1 /* Copyright 2000-2005 The Apache Software Foundation or its licensors, as
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 * apr_uri.c: URI related utility things
26 #include "apr_general.h"
27 #include "apr_strings.h"
29 #define APR_WANT_STRFUNC
34 typedef struct schemes_t schemes_t;
36 /** Structure to store various schemes and their default ports */
38 /** The name of the scheme */
40 /** The default port for the scheme */
41 apr_port_t default_port;
44 /* Some WWW schemes and their default ports; this is basically /etc/services */
45 /* This will become global when the protocol abstraction comes */
46 /* As the schemes are searched by a linear search, */
47 /* they are sorted by their expected frequency */
48 static schemes_t schemes[] =
50 {"http", APR_URI_HTTP_DEFAULT_PORT},
51 {"ftp", APR_URI_FTP_DEFAULT_PORT},
52 {"https", APR_URI_HTTPS_DEFAULT_PORT},
53 {"gopher", APR_URI_GOPHER_DEFAULT_PORT},
54 {"ldap", APR_URI_LDAP_DEFAULT_PORT},
55 {"nntp", APR_URI_NNTP_DEFAULT_PORT},
56 {"snews", APR_URI_SNEWS_DEFAULT_PORT},
57 {"imap", APR_URI_IMAP_DEFAULT_PORT},
58 {"pop", APR_URI_POP_DEFAULT_PORT},
59 {"sip", APR_URI_SIP_DEFAULT_PORT},
60 {"rtsp", APR_URI_RTSP_DEFAULT_PORT},
61 {"wais", APR_URI_WAIS_DEFAULT_PORT},
62 {"z39.50r", APR_URI_WAIS_DEFAULT_PORT},
63 {"z39.50s", APR_URI_WAIS_DEFAULT_PORT},
64 {"prospero", APR_URI_PROSPERO_DEFAULT_PORT},
65 {"nfs", APR_URI_NFS_DEFAULT_PORT},
66 {"tip", APR_URI_TIP_DEFAULT_PORT},
67 {"acap", APR_URI_ACAP_DEFAULT_PORT},
68 {"telnet", APR_URI_TELNET_DEFAULT_PORT},
69 {"ssh", APR_URI_SSH_DEFAULT_PORT},
70 { NULL, 0xFFFF } /* unknown port */
73 APU_DECLARE(apr_port_t) apr_uri_port_of_scheme(const char *scheme_str)
78 for (scheme = schemes; scheme->name != NULL; ++scheme) {
79 if (strcasecmp(scheme_str, scheme->name) == 0) {
80 return scheme->default_port;
87 /** @deprecated @see apr_uri_port_of_scheme */
88 APU_DECLARE(apr_port_t) apr_uri_default_port_for_scheme(const char *scheme_str)
90 return apr_uri_port_of_scheme(scheme_str);
93 /* Unparse a apr_uri_t structure to an URI string.
94 * Optionally suppress the password for security reasons.
96 APU_DECLARE(char *) apr_uri_unparse(apr_pool_t *p,
97 const apr_uri_t *uptr,
102 /* If suppressing the site part, omit both user name & scheme://hostname */
103 if (!(flags & APR_URI_UNP_OMITSITEPART)) {
105 /* Construct a "user:password@" string, honoring the passed
106 * APR_URI_UNP_ flags: */
107 if (uptr->user || uptr->password) {
109 (uptr->user && !(flags & APR_URI_UNP_OMITUSER))
111 (uptr->password && !(flags & APR_URI_UNP_OMITPASSWORD))
113 (uptr->password && !(flags & APR_URI_UNP_OMITPASSWORD))
114 ? ((flags & APR_URI_UNP_REVEALPASSWORD)
115 ? uptr->password : "XXXXXXXX")
117 ((uptr->user && !(flags & APR_URI_UNP_OMITUSER)) ||
118 (uptr->password && !(flags & APR_URI_UNP_OMITPASSWORD)))
123 /* Construct scheme://site string */
124 if (uptr->hostname) {
126 const char *lbrk = "", *rbrk = "";
128 if (strchr(uptr->hostname, ':')) { /* v6 literal */
134 (uptr->port_str == NULL ||
136 uptr->port == apr_uri_port_of_scheme(uptr->scheme));
140 uptr->scheme, "://", ret,
141 lbrk, uptr->hostname, rbrk,
142 is_default_port ? "" : ":",
143 is_default_port ? "" : uptr->port_str,
147 /* A violation of RFC2396, but it is clear from section 3.2
148 * that the : belongs above to the scheme, while // belongs
149 * to the authority, so include the authority prefix while
150 * omitting the "scheme:" that the user neglected to pass us.
153 "//", ret, lbrk, uptr->hostname, rbrk,
154 is_default_port ? "" : ":",
155 is_default_port ? "" : uptr->port_str,
161 /* Should we suppress all path info? */
162 if (!(flags & APR_URI_UNP_OMITPATHINFO)) {
163 /* Append path, query and fragment strings: */
168 (uptr->query && !(flags & APR_URI_UNP_OMITQUERY))
170 (uptr->query && !(flags & APR_URI_UNP_OMITQUERY))
172 (uptr->fragment && !(flags & APR_URI_UNP_OMITQUERY))
174 (uptr->fragment && !(flags & APR_URI_UNP_OMITQUERY))
175 ? uptr->fragment : NULL,
181 /* Here is the hand-optimized parse_uri_components(). There are some wild
182 * tricks we could pull in assembly language that we don't pull here... like we
183 * can do word-at-time scans for delimiter characters using the same technique
184 * that fast memchr()s use. But that would be way non-portable. -djg
187 /* We have a apr_table_t that we can index by character and it tells us if the
188 * character is one of the interesting delimiters. Note that we even get
189 * compares for NUL for free -- it's just another delimiter.
192 #define T_COLON 0x01 /* ':' */
193 #define T_SLASH 0x02 /* '/' */
194 #define T_QUESTION 0x04 /* '?' */
195 #define T_HASH 0x08 /* '#' */
196 #define T_NUL 0x80 /* '\0' */
198 /* the uri_delims.h file is autogenerated by gen_uri_delims.c */
199 #include "uri_delims.h"
201 /* it works like this:
202 if (uri_delims[ch] & NOTEND_foobar) {
203 then we're not at a delimiter for foobar
207 /* Note that we optimize the scheme scanning here, we cheat and let the
208 * compiler know that it doesn't have to do the & masking.
210 #define NOTEND_SCHEME (0xff)
211 #define NOTEND_HOSTINFO (T_SLASH | T_QUESTION | T_HASH | T_NUL)
212 #define NOTEND_PATH (T_QUESTION | T_HASH | T_NUL)
214 /* parse_uri_components():
215 * Parse a given URI, fill in all supplied fields of a uri_components
216 * structure. This eliminates the necessity of extracting host, port,
217 * path, query info repeatedly in the modules.
219 * - fills in fields of uri_components *uptr
220 * - none on any of the r->* fields
222 APU_DECLARE(apr_status_t) apr_uri_parse(apr_pool_t *p, const char *uri,
227 const char *hostinfo;
230 int v6_offset1 = 0, v6_offset2 = 0;
232 /* Initialize the structure. parse_uri() and parse_uri_components()
233 * can be called more than once per request.
235 memset (uptr, '\0', sizeof(*uptr));
236 uptr->is_initialized = 1;
238 /* We assume the processor has a branch predictor like most --
239 * it assumes forward branches are untaken and backwards are taken. That's
240 * the reason for the gotos. -djg
244 /* we expect uri to point to first character of path ... remember
245 * that the path could be empty -- http://foobar?query for example
248 while ((uri_delims[*(unsigned char *)s] & NOTEND_PATH) == 0) {
252 uptr->path = apr_pstrmemdup(p, uri, s - uri);
261 uptr->fragment = apr_pstrdup(p, s1 + 1);
262 uptr->query = apr_pstrmemdup(p, s, s1 - s);
265 uptr->query = apr_pstrdup(p, s);
269 /* otherwise it's a fragment */
270 uptr->fragment = apr_pstrdup(p, s + 1);
274 /* find the scheme: */
276 while ((uri_delims[*(unsigned char *)s] & NOTEND_SCHEME) == 0) {
279 /* scheme must be non-empty and followed by :// */
280 if (s == uri || s[0] != ':' || s[1] != '/' || s[2] != '/') {
281 goto deal_with_path; /* backwards predicted taken! */
284 uptr->scheme = apr_pstrmemdup(p, uri, s - uri);
287 while ((uri_delims[*(unsigned char *)s] & NOTEND_HOSTINFO) == 0) {
290 uri = s; /* whatever follows hostinfo is start of uri */
291 uptr->hostinfo = apr_pstrmemdup(p, hostinfo, uri - hostinfo);
293 /* If there's a username:password@host:port, the @ we want is the last @...
294 * too bad there's no memrchr()... For the C purists, note that hostinfo
295 * is definately not the first character of the original uri so therefore
296 * &hostinfo[-1] < &hostinfo[0] ... and this loop is valid C.
300 } while (s >= hostinfo && *s != '@');
302 /* again we want the common case to be fall through */
304 /* We expect hostinfo to point to the first character of
305 * the hostname. If there's a port it is the first colon,
308 if (*hostinfo == '[') {
311 s = memchr(hostinfo, ']', uri - hostinfo);
316 s = NULL; /* no port */
320 s = memchr(hostinfo, ':', uri - hostinfo);
323 /* we expect the common case to have no port */
324 uptr->hostname = apr_pstrmemdup(p,
325 hostinfo + v6_offset1,
326 uri - hostinfo - v6_offset2);
329 uptr->hostname = apr_pstrmemdup(p,
330 hostinfo + v6_offset1,
331 s - hostinfo - v6_offset2);
333 uptr->port_str = apr_pstrmemdup(p, s, uri - s);
335 port = strtol(uptr->port_str, &endstr, 10);
337 if (*endstr == '\0') {
340 /* Invalid characters after ':' found */
343 uptr->port = apr_uri_port_of_scheme(uptr->scheme);
347 /* first colon delimits username:password */
348 s1 = memchr(hostinfo, ':', s - hostinfo);
350 uptr->user = apr_pstrmemdup(p, hostinfo, s1 - hostinfo);
352 uptr->password = apr_pstrmemdup(p, s1, s - s1);
355 uptr->user = apr_pstrmemdup(p, hostinfo, s - hostinfo);
361 /* Special case for CONNECT parsing: it comes with the hostinfo part only */
362 /* See the INTERNET-DRAFT document "Tunneling SSL Through a WWW Proxy"
363 * currently at http://www.mcom.com/newsref/std/tunneling_ssl.html
364 * for the format of the "CONNECT host:port HTTP/1.0" request
366 APU_DECLARE(apr_status_t) apr_uri_parse_hostinfo(apr_pool_t *p,
367 const char *hostinfo,
375 /* Initialize the structure. parse_uri() and parse_uri_components()
376 * can be called more than once per request.
378 memset(uptr, '\0', sizeof(*uptr));
379 uptr->is_initialized = 1;
380 uptr->hostinfo = apr_pstrdup(p, hostinfo);
382 /* We expect hostinfo to point to the first character of
383 * the hostname. There must be a port, separated by a colon
385 if (*hostinfo == '[') {
386 if ((rsb = strchr(hostinfo, ']')) == NULL ||
390 /* literal IPv6 address */
396 s = strchr(hostinfo, ':');
401 uptr->hostname = apr_pstrndup(p, hostinfo, s - hostinfo - v6_offset1);
403 uptr->port_str = apr_pstrdup(p, s);
405 uptr->port = (unsigned short) strtol(uptr->port_str, &endstr, 10);
406 if (*endstr == '\0') {
409 /* Invalid characters after ':' found */