2 * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or any later version.
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
19 * You can also choose to distribute this program under the terms of
20 * the Unmodified Binary Distribution Licence (as given in the file
21 * COPYING.UBDL), provided that you have satisfied its requirements.
24 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
28 * Uniform Resource Identifiers
37 #include <ipxe/vsprintf.h>
38 #include <ipxe/params.h>
42 * Decode URI field (in place)
46 * URI decoding can never increase the length of a string; we can
47 * therefore safely decode in place.
49 static void uri_decode ( char *string ) {
57 /* Copy string, decoding escaped characters as necessary */
61 snprintf ( hexbuf, sizeof ( hexbuf ), "%s", string );
62 decoded = strtoul ( hexbuf, &hexbuf_end, 16 );
63 skip = ( hexbuf_end - hexbuf );
73 * Check if character should be escaped within a URI field
76 * @v field URI field index
77 * @ret escaped Character should be escaped
79 static int uri_character_escaped ( char c, unsigned int field ) {
81 /* Non-printing characters and whitespace should always be
82 * escaped, since they cannot sensibly be displayed as part of
83 * a coherent URL string. (This test also catches control
84 * characters such as CR and LF, which could affect the
85 * operation of line-based protocols such as HTTP.)
87 * We should also escape characters which would alter the
88 * interpretation of the URL if not escaped, i.e. characters
89 * which have significance to the URL parser. We should not
90 * blindly escape all such characters, because this would lead
91 * to some very strange-looking URLs (e.g. if we were to
92 * always escape '/' as "%2F" even within the URI path).
94 * We do not need to be perfect. Our primary role is as a
95 * consumer of URIs rather than a producer; the main situation
96 * in which we produce a URI string is for display to a human
97 * user, who can probably tolerate some variance from the
98 * formal specification. The only situation in which we
99 * currently produce a URI string to be consumed by a computer
100 * is when constructing an HTTP request URI, which contains
101 * only the path and query fields.
103 * We can therefore sacrifice some correctness for the sake of
104 * code size. For example, colons within the URI host should
105 * be escaped unless they form part of an IPv6 literal
106 * address; doing this correctly would require the URI
107 * formatter to be aware of whether or not the URI host
108 * contained an IPv4 address, an IPv6 address, or a host name.
109 * We choose to simplify and never escape colons within the
110 * URI host field: in the event of a pathological hostname
111 * containing colons, this could potentially produce a URI
112 * string which could not be reparsed.
114 * After excluding non-printing characters, whitespace, and
115 * '%', the full set of characters with significance to the
116 * URL parser is "/#:@?". We choose for each URI field which
117 * of these require escaping in our use cases.
119 static const char *escaped[URI_FIELDS] = {
120 /* Scheme: escape everything */
121 [URI_SCHEME] = "/#:@?",
122 /* Opaque part: escape characters which would affect
123 * the reparsing of the URI, allowing everything else
124 * (e.g. ':', which will appear in iSCSI URIs).
127 /* User name: escape everything */
128 [URI_USER] = "/#:@?",
129 /* Password: escape everything */
130 [URI_PASSWORD] = "/#:@?",
131 /* Host name: escape everything except ':', which may
132 * appear as part of an IPv6 literal address.
135 /* Port number: escape everything */
136 [URI_PORT] = "/#:@?",
137 /* Path: escape everything except '/', which usually
138 * appears within paths.
141 /* Query: escape everything except '/', which
142 * sometimes appears within queries.
144 [URI_QUERY] = "#:@?",
145 /* Fragment: escape everything */
146 [URI_FRAGMENT] = "/#:@?",
149 return ( /* Always escape non-printing characters and whitespace */
150 ( ! isprint ( c ) ) || ( c == ' ' ) ||
151 /* Always escape '%' */
153 /* Escape field-specific characters */
154 strchr ( escaped[field], c ) );
161 * @v field URI field index
162 * @v buf Buffer to contain encoded string
163 * @v len Length of buffer
164 * @ret len Length of encoded string (excluding NUL)
166 size_t uri_encode ( const char *string, unsigned int field,
167 char *buf, ssize_t len ) {
168 ssize_t remaining = len;
172 /* Ensure encoded string is NUL-terminated even if empty */
176 /* Copy string, escaping as necessary */
177 while ( ( c = *(string++) ) ) {
178 if ( uri_character_escaped ( c, field ) ) {
179 used = ssnprintf ( buf, remaining, "%%%02X", c );
181 used = ssnprintf ( buf, remaining, "%c", c );
187 return ( len - remaining );
191 * Dump URI for debugging
195 static void uri_dump ( const struct uri *uri ) {
200 DBGC ( uri, " scheme \"%s\"", uri->scheme );
202 DBGC ( uri, " opaque \"%s\"", uri->opaque );
204 DBGC ( uri, " user \"%s\"", uri->user );
206 DBGC ( uri, " password \"%s\"", uri->password );
208 DBGC ( uri, " host \"%s\"", uri->host );
210 DBGC ( uri, " port \"%s\"", uri->port );
212 DBGC ( uri, " path \"%s\"", uri->path );
214 DBGC ( uri, " query \"%s\"", uri->query );
216 DBGC ( uri, " fragment \"%s\"", uri->fragment );
218 DBGC ( uri, " params \"%s\"", uri->params->name );
224 * @v refcnt Reference count
226 static void uri_free ( struct refcnt *refcnt ) {
227 struct uri *uri = container_of ( refcnt, struct uri, refcnt );
229 params_put ( uri->params );
236 * @v uri_string URI as a string
239 * Splits a URI into its component parts. The return URI structure is
240 * dynamically allocated and must eventually be freed by calling
243 struct uri * parse_uri ( const char *uri_string ) {
245 struct parameters *params;
253 /* Allocate space for URI struct and a copy of the string */
254 raw_len = ( strlen ( uri_string ) + 1 /* NUL */ );
255 uri = zalloc ( sizeof ( *uri ) + raw_len );
258 ref_init ( &uri->refcnt, uri_free );
259 raw = ( ( ( void * ) uri ) + sizeof ( *uri ) );
261 /* Copy in the raw string */
262 memcpy ( raw, uri_string, raw_len );
264 /* Identify the parameter list, if present */
265 if ( ( tmp = strstr ( raw, "##params" ) ) ) {
267 tmp += 8 /* "##params" */;
268 params = find_parameters ( *tmp ? ( tmp + 1 ) : NULL );
270 uri->params = claim_parameters ( params );
272 /* Ignore non-existent submission blocks */
276 /* Chop off the fragment, if it exists */
277 if ( ( tmp = strchr ( raw, '#' ) ) ) {
282 /* Identify absolute/relative URI */
283 if ( ( tmp = strchr ( raw, ':' ) ) ) {
284 /* Absolute URI: identify hierarchical/opaque */
288 /* Absolute URI with hierarchical part */
291 /* Absolute URI with opaque part */
300 /* If we don't have a path (i.e. we have an absolute URI with
301 * an opaque portion, we're already finished processing
306 /* Chop off the query, if it exists */
307 if ( ( tmp = strchr ( path, '?' ) ) ) {
312 /* If we have no path remaining, then we're already finished
318 /* Identify net/absolute/relative path */
319 if ( strncmp ( path, "//", 2 ) == 0 ) {
320 /* Net path. If this is terminated by the first '/'
321 * of an absolute path, then we have no space for a
322 * terminator after the authority field, so shuffle
323 * the authority down by one byte, overwriting one of
326 authority = ( path + 2 );
327 if ( ( tmp = strchr ( authority, '/' ) ) ) {
330 memmove ( ( authority - 1 ), authority,
331 ( tmp - authority ) );
336 /* Absolute/relative path */
341 /* If we don't have an authority (i.e. we have a non-net
342 * path), we're already finished processing
347 /* Split authority into user[:password] and host[:port] portions */
348 if ( ( tmp = strchr ( authority, '@' ) ) ) {
349 /* Has user[:password] */
352 uri->user = authority;
353 if ( ( tmp = strchr ( authority, ':' ) ) ) {
359 /* No user:password */
360 uri->host = authority;
363 /* Split host into host[:port] */
364 if ( ( uri->host[ strlen ( uri->host ) - 1 ] != ']' ) &&
365 ( tmp = strrchr ( uri->host, ':' ) ) ) {
370 /* Decode fields in-place */
371 for ( field = 0 ; field < URI_FIELDS ; field++ ) {
372 if ( uri_field ( uri, field ) )
373 uri_decode ( ( char * ) uri_field ( uri, field ) );
377 DBGC ( uri, "URI parsed \"%s\" to", uri_string );
387 * @v uri URI, or NULL
388 * @v default_port Default port to use if none specified in URI
391 unsigned int uri_port ( const struct uri *uri, unsigned int default_port ) {
393 if ( ( ! uri ) || ( ! uri->port ) )
396 return ( strtoul ( uri->port, NULL, 0 ) );
403 * @v buf Buffer to fill with URI string
404 * @v size Size of buffer
405 * @ret len Length of URI string
407 size_t format_uri ( const struct uri *uri, char *buf, size_t len ) {
408 static const char prefixes[URI_FIELDS] = {
410 [URI_PASSWORD] = ':',
414 [URI_FRAGMENT] = '#',
420 /* Ensure buffer is NUL-terminated */
424 /* Special-case NULL URI */
428 /* Generate fields */
429 for ( field = 0 ; field < URI_FIELDS ; field++ ) {
431 /* Skip non-existent fields */
432 if ( ! uri_field ( uri, field ) )
435 /* Prefix this field, if applicable */
436 prefix = prefixes[field];
437 if ( ( field == URI_HOST ) && ( uri->user != NULL ) )
439 if ( ( field == URI_PATH ) && ( uri->path[0] == '/' ) )
442 used += ssnprintf ( ( buf + used ), ( len - used ),
446 /* Encode this field */
447 used += uri_encode ( uri_field ( uri, field ), field,
448 ( buf + used ), ( len - used ) );
450 /* Suffix this field, if applicable */
451 if ( ( field == URI_SCHEME ) && ( ! uri->opaque ) ) {
452 used += ssnprintf ( ( buf + used ), ( len - used ),
458 DBGC ( uri, "URI formatted" );
460 DBGC ( uri, " to \"%s%s\"\n", buf,
461 ( ( used > len ) ? "<TRUNCATED>" : "" ) );
471 * @ret string URI string, or NULL on failure
473 * The caller is responsible for eventually freeing the allocated
476 char * format_uri_alloc ( const struct uri *uri ) {
480 len = ( format_uri ( uri, NULL, 0 ) + 1 /* NUL */ );
481 string = malloc ( len );
483 format_uri ( uri, string, len );
491 * @v dest Destination URI, or NULL to calculate length
492 * @ret len Length of raw URI
494 static size_t uri_copy_fields ( const struct uri *src, struct uri *dest ) {
495 size_t len = sizeof ( *dest );
496 char *out = ( ( void * ) dest + len );
500 /* Copy existent fields */
501 for ( field = 0 ; field < URI_FIELDS ; field++ ) {
503 /* Skip non-existent fields */
504 if ( ! uri_field ( src, field ) )
507 /* Calculate field length */
508 field_len = ( strlen ( uri_field ( src, field ) )
512 /* Copy field, if applicable */
514 memcpy ( out, uri_field ( src, field ), field_len );
515 uri_field ( dest, field ) = out;
526 * @ret uri Duplicate URI
528 * Creates a modifiable copy of a URI.
530 struct uri * uri_dup ( const struct uri *uri ) {
534 /* Allocate new URI */
535 len = uri_copy_fields ( uri, NULL );
536 dup = zalloc ( len );
539 ref_init ( &dup->refcnt, uri_free );
542 uri_copy_fields ( uri, dup );
544 /* Copy parameters */
545 dup->params = params_get ( uri->params );
547 DBGC ( uri, "URI duplicated" );
555 * Resolve base+relative path
557 * @v base_uri Base path
558 * @v relative_uri Relative path
559 * @ret resolved_uri Resolved path
561 * Takes a base path (e.g. "/var/lib/tftpboot/vmlinuz" and a relative
562 * path (e.g. "initrd.gz") and produces a new path
563 * (e.g. "/var/lib/tftpboot/initrd.gz"). Note that any non-directory
564 * portion of the base path will automatically be stripped; this
565 * matches the semantics used when resolving the path component of
568 char * resolve_path ( const char *base_path,
569 const char *relative_path ) {
570 size_t base_len = ( strlen ( base_path ) + 1 );
571 char base_path_copy[base_len];
572 char *base_tmp = base_path_copy;
575 /* If relative path is absolute, just re-use it */
576 if ( relative_path[0] == '/' )
577 return strdup ( relative_path );
579 /* Create modifiable copy of path for dirname() */
580 memcpy ( base_tmp, base_path, base_len );
581 base_tmp = dirname ( base_tmp );
583 /* Process "./" and "../" elements */
584 while ( *relative_path == '.' ) {
586 if ( *relative_path == 0 ) {
588 } else if ( *relative_path == '/' ) {
590 } else if ( *relative_path == '.' ) {
592 if ( *relative_path == 0 ) {
593 base_tmp = dirname ( base_tmp );
594 } else if ( *relative_path == '/' ) {
595 base_tmp = dirname ( base_tmp );
607 /* Create and return new path */
608 if ( asprintf ( &resolved, "%s%s%s", base_tmp,
609 ( ( base_tmp[ strlen ( base_tmp ) - 1 ] == '/' ) ?
610 "" : "/" ), relative_path ) < 0 )
617 * Resolve base+relative URI
619 * @v base_uri Base URI, or NULL
620 * @v relative_uri Relative URI
621 * @ret resolved_uri Resolved URI
623 * Takes a base URI (e.g. "http://ipxe.org/kernels/vmlinuz" and a
624 * relative URI (e.g. "../initrds/initrd.gz") and produces a new URI
625 * (e.g. "http://ipxe.org/initrds/initrd.gz").
627 struct uri * resolve_uri ( const struct uri *base_uri,
628 struct uri *relative_uri ) {
630 char *tmp_path = NULL;
633 /* If relative URI is absolute, just re-use it */
634 if ( uri_is_absolute ( relative_uri ) || ( ! base_uri ) )
635 return uri_get ( relative_uri );
638 memcpy ( &tmp_uri, base_uri, sizeof ( tmp_uri ) );
639 if ( relative_uri->path ) {
640 tmp_path = resolve_path ( ( base_uri->path ?
641 base_uri->path : "/" ),
642 relative_uri->path );
643 tmp_uri.path = tmp_path;
644 tmp_uri.query = relative_uri->query;
645 tmp_uri.fragment = relative_uri->fragment;
646 tmp_uri.params = relative_uri->params;
647 } else if ( relative_uri->query ) {
648 tmp_uri.query = relative_uri->query;
649 tmp_uri.fragment = relative_uri->fragment;
650 tmp_uri.params = relative_uri->params;
651 } else if ( relative_uri->fragment ) {
652 tmp_uri.fragment = relative_uri->fragment;
653 tmp_uri.params = relative_uri->params;
654 } else if ( relative_uri->params ) {
655 tmp_uri.params = relative_uri->params;
658 /* Create demangled URI */
659 new_uri = uri_dup ( &tmp_uri );
665 * Construct TFTP URI from next-server and filename
667 * @v next_server Next-server address
668 * @v port Port number, or zero to use the default port
669 * @v filename Filename
670 * @ret uri URI, or NULL on failure
672 * TFTP filenames specified via the DHCP next-server field often
673 * contain characters such as ':' or '#' which would confuse the
674 * generic URI parser. We provide a mechanism for directly
675 * constructing a TFTP URI from the next-server and filename.
677 struct uri * tftp_uri ( struct in_addr next_server, unsigned int port,
678 const char *filename ) {
679 char buf[ 6 /* "65535" + NUL */ ];
682 memset ( &uri, 0, sizeof ( uri ) );
684 uri.host = inet_ntoa ( next_server );
686 snprintf ( buf, sizeof ( buf ), "%d", port );
690 return uri_dup ( &uri );