2 * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License as
6 * published by the Free Software Foundation; either version 2 of the
7 * License, or any later version.
9 * This program is distributed in the hope that it will be useful, but
10 * WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
20 FILE_LICENCE ( GPL2_OR_LATER );
24 * Uniform Resource Identifiers
33 #include <ipxe/vsprintf.h>
34 #include <ipxe/params.h>
38 * Decode URI field (in place)
42 * URI decoding can never increase the length of a string; we can
43 * therefore safely decode in place.
45 static void uri_decode ( char *string ) {
53 /* Copy string, decoding escaped characters as necessary */
57 snprintf ( hexbuf, sizeof ( hexbuf ), "%s", string );
58 decoded = strtoul ( hexbuf, &hexbuf_end, 16 );
59 skip = ( hexbuf_end - hexbuf );
69 * Check if character should be escaped within a URI field
72 * @v field URI field index
73 * @ret escaped Character should be escaped
75 static int uri_character_escaped ( char c, unsigned int field ) {
77 /* Non-printing characters and whitespace should always be
78 * escaped, since they cannot sensibly be displayed as part of
79 * a coherent URL string. (This test also catches control
80 * characters such as CR and LF, which could affect the
81 * operation of line-based protocols such as HTTP.)
83 * We should also escape characters which would alter the
84 * interpretation of the URL if not escaped, i.e. characters
85 * which have significance to the URL parser. We should not
86 * blindly escape all such characters, because this would lead
87 * to some very strange-looking URLs (e.g. if we were to
88 * always escape '/' as "%2F" even within the URI path).
90 * We do not need to be perfect. Our primary role is as a
91 * consumer of URIs rather than a producer; the main situation
92 * in which we produce a URI string is for display to a human
93 * user, who can probably tolerate some variance from the
94 * formal specification. The only situation in which we
95 * currently produce a URI string to be consumed by a computer
96 * is when constructing an HTTP request URI, which contains
97 * only the path and query fields.
99 * We can therefore sacrifice some correctness for the sake of
100 * code size. For example, colons within the URI host should
101 * be escaped unless they form part of an IPv6 literal
102 * address; doing this correctly would require the URI
103 * formatter to be aware of whether or not the URI host
104 * contained an IPv4 address, an IPv6 address, or a host name.
105 * We choose to simplify and never escape colons within the
106 * URI host field: in the event of a pathological hostname
107 * containing colons, this could potentially produce a URI
108 * string which could not be reparsed.
110 * After excluding non-printing characters, whitespace, and
111 * '%', the full set of characters with significance to the
112 * URL parser is "/#:@?". We choose for each URI field which
113 * of these require escaping in our use cases.
115 static const char *escaped[URI_FIELDS] = {
116 /* Scheme: escape everything */
117 [URI_SCHEME] = "/#:@?",
118 /* Opaque part: escape characters which would affect
119 * the reparsing of the URI, allowing everything else
120 * (e.g. ':', which will appear in iSCSI URIs).
123 /* User name: escape everything */
124 [URI_USER] = "/#:@?",
125 /* Password: escape everything */
126 [URI_PASSWORD] = "/#:@?",
127 /* Host name: escape everything except ':', which may
128 * appear as part of an IPv6 literal address.
131 /* Port number: escape everything */
132 [URI_PORT] = "/#:@?",
133 /* Path: escape everything except '/', which usually
134 * appears within paths.
137 /* Query: escape everything except '/', which
138 * sometimes appears within queries.
140 [URI_QUERY] = "#:@?",
141 /* Fragment: escape everything */
142 [URI_FRAGMENT] = "/#:@?",
145 return ( /* Always escape non-printing characters and whitespace */
146 ( ! isprint ( c ) ) || ( c == ' ' ) ||
147 /* Always escape '%' */
149 /* Escape field-specific characters */
150 strchr ( escaped[field], c ) );
157 * @v field URI field index
158 * @v buf Buffer to contain encoded string
159 * @v len Length of buffer
160 * @ret len Length of encoded string (excluding NUL)
162 size_t uri_encode ( const char *string, unsigned int field,
163 char *buf, ssize_t len ) {
164 ssize_t remaining = len;
168 /* Ensure encoded string is NUL-terminated even if empty */
172 /* Copy string, escaping as necessary */
173 while ( ( c = *(string++) ) ) {
174 if ( uri_character_escaped ( c, field ) ) {
175 used = ssnprintf ( buf, remaining, "%%%02X", c );
177 used = ssnprintf ( buf, remaining, "%c", c );
183 return ( len - remaining );
187 * Dump URI for debugging
191 static void uri_dump ( const struct uri *uri ) {
196 DBGC ( uri, " scheme \"%s\"", uri->scheme );
198 DBGC ( uri, " opaque \"%s\"", uri->opaque );
200 DBGC ( uri, " user \"%s\"", uri->user );
202 DBGC ( uri, " password \"%s\"", uri->password );
204 DBGC ( uri, " host \"%s\"", uri->host );
206 DBGC ( uri, " port \"%s\"", uri->port );
208 DBGC ( uri, " path \"%s\"", uri->path );
210 DBGC ( uri, " query \"%s\"", uri->query );
212 DBGC ( uri, " fragment \"%s\"", uri->fragment );
214 DBGC ( uri, " params \"%s\"", uri->params->name );
220 * @v refcnt Reference count
222 static void uri_free ( struct refcnt *refcnt ) {
223 struct uri *uri = container_of ( refcnt, struct uri, refcnt );
225 params_put ( uri->params );
232 * @v uri_string URI as a string
235 * Splits a URI into its component parts. The return URI structure is
236 * dynamically allocated and must eventually be freed by calling
239 struct uri * parse_uri ( const char *uri_string ) {
241 struct parameters *params;
249 /* Allocate space for URI struct and a copy of the string */
250 raw_len = ( strlen ( uri_string ) + 1 /* NUL */ );
251 uri = zalloc ( sizeof ( *uri ) + raw_len );
254 ref_init ( &uri->refcnt, uri_free );
255 raw = ( ( ( void * ) uri ) + sizeof ( *uri ) );
257 /* Copy in the raw string */
258 memcpy ( raw, uri_string, raw_len );
260 /* Identify the parameter list, if present */
261 if ( ( tmp = strstr ( raw, "##params" ) ) ) {
263 tmp += 8 /* "##params" */;
264 params = find_parameters ( *tmp ? ( tmp + 1 ) : NULL );
266 uri->params = claim_parameters ( params );
268 /* Ignore non-existent submission blocks */
272 /* Chop off the fragment, if it exists */
273 if ( ( tmp = strchr ( raw, '#' ) ) ) {
278 /* Identify absolute/relative URI */
279 if ( ( tmp = strchr ( raw, ':' ) ) ) {
280 /* Absolute URI: identify hierarchical/opaque */
284 /* Absolute URI with hierarchical part */
287 /* Absolute URI with opaque part */
296 /* If we don't have a path (i.e. we have an absolute URI with
297 * an opaque portion, we're already finished processing
302 /* Chop off the query, if it exists */
303 if ( ( tmp = strchr ( path, '?' ) ) ) {
308 /* If we have no path remaining, then we're already finished
314 /* Identify net/absolute/relative path */
315 if ( strncmp ( path, "//", 2 ) == 0 ) {
316 /* Net path. If this is terminated by the first '/'
317 * of an absolute path, then we have no space for a
318 * terminator after the authority field, so shuffle
319 * the authority down by one byte, overwriting one of
322 authority = ( path + 2 );
323 if ( ( tmp = strchr ( authority, '/' ) ) ) {
326 memmove ( ( authority - 1 ), authority,
327 ( tmp - authority ) );
332 /* Absolute/relative path */
337 /* If we don't have an authority (i.e. we have a non-net
338 * path), we're already finished processing
343 /* Split authority into user[:password] and host[:port] portions */
344 if ( ( tmp = strchr ( authority, '@' ) ) ) {
345 /* Has user[:password] */
348 uri->user = authority;
349 if ( ( tmp = strchr ( authority, ':' ) ) ) {
355 /* No user:password */
356 uri->host = authority;
359 /* Split host into host[:port] */
360 if ( ( uri->host[ strlen ( uri->host ) - 1 ] != ']' ) &&
361 ( tmp = strrchr ( uri->host, ':' ) ) ) {
366 /* Decode fields in-place */
367 for ( field = 0 ; field < URI_FIELDS ; field++ ) {
368 if ( uri_field ( uri, field ) )
369 uri_decode ( ( char * ) uri_field ( uri, field ) );
373 DBGC ( uri, "URI parsed \"%s\" to", uri_string );
383 * @v uri URI, or NULL
384 * @v default_port Default port to use if none specified in URI
387 unsigned int uri_port ( const struct uri *uri, unsigned int default_port ) {
389 if ( ( ! uri ) || ( ! uri->port ) )
392 return ( strtoul ( uri->port, NULL, 0 ) );
399 * @v buf Buffer to fill with URI string
400 * @v size Size of buffer
401 * @ret len Length of URI string
403 size_t format_uri ( const struct uri *uri, char *buf, size_t len ) {
404 static const char prefixes[URI_FIELDS] = {
406 [URI_PASSWORD] = ':',
410 [URI_FRAGMENT] = '#',
416 /* Ensure buffer is NUL-terminated */
420 /* Special-case NULL URI */
424 /* Generate fields */
425 for ( field = 0 ; field < URI_FIELDS ; field++ ) {
427 /* Skip non-existent fields */
428 if ( ! uri_field ( uri, field ) )
431 /* Prefix this field, if applicable */
432 prefix = prefixes[field];
433 if ( ( field == URI_HOST ) && ( uri->user != NULL ) )
435 if ( ( field == URI_PATH ) && ( uri->path[0] == '/' ) )
438 used += ssnprintf ( ( buf + used ), ( len - used ),
442 /* Encode this field */
443 used += uri_encode ( uri_field ( uri, field ), field,
444 ( buf + used ), ( len - used ) );
446 /* Suffix this field, if applicable */
447 if ( ( field == URI_SCHEME ) && ( ! uri->opaque ) ) {
448 used += ssnprintf ( ( buf + used ), ( len - used ),
454 DBGC ( uri, "URI formatted" );
456 DBGC ( uri, " to \"%s%s\"\n", buf,
457 ( ( used > len ) ? "<TRUNCATED>" : "" ) );
467 * @ret string URI string, or NULL on failure
469 * The caller is responsible for eventually freeing the allocated
472 char * format_uri_alloc ( const struct uri *uri ) {
476 len = ( format_uri ( uri, NULL, 0 ) + 1 /* NUL */ );
477 string = malloc ( len );
479 format_uri ( uri, string, len );
487 * @v dest Destination URI, or NULL to calculate length
488 * @ret len Length of raw URI
490 static size_t uri_copy_fields ( const struct uri *src, struct uri *dest ) {
491 size_t len = sizeof ( *dest );
492 char *out = ( ( void * ) dest + len );
496 /* Copy existent fields */
497 for ( field = 0 ; field < URI_FIELDS ; field++ ) {
499 /* Skip non-existent fields */
500 if ( ! uri_field ( src, field ) )
503 /* Calculate field length */
504 field_len = ( strlen ( uri_field ( src, field ) )
508 /* Copy field, if applicable */
510 memcpy ( out, uri_field ( src, field ), field_len );
511 uri_field ( dest, field ) = out;
522 * @ret uri Duplicate URI
524 * Creates a modifiable copy of a URI.
526 struct uri * uri_dup ( const struct uri *uri ) {
530 /* Allocate new URI */
531 len = uri_copy_fields ( uri, NULL );
532 dup = zalloc ( len );
535 ref_init ( &dup->refcnt, uri_free );
538 uri_copy_fields ( uri, dup );
540 /* Copy parameters */
541 dup->params = params_get ( uri->params );
543 DBGC ( uri, "URI duplicated" );
551 * Resolve base+relative path
553 * @v base_uri Base path
554 * @v relative_uri Relative path
555 * @ret resolved_uri Resolved path
557 * Takes a base path (e.g. "/var/lib/tftpboot/vmlinuz" and a relative
558 * path (e.g. "initrd.gz") and produces a new path
559 * (e.g. "/var/lib/tftpboot/initrd.gz"). Note that any non-directory
560 * portion of the base path will automatically be stripped; this
561 * matches the semantics used when resolving the path component of
564 char * resolve_path ( const char *base_path,
565 const char *relative_path ) {
566 size_t base_len = ( strlen ( base_path ) + 1 );
567 char base_path_copy[base_len];
568 char *base_tmp = base_path_copy;
571 /* If relative path is absolute, just re-use it */
572 if ( relative_path[0] == '/' )
573 return strdup ( relative_path );
575 /* Create modifiable copy of path for dirname() */
576 memcpy ( base_tmp, base_path, base_len );
577 base_tmp = dirname ( base_tmp );
579 /* Process "./" and "../" elements */
580 while ( *relative_path == '.' ) {
582 if ( *relative_path == 0 ) {
584 } else if ( *relative_path == '/' ) {
586 } else if ( *relative_path == '.' ) {
588 if ( *relative_path == 0 ) {
589 base_tmp = dirname ( base_tmp );
590 } else if ( *relative_path == '/' ) {
591 base_tmp = dirname ( base_tmp );
603 /* Create and return new path */
604 if ( asprintf ( &resolved, "%s%s%s", base_tmp,
605 ( ( base_tmp[ strlen ( base_tmp ) - 1 ] == '/' ) ?
606 "" : "/" ), relative_path ) < 0 )
613 * Resolve base+relative URI
615 * @v base_uri Base URI, or NULL
616 * @v relative_uri Relative URI
617 * @ret resolved_uri Resolved URI
619 * Takes a base URI (e.g. "http://ipxe.org/kernels/vmlinuz" and a
620 * relative URI (e.g. "../initrds/initrd.gz") and produces a new URI
621 * (e.g. "http://ipxe.org/initrds/initrd.gz").
623 struct uri * resolve_uri ( const struct uri *base_uri,
624 struct uri *relative_uri ) {
626 char *tmp_path = NULL;
629 /* If relative URI is absolute, just re-use it */
630 if ( uri_is_absolute ( relative_uri ) || ( ! base_uri ) )
631 return uri_get ( relative_uri );
634 memcpy ( &tmp_uri, base_uri, sizeof ( tmp_uri ) );
635 if ( relative_uri->path ) {
636 tmp_path = resolve_path ( ( base_uri->path ?
637 base_uri->path : "/" ),
638 relative_uri->path );
639 tmp_uri.path = tmp_path;
640 tmp_uri.query = relative_uri->query;
641 tmp_uri.fragment = relative_uri->fragment;
642 tmp_uri.params = relative_uri->params;
643 } else if ( relative_uri->query ) {
644 tmp_uri.query = relative_uri->query;
645 tmp_uri.fragment = relative_uri->fragment;
646 tmp_uri.params = relative_uri->params;
647 } else if ( relative_uri->fragment ) {
648 tmp_uri.fragment = relative_uri->fragment;
649 tmp_uri.params = relative_uri->params;
650 } else if ( relative_uri->params ) {
651 tmp_uri.params = relative_uri->params;
654 /* Create demangled URI */
655 new_uri = uri_dup ( &tmp_uri );
661 * Construct TFTP URI from next-server and filename
663 * @v next_server Next-server address
664 * @v filename Filename
665 * @ret uri URI, or NULL on failure
667 * TFTP filenames specified via the DHCP next-server field often
668 * contain characters such as ':' or '#' which would confuse the
669 * generic URI parser. We provide a mechanism for directly
670 * constructing a TFTP URI from the next-server and filename.
672 struct uri * tftp_uri ( struct in_addr next_server, const char *filename ) {
675 memset ( &uri, 0, sizeof ( uri ) );
677 uri.host = inet_ntoa ( next_server );
679 return uri_dup ( &uri );