qemu/roms/ipxe/src/core/uri.c

   1 /*
   2  * Copyright (C) 2007 Michael Brown <mbrown@fensystems.co.uk>.
   3  *
   4  * This program is free software; you can redistribute it and/or
   5  * modify it under the terms of the GNU General Public License as
   6  * published by the Free Software Foundation; either version 2 of the
   7  * License, or any later version.
   8  *
   9  * This program is distributed in the hope that it will be useful, but
  10  * WITHOUT ANY WARRANTY; without even the implied warranty of
  11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12  * General Public License for more details.
  13  *
  14  * You should have received a copy of the GNU General Public License
  15  * along with this program; if not, write to the Free Software
  16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  17  * 02110-1301, USA.
  18  */
  19
  20 FILE_LICENCE ( GPL2_OR_LATER );
  21
  22 /** @file
  23  *
  24  * Uniform Resource Identifiers
  25  *
  26  */
  27
  28 #include <stdint.h>
  29 #include <stdlib.h>
  30 #include <string.h>
  31 #include <libgen.h>
  32 #include <ctype.h>
  33 #include <ipxe/vsprintf.h>
  34 #include <ipxe/params.h>
  35 #include <ipxe/uri.h>
  36
  37 /**
  38  * Decode URI field (in place)
  39  *
  40  * @v string            String
  41  *
  42  * URI decoding can never increase the length of a string; we can
  43  * therefore safely decode in place.
  44  */
  45 static void uri_decode ( char *string ) {
  46         char *dest = string;
  47         char hexbuf[3];
  48         char *hexbuf_end;
  49         char c;
  50         char decoded;
  51         unsigned int skip;
  52
  53         /* Copy string, decoding escaped characters as necessary */
  54         do {
  55                 c = *(string++);
  56                 if ( c == '%' ) {
  57                         snprintf ( hexbuf, sizeof ( hexbuf ), "%s", string );
  58                         decoded = strtoul ( hexbuf, &hexbuf_end, 16 );
  59                         skip = ( hexbuf_end - hexbuf );
  60                         string += skip;
  61                         if ( skip )
  62                                 c = decoded;
  63                 }
  64                 *(dest++) = c;
  65         } while ( c );
  66 }
  67
  68 /**
  69  * Check if character should be escaped within a URI field
  70  *
  71  * @v c                 Character
  72  * @v field             URI field index
  73  * @ret escaped         Character should be escaped
  74  */
  75 static int uri_character_escaped ( char c, unsigned int field ) {
  76
  77         /* Non-printing characters and whitespace should always be
  78          * escaped, since they cannot sensibly be displayed as part of
  79          * a coherent URL string.  (This test also catches control
  80          * characters such as CR and LF, which could affect the
  81          * operation of line-based protocols such as HTTP.)
  82          *
  83          * We should also escape characters which would alter the
  84          * interpretation of the URL if not escaped, i.e. characters
  85          * which have significance to the URL parser.  We should not
  86          * blindly escape all such characters, because this would lead
  87          * to some very strange-looking URLs (e.g. if we were to
  88          * always escape '/' as "%2F" even within the URI path).
  89          *
  90          * We do not need to be perfect.  Our primary role is as a
  91          * consumer of URIs rather than a producer; the main situation
  92          * in which we produce a URI string is for display to a human
  93          * user, who can probably tolerate some variance from the
  94          * formal specification.  The only situation in which we
  95          * currently produce a URI string to be consumed by a computer
  96          * is when constructing an HTTP request URI, which contains
  97          * only the path and query fields.
  98          *
  99          * We can therefore sacrifice some correctness for the sake of
 100          * code size.  For example, colons within the URI host should
 101          * be escaped unless they form part of an IPv6 literal
 102          * address; doing this correctly would require the URI
 103          * formatter to be aware of whether or not the URI host
 104          * contained an IPv4 address, an IPv6 address, or a host name.
 105          * We choose to simplify and never escape colons within the
 106          * URI host field: in the event of a pathological hostname
 107          * containing colons, this could potentially produce a URI
 108          * string which could not be reparsed.
 109          *
 110          * After excluding non-printing characters, whitespace, and
 111          * '%', the full set of characters with significance to the
 112          * URL parser is "/#:@?".  We choose for each URI field which
 113          * of these require escaping in our use cases.
 114          */
 115         static const char *escaped[URI_FIELDS] = {
 116                 /* Scheme: escape everything */
 117                 [URI_SCHEME]    = "/#:@?",
 118                 /* Opaque part: escape characters which would affect
 119                  * the reparsing of the URI, allowing everything else
 120                  * (e.g. ':', which will appear in iSCSI URIs).
 121                  */
 122                 [URI_OPAQUE]    = "/#",
 123                 /* User name: escape everything */
 124                 [URI_USER]      = "/#:@?",
 125                 /* Password: escape everything */
 126                 [URI_PASSWORD]  = "/#:@?",
 127                 /* Host name: escape everything except ':', which may
 128                  * appear as part of an IPv6 literal address.
 129                  */
 130                 [URI_HOST]      = "/#@?",
 131                 /* Port number: escape everything */
 132                 [URI_PORT]      = "/#:@?",
 133                 /* Path: escape everything except '/', which usually
 134                  * appears within paths.
 135                  */
 136                 [URI_PATH]      = "#:@?",
 137                 /* Query: escape everything except '/', which
 138                  * sometimes appears within queries.
 139                  */
 140                 [URI_QUERY]     = "#:@?",
 141                 /* Fragment: escape everything */
 142                 [URI_FRAGMENT]  = "/#:@?",
 143         };
 144
 145         return ( /* Always escape non-printing characters and whitespace */
 146                  ( ! isprint ( c ) ) || ( c == ' ' ) ||
 147                  /* Always escape '%' */
 148                  ( c == '%' ) ||
 149                  /* Escape field-specific characters */
 150                  strchr ( escaped[field], c ) );
 151 }
 152
 153 /**
 154  * Encode URI field
 155  *
 156  * @v uri               URI
 157  * @v field             URI field index
 158  * @v buf               Buffer to contain encoded string
 159  * @v len               Length of buffer
 160  * @ret len             Length of encoded string (excluding NUL)
 161  */
 162 size_t uri_encode ( const char *string, unsigned int field,
 163                     char *buf, ssize_t len ) {
 164         ssize_t remaining = len;
 165         size_t used;
 166         char c;
 167
 168         /* Ensure encoded string is NUL-terminated even if empty */
 169         if ( len > 0 )
 170                 buf[0] = '\0';
 171
 172         /* Copy string, escaping as necessary */
 173         while ( ( c = *(string++) ) ) {
 174                 if ( uri_character_escaped ( c, field ) ) {
 175                         used = ssnprintf ( buf, remaining, "%%%02X", c );
 176                 } else {
 177                         used = ssnprintf ( buf, remaining, "%c", c );
 178                 }
 179                 buf += used;
 180                 remaining -= used;
 181         }
 182
 183         return ( len - remaining );
 184 }
 185
 186 /**
 187  * Dump URI for debugging
 188  *
 189  * @v uri               URI
 190  */
 191 static void uri_dump ( const struct uri *uri ) {
 192
 193         if ( ! uri )
 194                 return;
 195         if ( uri->scheme )
 196                 DBGC ( uri, " scheme \"%s\"", uri->scheme );
 197         if ( uri->opaque )
 198                 DBGC ( uri, " opaque \"%s\"", uri->opaque );
 199         if ( uri->user )
 200                 DBGC ( uri, " user \"%s\"", uri->user );
 201         if ( uri->password )
 202                 DBGC ( uri, " password \"%s\"", uri->password );
 203         if ( uri->host )
 204                 DBGC ( uri, " host \"%s\"", uri->host );
 205         if ( uri->port )
 206                 DBGC ( uri, " port \"%s\"", uri->port );
 207         if ( uri->path )
 208                 DBGC ( uri, " path \"%s\"", uri->path );
 209         if ( uri->query )
 210                 DBGC ( uri, " query \"%s\"", uri->query );
 211         if ( uri->fragment )
 212                 DBGC ( uri, " fragment \"%s\"", uri->fragment );
 213         if ( uri->params )
 214                 DBGC ( uri, " params \"%s\"", uri->params->name );
 215 }
 216
 217 /**
 218  * Free URI
 219  *
 220  * @v refcnt            Reference count
 221  */
 222 static void uri_free ( struct refcnt *refcnt ) {
 223         struct uri *uri = container_of ( refcnt, struct uri, refcnt );
 224
 225         params_put ( uri->params );
 226         free ( uri );
 227 }
 228
 229 /**
 230  * Parse URI
 231  *
 232  * @v uri_string        URI as a string
 233  * @ret uri             URI
 234  *
 235  * Splits a URI into its component parts.  The return URI structure is
 236  * dynamically allocated and must eventually be freed by calling
 237  * uri_put().
 238  */
 239 struct uri * parse_uri ( const char *uri_string ) {
 240         struct uri *uri;
 241         struct parameters *params;
 242         char *raw;
 243         char *tmp;
 244         char *path;
 245         char *authority;
 246         size_t raw_len;
 247         unsigned int field;
 248
 249         /* Allocate space for URI struct and a copy of the string */
 250         raw_len = ( strlen ( uri_string ) + 1 /* NUL */ );
 251         uri = zalloc ( sizeof ( *uri ) + raw_len );
 252         if ( ! uri )
 253                 return NULL;
 254         ref_init ( &uri->refcnt, uri_free );
 255         raw = ( ( ( void * ) uri ) + sizeof ( *uri ) );
 256
 257         /* Copy in the raw string */
 258         memcpy ( raw, uri_string, raw_len );
 259
 260         /* Identify the parameter list, if present */
 261         if ( ( tmp = strstr ( raw, "##params" ) ) ) {
 262                 *tmp = '\0';
 263                 tmp += 8 /* "##params" */;
 264                 params = find_parameters ( *tmp ? ( tmp + 1 ) : NULL );
 265                 if ( params ) {
 266                         uri->params = claim_parameters ( params );
 267                 } else {
 268                         /* Ignore non-existent submission blocks */
 269                 }
 270         }
 271
 272         /* Chop off the fragment, if it exists */
 273         if ( ( tmp = strchr ( raw, '#' ) ) ) {
 274                 *(tmp++) = '\0';
 275                 uri->fragment = tmp;
 276         }
 277
 278         /* Identify absolute/relative URI */
 279         if ( ( tmp = strchr ( raw, ':' ) ) ) {
 280                 /* Absolute URI: identify hierarchical/opaque */
 281                 uri->scheme = raw;
 282                 *(tmp++) = '\0';
 283                 if ( *tmp == '/' ) {
 284                         /* Absolute URI with hierarchical part */
 285                         path = tmp;
 286                 } else {
 287                         /* Absolute URI with opaque part */
 288                         uri->opaque = tmp;
 289                         path = NULL;
 290                 }
 291         } else {
 292                 /* Relative URI */
 293                 path = raw;
 294         }
 295
 296         /* If we don't have a path (i.e. we have an absolute URI with
 297          * an opaque portion, we're already finished processing
 298          */
 299         if ( ! path )
 300                 goto done;
 301
 302         /* Chop off the query, if it exists */
 303         if ( ( tmp = strchr ( path, '?' ) ) ) {
 304                 *(tmp++) = '\0';
 305                 uri->query = tmp;
 306         }
 307
 308         /* If we have no path remaining, then we're already finished
 309          * processing.
 310          */
 311         if ( ! path[0] )
 312                 goto done;
 313
 314         /* Identify net/absolute/relative path */
 315         if ( strncmp ( path, "//", 2 ) == 0 ) {
 316                 /* Net path.  If this is terminated by the first '/'
 317                  * of an absolute path, then we have no space for a
 318                  * terminator after the authority field, so shuffle
 319                  * the authority down by one byte, overwriting one of
 320                  * the two slashes.
 321                  */
 322                 authority = ( path + 2 );
 323                 if ( ( tmp = strchr ( authority, '/' ) ) ) {
 324                         /* Shuffle down */
 325                         uri->path = tmp;
 326                         memmove ( ( authority - 1 ), authority,
 327                                   ( tmp - authority ) );
 328                         authority--;
 329                         *(--tmp) = '\0';
 330                 }
 331         } else {
 332                 /* Absolute/relative path */
 333                 uri->path = path;
 334                 authority = NULL;
 335         }
 336
 337         /* If we don't have an authority (i.e. we have a non-net
 338          * path), we're already finished processing
 339          */
 340         if ( ! authority )
 341                 goto done;
 342
 343         /* Split authority into user[:password] and host[:port] portions */
 344         if ( ( tmp = strchr ( authority, '@' ) ) ) {
 345                 /* Has user[:password] */
 346                 *(tmp++) = '\0';
 347                 uri->host = tmp;
 348                 uri->user = authority;
 349                 if ( ( tmp = strchr ( authority, ':' ) ) ) {
 350                         /* Has password */
 351                         *(tmp++) = '\0';
 352                         uri->password = tmp;
 353                 }
 354         } else {
 355                 /* No user:password */
 356                 uri->host = authority;
 357         }
 358
 359         /* Split host into host[:port] */
 360         if ( ( uri->host[ strlen ( uri->host ) - 1 ] != ']' ) &&
 361              ( tmp = strrchr ( uri->host, ':' ) ) ) {
 362                 *(tmp++) = '\0';
 363                 uri->port = tmp;
 364         }
 365
 366         /* Decode fields in-place */
 367         for ( field = 0 ; field < URI_FIELDS ; field++ ) {
 368                 if ( uri_field ( uri, field ) )
 369                         uri_decode ( ( char * ) uri_field ( uri, field ) );
 370         }
 371
 372  done:
 373         DBGC ( uri, "URI parsed \"%s\" to", uri_string );
 374         uri_dump ( uri );
 375         DBGC ( uri, "\n" );
 376
 377         return uri;
 378 }
 379
 380 /**
 381  * Get port from URI
 382  *
 383  * @v uri               URI, or NULL
 384  * @v default_port      Default port to use if none specified in URI
 385  * @ret port            Port
 386  */
 387 unsigned int uri_port ( const struct uri *uri, unsigned int default_port ) {
 388
 389         if ( ( ! uri ) || ( ! uri->port ) )
 390                 return default_port;
 391
 392         return ( strtoul ( uri->port, NULL, 0 ) );
 393 }
 394
 395 /**
 396  * Format URI
 397  *
 398  * @v uri               URI
 399  * @v buf               Buffer to fill with URI string
 400  * @v size              Size of buffer
 401  * @ret len             Length of URI string
 402  */
 403 size_t format_uri ( const struct uri *uri, char *buf, size_t len ) {
 404         static const char prefixes[URI_FIELDS] = {
 405                 [URI_OPAQUE] = ':',
 406                 [URI_PASSWORD] = ':',
 407                 [URI_PORT] = ':',
 408                 [URI_PATH] = '/',
 409                 [URI_QUERY] = '?',
 410                 [URI_FRAGMENT] = '#',
 411         };
 412         char prefix;
 413         size_t used = 0;
 414         unsigned int field;
 415
 416         /* Ensure buffer is NUL-terminated */
 417         if ( len )
 418                 buf[0] = '\0';
 419
 420         /* Special-case NULL URI */
 421         if ( ! uri )
 422                 return 0;
 423
 424         /* Generate fields */
 425         for ( field = 0 ; field < URI_FIELDS ; field++ ) {
 426
 427                 /* Skip non-existent fields */
 428                 if ( ! uri_field ( uri, field ) )
 429                         continue;
 430
 431                 /* Prefix this field, if applicable */
 432                 prefix = prefixes[field];
 433                 if ( ( field == URI_HOST ) && ( uri->user != NULL ) )
 434                         prefix = '@';
 435                 if ( ( field == URI_PATH ) && ( uri->path[0] == '/' ) )
 436                         prefix = '\0';
 437                 if ( prefix ) {
 438                         used += ssnprintf ( ( buf + used ), ( len - used ),
 439                                             "%c", prefix );
 440                 }
 441
 442                 /* Encode this field */
 443                 used += uri_encode ( uri_field ( uri, field ), field,
 444                                      ( buf + used ), ( len - used ) );
 445
 446                 /* Suffix this field, if applicable */
 447                 if ( ( field == URI_SCHEME ) && ( ! uri->opaque ) ) {
 448                         used += ssnprintf ( ( buf + used ), ( len - used ),
 449                                             "://" );
 450                 }
 451         }
 452
 453         if ( len ) {
 454                 DBGC ( uri, "URI formatted" );
 455                 uri_dump ( uri );
 456                 DBGC ( uri, " to \"%s%s\"\n", buf,
 457                        ( ( used > len ) ? "<TRUNCATED>" : "" ) );
 458         }
 459
 460         return used;
 461 }
 462
 463 /**
 464  * Format URI
 465  *
 466  * @v uri               URI
 467  * @ret string          URI string, or NULL on failure
 468  *
 469  * The caller is responsible for eventually freeing the allocated
 470  * memory.
 471  */
 472 char * format_uri_alloc ( const struct uri *uri ) {
 473         size_t len;
 474         char *string;
 475
 476         len = ( format_uri ( uri, NULL, 0 ) + 1 /* NUL */ );
 477         string = malloc ( len );
 478         if ( string )
 479                 format_uri ( uri, string, len );
 480         return string;
 481 }
 482
 483 /**
 484  * Copy URI fields
 485  *
 486  * @v src               Source URI
 487  * @v dest              Destination URI, or NULL to calculate length
 488  * @ret len             Length of raw URI
 489  */
 490 static size_t uri_copy_fields ( const struct uri *src, struct uri *dest ) {
 491         size_t len = sizeof ( *dest );
 492         char *out = ( ( void * ) dest + len );
 493         unsigned int field;
 494         size_t field_len;
 495
 496         /* Copy existent fields */
 497         for ( field = 0 ; field < URI_FIELDS ; field++ ) {
 498
 499                 /* Skip non-existent fields */
 500                 if ( ! uri_field ( src, field ) )
 501                         continue;
 502
 503                 /* Calculate field length */
 504                 field_len = ( strlen ( uri_field ( src, field ) )
 505                               + 1 /* NUL */ );
 506                 len += field_len;
 507
 508                 /* Copy field, if applicable */
 509                 if ( dest ) {
 510                         memcpy ( out, uri_field ( src, field ), field_len );
 511                         uri_field ( dest, field ) = out;
 512                         out += field_len;
 513                 }
 514         }
 515         return len;
 516 }
 517
 518 /**
 519  * Duplicate URI
 520  *
 521  * @v uri               URI
 522  * @ret uri             Duplicate URI
 523  *
 524  * Creates a modifiable copy of a URI.
 525  */
 526 struct uri * uri_dup ( const struct uri *uri ) {
 527         struct uri *dup;
 528         size_t len;
 529
 530         /* Allocate new URI */
 531         len = uri_copy_fields ( uri, NULL );
 532         dup = zalloc ( len );
 533         if ( ! dup )
 534                 return NULL;
 535         ref_init ( &dup->refcnt, uri_free );
 536
 537         /* Copy fields */
 538         uri_copy_fields ( uri, dup );
 539
 540         /* Copy parameters */
 541         dup->params = params_get ( uri->params );
 542
 543         DBGC ( uri, "URI duplicated" );
 544         uri_dump ( uri );
 545         DBGC ( uri, "\n" );
 546
 547         return dup;
 548 }
 549
 550 /**
 551  * Resolve base+relative path
 552  *
 553  * @v base_uri          Base path
 554  * @v relative_uri      Relative path
 555  * @ret resolved_uri    Resolved path
 556  *
 557  * Takes a base path (e.g. "/var/lib/tftpboot/vmlinuz" and a relative
 558  * path (e.g. "initrd.gz") and produces a new path
 559  * (e.g. "/var/lib/tftpboot/initrd.gz").  Note that any non-directory
 560  * portion of the base path will automatically be stripped; this
 561  * matches the semantics used when resolving the path component of
 562  * URIs.
 563  */
 564 char * resolve_path ( const char *base_path,
 565                       const char *relative_path ) {
 566         size_t base_len = ( strlen ( base_path ) + 1 );
 567         char base_path_copy[base_len];
 568         char *base_tmp = base_path_copy;
 569         char *resolved;
 570
 571         /* If relative path is absolute, just re-use it */
 572         if ( relative_path[0] == '/' )
 573                 return strdup ( relative_path );
 574
 575         /* Create modifiable copy of path for dirname() */
 576         memcpy ( base_tmp, base_path, base_len );
 577         base_tmp = dirname ( base_tmp );
 578
 579         /* Process "./" and "../" elements */
 580         while ( *relative_path == '.' ) {
 581                 relative_path++;
 582                 if ( *relative_path == 0 ) {
 583                         /* Do nothing */
 584                 } else if ( *relative_path == '/' ) {
 585                         relative_path++;
 586                 } else if ( *relative_path == '.' ) {
 587                         relative_path++;
 588                         if ( *relative_path == 0 ) {
 589                                 base_tmp = dirname ( base_tmp );
 590                         } else if ( *relative_path == '/' ) {
 591                                 base_tmp = dirname ( base_tmp );
 592                                 relative_path++;
 593                         } else {
 594                                 relative_path -= 2;
 595                                 break;
 596                         }
 597                 } else {
 598                         relative_path--;
 599                         break;
 600                 }
 601         }
 602
 603         /* Create and return new path */
 604         if ( asprintf ( &resolved, "%s%s%s", base_tmp,
 605                         ( ( base_tmp[ strlen ( base_tmp ) - 1 ] == '/' ) ?
 606                           "" : "/" ), relative_path ) < 0 )
 607                 return NULL;
 608
 609         return resolved;
 610 }
 611
 612 /**
 613  * Resolve base+relative URI
 614  *
 615  * @v base_uri          Base URI, or NULL
 616  * @v relative_uri      Relative URI
 617  * @ret resolved_uri    Resolved URI
 618  *
 619  * Takes a base URI (e.g. "http://ipxe.org/kernels/vmlinuz" and a
 620  * relative URI (e.g. "../initrds/initrd.gz") and produces a new URI
 621  * (e.g. "http://ipxe.org/initrds/initrd.gz").
 622  */
 623 struct uri * resolve_uri ( const struct uri *base_uri,
 624                            struct uri *relative_uri ) {
 625         struct uri tmp_uri;
 626         char *tmp_path = NULL;
 627         struct uri *new_uri;
 628
 629         /* If relative URI is absolute, just re-use it */
 630         if ( uri_is_absolute ( relative_uri ) || ( ! base_uri ) )
 631                 return uri_get ( relative_uri );
 632
 633         /* Mangle URI */
 634         memcpy ( &tmp_uri, base_uri, sizeof ( tmp_uri ) );
 635         if ( relative_uri->path ) {
 636                 tmp_path = resolve_path ( ( base_uri->path ?
 637                                             base_uri->path : "/" ),
 638                                           relative_uri->path );
 639                 tmp_uri.path = tmp_path;
 640                 tmp_uri.query = relative_uri->query;
 641                 tmp_uri.fragment = relative_uri->fragment;
 642                 tmp_uri.params = relative_uri->params;
 643         } else if ( relative_uri->query ) {
 644                 tmp_uri.query = relative_uri->query;
 645                 tmp_uri.fragment = relative_uri->fragment;
 646                 tmp_uri.params = relative_uri->params;
 647         } else if ( relative_uri->fragment ) {
 648                 tmp_uri.fragment = relative_uri->fragment;
 649                 tmp_uri.params = relative_uri->params;
 650         } else if ( relative_uri->params ) {
 651                 tmp_uri.params = relative_uri->params;
 652         }
 653
 654         /* Create demangled URI */
 655         new_uri = uri_dup ( &tmp_uri );
 656         free ( tmp_path );
 657         return new_uri;
 658 }
 659
 660 /**
 661  * Construct TFTP URI from next-server and filename
 662  *
 663  * @v next_server       Next-server address
 664  * @v filename          Filename
 665  * @ret uri             URI, or NULL on failure
 666  *
 667  * TFTP filenames specified via the DHCP next-server field often
 668  * contain characters such as ':' or '#' which would confuse the
 669  * generic URI parser.  We provide a mechanism for directly
 670  * constructing a TFTP URI from the next-server and filename.
 671  */
 672 struct uri * tftp_uri ( struct in_addr next_server, const char *filename ) {
 673         struct uri uri;
 674
 675         memset ( &uri, 0, sizeof ( uri ) );
 676         uri.scheme = "tftp";
 677         uri.host = inet_ntoa ( next_server );
 678         uri.path = filename;
 679         return uri_dup ( &uri );
 680 }