1 /* Copyright 2000-2005 The Apache Software Foundation or its licensors, as
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
18 #include "apr_strings.h"
20 #define APR_WANT_STDIO /* for sprintf() */
21 #define APR_WANT_STRFUNC
26 #include "apu_config.h"
28 #ifdef APR_HAVE_OLD_EXPAT
34 #define DEBUG_CR "\r\n"
36 /* errors related to namespace processing */
37 #define APR_XML_NS_ERROR_UNKNOWN_PREFIX (-1000)
38 #define APR_XML_NS_ERROR_INVALID_DECL (-1001)
40 /* test for a namespace prefix that begins with [Xx][Mm][Ll] */
41 #define APR_XML_NS_IS_RESERVED(name) \
42 ( (name[0] == 'X' || name[0] == 'x') && \
43 (name[1] == 'M' || name[1] == 'm') && \
44 (name[2] == 'L' || name[2] == 'l') )
47 /* the real (internal) definition of the parser context */
48 struct apr_xml_parser {
49 apr_xml_doc *doc; /* the doc we're parsing */
50 apr_pool_t *p; /* the pool we allocate from */
51 apr_xml_elem *cur_elem; /* current element */
53 int error; /* an error has occurred */
54 #define APR_XML_ERROR_EXPAT 1
55 #define APR_XML_ERROR_PARSE_DONE 2
56 /* also: public APR_XML_NS_ERROR_* values (if any) */
58 XML_Parser xp; /* the actual (Expat) XML parser */
59 enum XML_Error xp_err; /* stored Expat error code */
62 /* struct for scoping namespace declarations */
63 typedef struct apr_xml_ns_scope {
64 const char *prefix; /* prefix used for this ns */
65 int ns; /* index into namespace table */
66 int emptyURI; /* the namespace URI is the empty string */
67 struct apr_xml_ns_scope *next; /* next scoped namespace */
71 /* return namespace table index for a given prefix */
72 static int find_prefix(apr_xml_parser *parser, const char *prefix)
74 apr_xml_elem *elem = parser->cur_elem;
77 ** Walk up the tree, looking for a namespace scope that defines this
80 for (; elem; elem = elem->parent) {
81 apr_xml_ns_scope *ns_scope = elem->ns_scope;
83 for (ns_scope = elem->ns_scope; ns_scope; ns_scope = ns_scope->next) {
84 if (strcmp(prefix, ns_scope->prefix) == 0) {
85 if (ns_scope->emptyURI) {
87 ** It is possible to set the default namespace to an
88 ** empty URI string; this resets the default namespace
89 ** to mean "no namespace." We just found the prefix
90 ** refers to an empty URI, so return "no namespace."
92 return APR_XML_NS_NONE;
101 * If the prefix is empty (""), this means that a prefix was not
102 * specified in the element/attribute. The search that was performed
103 * just above did not locate a default namespace URI (which is stored
104 * into ns_scope with an empty prefix). This means the element/attribute
105 * has "no namespace". We have a reserved value for this.
107 if (*prefix == '\0') {
108 return APR_XML_NS_NONE;
112 return APR_XML_NS_ERROR_UNKNOWN_PREFIX;
115 static void start_handler(void *userdata, const char *name, const char **attrs)
117 apr_xml_parser *parser = userdata;
125 /* punt once we find an error */
129 elem = apr_pcalloc(parser->p, sizeof(*elem));
131 /* prep the element */
132 elem->name = elem_name = apr_pstrdup(parser->p, name);
134 /* fill in the attributes (note: ends up in reverse order) */
136 attr = apr_palloc(parser->p, sizeof(*attr));
137 attr->name = apr_pstrdup(parser->p, *attrs++);
138 attr->value = apr_pstrdup(parser->p, *attrs++);
139 attr->next = elem->attr;
143 /* hook the element into the tree */
144 if (parser->cur_elem == NULL) {
145 /* no current element; this also becomes the root */
146 parser->cur_elem = parser->doc->root = elem;
149 /* this element appeared within the current elem */
150 elem->parent = parser->cur_elem;
152 /* set up the child/sibling links */
153 if (elem->parent->last_child == NULL) {
154 /* no first child either */
155 elem->parent->first_child = elem->parent->last_child = elem;
158 /* hook onto the end of the parent's children */
159 elem->parent->last_child->next = elem;
160 elem->parent->last_child = elem;
163 /* this element is now the current element */
164 parser->cur_elem = elem;
167 /* scan the attributes for namespace declarations */
168 for (prev = NULL, attr = elem->attr;
171 if (strncmp(attr->name, "xmlns", 5) == 0) {
172 const char *prefix = &attr->name[5];
173 apr_xml_ns_scope *ns_scope;
175 /* test for xmlns:foo= form and xmlns= form */
176 if (*prefix == ':') {
177 /* a namespace prefix declaration must have a
179 if (attr->value[0] == '\0') {
180 parser->error = APR_XML_NS_ERROR_INVALID_DECL;
185 else if (*prefix != '\0') {
186 /* advance "prev" since "attr" is still present */
191 /* quote the URI before we ever start working with it */
192 quoted = apr_xml_quote_string(parser->p, attr->value, 1);
194 /* build and insert the new scope */
195 ns_scope = apr_pcalloc(parser->p, sizeof(*ns_scope));
196 ns_scope->prefix = prefix;
197 ns_scope->ns = apr_xml_insert_uri(parser->doc->namespaces, quoted);
198 ns_scope->emptyURI = *quoted == '\0';
199 ns_scope->next = elem->ns_scope;
200 elem->ns_scope = ns_scope;
202 /* remove this attribute from the element */
204 elem->attr = attr->next;
206 prev->next = attr->next;
208 /* Note: prev will not be advanced since we just removed "attr" */
210 else if (strcmp(attr->name, "xml:lang") == 0) {
211 /* save away the language (in quoted form) */
212 elem->lang = apr_xml_quote_string(parser->p, attr->value, 1);
214 /* remove this attribute from the element */
216 elem->attr = attr->next;
218 prev->next = attr->next;
220 /* Note: prev will not be advanced since we just removed "attr" */
223 /* advance "prev" since "attr" is still present */
229 ** If an xml:lang attribute didn't exist (lang==NULL), then copy the
230 ** language from the parent element (if present).
232 ** NOTE: elem_size() *depends* upon this pointer equality.
234 if (elem->lang == NULL && elem->parent != NULL)
235 elem->lang = elem->parent->lang;
237 /* adjust the element's namespace */
238 colon = strchr(elem_name, ':');
241 * The element is using the default namespace, which will always
242 * be found. Either it will be "no namespace", or a default
243 * namespace URI has been specified at some point.
245 elem->ns = find_prefix(parser, "");
247 else if (APR_XML_NS_IS_RESERVED(elem->name)) {
248 elem->ns = APR_XML_NS_NONE;
252 elem->ns = find_prefix(parser, elem->name);
253 elem->name = colon + 1;
255 if (APR_XML_NS_IS_ERROR(elem->ns)) {
256 parser->error = elem->ns;
261 /* adjust all remaining attributes' namespaces */
262 for (attr = elem->attr; attr; attr = attr->next) {
264 * apr_xml_attr defines this as "const" but we dup'd it, so we
265 * know that we can change it. a bit hacky, but the existing
266 * structure def is best.
268 char *attr_name = (char *)attr->name;
270 colon = strchr(attr_name, ':');
273 * Attributes do NOT use the default namespace. Therefore,
274 * we place them into the "no namespace" category.
276 attr->ns = APR_XML_NS_NONE;
278 else if (APR_XML_NS_IS_RESERVED(attr->name)) {
279 attr->ns = APR_XML_NS_NONE;
283 attr->ns = find_prefix(parser, attr->name);
284 attr->name = colon + 1;
286 if (APR_XML_NS_IS_ERROR(attr->ns)) {
287 parser->error = attr->ns;
294 static void end_handler(void *userdata, const char *name)
296 apr_xml_parser *parser = userdata;
298 /* punt once we find an error */
302 /* pop up one level */
303 parser->cur_elem = parser->cur_elem->parent;
306 static void cdata_handler(void *userdata, const char *data, int len)
308 apr_xml_parser *parser = userdata;
310 apr_text_header *hdr;
313 /* punt once we find an error */
317 elem = parser->cur_elem;
318 s = apr_pstrndup(parser->p, data, len);
320 if (elem->last_child == NULL) {
321 /* no children yet. this cdata follows the start tag */
322 hdr = &elem->first_cdata;
325 /* child elements exist. this cdata follows the last child. */
326 hdr = &elem->last_child->following_cdata;
329 apr_text_append(parser->p, hdr, s);
332 static apr_status_t cleanup_parser(void *ctx)
334 apr_xml_parser *parser = ctx;
336 XML_ParserFree(parser->xp);
342 #if XML_MAJOR_VERSION > 1
343 /* Stop the parser if an entity declaration is hit. */
344 static void entity_declaration(void *userData, const XML_Char *entityName,
345 int is_parameter_entity, const XML_Char *value,
346 int value_length, const XML_Char *base,
347 const XML_Char *systemId, const XML_Char *publicId,
348 const XML_Char *notationName)
350 apr_xml_parser *parser = userData;
352 XML_StopParser(parser->xp, XML_FALSE);
355 /* A noop default_handler. */
356 static void default_handler(void *userData, const XML_Char *s, int len)
361 APU_DECLARE(apr_xml_parser *) apr_xml_parser_create(apr_pool_t *pool)
363 apr_xml_parser *parser = apr_pcalloc(pool, sizeof(*parser));
366 parser->doc = apr_pcalloc(pool, sizeof(*parser->doc));
368 parser->doc->namespaces = apr_array_make(pool, 5, sizeof(const char *));
370 /* ### is there a way to avoid hard-coding this? */
371 apr_xml_insert_uri(parser->doc->namespaces, "DAV:");
373 parser->xp = XML_ParserCreate(NULL);
374 if (parser->xp == NULL) {
375 (*apr_pool_get_abort(pool))(APR_ENOMEM);
379 apr_pool_cleanup_register(pool, parser, cleanup_parser,
380 apr_pool_cleanup_null);
382 XML_SetUserData(parser->xp, parser);
383 XML_SetElementHandler(parser->xp, start_handler, end_handler);
384 XML_SetCharacterDataHandler(parser->xp, cdata_handler);
386 /* Prevent the "billion laughs" attack against expat by disabling
387 * internal entity expansion. With 2.x, forcibly stop the parser
388 * if an entity is declared - this is safer and a more obvious
389 * failure mode. With older versions, installing a noop
390 * DefaultHandler means that internal entities will be expanded as
391 * the empty string, which is also sufficient to prevent the
393 #if XML_MAJOR_VERSION > 1
394 XML_SetEntityDeclHandler(parser->xp, entity_declaration);
396 XML_SetDefaultHandler(parser->xp, default_handler);
402 static apr_status_t do_parse(apr_xml_parser *parser,
403 const char *data, apr_size_t len,
406 if (parser->xp == NULL) {
407 parser->error = APR_XML_ERROR_PARSE_DONE;
410 int rv = XML_Parse(parser->xp, data, len, is_final);
413 parser->error = APR_XML_ERROR_EXPAT;
414 parser->xp_err = XML_GetErrorCode(parser->xp);
418 /* ### better error code? */
419 return parser->error ? APR_EGENERAL : APR_SUCCESS;
422 APU_DECLARE(apr_status_t) apr_xml_parser_feed(apr_xml_parser *parser,
426 return do_parse(parser, data, len, 0 /* is_final */);
429 APU_DECLARE(apr_status_t) apr_xml_parser_done(apr_xml_parser *parser,
433 apr_status_t status = do_parse(parser, &end, 0, 1 /* is_final */);
435 /* get rid of the parser */
436 (void) apr_pool_cleanup_run(parser->p, parser, cleanup_parser);
446 APU_DECLARE(char *) apr_xml_parser_geterror(apr_xml_parser *parser,
448 apr_size_t errbufsize)
450 int error = parser->error;
453 /* clear our record of an error */
461 case APR_XML_NS_ERROR_UNKNOWN_PREFIX:
462 msg = "An undefined namespace prefix was used.";
465 case APR_XML_NS_ERROR_INVALID_DECL:
466 msg = "A namespace prefix was defined with an empty URI.";
469 case APR_XML_ERROR_EXPAT:
470 (void) apr_snprintf(errbuf, errbufsize,
471 "XML parser error code: %s (%d)",
472 XML_ErrorString(parser->xp_err), parser->xp_err);
475 case APR_XML_ERROR_PARSE_DONE:
476 msg = "The parser is not active.";
480 msg = "There was an unknown error within the XML body.";
484 (void) apr_cpystrn(errbuf, msg, errbufsize);
488 APU_DECLARE(apr_status_t) apr_xml_parse_file(apr_pool_t *p,
489 apr_xml_parser **parser,
492 apr_size_t buffer_length)
498 *parser = apr_xml_parser_create(p);
499 if (*parser == NULL) {
500 /* FIXME: returning an error code would be nice,
501 * but we dont get one ;( */
504 buffer = apr_palloc(p, buffer_length);
505 length = buffer_length;
507 rv = apr_file_read(xmlfd, buffer, &length);
509 while (rv == APR_SUCCESS) {
510 rv = apr_xml_parser_feed(*parser, buffer, length);
511 if (rv != APR_SUCCESS) {
515 length = buffer_length;
516 rv = apr_file_read(xmlfd, buffer, &length);
521 rv = apr_xml_parser_done(*parser, ppdoc);
526 APU_DECLARE(void) apr_text_append(apr_pool_t * p, apr_text_header *hdr,
529 apr_text *t = apr_palloc(p, sizeof(*t));
534 if (hdr->first == NULL) {
535 /* no text elements yet */
536 hdr->first = hdr->last = t;
539 /* append to the last text element */
546 /* ---------------------------------------------------------------
548 ** XML UTILITY FUNCTIONS
552 ** apr_xml_quote_string: quote an XML string
554 ** Replace '<', '>', and '&' with '<', '>', and '&'.
555 ** If quotes is true, then replace '"' with '"'.
557 ** quotes is typically set to true for XML strings that will occur within
558 ** double quotes -- attribute values.
560 APU_DECLARE(const char *) apr_xml_quote_string(apr_pool_t *p, const char *s,
565 apr_size_t extra = 0;
570 for (scan = s; (c = *scan) != '\0'; ++scan, ++len) {
571 if (c == '<' || c == '>')
572 extra += 3; /* < or > */
574 extra += 4; /* & */
575 else if (quotes && c == '"')
576 extra += 5; /* " */
583 qstr = apr_palloc(p, len + extra + 1);
584 for (scan = s, qscan = qstr; (c = *scan) != '\0'; ++scan) {
604 else if (quotes && c == '"') {
621 /* how many characters for the given integer? */
622 #define APR_XML_NS_LEN(ns) ((ns) < 10 ? 1 : (ns) < 100 ? 2 : (ns) < 1000 ? 3 : \
623 (ns) < 10000 ? 4 : (ns) < 100000 ? 5 : \
624 (ns) < 1000000 ? 6 : (ns) < 10000000 ? 7 : \
625 (ns) < 100000000 ? 8 : (ns) < 1000000000 ? 9 : 10)
627 static apr_size_t text_size(const apr_text *t)
631 for (; t; t = t->next)
632 size += strlen(t->text);
636 static apr_size_t elem_size(const apr_xml_elem *elem, int style,
637 apr_array_header_t *namespaces, int *ns_map)
641 if (style == APR_XML_X2T_FULL || style == APR_XML_X2T_FULL_NS_LANG) {
642 const apr_xml_attr *attr;
646 if (style == APR_XML_X2T_FULL_NS_LANG) {
650 ** The outer element will contain xmlns:ns%d="%s" attributes
651 ** and an xml:lang attribute, if applicable.
654 for (i = namespaces->nelts; i--;) {
655 /* compute size of: ' xmlns:ns%d="%s"' */
656 size += (9 + APR_XML_NS_LEN(i) + 2 +
657 strlen(APR_XML_GET_URI_ITEM(namespaces, i)) + 1);
660 if (elem->lang != NULL) {
661 /* compute size of: ' xml:lang="%s"' */
662 size += 11 + strlen(elem->lang) + 1;
666 if (elem->ns == APR_XML_NS_NONE) {
667 /* compute size of: <%s> */
668 size += 1 + strlen(elem->name) + 1;
671 int ns = ns_map ? ns_map[elem->ns] : elem->ns;
673 /* compute size of: <ns%d:%s> */
674 size += 3 + APR_XML_NS_LEN(ns) + 1 + strlen(elem->name) + 1;
677 if (APR_XML_ELEM_IS_EMPTY(elem)) {
678 /* insert a closing "/" */
683 * two of above plus "/":
684 * <ns%d:%s> ... </ns%d:%s>
690 for (attr = elem->attr; attr; attr = attr->next) {
691 if (attr->ns == APR_XML_NS_NONE) {
692 /* compute size of: ' %s="%s"' */
693 size += 1 + strlen(attr->name) + 2 + strlen(attr->value) + 1;
696 /* compute size of: ' ns%d:%s="%s"' */
697 int ns = ns_map ? ns_map[attr->ns] : attr->ns;
698 size += 3 + APR_XML_NS_LEN(ns) + 1 + strlen(attr->name) + 2 + strlen(attr->value) + 1;
703 ** If the element has an xml:lang value that is *different* from
704 ** its parent, then add the thing in: ' xml:lang="%s"'.
706 ** NOTE: we take advantage of the pointer equality established by
707 ** the parsing for "inheriting" the xml:lang values from parents.
709 if (elem->lang != NULL &&
710 (elem->parent == NULL || elem->lang != elem->parent->lang)) {
711 size += 11 + strlen(elem->lang) + 1;
714 else if (style == APR_XML_X2T_LANG_INNER) {
716 * This style prepends the xml:lang value plus a null terminator.
717 * If a lang value is not present, then we insert a null term.
719 size = elem->lang ? strlen(elem->lang) + 1 : 1;
724 size += text_size(elem->first_cdata.first);
726 for (elem = elem->first_child; elem; elem = elem->next) {
727 /* the size of the child element plus the CDATA that follows it */
728 size += (elem_size(elem, APR_XML_X2T_FULL, NULL, ns_map) +
729 text_size(elem->following_cdata.first));
735 static char *write_text(char *s, const apr_text *t)
737 for (; t; t = t->next) {
738 apr_size_t len = strlen(t->text);
739 memcpy(s, t->text, len);
745 static char *write_elem(char *s, const apr_xml_elem *elem, int style,
746 apr_array_header_t *namespaces, int *ns_map)
748 const apr_xml_elem *child;
752 if (style == APR_XML_X2T_FULL || style == APR_XML_X2T_FULL_NS_LANG) {
753 int empty = APR_XML_ELEM_IS_EMPTY(elem);
754 const apr_xml_attr *attr;
756 if (elem->ns == APR_XML_NS_NONE) {
757 len = sprintf(s, "<%s", elem->name);
760 ns = ns_map ? ns_map[elem->ns] : elem->ns;
761 len = sprintf(s, "<ns%d:%s", ns, elem->name);
765 for (attr = elem->attr; attr; attr = attr->next) {
766 if (attr->ns == APR_XML_NS_NONE)
767 len = sprintf(s, " %s=\"%s\"", attr->name, attr->value);
769 ns = ns_map ? ns_map[attr->ns] : attr->ns;
770 len = sprintf(s, " ns%d:%s=\"%s\"", ns, attr->name, attr->value);
775 /* add the xml:lang value if necessary */
776 if (elem->lang != NULL &&
777 (style == APR_XML_X2T_FULL_NS_LANG ||
778 elem->parent == NULL ||
779 elem->lang != elem->parent->lang)) {
780 len = sprintf(s, " xml:lang=\"%s\"", elem->lang);
784 /* add namespace definitions, if required */
785 if (style == APR_XML_X2T_FULL_NS_LANG) {
788 for (i = namespaces->nelts; i--;) {
789 len = sprintf(s, " xmlns:ns%d=\"%s\"", i,
790 APR_XML_GET_URI_ITEM(namespaces, i));
795 /* no more to do. close it up and go. */
805 else if (style == APR_XML_X2T_LANG_INNER) {
806 /* prepend the xml:lang value */
807 if (elem->lang != NULL) {
808 len = strlen(elem->lang);
809 memcpy(s, elem->lang, len);
815 s = write_text(s, elem->first_cdata.first);
817 for (child = elem->first_child; child; child = child->next) {
818 s = write_elem(s, child, APR_XML_X2T_FULL, NULL, ns_map);
819 s = write_text(s, child->following_cdata.first);
822 if (style == APR_XML_X2T_FULL || style == APR_XML_X2T_FULL_NS_LANG) {
823 if (elem->ns == APR_XML_NS_NONE) {
824 len = sprintf(s, "</%s>", elem->name);
827 ns = ns_map ? ns_map[elem->ns] : elem->ns;
828 len = sprintf(s, "</ns%d:%s>", ns, elem->name);
836 APU_DECLARE(void) apr_xml_quote_elem(apr_pool_t *p, apr_xml_elem *elem)
839 apr_xml_attr *scan_attr;
840 apr_xml_elem *scan_elem;
842 /* convert the element's text */
843 for (scan_txt = elem->first_cdata.first;
845 scan_txt = scan_txt->next) {
846 scan_txt->text = apr_xml_quote_string(p, scan_txt->text, 0);
848 for (scan_txt = elem->following_cdata.first;
850 scan_txt = scan_txt->next) {
851 scan_txt->text = apr_xml_quote_string(p, scan_txt->text, 0);
854 /* convert the attribute values */
855 for (scan_attr = elem->attr;
857 scan_attr = scan_attr->next) {
858 scan_attr->value = apr_xml_quote_string(p, scan_attr->value, 1);
861 /* convert the child elements */
862 for (scan_elem = elem->first_child;
864 scan_elem = scan_elem->next) {
865 apr_xml_quote_elem(p, scan_elem);
869 /* convert an element to a text string */
870 APU_DECLARE(void) apr_xml_to_text(apr_pool_t * p, const apr_xml_elem *elem,
871 int style, apr_array_header_t *namespaces,
872 int *ns_map, const char **pbuf,
875 /* get the exact size, plus a null terminator */
876 apr_size_t size = elem_size(elem, style, namespaces, ns_map) + 1;
877 char *s = apr_palloc(p, size);
879 (void) write_elem(s, elem, style, namespaces, ns_map);
887 APU_DECLARE(const char *) apr_xml_empty_elem(apr_pool_t * p,
888 const apr_xml_elem *elem)
890 if (elem->ns == APR_XML_NS_NONE) {
892 * The prefix (xml...) is already within the prop name, or
893 * the element simply has no prefix.
895 return apr_psprintf(p, "<%s/>" DEBUG_CR, elem->name);
898 return apr_psprintf(p, "<ns%d:%s/>" DEBUG_CR, elem->ns, elem->name);
901 /* return the URI's (existing) index, or insert it and return a new index */
902 APU_DECLARE(int) apr_xml_insert_uri(apr_array_header_t *uri_array,
908 /* never insert an empty URI; this index is always APR_XML_NS_NONE */
910 return APR_XML_NS_NONE;
912 for (i = uri_array->nelts; i--;) {
913 if (strcmp(uri, APR_XML_GET_URI_ITEM(uri_array, i)) == 0)
917 pelt = apr_array_push(uri_array);
918 *pelt = uri; /* assume uri is const or in a pool */
919 return uri_array->nelts - 1;