[libxml2] Make xmlParseContent and xmlParseElement non-recursive



commit 62150ed2ab19a4dd76c15acc62c7d923d9f3b2cc
Author: Nick Wellnhofer <wellnhofer aevum de>
Date:   Mon Sep 23 14:46:41 2019 +0200

    Make xmlParseContent and xmlParseElement non-recursive
    
    Split xmlParseElement into subfunctions. Use nameNsPush to store prefix,
    URI and nsNr on the heap, similar to the push parser.
    
    Closes #84.

 parser.c                     | 121 +++++++++++++++++++++++++------------------
 result/errors/754947.xml.err |   2 +-
 result/errors/759398.xml.err |   4 +-
 3 files changed, 74 insertions(+), 53 deletions(-)
---
diff --git a/parser.c b/parser.c
index 5b8df8c5..cad0a9d4 100644
--- a/parser.c
+++ b/parser.c
@@ -96,6 +96,12 @@ xmlCreateEntityParserCtxtInternal(const xmlChar *URL, const xmlChar *ID,
 
 static void xmlHaltParser(xmlParserCtxtPtr ctxt);
 
+static int
+xmlParseElementStart(xmlParserCtxtPtr ctxt);
+
+static void
+xmlParseElementEnd(xmlParserCtxtPtr ctxt);
+
 /************************************************************************
  *                                                                     *
  *     Arbitrary limits set in the parser. See XML_PARSE_HUGE          *
@@ -1822,7 +1828,6 @@ nodePop(xmlParserCtxtPtr ctxt)
     return (ret);
 }
 
-#ifdef LIBXML_PUSH_ENABLED
 /**
  * nameNsPush:
  * @ctxt:  an XML parser context
@@ -1858,6 +1863,11 @@ nameNsPush(xmlParserCtxtPtr ctxt, const xmlChar * value,
            goto mem_error;
         }
        ctxt->pushTab = tmp2;
+    } else if (ctxt->pushTab == NULL) {
+        ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
+                                            sizeof(ctxt->pushTab[0]));
+        if (ctxt->pushTab == NULL)
+            goto mem_error;
     }
     ctxt->nameTab[ctxt->nameNr] = value;
     ctxt->name = value;
@@ -1869,6 +1879,7 @@ mem_error:
     xmlErrMemory(ctxt, NULL);
     return (-1);
 }
+#ifdef LIBXML_PUSH_ENABLED
 /**
  * nameNsPop:
  * @ctxt: an XML parser context
@@ -9812,9 +9823,10 @@ xmlParseCDSect(xmlParserCtxtPtr ctxt) {
 
 void
 xmlParseContent(xmlParserCtxtPtr ctxt) {
+    int nameNr = ctxt->nameNr;
+
     GROW;
     while ((RAW != 0) &&
-          ((RAW != '<') || (NXT(1) != '/')) &&
           (ctxt->instate != XML_PARSER_EOF)) {
        const xmlChar *test = CUR_PTR;
        unsigned int cons = ctxt->input->consumed;
@@ -9848,7 +9860,13 @@ xmlParseContent(xmlParserCtxtPtr ctxt) {
         * Fourth case :  a sub-element.
         */
        else if (*cur == '<') {
-           xmlParseElement(ctxt);
+            if (NXT(1) == '/') {
+                if (ctxt->nameNr <= nameNr)
+                    break;
+               xmlParseElementEnd(ctxt);
+            } else {
+               xmlParseElementStart(ctxt);
+            }
        }
 
        /*
@@ -9883,7 +9901,7 @@ xmlParseContent(xmlParserCtxtPtr ctxt) {
  * xmlParseElement:
  * @ctxt:  an XML parser context
  *
- * parse an XML element, this is highly recursive
+ * parse an XML element
  *
  * [39] element ::= EmptyElemTag | STag content ETag
  *
@@ -9895,6 +9913,23 @@ xmlParseContent(xmlParserCtxtPtr ctxt) {
 
 void
 xmlParseElement(xmlParserCtxtPtr ctxt) {
+    if (xmlParseElementStart(ctxt) != 0)
+        return;
+    xmlParseContent(ctxt);
+    if (ctxt->instate == XML_PARSER_EOF)
+       return;
+    xmlParseElementEnd(ctxt);
+}
+
+/**
+ * xmlParseElementStart:
+ * @ctxt:  an XML parser context
+ *
+ * Parse the start of an XML element. Returns -1 in case of error, 0 if an
+ * opening tag was parsed, 1 if an empty element was parsed.
+ */
+static int
+xmlParseElementStart(xmlParserCtxtPtr ctxt) {
     const xmlChar *name;
     const xmlChar *prefix = NULL;
     const xmlChar *URI = NULL;
@@ -9909,7 +9944,7 @@ xmlParseElement(xmlParserCtxtPtr ctxt) {
                 "Excessive depth in document: %d use XML_PARSE_HUGE option\n",
                          xmlParserMaxDepth);
        xmlHaltParser(ctxt);
-       return;
+       return(-1);
     }
 
     /* Capture start position */
@@ -9936,12 +9971,17 @@ xmlParseElement(xmlParserCtxtPtr ctxt) {
        name = xmlParseStartTag(ctxt);
 #endif /* LIBXML_SAX1_ENABLED */
     if (ctxt->instate == XML_PARSER_EOF)
-       return;
+       return(-1);
     if (name == NULL) {
        spacePop(ctxt);
-        return;
+        return(-1);
     }
-    namePush(ctxt, name);
+    if (ctxt->sax2)
+        nameNsPush(ctxt, name, prefix, URI, ctxt->nsNr - nsNr);
+#ifdef LIBXML_SAX1_ENABLED
+    else
+        namePush(ctxt, name);
+#endif /* LIBXML_SAX1_ENABLED */
     ret = ctxt->node;
 
 #ifdef LIBXML_VALID_ENABLED
@@ -9982,7 +10022,7 @@ xmlParseElement(xmlParserCtxtPtr ctxt) {
           node_info.node = ret;
           xmlParserAddNodeInfo(ctxt, &node_info);
        }
-       return;
+       return(1);
     }
     if (RAW == '>') {
         NEXT1;
@@ -10010,41 +10050,39 @@ xmlParseElement(xmlParserCtxtPtr ctxt) {
           node_info.node = ret;
           xmlParserAddNodeInfo(ctxt, &node_info);
        }
-       return;
+       return(-1);
     }
 
-    /*
-     * Parse the content of the element:
-     */
-    xmlParseContent(ctxt);
-    if (ctxt->instate == XML_PARSER_EOF)
-       return;
-    if (!IS_BYTE_CHAR(RAW)) {
-        xmlFatalErrMsgStrIntStr(ctxt, XML_ERR_TAG_NOT_FINISHED,
-        "Premature end of data in tag %s line %d\n",
-                               name, line, NULL);
+    return(0);
+}
 
-       /*
-        * end of parsing of this node.
-        */
-       nodePop(ctxt);
-       namePop(ctxt);
-       spacePop(ctxt);
-       if (nsNr != ctxt->nsNr)
-           nsPop(ctxt, ctxt->nsNr - nsNr);
-       return;
-    }
+/**
+ * xmlParseElementEnd:
+ * @ctxt:  an XML parser context
+ *
+ * Parse the end of an XML element.
+ */
+static void
+xmlParseElementEnd(xmlParserCtxtPtr ctxt) {
+    xmlParserNodeInfo node_info;
+    xmlNodePtr ret = ctxt->node;
+
+    if (ctxt->nameNr <= 0)
+        return;
 
     /*
      * parse the end of tag: '</' should be here.
      */
     if (ctxt->sax2) {
-       xmlParseEndTag2(ctxt, prefix, URI, line, ctxt->nsNr - nsNr, tlen);
+        const xmlChar *prefix = ctxt->pushTab[ctxt->nameNr * 3 - 3];
+        const xmlChar *URI = ctxt->pushTab[ctxt->nameNr * 3 - 2];
+        int nsNr = (ptrdiff_t) ctxt->pushTab[ctxt->nameNr * 3 - 1];
+       xmlParseEndTag2(ctxt, prefix, URI, 0, nsNr, 0);
        namePop(ctxt);
     }
 #ifdef LIBXML_SAX1_ENABLED
-      else
-       xmlParseEndTag1(ctxt, line);
+    else
+       xmlParseEndTag1(ctxt, 0);
 #endif /* LIBXML_SAX1_ENABLED */
 
     /*
@@ -12361,13 +12399,6 @@ xmlCreatePushParserCtxt(xmlSAXHandlerPtr sax, void *user_data,
        return(NULL);
     }
     ctxt->dictNames = 1;
-    ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 * sizeof(xmlChar *));
-    if (ctxt->pushTab == NULL) {
-        xmlErrMemory(ctxt, NULL);
-       xmlFreeParserInputBuffer(buf);
-       xmlFreeParserCtxt(ctxt);
-       return(NULL);
-    }
     if (sax != NULL) {
 #ifdef LIBXML_SAX1_ENABLED
        if (ctxt->sax != (xmlSAXHandlerPtr) &xmlDefaultSAXHandler)
@@ -14949,16 +14980,6 @@ xmlCtxtResetPush(xmlParserCtxtPtr ctxt, const char *chunk,
 
     xmlCtxtReset(ctxt);
 
-    if (ctxt->pushTab == NULL) {
-        ctxt->pushTab = (void **) xmlMalloc(ctxt->nameMax * 3 *
-                                           sizeof(xmlChar *));
-        if (ctxt->pushTab == NULL) {
-           xmlErrMemory(ctxt, NULL);
-            xmlFreeParserInputBuffer(buf);
-            return(1);
-        }
-    }
-
     if (filename == NULL) {
         ctxt->directory = NULL;
     } else {
diff --git a/result/errors/754947.xml.err b/result/errors/754947.xml.err
index f45cb5a2..51e9b4ed 100644
--- a/result/errors/754947.xml.err
+++ b/result/errors/754947.xml.err
@@ -2,6 +2,6 @@
 Bytes: 0xEE 0x5D 0x5D 0x3E
 <d><![CDATA[0000000000000�]]>
                          ^
-./test/errors/754947.xml:1: parser error : Premature end of data in tag d line 1
+./test/errors/754947.xml:1: parser error : EndTag: '</' not found
 <d><![CDATA[0000000000000�]]>
                              ^
diff --git a/result/errors/759398.xml.err b/result/errors/759398.xml.err
index f6036a3b..bc9e5e03 100644
--- a/result/errors/759398.xml.err
+++ b/result/errors/759398.xml.err
@@ -1,10 +1,10 @@
 ./test/errors/759398.xml:210: parser error : StartTag: invalid element name
 need to worry about parsers whi<! don't expand PErefs finding
                                 ^
-./test/errors/759398.xml:309: parser error : Opening and ending tag mismatch: 
№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№��
 �№№�
 
��№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№
 №№��
 
�№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№�
 ��№№
 
№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№m
 line 308 and termdef
+./test/errors/759398.xml:309: parser error : Opening and ending tag mismatch: 
№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№��
 �№№�
 
��№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№
 №№��
 
�№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№�
 ��№№
 
№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№№m
 line 205 and termdef
 and provide access to their content and structure.</termdef> <termdef
                                                             ^
-./test/errors/759398.xml:314: parser error : Opening and ending tag mismatch: spec line 50 and p
+./test/errors/759398.xml:314: parser error : Opening and ending tag mismatch: spec line 205 and p
 data and the information it must provide to the application.</p>
                                                                 ^
 ./test/errors/759398.xml:316: parser error : Extra content at the end of the document


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]