[libxml2] Fix quadratic runtime when push parsing HTML entity refs



commit 6995eed077899c64d34fe8f0d0b34d214cf586af
Author: Nick Wellnhofer <wellnhofer aevum de>
Date:   Sun Jul 19 13:54:52 2020 +0200

    Fix quadratic runtime when push parsing HTML entity refs
    
    The HTML push parser would look ahead for characters in "; >/" to
    terminate an entity reference but actual parsing could stop earlier,
    potentially resulting in quadratic runtime.
    
    Parse char data and references alternately in htmlParseTryOrFinish
    and only look ahead once for a terminating '<' character.
    
    Found by OSS-Fuzz.

 HTMLparser.c | 77 +++++++-----------------------------------------------------
 1 file changed, 9 insertions(+), 68 deletions(-)
---
diff --git a/HTMLparser.c b/HTMLparser.c
index d10cf11fa..ad9d7ccca 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -5249,61 +5249,6 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first,
     return (-1);
 }
 
-/**
- * htmlParseLookupChars:
- * @ctxt: an HTML parser context
- * @stop: Array of chars, which stop the lookup.
- * @stopLen: Length of stop-Array
- *
- * Try to find if any char of the stop-Array is available in the input
- * stream.
- * This function has a side effect of (possibly) incrementing ctxt->checkIndex
- * to avoid rescanning sequences of bytes, it DOES change the state of the
- * parser, do not use liberally.
- *
- * Returns the index to the current parsing point if a stopChar
- *      is available, -1 otherwise.
- */
-static int
-htmlParseLookupChars(htmlParserCtxtPtr ctxt, const xmlChar * stop,
-                     int stopLen)
-{
-    int base, len;
-    htmlParserInputPtr in;
-    const xmlChar *buf;
-    int i;
-
-    in = ctxt->input;
-    if (in == NULL)
-        return (-1);
-
-    base = in->cur - in->base;
-    if (base < 0)
-        return (-1);
-
-    if (ctxt->checkIndex > base)
-        base = ctxt->checkIndex;
-
-    if (in->buf == NULL) {
-        buf = in->base;
-        len = in->length;
-    } else {
-        buf = xmlBufContent(in->buf->buffer);
-        len = xmlBufUse(in->buf->buffer);
-    }
-
-    for (; base < len; base++) {
-        for (i = 0; i < stopLen; ++i) {
-            if (buf[base] == stop[i]) {
-                ctxt->checkIndex = 0;
-                return (base - (in->cur - in->base));
-            }
-        }
-    }
-    ctxt->checkIndex = base;
-    return (-1);
-}
-
 /**
  * htmlParseTryOrFinish:
  * @ctxt:  an HTML parser context
@@ -5893,17 +5838,6 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
                                "HPP: entering START_TAG\n");
 #endif
                        break;
-                   } else if (cur == '&') {
-                       if ((!terminate) &&
-                           (htmlParseLookupChars(ctxt,
-                                                  BAD_CAST "; >/", 4) < 0))
-                           goto done;
-#ifdef DEBUG_PUSH
-                       xmlGenericError(xmlGenericErrorContext,
-                               "HPP: Parsing Reference\n");
-#endif
-                       /* TODO: check generation of subtrees if noent !!! */
-                       htmlParseReference(ctxt);
                    } else {
                        /*
                         * check that the text sequence is complete
@@ -5912,14 +5846,21 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
                         * data detection.
                         */
                        if ((!terminate) &&
-                            (htmlParseLookupChars(ctxt, BAD_CAST "<&", 2) < 0))
+                            (htmlParseLookupSequence(ctxt, '<', 0, 0, 0) < 0))
                            goto done;
                        ctxt->checkIndex = 0;
 #ifdef DEBUG_PUSH
                        xmlGenericError(xmlGenericErrorContext,
                                "HPP: Parsing char data\n");
 #endif
-                       htmlParseCharData(ctxt);
+                        while ((cur != '<') && (cur != 0)) {
+                            if (cur == '&') {
+                               htmlParseReference(ctxt);
+                            } else {
+                               htmlParseCharData(ctxt);
+                            }
+                            cur = in->cur[0];
+                        }
                    }
                }
                if (cons == ctxt->nbChars) {


[Date Prev][Date Next]   [Thread Prev][Thread Next]   [Thread Index] [Date Index] [Author Index]