[libxml2] Fix regression when parsing invalid HTML tags in push mode
- From: Nick Wellnhofer <nwellnhof src gnome org>
- To: commits-list gnome org
- Cc:
- Subject: [libxml2] Fix regression when parsing invalid HTML tags in push mode
- Date: Mon, 10 Jan 2022 14:21:21 +0000 (UTC)
commit 094fc08a09a75feb694837b580bad0401d1e6a0a
Author: Nick Wellnhofer <wellnhofer aevum de>
Date: Mon Jan 10 14:02:10 2022 +0100
Fix regression when parsing invalid HTML tags in push mode
Revert part of commit 173a0830 that changed behavior when parsing
malformed start tags with the push parser. This reintroduces quadratic
behavior in recovery mode which will be worked around in the next
commit.
Fixes #312.
HTMLparser.c | 28 ++++------------------------
1 file changed, 4 insertions(+), 24 deletions(-)
---
diff --git a/HTMLparser.c b/HTMLparser.c
index 02d476f9..d9d8d00d 100644
--- a/HTMLparser.c
+++ b/HTMLparser.c
@@ -5992,32 +5992,12 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
} else if (cur == '<') {
if ((!terminate) && (next == 0))
goto done;
- /*
- * Only switch to START_TAG if the next character
- * starts a valid name. Otherwise, htmlParseStartTag
- * might return without consuming all characters
- * up to the final '>'.
- */
- if ((IS_ASCII_LETTER(next)) ||
- (next == '_') || (next == ':') || (next == '.')) {
- ctxt->instate = XML_PARSER_START_TAG;
- ctxt->checkIndex = 0;
+ ctxt->instate = XML_PARSER_START_TAG;
+ ctxt->checkIndex = 0;
#ifdef DEBUG_PUSH
- xmlGenericError(xmlGenericErrorContext,
- "HPP: entering START_TAG\n");
+ xmlGenericError(xmlGenericErrorContext,
+ "HPP: entering START_TAG\n");
#endif
- } else {
- htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
- "htmlParseTryOrFinish: "
- "invalid element name\n",
- NULL, NULL);
- htmlCheckParagraph(ctxt);
- if ((ctxt->sax != NULL) &&
- (ctxt->sax->characters != NULL))
- ctxt->sax->characters(ctxt->userData,
- in->cur, 1);
- NEXT;
- }
break;
} else {
/*
[
Date Prev][
Date Next] [
Thread Prev][
Thread Next]
[
Thread Index]
[
Date Index]
[
Author Index]