From 477c7f6affcb665305b333f92ce0a782325b4156 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Sun, 28 Jun 2020 15:54:23 +0200 Subject: [PATCH] Fix quadratic runtime in HTML parser Commit eeb99329 removed an important optimization avoiding quadratic runtime when repeatedly scanning the input buffer for terminating characters in the HTML push parser. The related bug is https://bugzilla.gnome.org/show_bug.cgi?id=444994 Make sure that ctxt->checkIndex is always written and store additional parser state in ctxt->inSubset which is unused in the HTML parser. Found by OSS-Fuzz. --- HTMLparser.c | 15 +++++++-- result/HTML/chunked_attr.html | 46 +++++++++++++++++++++++++++ result/HTML/chunked_attr.html.err | 0 result/HTML/chunked_attr.html.sax | 41 ++++++++++++++++++++++++ test/HTML/chunked_attr.html | 53 +++++++++++++++++++++++++++++++ 5 files changed, 152 insertions(+), 3 deletions(-) create mode 100644 result/HTML/chunked_attr.html create mode 100644 result/HTML/chunked_attr.html.err create mode 100644 result/HTML/chunked_attr.html.sax create mode 100644 test/HTML/chunked_attr.html diff --git a/HTMLparser.c b/HTMLparser.c index 7fba9429..468db107 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -5158,8 +5158,12 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first, if (base < 0) return (-1); - if (ctxt->checkIndex > base) + if (ctxt->checkIndex > base) { base = ctxt->checkIndex; + /* Abuse inSubset member to restore current state. */ + incomment = ctxt->inSubset & 1 ? 1 : 0; + invalue = ctxt->inSubset & 2 ? 1 : 0; + } if (in->buf == NULL) { buf = in->base; @@ -5235,8 +5239,13 @@ htmlParseLookupSequence(htmlParserCtxtPtr ctxt, xmlChar first, return (base - (in->cur - in->base)); } } - if ((!incomment) && (!invalue)) - ctxt->checkIndex = base; + ctxt->checkIndex = base; + /* Abuse inSubset member to track current state. */ + ctxt->inSubset = 0; + if (incomment) + ctxt->inSubset |= 1; + if (invalue) + ctxt->inSubset |= 2; #ifdef DEBUG_PUSH if (next == 0) xmlGenericError(xmlGenericErrorContext, diff --git a/result/HTML/chunked_attr.html b/result/HTML/chunked_attr.html new file mode 100644 index 00000000..2fd71a6e --- /dev/null +++ b/result/HTML/chunked_attr.html @@ -0,0 +1,46 @@ + + + + +

+Filler bytes follow: + + 100 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 200 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 300 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 400 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 500 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 600 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 700 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 800 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 900 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 1000 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 100 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 200 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 300 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 400 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 500 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 600 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 700 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 800 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 900 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 2000 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 100 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 200 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 300 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 400 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 500 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 600 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 700 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 800 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 +xxx +

+
+
+ + diff --git a/result/HTML/chunked_attr.html.err b/result/HTML/chunked_attr.html.err new file mode 100644 index 00000000..e69de29b diff --git a/result/HTML/chunked_attr.html.sax b/result/HTML/chunked_attr.html.sax new file mode 100644 index 00000000..5f9e9702 --- /dev/null +++ b/result/HTML/chunked_attr.html.sax @@ -0,0 +1,41 @@ +SAX.setDocumentLocator() +SAX.startDocument() +SAX.startElement(html) +SAX.characters( +, 1) +SAX.comment( +This tests internal state tracking of the push parser and assumes a chunk +size of 4096 (or a divisor of 4096) and an initial chunk of size 4. +Make sure that the first '<' in the attribute value ends up near +offset 4100. +) +SAX.characters( +, 1) +SAX.startElement(body) +SAX.characters( +, 1) +SAX.startElement(p) +SAX.characters( +Filler bytes follow: + + 1, 1000) +SAX.characters(89 123456789 123456789 + 1, 1000) +SAX.characters(89 123456789 123456789 + 1, 827) +SAX.endElement(p) +SAX.characters( +, 1) +SAX.startElement(div, fill1='123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789', onmouseover='x<b>text</b>x', fill2='123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789', fill3='123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789', fill4='123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789', fill5='123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789', fill6='123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789') +SAX.characters( +, 1) +SAX.endElement(div) +SAX.characters( +, 1) +SAX.endElement(body) +SAX.characters( +, 1) +SAX.endElement(html) +SAX.characters( +, 1) +SAX.endDocument() diff --git a/test/HTML/chunked_attr.html b/test/HTML/chunked_attr.html new file mode 100644 index 00000000..84d81796 --- /dev/null +++ b/test/HTML/chunked_attr.html @@ -0,0 +1,53 @@ + + + +

+Filler bytes follow: + + 100 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 200 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 300 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 400 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 500 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 600 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 700 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 800 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 900 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 1000 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 100 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 200 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 300 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 400 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 500 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 600 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 700 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 800 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 900 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 2000 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 100 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 200 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 300 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 400 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 500 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 600 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 700 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 + 800 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 123456789 +xxx +

+
+
+ + -- 2.27.0