From 500789224b59fa70d6837be5cd1edb8e2f1eccb6 Mon Sep 17 00:00:00 2001 From: Nick Wellnhofer Date: Sun, 12 Jul 2020 20:28:47 +0200 Subject: [PATCH 083/139] Fix quadratic runtime when parsing HTML script content If htmlParseScript returns upon hitting an invalid character, htmlParseLookupSequence will be called again with checkIndex reset to zero, potentially resulting in quadratic runtime. Make sure that htmlParseScript consumes all input in one go and simply skips over invalid characters similar to htmlParseCharDataInternal. Found by OSS-Fuzz. --- HTMLparser.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/HTMLparser.c b/HTMLparser.c index 1dea794..26ed124 100644 --- a/HTMLparser.c +++ b/HTMLparser.c @@ -2928,7 +2928,7 @@ htmlParseScript(htmlParserCtxtPtr ctxt) { SHRINK; cur = CUR_CHAR(l); - while (IS_CHAR_CH(cur)) { + while (cur != 0) { if ((cur == '<') && (NXT(1) == '/')) { /* * One should break here, the specification is clear: @@ -2959,7 +2959,12 @@ htmlParseScript(htmlParserCtxtPtr ctxt) { } } } - COPY_BUF(l,buf,nbchar,cur); + if (IS_CHAR_CH(cur)) { + COPY_BUF(l,buf,nbchar,cur); + } else { + htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, + "Invalid char in CDATA 0x%X\n", cur); + } if (nbchar >= HTML_PARSER_BIG_BUFFER_SIZE) { buf[nbchar] = 0; if (ctxt->sax->cdataBlock!= NULL) { @@ -2977,14 +2982,6 @@ htmlParseScript(htmlParserCtxtPtr ctxt) { cur = CUR_CHAR(l); } - if ((!(IS_CHAR_CH(cur))) && (!((cur == 0) && (ctxt->progressive)))) { - htmlParseErrInt(ctxt, XML_ERR_INVALID_CHAR, - "Invalid char in CDATA 0x%X\n", cur); - if (ctxt->input->cur < ctxt->input->end) { - NEXT; - } - } - if ((nbchar != 0) && (ctxt->sax != NULL) && (!ctxt->disableSAX)) { buf[nbchar] = 0; if (ctxt->sax->cdataBlock!= NULL) { -- 1.8.3.1