diff --git a/backport-001-CVE-2023-52425.patch b/backport-001-CVE-2023-52425.patch new file mode 100644 index 0000000..1487385 --- /dev/null +++ b/backport-001-CVE-2023-52425.patch @@ -0,0 +1,223 @@ +From 6cc9677838ce4e68680f7877d71032ca6481ee56 Mon Sep 17 00:00:00 2001 +From: Snild Dolkow +Date: Thu, 17 Aug 2023 16:25:26 +0200 +Subject: [PATCH] Skip parsing after repeated partials on the same token + +Reference: https://github.com/libexpat/libexpat/pull/789/commits/9cdf9b8d77d5c2c2a27d15fb68dd3f83cafb45a1 +Conflict: remove basic_test.c + change xmlparse.c + +MIME-Version: 1.0 +Content-Type: text/plain; charset=utf-8 +Content-Transfer-Encoding: 8bit + +When the parse buffer contains the starting bytes of a token but not +all of them, we cannot parse the token to completion. We call this a +partial token. When this happens, the parse position is reset to the +start of the token, and the parse() call returns. The client is then +expected to provide more data and call parse() again. + +In extreme cases, this means that the bytes of a token may be parsed +many times: once for every buffer refill required before the full token +is present in the buffer. + +Math: + Assume there's a token of T bytes + Assume the client fills the buffer in chunks of X bytes + We'll try to parse X, 2X, 3X, 4X ... until mX == T (technically >=) + That's (m²+m)X/2 = (T²/X+T)/2 bytes parsed (arithmetic progression) + While it is alleviated by larger refills, this amounts to O(T²) + +Expat grows its internal buffer by doubling it when necessary, but has +no way to inform the client about how much space is available. Instead, +we add a heuristic that skips parsing when we've repeatedly stopped on +an incomplete token. Specifically: + + * Only try to parse if we have a certain amount of data buffered + * Every time we stop on an incomplete token, double the threshold + * As soon as any token completes, the threshold is reset + +This means that when we get stuck on an incomplete token, the threshold +grows exponentially, effectively making the client perform larger buffer +fills, limiting how many times we can end up re-parsing the same bytes. + +Math: + Assume there's a token of T bytes + Assume the client fills the buffer in chunks of X bytes + We'll try to parse X, 2X, 4X, 8X ... until (2^k)X == T (or larger) + That's (2^(k+1)-1)X bytes parsed -- e.g. 15X if T = 8X + This is equal to 2T-X, which amounts to O(T) + +We could've chosen a faster growth rate, e.g. 4 or 8. Those seem to +increase performance further, at the cost of further increasing the +risk of growing the buffer more than necessary. This can easily be +adjusted in the future, if desired. + +This is all completely transparent to the client, except for: +1. possible delay of some callbacks (when our heuristic overshoots) +2. apps that never do isFinal=XML_TRUE could miss data at the end + +For the affected testdata, this change shows a 100-400x speedup. +The recset.xml benchmark shows no clear change either way. + +Before: +benchmark -n ../testdata/largefiles/recset.xml 65535 3 + 3 loops, with buffer size 65535. Average time per loop: 0.270223 +benchmark -n ../testdata/largefiles/aaaaaa_attr.xml 4096 3 + 3 loops, with buffer size 4096. Average time per loop: 15.033048 +benchmark -n ../testdata/largefiles/aaaaaa_cdata.xml 4096 3 + 3 loops, with buffer size 4096. Average time per loop: 0.018027 +benchmark -n ../testdata/largefiles/aaaaaa_comment.xml 4096 3 + 3 loops, with buffer size 4096. Average time per loop: 11.775362 +benchmark -n ../testdata/largefiles/aaaaaa_tag.xml 4096 3 + 3 loops, with buffer size 4096. Average time per loop: 11.711414 +benchmark -n ../testdata/largefiles/aaaaaa_text.xml 4096 3 + 3 loops, with buffer size 4096. Average time per loop: 0.019362 + +After: +./run.sh benchmark -n ../testdata/largefiles/recset.xml 65535 3 + 3 loops, with buffer size 65535. Average time per loop: 0.269030 +./run.sh benchmark -n ../testdata/largefiles/aaaaaa_attr.xml 4096 3 + 3 loops, with buffer size 4096. Average time per loop: 0.044794 +./run.sh benchmark -n ../testdata/largefiles/aaaaaa_cdata.xml 4096 3 + 3 loops, with buffer size 4096. Average time per loop: 0.016377 +./run.sh benchmark -n ../testdata/largefiles/aaaaaa_comment.xml 4096 3 + 3 loops, with buffer size 4096. Average time per loop: 0.027022 +./run.sh benchmark -n ../testdata/largefiles/aaaaaa_tag.xml 4096 3 + 3 loops, with buffer size 4096. Average time per loop: 0.099360 +./run.sh benchmark -n ../testdata/largefiles/aaaaaa_text.xml 4096 3 + 3 loops, with buffer size 4096. Average time per loop: 0.017956 +--- + lib/xmlparse.c | 58 +++++++++++++++++++++++++++++--------------- + 1 file changed, 39 insertions(+), 19 deletions(-) + +diff --git a/lib/xmlparse.c b/lib/xmlparse.c +index 5ba56eae..32df1eb9 100644 +--- a/lib/xmlparse.c ++++ b/lib/xmlparse.c +@@ -65,6 +65,7 @@ + # endif + #endif + ++#include + #include + #include /* memset(), memcpy() */ + #include +@@ -613,6 +614,7 @@ struct XML_ParserStruct { + const char *m_bufferLim; + XML_Index m_parseEndByteIndex; + const char *m_parseEndPtr; ++ size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */ + XML_Char *m_dataBuf; + XML_Char *m_dataBufEnd; + XML_StartElementHandler m_startElementHandler; +@@ -944,6 +946,32 @@ get_hash_secret_salt(XML_Parser parser) { + return parser->m_hash_secret_salt; + } + ++static enum XML_Error ++callProcessor(XML_Parser parser, const char *start, const char *end, ++ const char **endPtr) { ++ const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start); ++ ++ if (! parser->m_parsingStatus.finalBuffer) { ++ // Heuristic: don't try to parse a partial token again until the amount of ++ // available data has increased significantly. ++ const size_t had_before = parser->m_partialTokenBytesBefore; ++ const bool enough = (have_now >= 2 * had_before); ++ ++ if (! enough) { ++ *endPtr = start; // callers may expect this to be set ++ return XML_ERROR_NONE; ++ } ++ } ++ const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr); ++ // if we consumed nothing, remember what we had on this parse attempt. ++ if (*endPtr == start) { ++ parser->m_partialTokenBytesBefore = have_now; ++ } else { ++ parser->m_partialTokenBytesBefore = 0; ++ } ++ return ret; ++} ++ + static XML_Bool /* only valid for root parser */ + startParsing(XML_Parser parser) { + /* hash functions must be initialized before setContext() is called */ +@@ -1117,6 +1145,7 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) { + parser->m_bufferEnd = parser->m_buffer; + parser->m_parseEndByteIndex = 0; + parser->m_parseEndPtr = NULL; ++ parser->m_partialTokenBytesBefore = 0; + parser->m_declElementType = NULL; + parser->m_declAttributeId = NULL; + parser->m_declEntity = NULL; +@@ -1849,29 +1878,20 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { + to detect errors based on that fact. + */ + parser->m_errorCode +- = parser->m_processor(parser, parser->m_bufferPtr, +- parser->m_parseEndPtr, &parser->m_bufferPtr); ++ = callProcessor(parser, parser->m_bufferPtr, parser->m_parseEndPtr, ++ &parser->m_bufferPtr); + + if (parser->m_errorCode == XML_ERROR_NONE) { + switch (parser->m_parsingStatus.parsing) { + case XML_SUSPENDED: +- /* It is hard to be certain, but it seems that this case +- * cannot occur. This code is cleaning up a previous parse +- * with no new data (since len == 0). Changing the parsing +- * state requires getting to execute a handler function, and +- * there doesn't seem to be an opportunity for that while in +- * this circumstance. +- * +- * Given the uncertainty, we retain the code but exclude it +- * from coverage tests. +- * +- * LCOV_EXCL_START +- */ ++ /* While we added no new data, the finalBuffer flag may have caused ++ * us to parse previously-unparsed data in the internal buffer. ++ * If that triggered a callback to the application, it would have ++ * had an opportunity to suspend parsing. */ + XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr, + parser->m_bufferPtr, &parser->m_position); + parser->m_positionPtr = parser->m_bufferPtr; + return XML_STATUS_SUSPENDED; +- /* LCOV_EXCL_STOP */ + case XML_INITIALIZED: + case XML_PARSING: + parser->m_parsingStatus.parsing = XML_FINISHED; +@@ -1901,7 +1921,7 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) { + parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; + + parser->m_errorCode +- = parser->m_processor(parser, s, parser->m_parseEndPtr = s + len, &end); ++ = callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end); + + if (parser->m_errorCode != XML_ERROR_NONE) { + parser->m_eventEndPtr = parser->m_eventPtr; +@@ -2004,8 +2024,8 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal) { + parser->m_parseEndByteIndex += len; + parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal; + +- parser->m_errorCode = parser->m_processor( +- parser, start, parser->m_parseEndPtr, &parser->m_bufferPtr); ++ parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr, ++ &parser->m_bufferPtr); + + if (parser->m_errorCode != XML_ERROR_NONE) { + parser->m_eventEndPtr = parser->m_eventPtr; +@@ -2192,7 +2212,7 @@ XML_ResumeParser(XML_Parser parser) { + } + parser->m_parsingStatus.parsing = XML_PARSING; + +- parser->m_errorCode = parser->m_processor( ++ parser->m_errorCode = callProcessor( + parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr); + + if (parser->m_errorCode != XML_ERROR_NONE) { +-- +2.33.0 + + diff --git a/backport-002-CVE-2023-52425.patch b/backport-002-CVE-2023-52425.patch new file mode 100644 index 0000000..6ed28d3 --- /dev/null +++ b/backport-002-CVE-2023-52425.patch @@ -0,0 +1,40 @@ +From c3a4816e175ede7da1a692a50d6251efdfe41a45 Mon Sep 17 00:00:00 2001 +From: Snild Dolkow +Date: Mon, 4 Sep 2023 17:21:14 +0200 +Subject: [PATCH] Don't update partial token heuristic on error + +Reference: https://github.com/libexpat/libexpat/pull/789/commits/1b9d398517befeb944cbbadadf10992b07e96fa2 +Conflict: no + +Suggested-by: Sebastian Pipping +--- + lib/xmlparse.c | 12 +++++++----- + 1 file changed, 7 insertions(+), 5 deletions(-) + +diff --git a/lib/xmlparse.c b/lib/xmlparse.c +index 32df1eb9..a8414dd7 100644 +--- a/lib/xmlparse.c ++++ b/lib/xmlparse.c +@@ -963,11 +963,13 @@ callProcessor(XML_Parser parser, const char *start, const char *end, + } + } + const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr); +- // if we consumed nothing, remember what we had on this parse attempt. +- if (*endPtr == start) { +- parser->m_partialTokenBytesBefore = have_now; +- } else { +- parser->m_partialTokenBytesBefore = 0; ++ if (ret == XML_ERROR_NONE) { ++ // if we consumed nothing, remember what we had on this parse attempt. ++ if (*endPtr == start) { ++ parser->m_partialTokenBytesBefore = have_now; ++ } else { ++ parser->m_partialTokenBytesBefore = 0; ++ } + } + return ret; + } +-- +2.33.0 + + diff --git a/backport-003-CVE-2023-52425.patch b/backport-003-CVE-2023-52425.patch new file mode 100644 index 0000000..bacc4df --- /dev/null +++ b/backport-003-CVE-2023-52425.patch @@ -0,0 +1,62 @@ +From af7d2acf60b2d42506c7fb7e61ed3dbc7989dd01 Mon Sep 17 00:00:00 2001 +From: Snild Dolkow +Date: Thu, 31 Aug 2023 12:36:43 +0200 +Subject: [PATCH] Always consume BOM bytes when found in prolog + +Reference: https://github.com/libexpat/libexpat/commit/b1e955449cea6bb5862cd249e659c2123bd95a9e +Conflict: change xmlparse.c + +The byte order mark is not correctly consumed when followed by an +incomplete token in a non-final parse. This results in the BOM staying +in the buffer, causing an invalid token error later. + +This was not detected by existing tests because they either parse +everything in one call, or add a single byte at a time. + +By moving forward when we find a BOM, we make sure that the BOM +bytes are properly consumed in all cases. +--- + lib/xmlparse.c | 18 +++++++++--------- + 1 file changed, 9 insertions(+), 9 deletions(-) + +diff --git a/lib/xmlparse.c b/lib/xmlparse.c +index daceacf..184997d 100644 +--- a/lib/xmlparse.c ++++ b/lib/xmlparse.c +@@ -4502,15 +4502,15 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end, + parser->m_processor = entityValueProcessor; + return entityValueProcessor(parser, next, end, nextPtr); + } +- /* If we are at the end of the buffer, this would cause XmlPrologTok to +- return XML_TOK_NONE on the next call, which would then cause the +- function to exit with *nextPtr set to s - that is what we want for other +- tokens, but not for the BOM - we would rather like to skip it; +- then, when this routine is entered the next time, XmlPrologTok will +- return XML_TOK_INVALID, since the BOM is still in the buffer ++ /* XmlPrologTok has now set the encoding based on the BOM it found, and we ++ must move s and nextPtr forward to consume the BOM. ++ ++ If we didn't, and got XML_TOK_NONE from the next XmlPrologTok call, we ++ would leave the BOM in the buffer and return. On the next call to this ++ function, our XmlPrologTok call would return XML_TOK_INVALID, since it ++ is not valid to have multiple BOMs. + */ +- else if (tok == XML_TOK_BOM && next == end +- && ! parser->m_parsingStatus.finalBuffer) { ++ else if (tok == XML_TOK_BOM) { + # if defined(XML_DTD) || XML_GE == 1 + if (! accountingDiffTolerated(parser, tok, s, next, __LINE__, + XML_ACCOUNT_DIRECT)) { +@@ -4520,7 +4520,7 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end, + # endif + + *nextPtr = next; +- return XML_ERROR_NONE; ++ s = next; + } + /* If we get this token, we have the start of what might be a + normal tag, but not a declaration (i.e. it doesn't begin with +-- +2.33.0 + + diff --git a/backport-004-CVE-2023-52425.patch b/backport-004-CVE-2023-52425.patch new file mode 100644 index 0000000..e2f9b8d --- /dev/null +++ b/backport-004-CVE-2023-52425.patch @@ -0,0 +1,41 @@ +From a7b9a07cd50a4422194f64eb50181fcaec3ef0cf Mon Sep 17 00:00:00 2001 +From: Snild Dolkow +Date: Thu, 24 Aug 2023 09:31:31 +0200 +Subject: [PATCH] tests: Move triplet_start_checker flag check after isFinal=1 + call + +Reference: https://github.com/libexpat/libexpat/pull/745/commits/d52b4141496bd26bd716d88c67af8f2250bd0da6 +Conflict: remove ns_tests.c + change runtests.c + +There is no guarantee that the callback will happen before the parse +call with isFinal=XML_TRUE. Let's move the assertion to a location +where we know it must have happened. +--- + tests/runtests.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/tests/runtests.c b/tests/runtests.c +index 45ba5d59..8f1d11f0 100644 +--- a/tests/runtests.c ++++ b/tests/runtests.c +@@ -6527,13 +6527,13 @@ START_TEST(test_return_ns_triplet) { + if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) + == XML_STATUS_ERROR) + xml_failure(g_parser); +- if (! triplet_start_flag) +- fail("triplet_start_checker not invoked"); + /* Check that unsetting "return triplets" fails while still parsing */ + XML_SetReturnNSTriplet(g_parser, XML_FALSE); + if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE) + == XML_STATUS_ERROR) + xml_failure(g_parser); ++ if (! triplet_start_flag) ++ fail("triplet_start_checker not invoked"); + if (! triplet_end_flag) + fail("triplet_end_checker not invoked"); + if (dummy_handler_flags +-- +2.33.0 + + diff --git a/backport-005-CVE-2023-52425.patch b/backport-005-CVE-2023-52425.patch new file mode 100644 index 0000000..506d542 --- /dev/null +++ b/backport-005-CVE-2023-52425.patch @@ -0,0 +1,32 @@ +From 1b728cf8376a166d21eae818dfa66c55b6209bc4 Mon Sep 17 00:00:00 2001 +From: Snild Dolkow +Date: Thu, 24 Aug 2023 14:10:58 +0200 +Subject: [PATCH] tests: Set isFinal in test_column_number_after_parse + +Reference: https://github.com/libexpat/libexpat/pull/745/commits/2cee1061e2fec10633c3f02a961dabf95e85910a +Conflict: remove basic_tests.c + change runtests.c + +Without this, parsing of the end tag may be deferred, yielding an +unexpected column number. +--- + tests/runtests.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tests/runtests.c b/tests/runtests.c +index 8f1d11f0..9931d85e 100644 +--- a/tests/runtests.c ++++ b/tests/runtests.c +@@ -1071,7 +1071,7 @@ START_TEST(test_column_number_after_parse) { + const char *text = ""; + XML_Size colno; + +- if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) ++ if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) + == XML_STATUS_ERROR) + xml_failure(g_parser); + colno = XML_GetCurrentColumnNumber(g_parser); +-- +2.33.0 + + diff --git a/backport-006-CVE-2023-52425.patch b/backport-006-CVE-2023-52425.patch new file mode 100644 index 0000000..6f21070 --- /dev/null +++ b/backport-006-CVE-2023-52425.patch @@ -0,0 +1,39 @@ +From 25749ff3dad2216dfd7596498b592747a3d9305e Mon Sep 17 00:00:00 2001 +From: Snild Dolkow +Date: Thu, 31 Aug 2023 16:14:38 +0200 +Subject: [PATCH] tests: Set isFinal=1 in line/column-number-after-error tests + +Reference: https://github.com/libexpat/libexpat/pull/745/commits/d4105a9080271a8d4996d2454f89be9992cb268a +Conflict: remove basic_tests.c + change runtests.c + +--- + tests/runtests.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/tests/runtests.c b/tests/runtests.c +index 45ba5d59..d367271f 100644 +--- a/tests/runtests.c ++++ b/tests/runtests.c +@@ -1139,7 +1139,7 @@ START_TEST(test_line_number_after_error) { + " \n" + " "; /* missing */ + XML_Size lineno; +- if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) ++ if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) + != XML_STATUS_ERROR) + fail("Expected a parse error"); + +@@ -1158,7 +1158,7 @@ START_TEST(test_column_number_after_error) { + " \n" + " "; /* missing */ + XML_Size colno; +- if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE) ++ if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) + != XML_STATUS_ERROR) + fail("Expected a parse error"); + +-- +2.33.0 + + diff --git a/backport-007-CVE-2023-52425.patch b/backport-007-CVE-2023-52425.patch new file mode 100644 index 0000000..0712b74 --- /dev/null +++ b/backport-007-CVE-2023-52425.patch @@ -0,0 +1,368 @@ +From 1d784ef14933ee775fc20ba4435b8def6b70eae3 Mon Sep 17 00:00:00 2001 +From: caixiaomeng 00662745 +Date: Mon, 4 Mar 2024 11:00:25 +0800 +Subject: [PATCH] tests: Adapat test default current cases for 2.4.1 + +Reference: https://github.com/libexpat/libexpat/commit/7474fe3d3f686a4d76f1df48c5db0eced295059b +Conflict: yes + +--- + tests/runtests.c | 303 +++++++++++++++++++++++++++++++---------- + 1 file changed, 234 insertions(+), 69 deletions(-) + +diff --git a/tests/runtests.c b/tests/runtests.c +index c0aa1773..e97a7c51 100644 +--- a/tests/runtests.c ++++ b/tests/runtests.c +@@ -2536,34 +2536,75 @@ START_TEST(test_memory_allocation) { + } + END_TEST + ++/* Handlers that record their arg and a single identifying character */ ++ ++struct handler_record_entry { ++ const char *name; ++ int arg; ++}; ++struct handler_record_list { ++ int count; ++ struct handler_record_entry entries[50]; // arbitrary big-enough max count ++}; ++ ++# define handler_record_get(storage, index) \ ++ _handler_record_get((storage), (index), __FILE__, __LINE__) ++ ++# define assert_record_handler_called(storage, index, expected_name, \ ++ expected_arg) \ ++ do { \ ++ const struct handler_record_entry *e \ ++ = handler_record_get(storage, index); \ ++ assert(strcmp(e->name, expected_name) == 0); \ ++ assert(e->arg == (expected_arg)); \ ++ } while (0) ++ ++/* Handlers that record their function name and int arg. */ ++ ++static void ++record_call(struct handler_record_list *const rec, const char *funcname, ++ const int arg) { ++ const int max_entries = sizeof(rec->entries) / sizeof(rec->entries[0]); ++ assert(rec->count < max_entries); ++ struct handler_record_entry *const e = &rec->entries[rec->count++]; ++ e->name = funcname; ++ e->arg = arg; ++} ++ + static void XMLCALL + record_default_handler(void *userData, const XML_Char *s, int len) { + UNUSED_P(s); +- UNUSED_P(len); +- CharData_AppendXMLChars((CharData *)userData, XCS("D"), 1); ++ record_call((struct handler_record_list *)userData, __func__, len); + } + + static void XMLCALL + record_cdata_handler(void *userData, const XML_Char *s, int len) { + UNUSED_P(s); +- UNUSED_P(len); +- CharData_AppendXMLChars((CharData *)userData, XCS("C"), 1); ++ record_call((struct handler_record_list *)userData, __func__, len); + XML_DefaultCurrent(g_parser); + } + + static void XMLCALL + record_cdata_nodefault_handler(void *userData, const XML_Char *s, int len) { + UNUSED_P(s); +- UNUSED_P(len); +- CharData_AppendXMLChars((CharData *)userData, XCS("c"), 1); ++ record_call((struct handler_record_list *)userData, __func__, len); + } + + static void XMLCALL + record_skip_handler(void *userData, const XML_Char *entityName, + int is_parameter_entity) { + UNUSED_P(entityName); +- CharData_AppendXMLChars((CharData *)userData, +- is_parameter_entity ? XCS("E") : XCS("e"), 1); ++ record_call((struct handler_record_list *)userData, __func__, ++ is_parameter_entity); ++} ++ ++static const struct handler_record_entry * ++_handler_record_get(const struct handler_record_list *storage, int index, ++ const char *file, int line) { ++ if (storage->count <= index) { ++ fail("too few handler calls"); ++ } ++ return &storage->entries[index]; + } + + /* Test XML_DefaultCurrent() passes handling on correctly */ +@@ -2573,78 +2614,202 @@ START_TEST(test_default_current) { + "\n" + "]>\n" + "&entity;"; +- CharData storage; + +- XML_SetDefaultHandler(g_parser, record_default_handler); +- XML_SetCharacterDataHandler(g_parser, record_cdata_handler); +- CharData_Init(&storage); +- XML_SetUserData(g_parser, &storage); +- if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) +- == XML_STATUS_ERROR) +- xml_failure(g_parser); +- CharData_CheckXMLChars(&storage, XCS("DCDCDCDCDCDD")); ++ { ++ struct handler_record_list storage; ++ storage.count = 0; ++ XML_SetDefaultHandler(g_parser, record_default_handler); ++ XML_SetCharacterDataHandler(g_parser, record_cdata_handler); ++ XML_SetUserData(g_parser, &storage); ++ if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) ++ == XML_STATUS_ERROR) ++ xml_failure(g_parser); ++ int i = 0; ++ assert_record_handler_called(&storage, i++, "record_default_handler", 5); ++ // we should have gotten one or more cdata callbacks, totaling 5 chars ++ int cdata_len_remaining = 5; ++ while (cdata_len_remaining > 0) { ++ const struct handler_record_entry *c_entry ++ = handler_record_get(&storage, i++); ++ assert(strcmp(c_entry->name, "record_cdata_handler") == 0); ++ assert(c_entry->arg > 0); ++ assert(c_entry->arg <= cdata_len_remaining); ++ cdata_len_remaining -= c_entry->arg; ++ // default handler must follow, with the exact same len argument. ++ assert_record_handler_called(&storage, i++, "record_default_handler", ++ c_entry->arg); ++ } ++ assert_record_handler_called(&storage, i++, "record_default_handler", 6); ++ assert(storage.count == i); ++ } + + /* Again, without the defaulting */ +- XML_ParserReset(g_parser, NULL); +- XML_SetDefaultHandler(g_parser, record_default_handler); +- XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler); +- CharData_Init(&storage); +- XML_SetUserData(g_parser, &storage); +- if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) +- == XML_STATUS_ERROR) +- xml_failure(g_parser); +- CharData_CheckXMLChars(&storage, XCS("DcccccD")); ++ { ++ struct handler_record_list storage; ++ storage.count = 0; ++ XML_ParserReset(g_parser, NULL); ++ XML_SetDefaultHandler(g_parser, record_default_handler); ++ XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler); ++ XML_SetUserData(g_parser, &storage); ++ if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE) ++ == XML_STATUS_ERROR) ++ xml_failure(g_parser); ++ int i = 0; ++ assert_record_handler_called(&storage, i++, "record_default_handler", 5); ++ // we should have gotten one or more cdata callbacks, totaling 5 chars ++ int cdata_len_remaining = 5; ++ while (cdata_len_remaining > 0) { ++ const struct handler_record_entry *c_entry ++ = handler_record_get(&storage, i++); ++ assert(strcmp(c_entry->name, "record_cdata_nodefault_handler") == 0); ++ assert(c_entry->arg > 0); ++ assert(c_entry->arg <= cdata_len_remaining); ++ cdata_len_remaining -= c_entry->arg; ++ } ++ assert_record_handler_called(&storage, i++, "record_default_handler", 6); ++ assert(storage.count == i); ++ } + + /* Now with an internal entity to complicate matters */ +- XML_ParserReset(g_parser, NULL); +- XML_SetDefaultHandler(g_parser, record_default_handler); +- XML_SetCharacterDataHandler(g_parser, record_cdata_handler); +- CharData_Init(&storage); +- XML_SetUserData(g_parser, &storage); +- if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), +- XML_TRUE) +- == XML_STATUS_ERROR) +- xml_failure(g_parser); +- /* The default handler suppresses the entity */ +- CharData_CheckXMLChars(&storage, XCS("DDDDDDDDDDDDDDDDDDD")); ++ { ++ struct handler_record_list storage; ++ storage.count = 0; ++ XML_ParserReset(g_parser, NULL); ++ XML_SetDefaultHandler(g_parser, record_default_handler); ++ XML_SetCharacterDataHandler(g_parser, record_cdata_handler); ++ XML_SetUserData(g_parser, &storage); ++ if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), ++ XML_TRUE) ++ == XML_STATUS_ERROR) ++ xml_failure(g_parser); ++ /* The default handler suppresses the entity */ ++ assert_record_handler_called(&storage, 0, "record_default_handler", 9); ++ assert_record_handler_called(&storage, 1, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 2, "record_default_handler", 3); ++ assert_record_handler_called(&storage, 3, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 4, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 5, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 6, "record_default_handler", 8); ++ assert_record_handler_called(&storage, 7, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 8, "record_default_handler", 6); ++ assert_record_handler_called(&storage, 9, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 10, "record_default_handler", 7); ++ assert_record_handler_called(&storage, 11, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 12, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 13, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 14, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 15, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 16, "record_default_handler", 5); ++ assert_record_handler_called(&storage, 17, "record_default_handler", 8); ++ assert_record_handler_called(&storage, 18, "record_default_handler", 6); ++ assert(storage.count == 19); ++ } + + /* Again, with a skip handler */ +- XML_ParserReset(g_parser, NULL); +- XML_SetDefaultHandler(g_parser, record_default_handler); +- XML_SetCharacterDataHandler(g_parser, record_cdata_handler); +- XML_SetSkippedEntityHandler(g_parser, record_skip_handler); +- CharData_Init(&storage); +- XML_SetUserData(g_parser, &storage); +- if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), +- XML_TRUE) +- == XML_STATUS_ERROR) +- xml_failure(g_parser); +- /* The default handler suppresses the entity */ +- CharData_CheckXMLChars(&storage, XCS("DDDDDDDDDDDDDDDDDeD")); ++ { ++ struct handler_record_list storage; ++ storage.count = 0; ++ XML_ParserReset(g_parser, NULL); ++ XML_SetDefaultHandler(g_parser, record_default_handler); ++ XML_SetCharacterDataHandler(g_parser, record_cdata_handler); ++ XML_SetSkippedEntityHandler(g_parser, record_skip_handler); ++ XML_SetUserData(g_parser, &storage); ++ if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), ++ XML_TRUE) ++ == XML_STATUS_ERROR) ++ xml_failure(g_parser); ++ /* The default handler suppresses the entity */ ++ assert_record_handler_called(&storage, 0, "record_default_handler", 9); ++ assert_record_handler_called(&storage, 1, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 2, "record_default_handler", 3); ++ assert_record_handler_called(&storage, 3, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 4, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 5, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 6, "record_default_handler", 8); ++ assert_record_handler_called(&storage, 7, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 8, "record_default_handler", 6); ++ assert_record_handler_called(&storage, 9, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 10, "record_default_handler", 7); ++ assert_record_handler_called(&storage, 11, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 12, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 13, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 14, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 15, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 16, "record_default_handler", 5); ++ assert_record_handler_called(&storage, 17, "record_skip_handler", 0); ++ assert_record_handler_called(&storage, 18, "record_default_handler", 6); ++ assert(storage.count == 19); ++ } + + /* This time, allow the entity through */ +- XML_ParserReset(g_parser, NULL); +- XML_SetDefaultHandlerExpand(g_parser, record_default_handler); +- XML_SetCharacterDataHandler(g_parser, record_cdata_handler); +- CharData_Init(&storage); +- XML_SetUserData(g_parser, &storage); +- if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), +- XML_TRUE) +- == XML_STATUS_ERROR) +- xml_failure(g_parser); +- CharData_CheckXMLChars(&storage, XCS("DDDDDDDDDDDDDDDDDCDD")); ++ { ++ struct handler_record_list storage; ++ storage.count = 0; ++ XML_ParserReset(g_parser, NULL); ++ XML_SetDefaultHandlerExpand(g_parser, record_default_handler); ++ XML_SetCharacterDataHandler(g_parser, record_cdata_handler); ++ XML_SetUserData(g_parser, &storage); ++ if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), ++ XML_TRUE) ++ == XML_STATUS_ERROR) ++ xml_failure(g_parser); ++ assert_record_handler_called(&storage, 0, "record_default_handler", 9); ++ assert_record_handler_called(&storage, 1, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 2, "record_default_handler", 3); ++ assert_record_handler_called(&storage, 3, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 4, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 5, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 6, "record_default_handler", 8); ++ assert_record_handler_called(&storage, 7, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 8, "record_default_handler", 6); ++ assert_record_handler_called(&storage, 9, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 10, "record_default_handler", 7); ++ assert_record_handler_called(&storage, 11, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 12, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 13, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 14, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 15, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 16, "record_default_handler", 5); ++ assert_record_handler_called(&storage, 17, "record_cdata_handler", 1); ++ assert_record_handler_called(&storage, 18, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 19, "record_default_handler", 6); ++ assert(storage.count == 20); ++ } + + /* Finally, without passing the cdata to the default handler */ +- XML_ParserReset(g_parser, NULL); +- XML_SetDefaultHandlerExpand(g_parser, record_default_handler); +- XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler); +- CharData_Init(&storage); +- XML_SetUserData(g_parser, &storage); +- if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), +- XML_TRUE) +- == XML_STATUS_ERROR) +- xml_failure(g_parser); +- CharData_CheckXMLChars(&storage, XCS("DDDDDDDDDDDDDDDDDcD")); ++ { ++ struct handler_record_list storage; ++ storage.count = 0; ++ XML_ParserReset(g_parser, NULL); ++ XML_SetDefaultHandlerExpand(g_parser, record_default_handler); ++ XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler); ++ XML_SetUserData(g_parser, &storage); ++ if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text), ++ XML_TRUE) ++ == XML_STATUS_ERROR) ++ xml_failure(g_parser); ++ assert_record_handler_called(&storage, 0, "record_default_handler", 9); ++ assert_record_handler_called(&storage, 1, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 2, "record_default_handler", 3); ++ assert_record_handler_called(&storage, 3, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 4, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 5, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 6, "record_default_handler", 8); ++ assert_record_handler_called(&storage, 7, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 8, "record_default_handler", 6); ++ assert_record_handler_called(&storage, 9, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 10, "record_default_handler", 7); ++ assert_record_handler_called(&storage, 11, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 12, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 13, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 14, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 15, "record_default_handler", 1); ++ assert_record_handler_called(&storage, 16, "record_default_handler", 5); ++ assert_record_handler_called(&storage, 17, "record_cdata_nodefault_handler", ++ 1); ++ assert_record_handler_called(&storage, 18, "record_default_handler", 6); ++ assert(storage.count == 19); ++ } + } + END_TEST + +-- +2.33.0 + + diff --git a/backport-008-CVE-2023-52425.patch b/backport-008-CVE-2023-52425.patch new file mode 100644 index 0000000..0a799fc --- /dev/null +++ b/backport-008-CVE-2023-52425.patch @@ -0,0 +1,54 @@ +From 7f54667c59c5a884beba5dce17003715d7cbaffa Mon Sep 17 00:00:00 2001 +From: Snild Dolkow +Date: Mon, 18 Sep 2023 20:32:55 +0200 +Subject: [PATCH] tests: Run both with and without partial token heuristic + +If we always run with the heuristic enabled, it may hide some bugs by +grouping up input into bigger parse attempts. + +CI-fighting-assistance-by: Sebastian Pipping +--- + lib/internal.h | 2 ++ + lib/xmlparse.c | 5 ++++- + 2 files changed, 6 insertions(+), 1 deletion(-) + +diff --git a/lib/internal.h b/lib/internal.h +index 444eba0f..dda42d88 100644 +--- a/lib/internal.h ++++ b/lib/internal.h +@@ -158,6 +158,8 @@ unsigned long long testingAccountingGetCountBytesIndirect(XML_Parser parser); + const char *unsignedCharToPrintable(unsigned char c); + #endif + ++extern XML_Bool g_reparseDeferralEnabledDefault; // written ONLY in runtests.c ++ + #ifdef __cplusplus + } + #endif +diff --git a/lib/xmlparse.c b/lib/xmlparse.c +index 32df1eb9..e30e76aa 100644 +--- a/lib/xmlparse.c ++++ b/lib/xmlparse.c +@@ -599,6 +599,8 @@ static unsigned long getDebugLevel(const char *variableName, + ? 0 \ + : ((*((pool)->ptr)++ = c), 1)) + ++XML_Bool g_reparseDeferralEnabledDefault = XML_TRUE; // write ONLY in runtests.c ++ + struct XML_ParserStruct { + /* The first member must be m_userData so that the XML_GetUserData + macro works. */ +@@ -951,7 +953,8 @@ callProcessor(XML_Parser parser, const char *start, const char *end, + const char **endPtr) { + const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start); + +- if (! parser->m_parsingStatus.finalBuffer) { ++ if (g_reparseDeferralEnabledDefault ++ && ! parser->m_parsingStatus.finalBuffer) { + // Heuristic: don't try to parse a partial token again until the amount of + // available data has increased significantly. + const size_t had_before = parser->m_partialTokenBytesBefore; +-- +2.33.0 + + diff --git a/backport-009-CVE-2023-52425.patch b/backport-009-CVE-2023-52425.patch new file mode 100644 index 0000000..1583300 --- /dev/null +++ b/backport-009-CVE-2023-52425.patch @@ -0,0 +1,122 @@ +From 3194d762dc6a80bca5d374fe5084888386fbadcd Mon Sep 17 00:00:00 2001 +From: Snild Dolkow +Date: Mon, 11 Sep 2023 15:31:24 +0200 +Subject: [PATCH] Add app setting for enabling/disabling reparse heuristic + +Suggested-by: Sebastian Pipping +CI-fighting-assistance-by: Sebastian Pipping +--- + doc/reference.html | 24 +++++++++++++++++++++++- + lib/expat.h | 4 ++++ + lib/xmlparse.c | 13 ++++++++++++- + 3 files changed, 39 insertions(+), 2 deletions(-) + +diff --git a/doc/reference.html b/doc/reference.html +index 309cb241..1ded3bbe 100644 +--- a/doc/reference.html ++++ b/doc/reference.html +@@ -149,10 +149,11 @@ interface.

+ + +
  • +- Billion Laughs Attack Protection ++ Attack Protection + +
  • +
  • Miscellaneous Functions +@@ -2172,6 +2173,27 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold(XML_Parser p, +

    + + ++

    XML_SetReparseDeferralEnabled

    ++
    ++/* Added in Expat 2.6.0. */
    ++XML_Bool XMLCALL
    ++XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled);
    ++
    ++
    ++

    ++ Large tokens may require many parse calls before enough data is available for Expat to parse it in full. ++ If Expat retried parsing the token on every parse call, parsing could take quadratic time. ++ To avoid this, Expat only retries once a significant amount of new data is available. ++ This function allows disabling this behavior. ++

    ++

    ++ The enabled argument should be XML_TRUE or XML_FALSE. ++

    ++

    ++ Returns XML_TRUE on success, and XML_FALSE on error. ++

    ++
    ++ +

    Miscellaneous functions

    + +

    The functions in this section either obtain state information from +diff --git a/lib/expat.h b/lib/expat.h +index b7d6d354..a4033742 100644 +--- a/lib/expat.h ++++ b/lib/expat.h +@@ -1036,6 +1036,10 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold( + XML_Parser parser, unsigned long long activationThresholdBytes); + #endif + ++/* Added in Expat 2.6.0. */ ++XMLPARSEAPI(XML_Bool) ++XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled); ++ + /* Expat follows the semantic versioning convention. + See http://semver.org. + */ +diff --git a/lib/xmlparse.c b/lib/xmlparse.c +index e30e76aa..d95b054b 100644 +--- a/lib/xmlparse.c ++++ b/lib/xmlparse.c +@@ -617,6 +617,7 @@ struct XML_ParserStruct { + XML_Index m_parseEndByteIndex; + const char *m_parseEndPtr; + size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */ ++ XML_Bool m_reparseDeferralEnabled; + XML_Char *m_dataBuf; + XML_Char *m_dataBufEnd; + XML_StartElementHandler m_startElementHandler; +@@ -953,7 +954,7 @@ callProcessor(XML_Parser parser, const char *start, const char *end, + const char **endPtr) { + const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start); + +- if (g_reparseDeferralEnabledDefault ++ if (parser->m_reparseDeferralEnabled + && ! parser->m_parsingStatus.finalBuffer) { + // Heuristic: don't try to parse a partial token again until the amount of + // available data has increased significantly. +@@ -1149,6 +1150,7 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) { + parser->m_parseEndByteIndex = 0; + parser->m_parseEndPtr = NULL; + parser->m_partialTokenBytesBefore = 0; ++ parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault; + parser->m_declElementType = NULL; + parser->m_declAttributeId = NULL; + parser->m_declEntity = NULL; +@@ -2568,6 +2570,15 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold( + } + #endif /* defined(XML_DTD) || XML_GE == 1 */ + ++XML_Bool XMLCALL ++XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) { ++ if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) { ++ parser->m_reparseDeferralEnabled = enabled; ++ return XML_TRUE; ++ } ++ return XML_FALSE; ++} ++ + /* Initially tag->rawName always points into the parse buffer; + for those TAG instances opened while the current parse buffer was + processed, and not yet closed, we need to store tag->rawName in a more +-- +2.33.0 + + diff --git a/expat.spec b/expat.spec index 4221902..b4ec4af 100644 --- a/expat.spec +++ b/expat.spec @@ -1,7 +1,7 @@ %define Rversion %(echo %{version} | sed -e 's/\\./_/g' -e 's/^/R_/') Name: expat Version: 2.5.0 -Release: 2 +Release: 3 Summary: An XML parser library License: MIT URL: https://libexpat.github.io/ @@ -15,6 +15,15 @@ Patch05: backport-001-CVE-2023-52426.patch Patch06: backport-002-CVE-2023-52426.patch Patch07: backport-003-CVE-2023-52426.patch Patch08: backport-004-CVE-2023-52426.patch +Patch09: backport-001-CVE-2023-52425.patch +Patch10: backport-002-CVE-2023-52425.patch +Patch11: backport-003-CVE-2023-52425.patch +Patch12: backport-004-CVE-2023-52425.patch +Patch13: backport-005-CVE-2023-52425.patch +Patch14: backport-006-CVE-2023-52425.patch +Patch15: backport-007-CVE-2023-52425.patch +Patch16: backport-008-CVE-2023-52425.patch +Patch17: backport-009-CVE-2023-52425.patch BuildRequires: sed,autoconf,automake,gcc-c++,libtool,xmlto @@ -68,6 +77,9 @@ make check %{_mandir}/man1/* %changelog +* Wed Jun 12 2024 wangjiang - 2.5.0-3 +- fix CVE-2023-52425 + * Mon Apr 8 2024 caixiaomeng - 2.5.0-2 - fix cve-2024-28757 and cve-2023-52426