fix CVE-2023-52425

This commit is contained in:
wangjiang 2024-06-12 16:31:25 +08:00
parent fbd79d907d
commit 875f85950c
10 changed files with 994 additions and 1 deletions

View File

@ -0,0 +1,223 @@
From 6cc9677838ce4e68680f7877d71032ca6481ee56 Mon Sep 17 00:00:00 2001
From: Snild Dolkow <snild@sony.com>
Date: Thu, 17 Aug 2023 16:25:26 +0200
Subject: [PATCH] Skip parsing after repeated partials on the same token
Reference: https://github.com/libexpat/libexpat/pull/789/commits/9cdf9b8d77d5c2c2a27d15fb68dd3f83cafb45a1
Conflict: remove basic_test.c
change xmlparse.c
MIME-Version: 1.0
Content-Type: text/plain; charset=utf-8
Content-Transfer-Encoding: 8bit
When the parse buffer contains the starting bytes of a token but not
all of them, we cannot parse the token to completion. We call this a
partial token. When this happens, the parse position is reset to the
start of the token, and the parse() call returns. The client is then
expected to provide more data and call parse() again.
In extreme cases, this means that the bytes of a token may be parsed
many times: once for every buffer refill required before the full token
is present in the buffer.
Math:
Assume there's a token of T bytes
Assume the client fills the buffer in chunks of X bytes
We'll try to parse X, 2X, 3X, 4X ... until mX == T (technically >=)
That's (m²+m)X/2 = (T²/X+T)/2 bytes parsed (arithmetic progression)
While it is alleviated by larger refills, this amounts to O(T²)
Expat grows its internal buffer by doubling it when necessary, but has
no way to inform the client about how much space is available. Instead,
we add a heuristic that skips parsing when we've repeatedly stopped on
an incomplete token. Specifically:
* Only try to parse if we have a certain amount of data buffered
* Every time we stop on an incomplete token, double the threshold
* As soon as any token completes, the threshold is reset
This means that when we get stuck on an incomplete token, the threshold
grows exponentially, effectively making the client perform larger buffer
fills, limiting how many times we can end up re-parsing the same bytes.
Math:
Assume there's a token of T bytes
Assume the client fills the buffer in chunks of X bytes
We'll try to parse X, 2X, 4X, 8X ... until (2^k)X == T (or larger)
That's (2^(k+1)-1)X bytes parsed -- e.g. 15X if T = 8X
This is equal to 2T-X, which amounts to O(T)
We could've chosen a faster growth rate, e.g. 4 or 8. Those seem to
increase performance further, at the cost of further increasing the
risk of growing the buffer more than necessary. This can easily be
adjusted in the future, if desired.
This is all completely transparent to the client, except for:
1. possible delay of some callbacks (when our heuristic overshoots)
2. apps that never do isFinal=XML_TRUE could miss data at the end
For the affected testdata, this change shows a 100-400x speedup.
The recset.xml benchmark shows no clear change either way.
Before:
benchmark -n ../testdata/largefiles/recset.xml 65535 3
3 loops, with buffer size 65535. Average time per loop: 0.270223
benchmark -n ../testdata/largefiles/aaaaaa_attr.xml 4096 3
3 loops, with buffer size 4096. Average time per loop: 15.033048
benchmark -n ../testdata/largefiles/aaaaaa_cdata.xml 4096 3
3 loops, with buffer size 4096. Average time per loop: 0.018027
benchmark -n ../testdata/largefiles/aaaaaa_comment.xml 4096 3
3 loops, with buffer size 4096. Average time per loop: 11.775362
benchmark -n ../testdata/largefiles/aaaaaa_tag.xml 4096 3
3 loops, with buffer size 4096. Average time per loop: 11.711414
benchmark -n ../testdata/largefiles/aaaaaa_text.xml 4096 3
3 loops, with buffer size 4096. Average time per loop: 0.019362
After:
./run.sh benchmark -n ../testdata/largefiles/recset.xml 65535 3
3 loops, with buffer size 65535. Average time per loop: 0.269030
./run.sh benchmark -n ../testdata/largefiles/aaaaaa_attr.xml 4096 3
3 loops, with buffer size 4096. Average time per loop: 0.044794
./run.sh benchmark -n ../testdata/largefiles/aaaaaa_cdata.xml 4096 3
3 loops, with buffer size 4096. Average time per loop: 0.016377
./run.sh benchmark -n ../testdata/largefiles/aaaaaa_comment.xml 4096 3
3 loops, with buffer size 4096. Average time per loop: 0.027022
./run.sh benchmark -n ../testdata/largefiles/aaaaaa_tag.xml 4096 3
3 loops, with buffer size 4096. Average time per loop: 0.099360
./run.sh benchmark -n ../testdata/largefiles/aaaaaa_text.xml 4096 3
3 loops, with buffer size 4096. Average time per loop: 0.017956
---
lib/xmlparse.c | 58 +++++++++++++++++++++++++++++---------------
1 file changed, 39 insertions(+), 19 deletions(-)
diff --git a/lib/xmlparse.c b/lib/xmlparse.c
index 5ba56eae..32df1eb9 100644
--- a/lib/xmlparse.c
+++ b/lib/xmlparse.c
@@ -65,6 +65,7 @@
# endif
#endif
+#include <stdbool.h>
#include <stddef.h>
#include <string.h> /* memset(), memcpy() */
#include <assert.h>
@@ -613,6 +614,7 @@ struct XML_ParserStruct {
const char *m_bufferLim;
XML_Index m_parseEndByteIndex;
const char *m_parseEndPtr;
+ size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */
XML_Char *m_dataBuf;
XML_Char *m_dataBufEnd;
XML_StartElementHandler m_startElementHandler;
@@ -944,6 +946,32 @@ get_hash_secret_salt(XML_Parser parser) {
return parser->m_hash_secret_salt;
}
+static enum XML_Error
+callProcessor(XML_Parser parser, const char *start, const char *end,
+ const char **endPtr) {
+ const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start);
+
+ if (! parser->m_parsingStatus.finalBuffer) {
+ // Heuristic: don't try to parse a partial token again until the amount of
+ // available data has increased significantly.
+ const size_t had_before = parser->m_partialTokenBytesBefore;
+ const bool enough = (have_now >= 2 * had_before);
+
+ if (! enough) {
+ *endPtr = start; // callers may expect this to be set
+ return XML_ERROR_NONE;
+ }
+ }
+ const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr);
+ // if we consumed nothing, remember what we had on this parse attempt.
+ if (*endPtr == start) {
+ parser->m_partialTokenBytesBefore = have_now;
+ } else {
+ parser->m_partialTokenBytesBefore = 0;
+ }
+ return ret;
+}
+
static XML_Bool /* only valid for root parser */
startParsing(XML_Parser parser) {
/* hash functions must be initialized before setContext() is called */
@@ -1117,6 +1145,7 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) {
parser->m_bufferEnd = parser->m_buffer;
parser->m_parseEndByteIndex = 0;
parser->m_parseEndPtr = NULL;
+ parser->m_partialTokenBytesBefore = 0;
parser->m_declElementType = NULL;
parser->m_declAttributeId = NULL;
parser->m_declEntity = NULL;
@@ -1849,29 +1878,20 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
to detect errors based on that fact.
*/
parser->m_errorCode
- = parser->m_processor(parser, parser->m_bufferPtr,
- parser->m_parseEndPtr, &parser->m_bufferPtr);
+ = callProcessor(parser, parser->m_bufferPtr, parser->m_parseEndPtr,
+ &parser->m_bufferPtr);
if (parser->m_errorCode == XML_ERROR_NONE) {
switch (parser->m_parsingStatus.parsing) {
case XML_SUSPENDED:
- /* It is hard to be certain, but it seems that this case
- * cannot occur. This code is cleaning up a previous parse
- * with no new data (since len == 0). Changing the parsing
- * state requires getting to execute a handler function, and
- * there doesn't seem to be an opportunity for that while in
- * this circumstance.
- *
- * Given the uncertainty, we retain the code but exclude it
- * from coverage tests.
- *
- * LCOV_EXCL_START
- */
+ /* While we added no new data, the finalBuffer flag may have caused
+ * us to parse previously-unparsed data in the internal buffer.
+ * If that triggered a callback to the application, it would have
+ * had an opportunity to suspend parsing. */
XmlUpdatePosition(parser->m_encoding, parser->m_positionPtr,
parser->m_bufferPtr, &parser->m_position);
parser->m_positionPtr = parser->m_bufferPtr;
return XML_STATUS_SUSPENDED;
- /* LCOV_EXCL_STOP */
case XML_INITIALIZED:
case XML_PARSING:
parser->m_parsingStatus.parsing = XML_FINISHED;
@@ -1901,7 +1921,7 @@ XML_Parse(XML_Parser parser, const char *s, int len, int isFinal) {
parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
parser->m_errorCode
- = parser->m_processor(parser, s, parser->m_parseEndPtr = s + len, &end);
+ = callProcessor(parser, s, parser->m_parseEndPtr = s + len, &end);
if (parser->m_errorCode != XML_ERROR_NONE) {
parser->m_eventEndPtr = parser->m_eventPtr;
@@ -2004,8 +2024,8 @@ XML_ParseBuffer(XML_Parser parser, int len, int isFinal) {
parser->m_parseEndByteIndex += len;
parser->m_parsingStatus.finalBuffer = (XML_Bool)isFinal;
- parser->m_errorCode = parser->m_processor(
- parser, start, parser->m_parseEndPtr, &parser->m_bufferPtr);
+ parser->m_errorCode = callProcessor(parser, start, parser->m_parseEndPtr,
+ &parser->m_bufferPtr);
if (parser->m_errorCode != XML_ERROR_NONE) {
parser->m_eventEndPtr = parser->m_eventPtr;
@@ -2192,7 +2212,7 @@ XML_ResumeParser(XML_Parser parser) {
}
parser->m_parsingStatus.parsing = XML_PARSING;
- parser->m_errorCode = parser->m_processor(
+ parser->m_errorCode = callProcessor(
parser, parser->m_bufferPtr, parser->m_parseEndPtr, &parser->m_bufferPtr);
if (parser->m_errorCode != XML_ERROR_NONE) {
--
2.33.0

View File

@ -0,0 +1,40 @@
From c3a4816e175ede7da1a692a50d6251efdfe41a45 Mon Sep 17 00:00:00 2001
From: Snild Dolkow <snild@sony.com>
Date: Mon, 4 Sep 2023 17:21:14 +0200
Subject: [PATCH] Don't update partial token heuristic on error
Reference: https://github.com/libexpat/libexpat/pull/789/commits/1b9d398517befeb944cbbadadf10992b07e96fa2
Conflict: no
Suggested-by: Sebastian Pipping <sebastian@pipping.org>
---
lib/xmlparse.c | 12 +++++++-----
1 file changed, 7 insertions(+), 5 deletions(-)
diff --git a/lib/xmlparse.c b/lib/xmlparse.c
index 32df1eb9..a8414dd7 100644
--- a/lib/xmlparse.c
+++ b/lib/xmlparse.c
@@ -963,11 +963,13 @@ callProcessor(XML_Parser parser, const char *start, const char *end,
}
}
const enum XML_Error ret = parser->m_processor(parser, start, end, endPtr);
- // if we consumed nothing, remember what we had on this parse attempt.
- if (*endPtr == start) {
- parser->m_partialTokenBytesBefore = have_now;
- } else {
- parser->m_partialTokenBytesBefore = 0;
+ if (ret == XML_ERROR_NONE) {
+ // if we consumed nothing, remember what we had on this parse attempt.
+ if (*endPtr == start) {
+ parser->m_partialTokenBytesBefore = have_now;
+ } else {
+ parser->m_partialTokenBytesBefore = 0;
+ }
}
return ret;
}
--
2.33.0

View File

@ -0,0 +1,62 @@
From af7d2acf60b2d42506c7fb7e61ed3dbc7989dd01 Mon Sep 17 00:00:00 2001
From: Snild Dolkow <snild@sony.com>
Date: Thu, 31 Aug 2023 12:36:43 +0200
Subject: [PATCH] Always consume BOM bytes when found in prolog
Reference: https://github.com/libexpat/libexpat/commit/b1e955449cea6bb5862cd249e659c2123bd95a9e
Conflict: change xmlparse.c
The byte order mark is not correctly consumed when followed by an
incomplete token in a non-final parse. This results in the BOM staying
in the buffer, causing an invalid token error later.
This was not detected by existing tests because they either parse
everything in one call, or add a single byte at a time.
By moving forward when we find a BOM, we make sure that the BOM
bytes are properly consumed in all cases.
---
lib/xmlparse.c | 18 +++++++++---------
1 file changed, 9 insertions(+), 9 deletions(-)
diff --git a/lib/xmlparse.c b/lib/xmlparse.c
index daceacf..184997d 100644
--- a/lib/xmlparse.c
+++ b/lib/xmlparse.c
@@ -4502,15 +4502,15 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
parser->m_processor = entityValueProcessor;
return entityValueProcessor(parser, next, end, nextPtr);
}
- /* If we are at the end of the buffer, this would cause XmlPrologTok to
- return XML_TOK_NONE on the next call, which would then cause the
- function to exit with *nextPtr set to s - that is what we want for other
- tokens, but not for the BOM - we would rather like to skip it;
- then, when this routine is entered the next time, XmlPrologTok will
- return XML_TOK_INVALID, since the BOM is still in the buffer
+ /* XmlPrologTok has now set the encoding based on the BOM it found, and we
+ must move s and nextPtr forward to consume the BOM.
+
+ If we didn't, and got XML_TOK_NONE from the next XmlPrologTok call, we
+ would leave the BOM in the buffer and return. On the next call to this
+ function, our XmlPrologTok call would return XML_TOK_INVALID, since it
+ is not valid to have multiple BOMs.
*/
- else if (tok == XML_TOK_BOM && next == end
- && ! parser->m_parsingStatus.finalBuffer) {
+ else if (tok == XML_TOK_BOM) {
# if defined(XML_DTD) || XML_GE == 1
if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
XML_ACCOUNT_DIRECT)) {
@@ -4520,7 +4520,7 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
# endif
*nextPtr = next;
- return XML_ERROR_NONE;
+ s = next;
}
/* If we get this token, we have the start of what might be a
normal tag, but not a declaration (i.e. it doesn't begin with
--
2.33.0

View File

@ -0,0 +1,41 @@
From a7b9a07cd50a4422194f64eb50181fcaec3ef0cf Mon Sep 17 00:00:00 2001
From: Snild Dolkow <snild@sony.com>
Date: Thu, 24 Aug 2023 09:31:31 +0200
Subject: [PATCH] tests: Move triplet_start_checker flag check after isFinal=1
call
Reference: https://github.com/libexpat/libexpat/pull/745/commits/d52b4141496bd26bd716d88c67af8f2250bd0da6
Conflict: remove ns_tests.c
change runtests.c
There is no guarantee that the callback will happen before the parse
call with isFinal=XML_TRUE. Let's move the assertion to a location
where we know it must have happened.
---
tests/runtests.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tests/runtests.c b/tests/runtests.c
index 45ba5d59..8f1d11f0 100644
--- a/tests/runtests.c
+++ b/tests/runtests.c
@@ -6527,13 +6527,13 @@ START_TEST(test_return_ns_triplet) {
if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
== XML_STATUS_ERROR)
xml_failure(g_parser);
- if (! triplet_start_flag)
- fail("triplet_start_checker not invoked");
/* Check that unsetting "return triplets" fails while still parsing */
XML_SetReturnNSTriplet(g_parser, XML_FALSE);
if (_XML_Parse_SINGLE_BYTES(g_parser, epilog, (int)strlen(epilog), XML_TRUE)
== XML_STATUS_ERROR)
xml_failure(g_parser);
+ if (! triplet_start_flag)
+ fail("triplet_start_checker not invoked");
if (! triplet_end_flag)
fail("triplet_end_checker not invoked");
if (dummy_handler_flags
--
2.33.0

View File

@ -0,0 +1,32 @@
From 1b728cf8376a166d21eae818dfa66c55b6209bc4 Mon Sep 17 00:00:00 2001
From: Snild Dolkow <snild@sony.com>
Date: Thu, 24 Aug 2023 14:10:58 +0200
Subject: [PATCH] tests: Set isFinal in test_column_number_after_parse
Reference: https://github.com/libexpat/libexpat/pull/745/commits/2cee1061e2fec10633c3f02a961dabf95e85910a
Conflict: remove basic_tests.c
change runtests.c
Without this, parsing of the end tag may be deferred, yielding an
unexpected column number.
---
tests/runtests.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/tests/runtests.c b/tests/runtests.c
index 8f1d11f0..9931d85e 100644
--- a/tests/runtests.c
+++ b/tests/runtests.c
@@ -1071,7 +1071,7 @@ START_TEST(test_column_number_after_parse) {
const char *text = "<tag></tag>";
XML_Size colno;
- if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
+ if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
== XML_STATUS_ERROR)
xml_failure(g_parser);
colno = XML_GetCurrentColumnNumber(g_parser);
--
2.33.0

View File

@ -0,0 +1,39 @@
From 25749ff3dad2216dfd7596498b592747a3d9305e Mon Sep 17 00:00:00 2001
From: Snild Dolkow <snild@sony.com>
Date: Thu, 31 Aug 2023 16:14:38 +0200
Subject: [PATCH] tests: Set isFinal=1 in line/column-number-after-error tests
Reference: https://github.com/libexpat/libexpat/pull/745/commits/d4105a9080271a8d4996d2454f89be9992cb268a
Conflict: remove basic_tests.c
change runtests.c
---
tests/runtests.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/tests/runtests.c b/tests/runtests.c
index 45ba5d59..d367271f 100644
--- a/tests/runtests.c
+++ b/tests/runtests.c
@@ -1139,7 +1139,7 @@ START_TEST(test_line_number_after_error) {
" <b>\n"
" </a>"; /* missing </b> */
XML_Size lineno;
- if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
+ if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
!= XML_STATUS_ERROR)
fail("Expected a parse error");
@@ -1158,7 +1158,7 @@ START_TEST(test_column_number_after_error) {
" <b>\n"
" </a>"; /* missing </b> */
XML_Size colno;
- if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_FALSE)
+ if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
!= XML_STATUS_ERROR)
fail("Expected a parse error");
--
2.33.0

View File

@ -0,0 +1,368 @@
From 1d784ef14933ee775fc20ba4435b8def6b70eae3 Mon Sep 17 00:00:00 2001
From: caixiaomeng 00662745 <caixiaomeng2@huawei.com>
Date: Mon, 4 Mar 2024 11:00:25 +0800
Subject: [PATCH] tests: Adapat test default current cases for 2.4.1
Reference: https://github.com/libexpat/libexpat/commit/7474fe3d3f686a4d76f1df48c5db0eced295059b
Conflict: yes
---
tests/runtests.c | 303 +++++++++++++++++++++++++++++++----------
1 file changed, 234 insertions(+), 69 deletions(-)
diff --git a/tests/runtests.c b/tests/runtests.c
index c0aa1773..e97a7c51 100644
--- a/tests/runtests.c
+++ b/tests/runtests.c
@@ -2536,34 +2536,75 @@ START_TEST(test_memory_allocation) {
}
END_TEST
+/* Handlers that record their arg and a single identifying character */
+
+struct handler_record_entry {
+ const char *name;
+ int arg;
+};
+struct handler_record_list {
+ int count;
+ struct handler_record_entry entries[50]; // arbitrary big-enough max count
+};
+
+# define handler_record_get(storage, index) \
+ _handler_record_get((storage), (index), __FILE__, __LINE__)
+
+# define assert_record_handler_called(storage, index, expected_name, \
+ expected_arg) \
+ do { \
+ const struct handler_record_entry *e \
+ = handler_record_get(storage, index); \
+ assert(strcmp(e->name, expected_name) == 0); \
+ assert(e->arg == (expected_arg)); \
+ } while (0)
+
+/* Handlers that record their function name and int arg. */
+
+static void
+record_call(struct handler_record_list *const rec, const char *funcname,
+ const int arg) {
+ const int max_entries = sizeof(rec->entries) / sizeof(rec->entries[0]);
+ assert(rec->count < max_entries);
+ struct handler_record_entry *const e = &rec->entries[rec->count++];
+ e->name = funcname;
+ e->arg = arg;
+}
+
static void XMLCALL
record_default_handler(void *userData, const XML_Char *s, int len) {
UNUSED_P(s);
- UNUSED_P(len);
- CharData_AppendXMLChars((CharData *)userData, XCS("D"), 1);
+ record_call((struct handler_record_list *)userData, __func__, len);
}
static void XMLCALL
record_cdata_handler(void *userData, const XML_Char *s, int len) {
UNUSED_P(s);
- UNUSED_P(len);
- CharData_AppendXMLChars((CharData *)userData, XCS("C"), 1);
+ record_call((struct handler_record_list *)userData, __func__, len);
XML_DefaultCurrent(g_parser);
}
static void XMLCALL
record_cdata_nodefault_handler(void *userData, const XML_Char *s, int len) {
UNUSED_P(s);
- UNUSED_P(len);
- CharData_AppendXMLChars((CharData *)userData, XCS("c"), 1);
+ record_call((struct handler_record_list *)userData, __func__, len);
}
static void XMLCALL
record_skip_handler(void *userData, const XML_Char *entityName,
int is_parameter_entity) {
UNUSED_P(entityName);
- CharData_AppendXMLChars((CharData *)userData,
- is_parameter_entity ? XCS("E") : XCS("e"), 1);
+ record_call((struct handler_record_list *)userData, __func__,
+ is_parameter_entity);
+}
+
+static const struct handler_record_entry *
+_handler_record_get(const struct handler_record_list *storage, int index,
+ const char *file, int line) {
+ if (storage->count <= index) {
+ fail("too few handler calls");
+ }
+ return &storage->entries[index];
}
/* Test XML_DefaultCurrent() passes handling on correctly */
@@ -2573,78 +2614,202 @@ START_TEST(test_default_current) {
"<!ENTITY entity '&#37;'>\n"
"]>\n"
"<doc>&entity;</doc>";
- CharData storage;
- XML_SetDefaultHandler(g_parser, record_default_handler);
- XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
- CharData_Init(&storage);
- XML_SetUserData(g_parser, &storage);
- if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
- == XML_STATUS_ERROR)
- xml_failure(g_parser);
- CharData_CheckXMLChars(&storage, XCS("DCDCDCDCDCDD"));
+ {
+ struct handler_record_list storage;
+ storage.count = 0;
+ XML_SetDefaultHandler(g_parser, record_default_handler);
+ XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
+ XML_SetUserData(g_parser, &storage);
+ if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
+ == XML_STATUS_ERROR)
+ xml_failure(g_parser);
+ int i = 0;
+ assert_record_handler_called(&storage, i++, "record_default_handler", 5);
+ // we should have gotten one or more cdata callbacks, totaling 5 chars
+ int cdata_len_remaining = 5;
+ while (cdata_len_remaining > 0) {
+ const struct handler_record_entry *c_entry
+ = handler_record_get(&storage, i++);
+ assert(strcmp(c_entry->name, "record_cdata_handler") == 0);
+ assert(c_entry->arg > 0);
+ assert(c_entry->arg <= cdata_len_remaining);
+ cdata_len_remaining -= c_entry->arg;
+ // default handler must follow, with the exact same len argument.
+ assert_record_handler_called(&storage, i++, "record_default_handler",
+ c_entry->arg);
+ }
+ assert_record_handler_called(&storage, i++, "record_default_handler", 6);
+ assert(storage.count == i);
+ }
/* Again, without the defaulting */
- XML_ParserReset(g_parser, NULL);
- XML_SetDefaultHandler(g_parser, record_default_handler);
- XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
- CharData_Init(&storage);
- XML_SetUserData(g_parser, &storage);
- if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
- == XML_STATUS_ERROR)
- xml_failure(g_parser);
- CharData_CheckXMLChars(&storage, XCS("DcccccD"));
+ {
+ struct handler_record_list storage;
+ storage.count = 0;
+ XML_ParserReset(g_parser, NULL);
+ XML_SetDefaultHandler(g_parser, record_default_handler);
+ XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
+ XML_SetUserData(g_parser, &storage);
+ if (_XML_Parse_SINGLE_BYTES(g_parser, text, (int)strlen(text), XML_TRUE)
+ == XML_STATUS_ERROR)
+ xml_failure(g_parser);
+ int i = 0;
+ assert_record_handler_called(&storage, i++, "record_default_handler", 5);
+ // we should have gotten one or more cdata callbacks, totaling 5 chars
+ int cdata_len_remaining = 5;
+ while (cdata_len_remaining > 0) {
+ const struct handler_record_entry *c_entry
+ = handler_record_get(&storage, i++);
+ assert(strcmp(c_entry->name, "record_cdata_nodefault_handler") == 0);
+ assert(c_entry->arg > 0);
+ assert(c_entry->arg <= cdata_len_remaining);
+ cdata_len_remaining -= c_entry->arg;
+ }
+ assert_record_handler_called(&storage, i++, "record_default_handler", 6);
+ assert(storage.count == i);
+ }
/* Now with an internal entity to complicate matters */
- XML_ParserReset(g_parser, NULL);
- XML_SetDefaultHandler(g_parser, record_default_handler);
- XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
- CharData_Init(&storage);
- XML_SetUserData(g_parser, &storage);
- if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
- XML_TRUE)
- == XML_STATUS_ERROR)
- xml_failure(g_parser);
- /* The default handler suppresses the entity */
- CharData_CheckXMLChars(&storage, XCS("DDDDDDDDDDDDDDDDDDD"));
+ {
+ struct handler_record_list storage;
+ storage.count = 0;
+ XML_ParserReset(g_parser, NULL);
+ XML_SetDefaultHandler(g_parser, record_default_handler);
+ XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
+ XML_SetUserData(g_parser, &storage);
+ if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
+ XML_TRUE)
+ == XML_STATUS_ERROR)
+ xml_failure(g_parser);
+ /* The default handler suppresses the entity */
+ assert_record_handler_called(&storage, 0, "record_default_handler", 9);
+ assert_record_handler_called(&storage, 1, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 2, "record_default_handler", 3);
+ assert_record_handler_called(&storage, 3, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 4, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 5, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 6, "record_default_handler", 8);
+ assert_record_handler_called(&storage, 7, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 8, "record_default_handler", 6);
+ assert_record_handler_called(&storage, 9, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 10, "record_default_handler", 7);
+ assert_record_handler_called(&storage, 11, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 12, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 13, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 14, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 15, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 16, "record_default_handler", 5);
+ assert_record_handler_called(&storage, 17, "record_default_handler", 8);
+ assert_record_handler_called(&storage, 18, "record_default_handler", 6);
+ assert(storage.count == 19);
+ }
/* Again, with a skip handler */
- XML_ParserReset(g_parser, NULL);
- XML_SetDefaultHandler(g_parser, record_default_handler);
- XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
- XML_SetSkippedEntityHandler(g_parser, record_skip_handler);
- CharData_Init(&storage);
- XML_SetUserData(g_parser, &storage);
- if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
- XML_TRUE)
- == XML_STATUS_ERROR)
- xml_failure(g_parser);
- /* The default handler suppresses the entity */
- CharData_CheckXMLChars(&storage, XCS("DDDDDDDDDDDDDDDDDeD"));
+ {
+ struct handler_record_list storage;
+ storage.count = 0;
+ XML_ParserReset(g_parser, NULL);
+ XML_SetDefaultHandler(g_parser, record_default_handler);
+ XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
+ XML_SetSkippedEntityHandler(g_parser, record_skip_handler);
+ XML_SetUserData(g_parser, &storage);
+ if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
+ XML_TRUE)
+ == XML_STATUS_ERROR)
+ xml_failure(g_parser);
+ /* The default handler suppresses the entity */
+ assert_record_handler_called(&storage, 0, "record_default_handler", 9);
+ assert_record_handler_called(&storage, 1, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 2, "record_default_handler", 3);
+ assert_record_handler_called(&storage, 3, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 4, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 5, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 6, "record_default_handler", 8);
+ assert_record_handler_called(&storage, 7, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 8, "record_default_handler", 6);
+ assert_record_handler_called(&storage, 9, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 10, "record_default_handler", 7);
+ assert_record_handler_called(&storage, 11, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 12, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 13, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 14, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 15, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 16, "record_default_handler", 5);
+ assert_record_handler_called(&storage, 17, "record_skip_handler", 0);
+ assert_record_handler_called(&storage, 18, "record_default_handler", 6);
+ assert(storage.count == 19);
+ }
/* This time, allow the entity through */
- XML_ParserReset(g_parser, NULL);
- XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
- XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
- CharData_Init(&storage);
- XML_SetUserData(g_parser, &storage);
- if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
- XML_TRUE)
- == XML_STATUS_ERROR)
- xml_failure(g_parser);
- CharData_CheckXMLChars(&storage, XCS("DDDDDDDDDDDDDDDDDCDD"));
+ {
+ struct handler_record_list storage;
+ storage.count = 0;
+ XML_ParserReset(g_parser, NULL);
+ XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
+ XML_SetCharacterDataHandler(g_parser, record_cdata_handler);
+ XML_SetUserData(g_parser, &storage);
+ if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
+ XML_TRUE)
+ == XML_STATUS_ERROR)
+ xml_failure(g_parser);
+ assert_record_handler_called(&storage, 0, "record_default_handler", 9);
+ assert_record_handler_called(&storage, 1, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 2, "record_default_handler", 3);
+ assert_record_handler_called(&storage, 3, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 4, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 5, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 6, "record_default_handler", 8);
+ assert_record_handler_called(&storage, 7, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 8, "record_default_handler", 6);
+ assert_record_handler_called(&storage, 9, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 10, "record_default_handler", 7);
+ assert_record_handler_called(&storage, 11, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 12, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 13, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 14, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 15, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 16, "record_default_handler", 5);
+ assert_record_handler_called(&storage, 17, "record_cdata_handler", 1);
+ assert_record_handler_called(&storage, 18, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 19, "record_default_handler", 6);
+ assert(storage.count == 20);
+ }
/* Finally, without passing the cdata to the default handler */
- XML_ParserReset(g_parser, NULL);
- XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
- XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
- CharData_Init(&storage);
- XML_SetUserData(g_parser, &storage);
- if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
- XML_TRUE)
- == XML_STATUS_ERROR)
- xml_failure(g_parser);
- CharData_CheckXMLChars(&storage, XCS("DDDDDDDDDDDDDDDDDcD"));
+ {
+ struct handler_record_list storage;
+ storage.count = 0;
+ XML_ParserReset(g_parser, NULL);
+ XML_SetDefaultHandlerExpand(g_parser, record_default_handler);
+ XML_SetCharacterDataHandler(g_parser, record_cdata_nodefault_handler);
+ XML_SetUserData(g_parser, &storage);
+ if (_XML_Parse_SINGLE_BYTES(g_parser, entity_text, (int)strlen(entity_text),
+ XML_TRUE)
+ == XML_STATUS_ERROR)
+ xml_failure(g_parser);
+ assert_record_handler_called(&storage, 0, "record_default_handler", 9);
+ assert_record_handler_called(&storage, 1, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 2, "record_default_handler", 3);
+ assert_record_handler_called(&storage, 3, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 4, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 5, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 6, "record_default_handler", 8);
+ assert_record_handler_called(&storage, 7, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 8, "record_default_handler", 6);
+ assert_record_handler_called(&storage, 9, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 10, "record_default_handler", 7);
+ assert_record_handler_called(&storage, 11, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 12, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 13, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 14, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 15, "record_default_handler", 1);
+ assert_record_handler_called(&storage, 16, "record_default_handler", 5);
+ assert_record_handler_called(&storage, 17, "record_cdata_nodefault_handler",
+ 1);
+ assert_record_handler_called(&storage, 18, "record_default_handler", 6);
+ assert(storage.count == 19);
+ }
}
END_TEST
--
2.33.0

View File

@ -0,0 +1,54 @@
From 7f54667c59c5a884beba5dce17003715d7cbaffa Mon Sep 17 00:00:00 2001
From: Snild Dolkow <snild@sony.com>
Date: Mon, 18 Sep 2023 20:32:55 +0200
Subject: [PATCH] tests: Run both with and without partial token heuristic
If we always run with the heuristic enabled, it may hide some bugs by
grouping up input into bigger parse attempts.
CI-fighting-assistance-by: Sebastian Pipping <sebastian@pipping.org>
---
lib/internal.h | 2 ++
lib/xmlparse.c | 5 ++++-
2 files changed, 6 insertions(+), 1 deletion(-)
diff --git a/lib/internal.h b/lib/internal.h
index 444eba0f..dda42d88 100644
--- a/lib/internal.h
+++ b/lib/internal.h
@@ -158,6 +158,8 @@ unsigned long long testingAccountingGetCountBytesIndirect(XML_Parser parser);
const char *unsignedCharToPrintable(unsigned char c);
#endif
+extern XML_Bool g_reparseDeferralEnabledDefault; // written ONLY in runtests.c
+
#ifdef __cplusplus
}
#endif
diff --git a/lib/xmlparse.c b/lib/xmlparse.c
index 32df1eb9..e30e76aa 100644
--- a/lib/xmlparse.c
+++ b/lib/xmlparse.c
@@ -599,6 +599,8 @@ static unsigned long getDebugLevel(const char *variableName,
? 0 \
: ((*((pool)->ptr)++ = c), 1))
+XML_Bool g_reparseDeferralEnabledDefault = XML_TRUE; // write ONLY in runtests.c
+
struct XML_ParserStruct {
/* The first member must be m_userData so that the XML_GetUserData
macro works. */
@@ -951,7 +953,8 @@ callProcessor(XML_Parser parser, const char *start, const char *end,
const char **endPtr) {
const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start);
- if (! parser->m_parsingStatus.finalBuffer) {
+ if (g_reparseDeferralEnabledDefault
+ && ! parser->m_parsingStatus.finalBuffer) {
// Heuristic: don't try to parse a partial token again until the amount of
// available data has increased significantly.
const size_t had_before = parser->m_partialTokenBytesBefore;
--
2.33.0

View File

@ -0,0 +1,122 @@
From 3194d762dc6a80bca5d374fe5084888386fbadcd Mon Sep 17 00:00:00 2001
From: Snild Dolkow <snild@sony.com>
Date: Mon, 11 Sep 2023 15:31:24 +0200
Subject: [PATCH] Add app setting for enabling/disabling reparse heuristic
Suggested-by: Sebastian Pipping <sebastian@pipping.org>
CI-fighting-assistance-by: Sebastian Pipping <sebastian@pipping.org>
---
doc/reference.html | 24 +++++++++++++++++++++++-
lib/expat.h | 4 ++++
lib/xmlparse.c | 13 ++++++++++++-
3 files changed, 39 insertions(+), 2 deletions(-)
diff --git a/doc/reference.html b/doc/reference.html
index 309cb241..1ded3bbe 100644
--- a/doc/reference.html
+++ b/doc/reference.html
@@ -149,10 +149,11 @@ interface.</p>
</ul>
</li>
<li>
- <a href="#billion-laughs">Billion Laughs Attack Protection</a>
+ <a href="#attack-protection">Attack Protection</a>
<ul>
<li><a href="#XML_SetBillionLaughsAttackProtectionMaximumAmplification">XML_SetBillionLaughsAttackProtectionMaximumAmplification</a></li>
<li><a href="#XML_SetBillionLaughsAttackProtectionActivationThreshold">XML_SetBillionLaughsAttackProtectionActivationThreshold</a></li>
+ <li><a href="#XML_SetReparseDeferralEnabled">XML_SetReparseDeferralEnabled</a></li>
</ul>
</li>
<li><a href="#miscellaneous">Miscellaneous Functions</a>
@@ -2172,6 +2173,27 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold(XML_Parser p,
</p>
</div>
+<h4 id="XML_SetReparseDeferralEnabled">XML_SetReparseDeferralEnabled</h4>
+<pre class="fcndec">
+/* Added in Expat 2.6.0. */
+XML_Bool XMLCALL
+XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled);
+</pre>
+<div class="fcndef">
+ <p>
+ Large tokens may require many parse calls before enough data is available for Expat to parse it in full.
+ If Expat retried parsing the token on every parse call, parsing could take quadratic time.
+ To avoid this, Expat only retries once a significant amount of new data is available.
+ This function allows disabling this behavior.
+ </p>
+ <p>
+ The <code>enabled</code> argument should be <code>XML_TRUE</code> or <code>XML_FALSE</code>.
+ </p>
+ <p>
+ Returns <code>XML_TRUE</code> on success, and <code>XML_FALSE</code> on error.
+ </p>
+</div>
+
<h3><a name="miscellaneous">Miscellaneous functions</a></h3>
<p>The functions in this section either obtain state information from
diff --git a/lib/expat.h b/lib/expat.h
index b7d6d354..a4033742 100644
--- a/lib/expat.h
+++ b/lib/expat.h
@@ -1036,6 +1036,10 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold(
XML_Parser parser, unsigned long long activationThresholdBytes);
#endif
+/* Added in Expat 2.6.0. */
+XMLPARSEAPI(XML_Bool)
+XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled);
+
/* Expat follows the semantic versioning convention.
See http://semver.org.
*/
diff --git a/lib/xmlparse.c b/lib/xmlparse.c
index e30e76aa..d95b054b 100644
--- a/lib/xmlparse.c
+++ b/lib/xmlparse.c
@@ -617,6 +617,7 @@ struct XML_ParserStruct {
XML_Index m_parseEndByteIndex;
const char *m_parseEndPtr;
size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */
+ XML_Bool m_reparseDeferralEnabled;
XML_Char *m_dataBuf;
XML_Char *m_dataBufEnd;
XML_StartElementHandler m_startElementHandler;
@@ -953,7 +954,7 @@ callProcessor(XML_Parser parser, const char *start, const char *end,
const char **endPtr) {
const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start);
- if (g_reparseDeferralEnabledDefault
+ if (parser->m_reparseDeferralEnabled
&& ! parser->m_parsingStatus.finalBuffer) {
// Heuristic: don't try to parse a partial token again until the amount of
// available data has increased significantly.
@@ -1149,6 +1150,7 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) {
parser->m_parseEndByteIndex = 0;
parser->m_parseEndPtr = NULL;
parser->m_partialTokenBytesBefore = 0;
+ parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault;
parser->m_declElementType = NULL;
parser->m_declAttributeId = NULL;
parser->m_declEntity = NULL;
@@ -2568,6 +2570,15 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold(
}
#endif /* defined(XML_DTD) || XML_GE == 1 */
+XML_Bool XMLCALL
+XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) {
+ if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) {
+ parser->m_reparseDeferralEnabled = enabled;
+ return XML_TRUE;
+ }
+ return XML_FALSE;
+}
+
/* Initially tag->rawName always points into the parse buffer;
for those TAG instances opened while the current parse buffer was
processed, and not yet closed, we need to store tag->rawName in a more
--
2.33.0

View File

@ -1,7 +1,7 @@
%define Rversion %(echo %{version} | sed -e 's/\\./_/g' -e 's/^/R_/')
Name: expat
Version: 2.5.0
Release: 2
Release: 3
Summary: An XML parser library
License: MIT
URL: https://libexpat.github.io/
@ -15,6 +15,15 @@ Patch05: backport-001-CVE-2023-52426.patch
Patch06: backport-002-CVE-2023-52426.patch
Patch07: backport-003-CVE-2023-52426.patch
Patch08: backport-004-CVE-2023-52426.patch
Patch09: backport-001-CVE-2023-52425.patch
Patch10: backport-002-CVE-2023-52425.patch
Patch11: backport-003-CVE-2023-52425.patch
Patch12: backport-004-CVE-2023-52425.patch
Patch13: backport-005-CVE-2023-52425.patch
Patch14: backport-006-CVE-2023-52425.patch
Patch15: backport-007-CVE-2023-52425.patch
Patch16: backport-008-CVE-2023-52425.patch
Patch17: backport-009-CVE-2023-52425.patch
BuildRequires: sed,autoconf,automake,gcc-c++,libtool,xmlto
@ -68,6 +77,9 @@ make check
%{_mandir}/man1/*
%changelog
* Wed Jun 12 2024 wangjiang <wangjiang37@h-partners.com> - 2.5.0-3
- fix CVE-2023-52425
* Mon Apr 8 2024 caixiaomeng <caixiaomeng2@huawei.com> - 2.5.0-2
- fix cve-2024-28757 and cve-2023-52426