63 lines
2.7 KiB
Diff
63 lines
2.7 KiB
Diff
From af7d2acf60b2d42506c7fb7e61ed3dbc7989dd01 Mon Sep 17 00:00:00 2001
|
|
From: Snild Dolkow <snild@sony.com>
|
|
Date: Thu, 31 Aug 2023 12:36:43 +0200
|
|
Subject: [PATCH] Always consume BOM bytes when found in prolog
|
|
|
|
Reference: https://github.com/libexpat/libexpat/commit/b1e955449cea6bb5862cd249e659c2123bd95a9e
|
|
Conflict: change xmlparse.c
|
|
|
|
The byte order mark is not correctly consumed when followed by an
|
|
incomplete token in a non-final parse. This results in the BOM staying
|
|
in the buffer, causing an invalid token error later.
|
|
|
|
This was not detected by existing tests because they either parse
|
|
everything in one call, or add a single byte at a time.
|
|
|
|
By moving forward when we find a BOM, we make sure that the BOM
|
|
bytes are properly consumed in all cases.
|
|
---
|
|
lib/xmlparse.c | 18 +++++++++---------
|
|
1 file changed, 9 insertions(+), 9 deletions(-)
|
|
|
|
diff --git a/lib/xmlparse.c b/lib/xmlparse.c
|
|
index daceacf..184997d 100644
|
|
--- a/lib/xmlparse.c
|
|
+++ b/lib/xmlparse.c
|
|
@@ -4502,15 +4502,15 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
|
|
parser->m_processor = entityValueProcessor;
|
|
return entityValueProcessor(parser, next, end, nextPtr);
|
|
}
|
|
- /* If we are at the end of the buffer, this would cause XmlPrologTok to
|
|
- return XML_TOK_NONE on the next call, which would then cause the
|
|
- function to exit with *nextPtr set to s - that is what we want for other
|
|
- tokens, but not for the BOM - we would rather like to skip it;
|
|
- then, when this routine is entered the next time, XmlPrologTok will
|
|
- return XML_TOK_INVALID, since the BOM is still in the buffer
|
|
+ /* XmlPrologTok has now set the encoding based on the BOM it found, and we
|
|
+ must move s and nextPtr forward to consume the BOM.
|
|
+
|
|
+ If we didn't, and got XML_TOK_NONE from the next XmlPrologTok call, we
|
|
+ would leave the BOM in the buffer and return. On the next call to this
|
|
+ function, our XmlPrologTok call would return XML_TOK_INVALID, since it
|
|
+ is not valid to have multiple BOMs.
|
|
*/
|
|
- else if (tok == XML_TOK_BOM && next == end
|
|
- && ! parser->m_parsingStatus.finalBuffer) {
|
|
+ else if (tok == XML_TOK_BOM) {
|
|
# if defined(XML_DTD) || XML_GE == 1
|
|
if (! accountingDiffTolerated(parser, tok, s, next, __LINE__,
|
|
XML_ACCOUNT_DIRECT)) {
|
|
@@ -4520,7 +4520,7 @@ entityValueInitProcessor(XML_Parser parser, const char *s, const char *end,
|
|
# endif
|
|
|
|
*nextPtr = next;
|
|
- return XML_ERROR_NONE;
|
|
+ s = next;
|
|
}
|
|
/* If we get this token, we have the start of what might be a
|
|
normal tag, but not a declaration (i.e. it doesn't begin with
|
|
--
|
|
2.33.0
|
|
|
|
|