123 lines
4.7 KiB
Diff
123 lines
4.7 KiB
Diff
From 3194d762dc6a80bca5d374fe5084888386fbadcd Mon Sep 17 00:00:00 2001
|
|
From: Snild Dolkow <snild@sony.com>
|
|
Date: Mon, 11 Sep 2023 15:31:24 +0200
|
|
Subject: [PATCH] Add app setting for enabling/disabling reparse heuristic
|
|
|
|
Suggested-by: Sebastian Pipping <sebastian@pipping.org>
|
|
CI-fighting-assistance-by: Sebastian Pipping <sebastian@pipping.org>
|
|
---
|
|
doc/reference.html | 24 +++++++++++++++++++++++-
|
|
lib/expat.h | 4 ++++
|
|
lib/xmlparse.c | 13 ++++++++++++-
|
|
3 files changed, 39 insertions(+), 2 deletions(-)
|
|
|
|
diff --git a/doc/reference.html b/doc/reference.html
|
|
index 309cb241..1ded3bbe 100644
|
|
--- a/doc/reference.html
|
|
+++ b/doc/reference.html
|
|
@@ -149,10 +149,11 @@ interface.</p>
|
|
</ul>
|
|
</li>
|
|
<li>
|
|
- <a href="#billion-laughs">Billion Laughs Attack Protection</a>
|
|
+ <a href="#attack-protection">Attack Protection</a>
|
|
<ul>
|
|
<li><a href="#XML_SetBillionLaughsAttackProtectionMaximumAmplification">XML_SetBillionLaughsAttackProtectionMaximumAmplification</a></li>
|
|
<li><a href="#XML_SetBillionLaughsAttackProtectionActivationThreshold">XML_SetBillionLaughsAttackProtectionActivationThreshold</a></li>
|
|
+ <li><a href="#XML_SetReparseDeferralEnabled">XML_SetReparseDeferralEnabled</a></li>
|
|
</ul>
|
|
</li>
|
|
<li><a href="#miscellaneous">Miscellaneous Functions</a>
|
|
@@ -2172,6 +2173,27 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold(XML_Parser p,
|
|
</p>
|
|
</div>
|
|
|
|
+<h4 id="XML_SetReparseDeferralEnabled">XML_SetReparseDeferralEnabled</h4>
|
|
+<pre class="fcndec">
|
|
+/* Added in Expat 2.6.0. */
|
|
+XML_Bool XMLCALL
|
|
+XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled);
|
|
+</pre>
|
|
+<div class="fcndef">
|
|
+ <p>
|
|
+ Large tokens may require many parse calls before enough data is available for Expat to parse it in full.
|
|
+ If Expat retried parsing the token on every parse call, parsing could take quadratic time.
|
|
+ To avoid this, Expat only retries once a significant amount of new data is available.
|
|
+ This function allows disabling this behavior.
|
|
+ </p>
|
|
+ <p>
|
|
+ The <code>enabled</code> argument should be <code>XML_TRUE</code> or <code>XML_FALSE</code>.
|
|
+ </p>
|
|
+ <p>
|
|
+ Returns <code>XML_TRUE</code> on success, and <code>XML_FALSE</code> on error.
|
|
+ </p>
|
|
+</div>
|
|
+
|
|
<h3><a name="miscellaneous">Miscellaneous functions</a></h3>
|
|
|
|
<p>The functions in this section either obtain state information from
|
|
diff --git a/lib/expat.h b/lib/expat.h
|
|
index b7d6d354..a4033742 100644
|
|
--- a/lib/expat.h
|
|
+++ b/lib/expat.h
|
|
@@ -1036,6 +1036,10 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold(
|
|
XML_Parser parser, unsigned long long activationThresholdBytes);
|
|
#endif
|
|
|
|
+/* Added in Expat 2.6.0. */
|
|
+XMLPARSEAPI(XML_Bool)
|
|
+XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled);
|
|
+
|
|
/* Expat follows the semantic versioning convention.
|
|
See http://semver.org.
|
|
*/
|
|
diff --git a/lib/xmlparse.c b/lib/xmlparse.c
|
|
index e30e76aa..d95b054b 100644
|
|
--- a/lib/xmlparse.c
|
|
+++ b/lib/xmlparse.c
|
|
@@ -617,6 +617,7 @@ struct XML_ParserStruct {
|
|
XML_Index m_parseEndByteIndex;
|
|
const char *m_parseEndPtr;
|
|
size_t m_partialTokenBytesBefore; /* used in heuristic to avoid O(n^2) */
|
|
+ XML_Bool m_reparseDeferralEnabled;
|
|
XML_Char *m_dataBuf;
|
|
XML_Char *m_dataBufEnd;
|
|
XML_StartElementHandler m_startElementHandler;
|
|
@@ -953,7 +954,7 @@ callProcessor(XML_Parser parser, const char *start, const char *end,
|
|
const char **endPtr) {
|
|
const size_t have_now = EXPAT_SAFE_PTR_DIFF(end, start);
|
|
|
|
- if (g_reparseDeferralEnabledDefault
|
|
+ if (parser->m_reparseDeferralEnabled
|
|
&& ! parser->m_parsingStatus.finalBuffer) {
|
|
// Heuristic: don't try to parse a partial token again until the amount of
|
|
// available data has increased significantly.
|
|
@@ -1149,6 +1150,7 @@ parserInit(XML_Parser parser, const XML_Char *encodingName) {
|
|
parser->m_parseEndByteIndex = 0;
|
|
parser->m_parseEndPtr = NULL;
|
|
parser->m_partialTokenBytesBefore = 0;
|
|
+ parser->m_reparseDeferralEnabled = g_reparseDeferralEnabledDefault;
|
|
parser->m_declElementType = NULL;
|
|
parser->m_declAttributeId = NULL;
|
|
parser->m_declEntity = NULL;
|
|
@@ -2568,6 +2570,15 @@ XML_SetBillionLaughsAttackProtectionActivationThreshold(
|
|
}
|
|
#endif /* defined(XML_DTD) || XML_GE == 1 */
|
|
|
|
+XML_Bool XMLCALL
|
|
+XML_SetReparseDeferralEnabled(XML_Parser parser, XML_Bool enabled) {
|
|
+ if (parser != NULL && (enabled == XML_TRUE || enabled == XML_FALSE)) {
|
|
+ parser->m_reparseDeferralEnabled = enabled;
|
|
+ return XML_TRUE;
|
|
+ }
|
|
+ return XML_FALSE;
|
|
+}
|
|
+
|
|
/* Initially tag->rawName always points into the parse buffer;
|
|
for those TAG instances opened while the current parse buffer was
|
|
processed, and not yet closed, we need to store tag->rawName in a more
|
|
--
|
|
2.33.0
|
|
|
|
|