82 lines
2.6 KiB
Diff
82 lines
2.6 KiB
Diff
From 68eadabd0055cba39c4ea1acfa8931d0d10a44e5 Mon Sep 17 00:00:00 2001
|
|
From: Nick Wellnhofer <wellnhofer@aevum.de>
|
|
Date: Sat, 11 Jul 2020 21:32:10 +0200
|
|
Subject: [PATCH] Fix exponential runtime in xmlFARecurseDeterminism
|
|
|
|
In order to prevent visiting a state twice, states must be marked as
|
|
visited for the whole duration of graph traversal because states might
|
|
be reached by different paths. Otherwise state graphs like the
|
|
following can lead to exponential runtime:
|
|
|
|
->O-->O-->O-->O-->O->
|
|
\ / \ / \ / \ /
|
|
O O O O
|
|
|
|
Reset the "visited" flag only after the graph was traversed.
|
|
|
|
xmlFAComputesDeterminism still has massive performance problems when
|
|
handling fuzzed input. By design, it has quadratic time complexity in
|
|
the number of reachable states. Some issues might also stem from
|
|
redundant epsilon transitions. With this fix, fuzzing regexes with a
|
|
maximum length of 100 becomes feasible at least.
|
|
|
|
Found with libFuzzer.
|
|
---
|
|
xmlregexp.c | 26 +++++++++++++++++++++++++-
|
|
1 file changed, 25 insertions(+), 1 deletion(-)
|
|
|
|
diff --git a/xmlregexp.c b/xmlregexp.c
|
|
index dbf3bf2c..f971f0c8 100644
|
|
--- a/xmlregexp.c
|
|
+++ b/xmlregexp.c
|
|
@@ -2658,7 +2658,6 @@ xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
|
|
state->markd = XML_REGEXP_MARK_VISITED;
|
|
res = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
|
|
to, atom);
|
|
- state->markd = 0;
|
|
if (res == 0) {
|
|
ret = 0;
|
|
/* t1->nd = 1; */
|
|
@@ -2676,6 +2675,30 @@ xmlFARecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state,
|
|
return(ret);
|
|
}
|
|
|
|
+/**
|
|
+ * xmlFAFinishRecurseDeterminism:
|
|
+ * @ctxt: a regexp parser context
|
|
+ *
|
|
+ * Reset flags after checking determinism.
|
|
+ */
|
|
+static void
|
|
+xmlFAFinishRecurseDeterminism(xmlRegParserCtxtPtr ctxt, xmlRegStatePtr state) {
|
|
+ int transnr, nbTrans;
|
|
+
|
|
+ if (state == NULL)
|
|
+ return;
|
|
+ if (state->markd != XML_REGEXP_MARK_VISITED)
|
|
+ return;
|
|
+ state->markd = 0;
|
|
+
|
|
+ nbTrans = state->nbTrans;
|
|
+ for (transnr = 0; transnr < nbTrans; transnr++) {
|
|
+ xmlRegTransPtr t1 = &state->trans[transnr];
|
|
+ if ((t1->atom == NULL) && (t1->to >= 0))
|
|
+ xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t1->to]);
|
|
+ }
|
|
+}
|
|
+
|
|
/**
|
|
* xmlFAComputesDeterminism:
|
|
* @ctxt: a regexp parser context
|
|
@@ -2789,6 +2812,7 @@ xmlFAComputesDeterminism(xmlRegParserCtxtPtr ctxt) {
|
|
*/
|
|
ret = xmlFARecurseDeterminism(ctxt, ctxt->states[t1->to],
|
|
t2->to, t2->atom);
|
|
+ xmlFAFinishRecurseDeterminism(ctxt, ctxt->states[t1->to]);
|
|
/* don't shortcut the computation so all non deterministic
|
|
transition get marked down
|
|
if (ret == 0)
|
|
--
|
|
2.23.0
|
|
|