61 lines
2.2 KiB
Diff
61 lines
2.2 KiB
Diff
From ec6e3efb06d7b15cf5a2328fabd3845acea4c815 Mon Sep 17 00:00:00 2001
|
|
From: Arne Becker <arne_becker@genua.de>
|
|
Date: Tue, 6 Jul 2021 21:56:04 +0200
|
|
Subject: [PATCH] Patch to forbid epsilon-reduction of final states
|
|
|
|
When building the internal representation of a regexp, it is possible
|
|
that a lot of empty transitions are created. Therefore there is a step
|
|
to reduce them in the function xmlFAEliminateSimpleEpsilonTransitions.
|
|
|
|
There is an error there for this case:
|
|
|
|
* State 1 has a transition with an atom (in this case "a") to state 2.
|
|
* State 2 is final and has an epsilon transition to state 1.
|
|
|
|
After reduction it looked like:
|
|
* State 1 has a transition with an atom (in this case "a") to itself
|
|
and is final.
|
|
|
|
In other words, the empty string is accepted when it shouldn't be.
|
|
|
|
The attached patch skips the reduction step for final states.
|
|
An alternative would be to insert or increment counters when reducing a
|
|
final state, but this seemed error prone and unnecessary, since there
|
|
aren't that many final states.
|
|
|
|
Fixes #282
|
|
---
|
|
xmlregexp.c | 9 ++++++++-
|
|
1 file changed, 8 insertions(+), 1 deletion(-)
|
|
|
|
diff --git a/xmlregexp.c b/xmlregexp.c
|
|
index 40dabb2..8d01c2b 100644
|
|
--- a/xmlregexp.c
|
|
+++ b/xmlregexp.c
|
|
@@ -1892,6 +1892,12 @@ xmlFAReduceEpsilonTransitions(xmlRegParserCtxtPtr ctxt, int fromnr,
|
|
* then X and Y are semantically equivalent and X can be eliminated
|
|
* If X is the start state then make Y the start state, else replace the
|
|
* target of all transitions to X by transitions to Y.
|
|
+ *
|
|
+ * If X is a final state, skip it.
|
|
+ * Otherwise it would be necessary to manipulate counters for this case when
|
|
+ * eliminating state 2:
|
|
+ * State 1 has a transition with an atom to state 2.
|
|
+ * State 2 is final and has an epsilon transition to state 1.
|
|
*/
|
|
static void
|
|
xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
|
|
@@ -1904,7 +1910,8 @@ xmlFAEliminateSimpleEpsilonTransitions(xmlRegParserCtxtPtr ctxt) {
|
|
continue;
|
|
if (state->nbTrans != 1)
|
|
continue;
|
|
- if (state->type == XML_REGEXP_UNREACH_STATE)
|
|
+ if (state->type == XML_REGEXP_UNREACH_STATE ||
|
|
+ state->type == XML_REGEXP_FINAL_STATE)
|
|
continue;
|
|
/* is the only transition out a basic transition */
|
|
if ((state->trans[0].atom == NULL) &&
|
|
--
|
|
1.8.3.1
|
|
|