458 lines
14 KiB
Diff
458 lines
14 KiB
Diff
From ad73148dfb6d06280a4d87f322991762aff90a55 Mon Sep 17 00:00:00 2001
|
|
From: Philip Hazel <Philip.Hazel@gmail.com>
|
|
Date: Mon, 4 Dec 2023 16:11:41 +0000
|
|
Subject: [PATCH] Fix incorrect matching of 0xffffffff to any character with
|
|
more than one other case in 32-bit UCP (but not UTF) mode.
|
|
|
|
---
|
|
src/pcre2_dfa_match.c | 28 ++++++++++++++++++++++++++
|
|
src/pcre2_match.c | 43 ++++++++++++++++++++++++++++++++++------
|
|
testdata/testinput12 | 26 ++++++++++++++++++++++++
|
|
testdata/testinput14 | 27 +++++++++++++++++++++++++
|
|
testdata/testoutput12-16 | 37 ++++++++++++++++++++++++++++++++++
|
|
testdata/testoutput12-32 | 33 ++++++++++++++++++++++++++++++
|
|
testdata/testoutput14-16 | 38 +++++++++++++++++++++++++++++++++++
|
|
testdata/testoutput14-32 | 34 +++++++++++++++++++++++++++++++
|
|
testdata/testoutput14-8 | 38 +++++++++++++++++++++++++++++++++++
|
|
9 files changed, 298 insertions(+), 6 deletions(-)
|
|
|
|
diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c
|
|
index 1c48ad6..caae652 100644
|
|
--- a/src/pcre2_dfa_match.c
|
|
+++ b/src/pcre2_dfa_match.c
|
|
@@ -1241,6 +1241,13 @@ for (;;)
|
|
break;
|
|
|
|
case PT_CLIST:
|
|
+#if PCRE2_CODE_UNIT_WIDTH == 32
|
|
+ if (c > MAX_UTF_CODE_POINT)
|
|
+ {
|
|
+ OK = FALSE;
|
|
+ break;
|
|
+ }
|
|
+#endif
|
|
cp = PRIV(ucd_caseless_sets) + code[2];
|
|
for (;;)
|
|
{
|
|
@@ -1516,6 +1523,13 @@ for (;;)
|
|
break;
|
|
|
|
case PT_CLIST:
|
|
+#if PCRE2_CODE_UNIT_WIDTH == 32
|
|
+ if (c > MAX_UTF_CODE_POINT)
|
|
+ {
|
|
+ OK = FALSE;
|
|
+ break;
|
|
+ }
|
|
+#endif
|
|
cp = PRIV(ucd_caseless_sets) + code[3];
|
|
for (;;)
|
|
{
|
|
@@ -1774,6 +1788,13 @@ for (;;)
|
|
break;
|
|
|
|
case PT_CLIST:
|
|
+#if PCRE2_CODE_UNIT_WIDTH == 32
|
|
+ if (c > MAX_UTF_CODE_POINT)
|
|
+ {
|
|
+ OK = FALSE;
|
|
+ break;
|
|
+ }
|
|
+#endif
|
|
cp = PRIV(ucd_caseless_sets) + code[3];
|
|
for (;;)
|
|
{
|
|
@@ -2058,6 +2079,13 @@ for (;;)
|
|
break;
|
|
|
|
case PT_CLIST:
|
|
+#if PCRE2_CODE_UNIT_WIDTH == 32
|
|
+ if (c > MAX_UTF_CODE_POINT)
|
|
+ {
|
|
+ OK = FALSE;
|
|
+ break;
|
|
+ }
|
|
+#endif
|
|
cp = PRIV(ucd_caseless_sets) + code[1 + IMM2_SIZE + 2];
|
|
for (;;)
|
|
{
|
|
diff --git a/src/pcre2_match.c b/src/pcre2_match.c
|
|
index d162e70..b2e1f23 100644
|
|
--- a/src/pcre2_match.c
|
|
+++ b/src/pcre2_match.c
|
|
@@ -2565,6 +2565,13 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
|
|
break;
|
|
|
|
case PT_CLIST:
|
|
+#if PCRE2_CODE_UNIT_WIDTH == 32
|
|
+ if (fc > MAX_UTF_CODE_POINT)
|
|
+ {
|
|
+ if (notmatch) break;;
|
|
+ RRETURN(MATCH_NOMATCH);
|
|
+ }
|
|
+#endif
|
|
cp = PRIV(ucd_caseless_sets) + Fecode[2];
|
|
for (;;)
|
|
{
|
|
@@ -2885,6 +2892,13 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
|
|
RRETURN(MATCH_NOMATCH);
|
|
}
|
|
GETCHARINCTEST(fc, Feptr);
|
|
+#if PCRE2_CODE_UNIT_WIDTH == 32
|
|
+ if (fc > MAX_UTF_CODE_POINT)
|
|
+ {
|
|
+ if (notmatch) continue;
|
|
+ RRETURN(MATCH_NOMATCH);
|
|
+ }
|
|
+#endif
|
|
cp = PRIV(ucd_caseless_sets) + Lpropvalue;
|
|
for (;;)
|
|
{
|
|
@@ -3698,6 +3712,13 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
|
|
RRETURN(MATCH_NOMATCH);
|
|
}
|
|
GETCHARINCTEST(fc, Feptr);
|
|
+#if PCRE2_CODE_UNIT_WIDTH == 32
|
|
+ if (fc > MAX_UTF_CODE_POINT)
|
|
+ {
|
|
+ if (Lctype == OP_NOTPROP) continue;
|
|
+ RRETURN(MATCH_NOMATCH);
|
|
+ }
|
|
+#endif
|
|
cp = PRIV(ucd_caseless_sets) + Lpropvalue;
|
|
for (;;)
|
|
{
|
|
@@ -4278,14 +4299,24 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
|
|
break;
|
|
}
|
|
GETCHARLENTEST(fc, Feptr, len);
|
|
- cp = PRIV(ucd_caseless_sets) + Lpropvalue;
|
|
- for (;;)
|
|
+#if PCRE2_CODE_UNIT_WIDTH == 32
|
|
+ if (fc > MAX_UTF_CODE_POINT)
|
|
{
|
|
- if (fc < *cp)
|
|
- { if (notmatch) break; else goto GOT_MAX; }
|
|
- if (fc == *cp++)
|
|
- { if (notmatch) goto GOT_MAX; else break; }
|
|
+ if (!notmatch) goto GOT_MAX;
|
|
}
|
|
+ else
|
|
+#endif
|
|
+ {
|
|
+ cp = PRIV(ucd_caseless_sets) + Lpropvalue;
|
|
+ for (;;)
|
|
+ {
|
|
+ if (fc < *cp)
|
|
+ { if (notmatch) break; else goto GOT_MAX; }
|
|
+ if (fc == *cp++)
|
|
+ { if (notmatch) goto GOT_MAX; else break; }
|
|
+ }
|
|
+ }
|
|
+
|
|
Feptr += len;
|
|
}
|
|
GOT_MAX:
|
|
diff --git a/testdata/testinput12 b/testdata/testinput12
|
|
index de3d406..85550c3 100644
|
|
--- a/testdata/testinput12
|
|
+++ b/testdata/testinput12
|
|
@@ -576,5 +576,31 @@
|
|
# This used to loop in 32-bit mode; it will fail in 16-bit mode.
|
|
/[\x{ffffffff}]/caseless,ucp
|
|
\x{ffffffff}xyz
|
|
+
|
|
+# These are 32-bit tests for handing 0xffffffff when in UCP caselsss mode. They
|
|
+# will give errors in 16-bit mode.
|
|
+
|
|
+/k*\x{ffffffff}/caseless,ucp
|
|
+ \x{ffffffff}
|
|
+
|
|
+/k+\x{ffffffff}/caseless,ucp,no_start_optimize
|
|
+ K\x{ffffffff}
|
|
+\= Expect no match
|
|
+ \x{ffffffff}\x{ffffffff}
|
|
+
|
|
+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
|
|
+\= Expect no match
|
|
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
|
|
+
|
|
+/k\x{ffffffff}/caseless,ucp,no_start_optimize
|
|
+ K\x{ffffffff}
|
|
+\= Expect no match
|
|
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
|
|
+
|
|
+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
|
|
+\= Expect no match
|
|
+ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
|
|
+
|
|
+# ---------------------------------------------------------
|
|
|
|
# End of testinput12
|
|
diff --git a/testdata/testinput14 b/testdata/testinput14
|
|
index 8a17ae7..8880b5c 100644
|
|
--- a/testdata/testinput14
|
|
+++ b/testdata/testinput14
|
|
@@ -78,4 +78,31 @@
|
|
|
|
# ----------------------------------------------------
|
|
|
|
+# ----------------------------------------------------
|
|
+# Tests for handling 0xffffffff in caseless UCP mode. They only apply to 32-bit
|
|
+# mode; for the other widths they will fail.
|
|
+
|
|
+/k*\x{ffffffff}/caseless,ucp
|
|
+ \x{ffffffff}
|
|
+
|
|
+/k+\x{ffffffff}/caseless,ucp,no_start_optimize
|
|
+ K\x{ffffffff}
|
|
+\= Expect no match
|
|
+ \x{ffffffff}\x{ffffffff}
|
|
+
|
|
+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
|
|
+\= Expect no match
|
|
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
|
|
+
|
|
+/k\x{ffffffff}/caseless,ucp,no_start_optimize
|
|
+ K\x{ffffffff}
|
|
+\= Expect no match
|
|
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
|
|
+
|
|
+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
|
|
+\= Expect no match
|
|
+ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
|
|
+
|
|
+# ----------------------------------------------------
|
|
+
|
|
# End of testinput14
|
|
diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16
|
|
index 9fa93fa..616d693 100644
|
|
--- a/testdata/testoutput12-16
|
|
+++ b/testdata/testoutput12-16
|
|
@@ -1827,5 +1827,42 @@ Failed: error 134 at offset 11: character code point value in \x{} or \o{} is to
|
|
/[\x{ffffffff}]/caseless,ucp
|
|
Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large
|
|
\x{ffffffff}xyz
|
|
+
|
|
+# These are 32-bit tests for handing 0xffffffff when in UCP caselsss mode. They
|
|
+# will give errors in 16-bit mode.
|
|
+
|
|
+/k*\x{ffffffff}/caseless,ucp
|
|
+Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large
|
|
+ \x{ffffffff}
|
|
+
|
|
+/k+\x{ffffffff}/caseless,ucp,no_start_optimize
|
|
+Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large
|
|
+ K\x{ffffffff}
|
|
+\= Expect no match
|
|
+ \x{ffffffff}\x{ffffffff}
|
|
+
|
|
+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
|
|
+Failed: error 134 at offset 15: character code point value in \x{} or \o{} is too large
|
|
+\= Expect no match
|
|
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
|
|
+
|
|
+/k\x{ffffffff}/caseless,ucp,no_start_optimize
|
|
+Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large
|
|
+ K\x{ffffffff}
|
|
+\= Expect no match
|
|
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
|
|
+
|
|
+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
|
|
+\= Expect no match
|
|
+ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
|
|
+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
|
|
+** Truncation will probably give the wrong result.
|
|
+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
|
|
+** Truncation will probably give the wrong result.
|
|
+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
|
|
+** Truncation will probably give the wrong result.
|
|
+No match
|
|
+
|
|
+# ---------------------------------------------------------
|
|
|
|
# End of testinput12
|
|
diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32
|
|
index 721d8bc..3c9586e 100644
|
|
--- a/testdata/testoutput12-32
|
|
+++ b/testdata/testoutput12-32
|
|
@@ -1821,5 +1821,38 @@ No match
|
|
/[\x{ffffffff}]/caseless,ucp
|
|
\x{ffffffff}xyz
|
|
0: \x{ffffffff}
|
|
+
|
|
+# These are 32-bit tests for handing 0xffffffff when in UCP caselsss mode. They
|
|
+# will give errors in 16-bit mode.
|
|
+
|
|
+/k*\x{ffffffff}/caseless,ucp
|
|
+ \x{ffffffff}
|
|
+ 0: \x{ffffffff}
|
|
+
|
|
+/k+\x{ffffffff}/caseless,ucp,no_start_optimize
|
|
+ K\x{ffffffff}
|
|
+ 0: K\x{ffffffff}
|
|
+\= Expect no match
|
|
+ \x{ffffffff}\x{ffffffff}
|
|
+No match
|
|
+
|
|
+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
|
|
+\= Expect no match
|
|
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
|
|
+No match
|
|
+
|
|
+/k\x{ffffffff}/caseless,ucp,no_start_optimize
|
|
+ K\x{ffffffff}
|
|
+ 0: K\x{ffffffff}
|
|
+\= Expect no match
|
|
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
|
|
+No match
|
|
+
|
|
+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
|
|
+\= Expect no match
|
|
+ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
|
|
+No match
|
|
+
|
|
+# ---------------------------------------------------------
|
|
|
|
# End of testinput12
|
|
diff --git a/testdata/testoutput14-16 b/testdata/testoutput14-16
|
|
index 61541f6..dd1a977 100644
|
|
--- a/testdata/testoutput14-16
|
|
+++ b/testdata/testoutput14-16
|
|
@@ -122,4 +122,42 @@ No match
|
|
|
|
# ----------------------------------------------------
|
|
|
|
+# ----------------------------------------------------
|
|
+# Tests for handling 0xffffffff in caseless UCP mode. They only apply to 32-bit
|
|
+# mode; for the other widths they will fail.
|
|
+
|
|
+/k*\x{ffffffff}/caseless,ucp
|
|
+Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large
|
|
+ \x{ffffffff}
|
|
+
|
|
+/k+\x{ffffffff}/caseless,ucp,no_start_optimize
|
|
+Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large
|
|
+ K\x{ffffffff}
|
|
+\= Expect no match
|
|
+ \x{ffffffff}\x{ffffffff}
|
|
+
|
|
+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
|
|
+Failed: error 134 at offset 15: character code point value in \x{} or \o{} is too large
|
|
+\= Expect no match
|
|
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
|
|
+
|
|
+/k\x{ffffffff}/caseless,ucp,no_start_optimize
|
|
+Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large
|
|
+ K\x{ffffffff}
|
|
+\= Expect no match
|
|
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
|
|
+
|
|
+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
|
|
+\= Expect no match
|
|
+ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
|
|
+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
|
|
+** Truncation will probably give the wrong result.
|
|
+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
|
|
+** Truncation will probably give the wrong result.
|
|
+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
|
|
+** Truncation will probably give the wrong result.
|
|
+No match
|
|
+
|
|
+# ----------------------------------------------------
|
|
+
|
|
# End of testinput14
|
|
diff --git a/testdata/testoutput14-32 b/testdata/testoutput14-32
|
|
index f1f65b7..dc21569 100644
|
|
--- a/testdata/testoutput14-32
|
|
+++ b/testdata/testoutput14-32
|
|
@@ -122,4 +122,38 @@ No match
|
|
|
|
# ----------------------------------------------------
|
|
|
|
+# ----------------------------------------------------
|
|
+# Tests for handling 0xffffffff in caseless UCP mode. They only apply to 32-bit
|
|
+# mode; for the other widths they will fail.
|
|
+
|
|
+/k*\x{ffffffff}/caseless,ucp
|
|
+ \x{ffffffff}
|
|
+ 0: \x{ffffffff}
|
|
+
|
|
+/k+\x{ffffffff}/caseless,ucp,no_start_optimize
|
|
+ K\x{ffffffff}
|
|
+ 0: K\x{ffffffff}
|
|
+\= Expect no match
|
|
+ \x{ffffffff}\x{ffffffff}
|
|
+No match
|
|
+
|
|
+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
|
|
+\= Expect no match
|
|
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
|
|
+No match
|
|
+
|
|
+/k\x{ffffffff}/caseless,ucp,no_start_optimize
|
|
+ K\x{ffffffff}
|
|
+ 0: K\x{ffffffff}
|
|
+\= Expect no match
|
|
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
|
|
+No match
|
|
+
|
|
+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
|
|
+\= Expect no match
|
|
+ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
|
|
+No match
|
|
+
|
|
+# ----------------------------------------------------
|
|
+
|
|
# End of testinput14
|
|
diff --git a/testdata/testoutput14-8 b/testdata/testoutput14-8
|
|
index aa62414..69285db 100644
|
|
--- a/testdata/testoutput14-8
|
|
+++ b/testdata/testoutput14-8
|
|
@@ -122,4 +122,42 @@ Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too
|
|
|
|
# ----------------------------------------------------
|
|
|
|
+# ----------------------------------------------------
|
|
+# Tests for handling 0xffffffff in caseless UCP mode. They only apply to 32-bit
|
|
+# mode; for the other widths they will fail.
|
|
+
|
|
+/k*\x{ffffffff}/caseless,ucp
|
|
+Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large
|
|
+ \x{ffffffff}
|
|
+
|
|
+/k+\x{ffffffff}/caseless,ucp,no_start_optimize
|
|
+Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large
|
|
+ K\x{ffffffff}
|
|
+\= Expect no match
|
|
+ \x{ffffffff}\x{ffffffff}
|
|
+
|
|
+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
|
|
+Failed: error 134 at offset 15: character code point value in \x{} or \o{} is too large
|
|
+\= Expect no match
|
|
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
|
|
+
|
|
+/k\x{ffffffff}/caseless,ucp,no_start_optimize
|
|
+Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large
|
|
+ K\x{ffffffff}
|
|
+\= Expect no match
|
|
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
|
|
+
|
|
+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
|
|
+\= Expect no match
|
|
+ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
|
|
+** Character \x{ffffffff} is greater than 255 and UTF-8 mode is not enabled.
|
|
+** Truncation will probably give the wrong result.
|
|
+** Character \x{ffffffff} is greater than 255 and UTF-8 mode is not enabled.
|
|
+** Truncation will probably give the wrong result.
|
|
+** Character \x{ffffffff} is greater than 255 and UTF-8 mode is not enabled.
|
|
+** Truncation will probably give the wrong result.
|
|
+No match
|
|
+
|
|
+# ----------------------------------------------------
|
|
+
|
|
# End of testinput14
|
|
--
|
|
2.33.0
|
|
|