78 lines
2.4 KiB
Diff
78 lines
2.4 KiB
Diff
From afce00e484cff118a824dac498e8044680dac401 Mon Sep 17 00:00:00 2001
|
|
From: Philip Hazel <Philip.Hazel@gmail.com>
|
|
Date: Fri, 1 Dec 2023 16:49:59 +0000
|
|
Subject: [PATCH] Fix compile loop in 32-bit mode for characters above the
|
|
Unicode limit when caseless and ucp are set.
|
|
|
|
---
|
|
src/pcre2_compile.c | 6 +++++-
|
|
testdata/testinput12 | 4 ++++
|
|
testdata/testoutput12-16 | 5 +++++
|
|
testdata/testoutput12-32 | 5 +++++
|
|
4 files changed, 19 insertions(+), 1 deletion(-)
|
|
|
|
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
|
|
index 4a4fab1..3e4014b 100644
|
|
--- a/src/pcre2_compile.c
|
|
+++ b/src/pcre2_compile.c
|
|
@@ -4954,10 +4954,14 @@ uint32_t c, othercase, next;
|
|
unsigned int co;
|
|
|
|
/* Find the first character that has an other case. If it has multiple other
|
|
-cases, return its case offset value. */
|
|
+cases, return its case offset value. In 32-bit mode, a value
|
|
+greater than the Unicode maximum ends the range. */
|
|
|
|
for (c = *cptr; c <= d; c++)
|
|
{
|
|
+#if PCRE2_CODE_UNIT_WIDTH == 32
|
|
+ if (c > MAX_UTF_CODE_POINT) return -1;
|
|
+#endif
|
|
if ((co = UCD_CASESET(c)) != 0)
|
|
{
|
|
*ocptr = c++; /* Character that has the set */
|
|
diff --git a/testdata/testinput12 b/testdata/testinput12
|
|
index a6678bb..de3d406 100644
|
|
--- a/testdata/testinput12
|
|
+++ b/testdata/testinput12
|
|
@@ -573,4 +573,8 @@
|
|
/\X++/
|
|
a\x{110000}\x{ffffffff}
|
|
|
|
+# This used to loop in 32-bit mode; it will fail in 16-bit mode.
|
|
+/[\x{ffffffff}]/caseless,ucp
|
|
+ \x{ffffffff}xyz
|
|
+
|
|
# End of testinput12
|
|
diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16
|
|
index f3b40a3..9fa93fa 100644
|
|
--- a/testdata/testoutput12-16
|
|
+++ b/testdata/testoutput12-16
|
|
@@ -1823,4 +1823,9 @@ Failed: error 134 at offset 11: character code point value in \x{} or \o{} is to
|
|
** Truncation will probably give the wrong result.
|
|
0: a\x00\x{ffff}
|
|
|
|
+# This used to loop in 32-bit mode; it will fail in 16-bit mode.
|
|
+/[\x{ffffffff}]/caseless,ucp
|
|
+Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large
|
|
+ \x{ffffffff}xyz
|
|
+
|
|
# End of testinput12
|
|
diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32
|
|
index dd42f86..721d8bc 100644
|
|
--- a/testdata/testoutput12-32
|
|
+++ b/testdata/testoutput12-32
|
|
@@ -1817,4 +1817,9 @@ No match
|
|
a\x{110000}\x{ffffffff}
|
|
0: a\x{110000}\x{ffffffff}
|
|
|
|
+# This used to loop in 32-bit mode; it will fail in 16-bit mode.
|
|
+/[\x{ffffffff}]/caseless,ucp
|
|
+ \x{ffffffff}xyz
|
|
+ 0: \x{ffffffff}
|
|
+
|
|
# End of testinput12
|
|
--
|
|
2.33.0
|
|
|