92 lines
2.8 KiB
Diff
92 lines
2.8 KiB
Diff
From 45dcb3de900b77583f4e9daa663004c55fad4794 Mon Sep 17 00:00:00 2001
|
|
From: Zoltan Herczeg <hzmester@freemail.hu>
|
|
Date: Wed, 22 Nov 2023 10:22:59 +0000
|
|
Subject: [PATCH] Fix \X matching in 32 bit mode without UTF in JIT
|
|
|
|
---
|
|
src/pcre2_jit_compile.c | 6 +++---
|
|
testdata/testinput12 | 4 ++++
|
|
testdata/testoutput12-16 | 9 +++++++++
|
|
testdata/testoutput12-32 | 5 +++++
|
|
4 files changed, 21 insertions(+), 3 deletions(-)
|
|
|
|
diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
|
|
index 510c392..8d64e1c 100644
|
|
--- a/src/pcre2_jit_compile.c
|
|
+++ b/src/pcre2_jit_compile.c
|
|
@@ -8718,7 +8718,7 @@ c = *cc++;
|
|
|
|
#if PCRE2_CODE_UNIT_WIDTH == 32
|
|
if (c >= 0x110000)
|
|
- return NULL;
|
|
+ return cc;
|
|
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
|
|
lgb = UCD_GRAPHBREAK(c);
|
|
|
|
@@ -8958,7 +8958,7 @@ switch(type)
|
|
#else
|
|
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
|
|
common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
|
|
- if (!common->utf || common->invalid_utf)
|
|
+ if (common->invalid_utf)
|
|
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
|
|
#endif
|
|
|
|
@@ -12044,7 +12044,7 @@ switch(opcode)
|
|
}
|
|
|
|
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
|
- if (common->utf)
|
|
+ if (type == OP_EXTUNI || common->utf)
|
|
{
|
|
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
|
|
detect_partial_match(common, &no_match);
|
|
diff --git a/testdata/testinput12 b/testdata/testinput12
|
|
index 5a2d8d2..a6678bb 100644
|
|
--- a/testdata/testinput12
|
|
+++ b/testdata/testinput12
|
|
@@ -569,4 +569,8 @@
|
|
/\x{802a0000}*/
|
|
\x{802a0000}\x{802a0000}
|
|
|
|
+# UTF matching without UTF, check invalid UTF characters
|
|
+/\X++/
|
|
+ a\x{110000}\x{ffffffff}
|
|
+
|
|
# End of testinput12
|
|
diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16
|
|
index 9ac403e..f3b40a3 100644
|
|
--- a/testdata/testoutput12-16
|
|
+++ b/testdata/testoutput12-16
|
|
@@ -1814,4 +1814,13 @@ No match
|
|
Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
|
|
\x{802a0000}\x{802a0000}
|
|
|
|
+# UTF matching without UTF, check invalid UTF characters
|
|
+/\X++/
|
|
+ a\x{110000}\x{ffffffff}
|
|
+** Character \x{110000} is greater than 0xffff and UTF-16 mode is not enabled.
|
|
+** Truncation will probably give the wrong result.
|
|
+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
|
|
+** Truncation will probably give the wrong result.
|
|
+ 0: a\x00\x{ffff}
|
|
+
|
|
# End of testinput12
|
|
diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32
|
|
index 9396305..dd42f86 100644
|
|
--- a/testdata/testoutput12-32
|
|
+++ b/testdata/testoutput12-32
|
|
@@ -1812,4 +1812,9 @@ No match
|
|
\x{802a0000}\x{802a0000}
|
|
0: \x{802a0000}\x{802a0000}
|
|
|
|
+# UTF matching without UTF, check invalid UTF characters
|
|
+/\X++/
|
|
+ a\x{110000}\x{ffffffff}
|
|
+ 0: a\x{110000}\x{ffffffff}
|
|
+
|
|
# End of testinput12
|
|
--
|
|
2.33.0
|
|
|