pcre2: sync patches from upstream to fix some bugs
This commit is contained in:
parent
2124205d67
commit
352a8d46ef
@ -0,0 +1,116 @@
|
||||
From c1306126c3f12c16ad62dd2553132f64a28ca607 Mon Sep 17 00:00:00 2001
|
||||
From: Philip Hazel <Philip.Hazel@gmail.com>
|
||||
Date: Sun, 19 Nov 2023 17:18:07 +0000
|
||||
Subject: [PATCH] Fix 32-bit quantifier following a character larger than the
|
||||
maximum UTF character.
|
||||
|
||||
---
|
||||
src/pcre2_compile.c | 11 ++++++++---
|
||||
testdata/testinput12 | 6 ++++++
|
||||
testdata/testoutput12-16 | 7 +++++++
|
||||
testdata/testoutput12-32 | 7 +++++++
|
||||
4 files changed, 28 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
|
||||
index b3e4969..fdaf2ad 100644
|
||||
--- a/src/pcre2_compile.c
|
||||
+++ b/src/pcre2_compile.c
|
||||
@@ -2781,6 +2781,7 @@ uint32_t *verbstartptr = NULL;
|
||||
uint32_t *previous_callout = NULL;
|
||||
uint32_t *parsed_pattern = cb->parsed_pattern;
|
||||
uint32_t *parsed_pattern_end = cb->parsed_pattern_end;
|
||||
+uint32_t *this_parsed_item = NULL;
|
||||
uint32_t meta_quantifier = 0;
|
||||
uint32_t add_after_mark = 0;
|
||||
uint32_t extra_options = cb->cx->extra_options;
|
||||
@@ -2866,10 +2867,11 @@ while (ptr < ptrend)
|
||||
uint32_t set, unset, *optset;
|
||||
uint32_t terminator;
|
||||
uint32_t prev_meta_quantifier;
|
||||
+ uint32_t *prev_parsed_item = this_parsed_item;
|
||||
BOOL prev_okquantifier;
|
||||
PCRE2_SPTR tempptr;
|
||||
PCRE2_SIZE offset;
|
||||
-
|
||||
+
|
||||
if (parsed_pattern >= parsed_pattern_end)
|
||||
{
|
||||
errorcode = ERR63; /* Internal error (parsed pattern overflow) */
|
||||
@@ -2881,6 +2883,10 @@ while (ptr < ptrend)
|
||||
errorcode = ERR19;
|
||||
goto FAILED; /* Parentheses too deeply nested */
|
||||
}
|
||||
+
|
||||
+ /* Remember where this item started */
|
||||
+
|
||||
+ this_parsed_item = parsed_pattern;
|
||||
|
||||
/* Get next input character, save its position for callout handling. */
|
||||
|
||||
@@ -3173,7 +3179,6 @@ while (ptr < ptrend)
|
||||
continue; /* Next character in pattern */
|
||||
}
|
||||
|
||||
-
|
||||
/* Process the next item in the main part of a pattern. */
|
||||
|
||||
switch(c)
|
||||
@@ -3450,7 +3455,7 @@ while (ptr < ptrend)
|
||||
wrapping it in non-capturing brackets, but we have to allow for a preceding
|
||||
(*MARK) for when (*ACCEPT) has an argument. */
|
||||
|
||||
- if (parsed_pattern[-1] == META_ACCEPT)
|
||||
+ if (*prev_parsed_item == META_ACCEPT)
|
||||
{
|
||||
uint32_t *p;
|
||||
for (p = parsed_pattern - 1; p >= verbstartptr; p--) p[1] = p[0];
|
||||
diff --git a/testdata/testinput12 b/testdata/testinput12
|
||||
index 7a85eb5..1e552e6 100644
|
||||
--- a/testdata/testinput12
|
||||
+++ b/testdata/testinput12
|
||||
@@ -560,4 +560,10 @@
|
||||
|
||||
# ----------------------------------------------------
|
||||
|
||||
+# Quantifier after a literal that has the value of META_ACCEPT (not UTF). This
|
||||
+# fails in 16-bit mode, but is OK for 32-bit.
|
||||
+
|
||||
+/\x{802a0000}*/
|
||||
+ \x{802a0000}\x{802a0000}
|
||||
+
|
||||
# End of testinput12
|
||||
diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16
|
||||
index 9867632..8cbc13d 100644
|
||||
--- a/testdata/testoutput12-16
|
||||
+++ b/testdata/testoutput12-16
|
||||
@@ -1803,4 +1803,11 @@ No match
|
||||
|
||||
# ----------------------------------------------------
|
||||
|
||||
+# Quantifier after a literal that has the value of META_ACCEPT (not UTF). This
|
||||
+# fails in 16-bit mode, but is OK for 32-bit.
|
||||
+
|
||||
+/\x{802a0000}*/
|
||||
+Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
|
||||
+ \x{802a0000}\x{802a0000}
|
||||
+
|
||||
# End of testinput12
|
||||
diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32
|
||||
index 3a20dd4..1a98b4b 100644
|
||||
--- a/testdata/testoutput12-32
|
||||
+++ b/testdata/testoutput12-32
|
||||
@@ -1801,4 +1801,11 @@ No match
|
||||
|
||||
# ----------------------------------------------------
|
||||
|
||||
+# Quantifier after a literal that has the value of META_ACCEPT (not UTF). This
|
||||
+# fails in 16-bit mode, but is OK for 32-bit.
|
||||
+
|
||||
+/\x{802a0000}*/
|
||||
+ \x{802a0000}\x{802a0000}
|
||||
+ 0: \x{802a0000}\x{802a0000}
|
||||
+
|
||||
# End of testinput12
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,91 @@
|
||||
From 45dcb3de900b77583f4e9daa663004c55fad4794 Mon Sep 17 00:00:00 2001
|
||||
From: Zoltan Herczeg <hzmester@freemail.hu>
|
||||
Date: Wed, 22 Nov 2023 10:22:59 +0000
|
||||
Subject: [PATCH] Fix \X matching in 32 bit mode without UTF in JIT
|
||||
|
||||
---
|
||||
src/pcre2_jit_compile.c | 6 +++---
|
||||
testdata/testinput12 | 4 ++++
|
||||
testdata/testoutput12-16 | 9 +++++++++
|
||||
testdata/testoutput12-32 | 5 +++++
|
||||
4 files changed, 21 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
|
||||
index 510c392..8d64e1c 100644
|
||||
--- a/src/pcre2_jit_compile.c
|
||||
+++ b/src/pcre2_jit_compile.c
|
||||
@@ -8718,7 +8718,7 @@ c = *cc++;
|
||||
|
||||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
if (c >= 0x110000)
|
||||
- return NULL;
|
||||
+ return cc;
|
||||
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
|
||||
lgb = UCD_GRAPHBREAK(c);
|
||||
|
||||
@@ -8958,7 +8958,7 @@ switch(type)
|
||||
#else
|
||||
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
|
||||
common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
|
||||
- if (!common->utf || common->invalid_utf)
|
||||
+ if (common->invalid_utf)
|
||||
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
|
||||
#endif
|
||||
|
||||
@@ -12044,7 +12044,7 @@ switch(opcode)
|
||||
}
|
||||
|
||||
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
|
||||
- if (common->utf)
|
||||
+ if (type == OP_EXTUNI || common->utf)
|
||||
{
|
||||
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
|
||||
detect_partial_match(common, &no_match);
|
||||
diff --git a/testdata/testinput12 b/testdata/testinput12
|
||||
index 5a2d8d2..a6678bb 100644
|
||||
--- a/testdata/testinput12
|
||||
+++ b/testdata/testinput12
|
||||
@@ -569,4 +569,8 @@
|
||||
/\x{802a0000}*/
|
||||
\x{802a0000}\x{802a0000}
|
||||
|
||||
+# UTF matching without UTF, check invalid UTF characters
|
||||
+/\X++/
|
||||
+ a\x{110000}\x{ffffffff}
|
||||
+
|
||||
# End of testinput12
|
||||
diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16
|
||||
index 9ac403e..f3b40a3 100644
|
||||
--- a/testdata/testoutput12-16
|
||||
+++ b/testdata/testoutput12-16
|
||||
@@ -1814,4 +1814,13 @@ No match
|
||||
Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
|
||||
\x{802a0000}\x{802a0000}
|
||||
|
||||
+# UTF matching without UTF, check invalid UTF characters
|
||||
+/\X++/
|
||||
+ a\x{110000}\x{ffffffff}
|
||||
+** Character \x{110000} is greater than 0xffff and UTF-16 mode is not enabled.
|
||||
+** Truncation will probably give the wrong result.
|
||||
+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
|
||||
+** Truncation will probably give the wrong result.
|
||||
+ 0: a\x00\x{ffff}
|
||||
+
|
||||
# End of testinput12
|
||||
diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32
|
||||
index 9396305..dd42f86 100644
|
||||
--- a/testdata/testoutput12-32
|
||||
+++ b/testdata/testoutput12-32
|
||||
@@ -1812,4 +1812,9 @@ No match
|
||||
\x{802a0000}\x{802a0000}
|
||||
0: \x{802a0000}\x{802a0000}
|
||||
|
||||
+# UTF matching without UTF, check invalid UTF characters
|
||||
+/\X++/
|
||||
+ a\x{110000}\x{ffffffff}
|
||||
+ 0: a\x{110000}\x{ffffffff}
|
||||
+
|
||||
# End of testinput12
|
||||
--
|
||||
2.33.0
|
||||
|
||||
75
backport-Fix-accept-and-endanchored-interaction-in-JIT.patch
Normal file
75
backport-Fix-accept-and-endanchored-interaction-in-JIT.patch
Normal file
@ -0,0 +1,75 @@
|
||||
From 1c09efe6b0008a3b463299efe7501bc3140806f3 Mon Sep 17 00:00:00 2001
|
||||
From: Zoltan Herczeg <hzmester@freemail.hu>
|
||||
Date: Wed, 6 Dec 2023 10:06:50 +0000
|
||||
Subject: [PATCH] Fix accept and endanchored interaction in JIT
|
||||
|
||||
---
|
||||
src/pcre2_jit_compile.c | 15 ++++++++++++---
|
||||
src/pcre2_jit_test.c | 1 +
|
||||
2 files changed, 13 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
|
||||
index 2e11c3c..849e2c8 100644
|
||||
--- a/src/pcre2_jit_compile.c
|
||||
+++ b/src/pcre2_jit_compile.c
|
||||
@@ -489,6 +489,8 @@ typedef struct compiler_common {
|
||||
jump_list *casefulcmp;
|
||||
jump_list *caselesscmp;
|
||||
jump_list *reset_match;
|
||||
+ /* Same as reset_match, but resets the STR_PTR as well. */
|
||||
+ jump_list *restart_match;
|
||||
BOOL unset_backref;
|
||||
BOOL alt_circumflex;
|
||||
#ifdef SUPPORT_UNICODE
|
||||
@@ -3146,7 +3148,7 @@ return (value & (value - 1)) == 0;
|
||||
|
||||
static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
|
||||
{
|
||||
-while (list)
|
||||
+while (list != NULL)
|
||||
{
|
||||
/* sljit_set_label is clever enough to do nothing
|
||||
if either the jump or the label is NULL. */
|
||||
@@ -12187,7 +12189,7 @@ if (*cc == OP_FAIL)
|
||||
}
|
||||
|
||||
if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)
|
||||
- add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
+ add_jump(compiler, &common->restart_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
|
||||
|
||||
if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
|
||||
{
|
||||
@@ -14552,10 +14554,17 @@ if (common->caselesscmp != NULL)
|
||||
set_jumps(common->caselesscmp, LABEL());
|
||||
do_caselesscmp(common);
|
||||
}
|
||||
-if (common->reset_match != NULL)
|
||||
+if (common->reset_match != NULL || common->restart_match != NULL)
|
||||
{
|
||||
+ if (common->restart_match != NULL)
|
||||
+ {
|
||||
+ set_jumps(common->restart_match, LABEL());
|
||||
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
|
||||
+ }
|
||||
+
|
||||
set_jumps(common->reset_match, LABEL());
|
||||
do_reset_match(common, (re->top_bracket + 1) * 2);
|
||||
+ /* The value of restart_match is in TMP1. */
|
||||
CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
|
||||
OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
|
||||
JUMPTO(SLJIT_JUMP, reset_match_label);
|
||||
diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c
|
||||
index b5d95d5..0974d19 100644
|
||||
--- a/src/pcre2_jit_test.c
|
||||
+++ b/src/pcre2_jit_test.c
|
||||
@@ -655,6 +655,7 @@ static struct regression_test_case regression_test_cases[] = {
|
||||
{ MU, A, 0, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
|
||||
{ MU, A, PCRE2_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
|
||||
{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?=A)", "AB" },
|
||||
+ { MU | PCRE2_ENDANCHORED, A, 0, 0, "aa(*ACCEPT)aa", "aaa" },
|
||||
|
||||
/* Conditional blocks. */
|
||||
{ MU, A, 0, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
|
||||
--
|
||||
2.33.0
|
||||
|
||||
28
backport-Fix-another-oversight-in-c1306126.patch
Normal file
28
backport-Fix-another-oversight-in-c1306126.patch
Normal file
@ -0,0 +1,28 @@
|
||||
From 04f6668a09c51cf10fa5514019843ab0af9724c8 Mon Sep 17 00:00:00 2001
|
||||
From: Philip Hazel <Philip.Hazel@gmail.com>
|
||||
Date: Tue, 21 Nov 2023 15:10:34 +0000
|
||||
Subject: [PATCH] Fix another oversight in c1306126
|
||||
|
||||
---
|
||||
src/pcre2_compile.c | 3 +++
|
||||
1 file changed, 3 insertions(+)
|
||||
|
||||
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
|
||||
index 9e45580..7b522c5 100644
|
||||
--- a/src/pcre2_compile.c
|
||||
+++ b/src/pcre2_compile.c
|
||||
@@ -3108,8 +3108,11 @@ while (ptr < ptrend)
|
||||
!read_repeat_counts(&tempptr, ptrend, NULL, NULL, &errorcode))))
|
||||
{
|
||||
if (after_manual_callout-- <= 0)
|
||||
+ {
|
||||
parsed_pattern = manage_callouts(thisptr, &previous_callout, auto_callout,
|
||||
parsed_pattern, cb);
|
||||
+ this_parsed_item = parsed_pattern; /* New start for current item */
|
||||
+ }
|
||||
}
|
||||
|
||||
/* If expect_cond_assert is 2, we have just passed (?( and are expecting an
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,43 @@
|
||||
From 936fef2a4480b21f5c43b207181097736fb311e3 Mon Sep 17 00:00:00 2001
|
||||
From: Zoltan Herczeg <hzmester@freemail.hu>
|
||||
Date: Wed, 22 Nov 2023 11:50:38 +0000
|
||||
Subject: [PATCH] Fix backref iterators when PCRE2_MATCH_UNSET_BACKREF is set
|
||||
in JIT
|
||||
|
||||
---
|
||||
src/pcre2_jit_compile.c | 4 +++-
|
||||
src/pcre2_jit_test.c | 1 +
|
||||
2 files changed, 4 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
|
||||
index 8d64e1c..8110d8c 100644
|
||||
--- a/src/pcre2_jit_compile.c
|
||||
+++ b/src/pcre2_jit_compile.c
|
||||
@@ -9539,9 +9539,11 @@ if (!minimize)
|
||||
if (ref)
|
||||
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
|
||||
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
|
||||
+
|
||||
if (ref)
|
||||
{
|
||||
- add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
|
||||
+ if (!common->unset_backref)
|
||||
+ add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
|
||||
zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
|
||||
}
|
||||
else
|
||||
diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c
|
||||
index b27cec7..8bff3dc 100644
|
||||
--- a/src/pcre2_jit_test.c
|
||||
+++ b/src/pcre2_jit_test.c
|
||||
@@ -595,6 +595,7 @@ static struct regression_test_case regression_test_cases[] = {
|
||||
{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{1,3}M", "aaaaaaaabbbbaabbbbm" },
|
||||
{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" },
|
||||
{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
|
||||
+ { MU | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "(a)|\\1+c", "xxc" },
|
||||
|
||||
/* Assertions. */
|
||||
{ MU, A, 0, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,40 @@
|
||||
From 9de4d53cf850e0fca625ce9d80c12bea5b2a5ab9 Mon Sep 17 00:00:00 2001
|
||||
From: Zoltan Herczeg <hzmester@freemail.hu>
|
||||
Date: Thu, 7 Dec 2023 09:03:24 +0000
|
||||
Subject: [PATCH] Fix backreferences with unset backref and non-greedy
|
||||
iterators in JIT
|
||||
|
||||
---
|
||||
src/pcre2_jit_compile.c | 3 ++-
|
||||
src/pcre2_jit_test.c | 1 +
|
||||
2 files changed, 3 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
|
||||
index 0f445e1..e1daa1e 100644
|
||||
--- a/src/pcre2_jit_compile.c
|
||||
+++ b/src/pcre2_jit_compile.c
|
||||
@@ -9653,7 +9653,8 @@ else
|
||||
{
|
||||
if (ref)
|
||||
{
|
||||
- add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
|
||||
+ if (!common->unset_backref)
|
||||
+ add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
|
||||
zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
|
||||
}
|
||||
else
|
||||
diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c
|
||||
index 0974d19..9b63c8e 100644
|
||||
--- a/src/pcre2_jit_test.c
|
||||
+++ b/src/pcre2_jit_test.c
|
||||
@@ -596,6 +596,7 @@ static struct regression_test_case regression_test_cases[] = {
|
||||
{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" },
|
||||
{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
|
||||
{ MU | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "(a)|\\1+c", "xxc" },
|
||||
+ { MU | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\1+?()", "" },
|
||||
|
||||
/* Assertions. */
|
||||
{ MU, A, 0, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,87 @@
|
||||
From 57ee073252dc826dbe412846a83421d2bb4483bc Mon Sep 17 00:00:00 2001
|
||||
From: Philip Hazel <Philip.Hazel@gmail.com>
|
||||
Date: Wed, 22 Nov 2023 11:34:27 +0000
|
||||
Subject: [PATCH] Fix bad patch in 05206d66. The interpreter was handling
|
||||
NOTEOL incorrectly in general after trying to fix it in invalid UTF subjects.
|
||||
|
||||
---
|
||||
src/pcre2_intmodedep.h | 3 ++-
|
||||
src/pcre2_match.c | 7 +++----
|
||||
testdata/testinput2 | 4 ++++
|
||||
testdata/testoutput2 | 6 ++++++
|
||||
4 files changed, 15 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/src/pcre2_intmodedep.h b/src/pcre2_intmodedep.h
|
||||
index 5e7e10d..423764d 100644
|
||||
--- a/src/pcre2_intmodedep.h
|
||||
+++ b/src/pcre2_intmodedep.h
|
||||
@@ -880,7 +880,8 @@ typedef struct match_block {
|
||||
PCRE2_SPTR start_code; /* For use when recursing */
|
||||
PCRE2_SPTR start_subject; /* Start of the subject string */
|
||||
PCRE2_SPTR check_subject; /* Where UTF-checked from */
|
||||
- PCRE2_SPTR end_subject; /* End of the subject string */
|
||||
+ PCRE2_SPTR end_subject; /* Usable end of the subject string */
|
||||
+ PCRE2_SPTR true_end_subject; /* Actual end of the subject string */
|
||||
PCRE2_SPTR end_match_ptr; /* Subject position at end match */
|
||||
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
|
||||
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
|
||||
diff --git a/src/pcre2_match.c b/src/pcre2_match.c
|
||||
index ea03976..c5e84ce 100644
|
||||
--- a/src/pcre2_match.c
|
||||
+++ b/src/pcre2_match.c
|
||||
@@ -6076,12 +6076,10 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
|
||||
if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS;
|
||||
|
||||
/* Fall through */
|
||||
- /* Unconditional end of subject assertion (\z). We must check NOTEOL
|
||||
- because it gets set for invalid UTF fragments. */
|
||||
+ /* Unconditional end of subject assertion (\z). */
|
||||
|
||||
case OP_EOD:
|
||||
- if (Feptr < mb->end_subject || (mb->moptions & PCRE2_NOTEOL) != 0)
|
||||
- RRETURN(MATCH_NOMATCH);
|
||||
+ if (Feptr < mb->true_end_subject) RRETURN(MATCH_NOMATCH);
|
||||
if (mb->partial != 0)
|
||||
{
|
||||
mb->hitend = TRUE;
|
||||
@@ -6891,6 +6889,7 @@ mb->callout_data = mcontext->callout_data;
|
||||
mb->start_subject = subject;
|
||||
mb->start_offset = start_offset;
|
||||
mb->end_subject = end_subject;
|
||||
+mb->true_end_subject = true_end_subject;
|
||||
mb->hasthen = (re->flags & PCRE2_HASTHEN) != 0;
|
||||
mb->allowemptypartial = (re->max_lookbehind > 0) ||
|
||||
(re->flags & PCRE2_MATCH_EMPTY) != 0;
|
||||
diff --git a/testdata/testinput2 b/testdata/testinput2
|
||||
index 0e24e78..b874f20 100644
|
||||
--- a/testdata/testinput2
|
||||
+++ b/testdata/testinput2
|
||||
@@ -6055,4 +6055,8 @@ a)"xI
|
||||
|
||||
/A +/extended
|
||||
|
||||
+/a\z/
|
||||
+ a
|
||||
+ a\=noteol
|
||||
+
|
||||
# End of testinput2
|
||||
diff --git a/testdata/testoutput2 b/testdata/testoutput2
|
||||
index 68800fb..c1bc0e6 100644
|
||||
--- a/testdata/testoutput2
|
||||
+++ b/testdata/testoutput2
|
||||
@@ -17946,6 +17946,12 @@ No match
|
||||
|
||||
/A +/extended
|
||||
|
||||
+/a\z/
|
||||
+ a
|
||||
+ 0: a
|
||||
+ a\=noteol
|
||||
+ 0: a
|
||||
+
|
||||
# End of testinput2
|
||||
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
|
||||
Error -62: bad serialized data
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,77 @@
|
||||
From afce00e484cff118a824dac498e8044680dac401 Mon Sep 17 00:00:00 2001
|
||||
From: Philip Hazel <Philip.Hazel@gmail.com>
|
||||
Date: Fri, 1 Dec 2023 16:49:59 +0000
|
||||
Subject: [PATCH] Fix compile loop in 32-bit mode for characters above the
|
||||
Unicode limit when caseless and ucp are set.
|
||||
|
||||
---
|
||||
src/pcre2_compile.c | 6 +++++-
|
||||
testdata/testinput12 | 4 ++++
|
||||
testdata/testoutput12-16 | 5 +++++
|
||||
testdata/testoutput12-32 | 5 +++++
|
||||
4 files changed, 19 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
|
||||
index 4a4fab1..3e4014b 100644
|
||||
--- a/src/pcre2_compile.c
|
||||
+++ b/src/pcre2_compile.c
|
||||
@@ -4954,10 +4954,14 @@ uint32_t c, othercase, next;
|
||||
unsigned int co;
|
||||
|
||||
/* Find the first character that has an other case. If it has multiple other
|
||||
-cases, return its case offset value. */
|
||||
+cases, return its case offset value. In 32-bit mode, a value
|
||||
+greater than the Unicode maximum ends the range. */
|
||||
|
||||
for (c = *cptr; c <= d; c++)
|
||||
{
|
||||
+#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
+ if (c > MAX_UTF_CODE_POINT) return -1;
|
||||
+#endif
|
||||
if ((co = UCD_CASESET(c)) != 0)
|
||||
{
|
||||
*ocptr = c++; /* Character that has the set */
|
||||
diff --git a/testdata/testinput12 b/testdata/testinput12
|
||||
index a6678bb..de3d406 100644
|
||||
--- a/testdata/testinput12
|
||||
+++ b/testdata/testinput12
|
||||
@@ -573,4 +573,8 @@
|
||||
/\X++/
|
||||
a\x{110000}\x{ffffffff}
|
||||
|
||||
+# This used to loop in 32-bit mode; it will fail in 16-bit mode.
|
||||
+/[\x{ffffffff}]/caseless,ucp
|
||||
+ \x{ffffffff}xyz
|
||||
+
|
||||
# End of testinput12
|
||||
diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16
|
||||
index f3b40a3..9fa93fa 100644
|
||||
--- a/testdata/testoutput12-16
|
||||
+++ b/testdata/testoutput12-16
|
||||
@@ -1823,4 +1823,9 @@ Failed: error 134 at offset 11: character code point value in \x{} or \o{} is to
|
||||
** Truncation will probably give the wrong result.
|
||||
0: a\x00\x{ffff}
|
||||
|
||||
+# This used to loop in 32-bit mode; it will fail in 16-bit mode.
|
||||
+/[\x{ffffffff}]/caseless,ucp
|
||||
+Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large
|
||||
+ \x{ffffffff}xyz
|
||||
+
|
||||
# End of testinput12
|
||||
diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32
|
||||
index dd42f86..721d8bc 100644
|
||||
--- a/testdata/testoutput12-32
|
||||
+++ b/testdata/testoutput12-32
|
||||
@@ -1817,4 +1817,9 @@ No match
|
||||
a\x{110000}\x{ffffffff}
|
||||
0: a\x{110000}\x{ffffffff}
|
||||
|
||||
+# This used to loop in 32-bit mode; it will fail in 16-bit mode.
|
||||
+/[\x{ffffffff}]/caseless,ucp
|
||||
+ \x{ffffffff}xyz
|
||||
+ 0: \x{ffffffff}
|
||||
+
|
||||
# End of testinput12
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,457 @@
|
||||
From ad73148dfb6d06280a4d87f322991762aff90a55 Mon Sep 17 00:00:00 2001
|
||||
From: Philip Hazel <Philip.Hazel@gmail.com>
|
||||
Date: Mon, 4 Dec 2023 16:11:41 +0000
|
||||
Subject: [PATCH] Fix incorrect matching of 0xffffffff to any character with
|
||||
more than one other case in 32-bit UCP (but not UTF) mode.
|
||||
|
||||
---
|
||||
src/pcre2_dfa_match.c | 28 ++++++++++++++++++++++++++
|
||||
src/pcre2_match.c | 43 ++++++++++++++++++++++++++++++++++------
|
||||
testdata/testinput12 | 26 ++++++++++++++++++++++++
|
||||
testdata/testinput14 | 27 +++++++++++++++++++++++++
|
||||
testdata/testoutput12-16 | 37 ++++++++++++++++++++++++++++++++++
|
||||
testdata/testoutput12-32 | 33 ++++++++++++++++++++++++++++++
|
||||
testdata/testoutput14-16 | 38 +++++++++++++++++++++++++++++++++++
|
||||
testdata/testoutput14-32 | 34 +++++++++++++++++++++++++++++++
|
||||
testdata/testoutput14-8 | 38 +++++++++++++++++++++++++++++++++++
|
||||
9 files changed, 298 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c
|
||||
index 1c48ad6..caae652 100644
|
||||
--- a/src/pcre2_dfa_match.c
|
||||
+++ b/src/pcre2_dfa_match.c
|
||||
@@ -1241,6 +1241,13 @@ for (;;)
|
||||
break;
|
||||
|
||||
case PT_CLIST:
|
||||
+#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
+ if (c > MAX_UTF_CODE_POINT)
|
||||
+ {
|
||||
+ OK = FALSE;
|
||||
+ break;
|
||||
+ }
|
||||
+#endif
|
||||
cp = PRIV(ucd_caseless_sets) + code[2];
|
||||
for (;;)
|
||||
{
|
||||
@@ -1516,6 +1523,13 @@ for (;;)
|
||||
break;
|
||||
|
||||
case PT_CLIST:
|
||||
+#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
+ if (c > MAX_UTF_CODE_POINT)
|
||||
+ {
|
||||
+ OK = FALSE;
|
||||
+ break;
|
||||
+ }
|
||||
+#endif
|
||||
cp = PRIV(ucd_caseless_sets) + code[3];
|
||||
for (;;)
|
||||
{
|
||||
@@ -1774,6 +1788,13 @@ for (;;)
|
||||
break;
|
||||
|
||||
case PT_CLIST:
|
||||
+#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
+ if (c > MAX_UTF_CODE_POINT)
|
||||
+ {
|
||||
+ OK = FALSE;
|
||||
+ break;
|
||||
+ }
|
||||
+#endif
|
||||
cp = PRIV(ucd_caseless_sets) + code[3];
|
||||
for (;;)
|
||||
{
|
||||
@@ -2058,6 +2079,13 @@ for (;;)
|
||||
break;
|
||||
|
||||
case PT_CLIST:
|
||||
+#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
+ if (c > MAX_UTF_CODE_POINT)
|
||||
+ {
|
||||
+ OK = FALSE;
|
||||
+ break;
|
||||
+ }
|
||||
+#endif
|
||||
cp = PRIV(ucd_caseless_sets) + code[1 + IMM2_SIZE + 2];
|
||||
for (;;)
|
||||
{
|
||||
diff --git a/src/pcre2_match.c b/src/pcre2_match.c
|
||||
index d162e70..b2e1f23 100644
|
||||
--- a/src/pcre2_match.c
|
||||
+++ b/src/pcre2_match.c
|
||||
@@ -2565,6 +2565,13 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
|
||||
break;
|
||||
|
||||
case PT_CLIST:
|
||||
+#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
+ if (fc > MAX_UTF_CODE_POINT)
|
||||
+ {
|
||||
+ if (notmatch) break;;
|
||||
+ RRETURN(MATCH_NOMATCH);
|
||||
+ }
|
||||
+#endif
|
||||
cp = PRIV(ucd_caseless_sets) + Fecode[2];
|
||||
for (;;)
|
||||
{
|
||||
@@ -2885,6 +2892,13 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
GETCHARINCTEST(fc, Feptr);
|
||||
+#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
+ if (fc > MAX_UTF_CODE_POINT)
|
||||
+ {
|
||||
+ if (notmatch) continue;
|
||||
+ RRETURN(MATCH_NOMATCH);
|
||||
+ }
|
||||
+#endif
|
||||
cp = PRIV(ucd_caseless_sets) + Lpropvalue;
|
||||
for (;;)
|
||||
{
|
||||
@@ -3698,6 +3712,13 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
|
||||
RRETURN(MATCH_NOMATCH);
|
||||
}
|
||||
GETCHARINCTEST(fc, Feptr);
|
||||
+#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
+ if (fc > MAX_UTF_CODE_POINT)
|
||||
+ {
|
||||
+ if (Lctype == OP_NOTPROP) continue;
|
||||
+ RRETURN(MATCH_NOMATCH);
|
||||
+ }
|
||||
+#endif
|
||||
cp = PRIV(ucd_caseless_sets) + Lpropvalue;
|
||||
for (;;)
|
||||
{
|
||||
@@ -4278,14 +4299,24 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
|
||||
break;
|
||||
}
|
||||
GETCHARLENTEST(fc, Feptr, len);
|
||||
- cp = PRIV(ucd_caseless_sets) + Lpropvalue;
|
||||
- for (;;)
|
||||
+#if PCRE2_CODE_UNIT_WIDTH == 32
|
||||
+ if (fc > MAX_UTF_CODE_POINT)
|
||||
{
|
||||
- if (fc < *cp)
|
||||
- { if (notmatch) break; else goto GOT_MAX; }
|
||||
- if (fc == *cp++)
|
||||
- { if (notmatch) goto GOT_MAX; else break; }
|
||||
+ if (!notmatch) goto GOT_MAX;
|
||||
}
|
||||
+ else
|
||||
+#endif
|
||||
+ {
|
||||
+ cp = PRIV(ucd_caseless_sets) + Lpropvalue;
|
||||
+ for (;;)
|
||||
+ {
|
||||
+ if (fc < *cp)
|
||||
+ { if (notmatch) break; else goto GOT_MAX; }
|
||||
+ if (fc == *cp++)
|
||||
+ { if (notmatch) goto GOT_MAX; else break; }
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
Feptr += len;
|
||||
}
|
||||
GOT_MAX:
|
||||
diff --git a/testdata/testinput12 b/testdata/testinput12
|
||||
index de3d406..85550c3 100644
|
||||
--- a/testdata/testinput12
|
||||
+++ b/testdata/testinput12
|
||||
@@ -576,5 +576,31 @@
|
||||
# This used to loop in 32-bit mode; it will fail in 16-bit mode.
|
||||
/[\x{ffffffff}]/caseless,ucp
|
||||
\x{ffffffff}xyz
|
||||
+
|
||||
+# These are 32-bit tests for handing 0xffffffff when in UCP caselsss mode. They
|
||||
+# will give errors in 16-bit mode.
|
||||
+
|
||||
+/k*\x{ffffffff}/caseless,ucp
|
||||
+ \x{ffffffff}
|
||||
+
|
||||
+/k+\x{ffffffff}/caseless,ucp,no_start_optimize
|
||||
+ K\x{ffffffff}
|
||||
+\= Expect no match
|
||||
+ \x{ffffffff}\x{ffffffff}
|
||||
+
|
||||
+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
|
||||
+\= Expect no match
|
||||
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
|
||||
+
|
||||
+/k\x{ffffffff}/caseless,ucp,no_start_optimize
|
||||
+ K\x{ffffffff}
|
||||
+\= Expect no match
|
||||
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
|
||||
+
|
||||
+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
|
||||
+\= Expect no match
|
||||
+ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
|
||||
+
|
||||
+# ---------------------------------------------------------
|
||||
|
||||
# End of testinput12
|
||||
diff --git a/testdata/testinput14 b/testdata/testinput14
|
||||
index 8a17ae7..8880b5c 100644
|
||||
--- a/testdata/testinput14
|
||||
+++ b/testdata/testinput14
|
||||
@@ -78,4 +78,31 @@
|
||||
|
||||
# ----------------------------------------------------
|
||||
|
||||
+# ----------------------------------------------------
|
||||
+# Tests for handling 0xffffffff in caseless UCP mode. They only apply to 32-bit
|
||||
+# mode; for the other widths they will fail.
|
||||
+
|
||||
+/k*\x{ffffffff}/caseless,ucp
|
||||
+ \x{ffffffff}
|
||||
+
|
||||
+/k+\x{ffffffff}/caseless,ucp,no_start_optimize
|
||||
+ K\x{ffffffff}
|
||||
+\= Expect no match
|
||||
+ \x{ffffffff}\x{ffffffff}
|
||||
+
|
||||
+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
|
||||
+\= Expect no match
|
||||
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
|
||||
+
|
||||
+/k\x{ffffffff}/caseless,ucp,no_start_optimize
|
||||
+ K\x{ffffffff}
|
||||
+\= Expect no match
|
||||
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
|
||||
+
|
||||
+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
|
||||
+\= Expect no match
|
||||
+ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
|
||||
+
|
||||
+# ----------------------------------------------------
|
||||
+
|
||||
# End of testinput14
|
||||
diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16
|
||||
index 9fa93fa..616d693 100644
|
||||
--- a/testdata/testoutput12-16
|
||||
+++ b/testdata/testoutput12-16
|
||||
@@ -1827,5 +1827,42 @@ Failed: error 134 at offset 11: character code point value in \x{} or \o{} is to
|
||||
/[\x{ffffffff}]/caseless,ucp
|
||||
Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large
|
||||
\x{ffffffff}xyz
|
||||
+
|
||||
+# These are 32-bit tests for handing 0xffffffff when in UCP caselsss mode. They
|
||||
+# will give errors in 16-bit mode.
|
||||
+
|
||||
+/k*\x{ffffffff}/caseless,ucp
|
||||
+Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large
|
||||
+ \x{ffffffff}
|
||||
+
|
||||
+/k+\x{ffffffff}/caseless,ucp,no_start_optimize
|
||||
+Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large
|
||||
+ K\x{ffffffff}
|
||||
+\= Expect no match
|
||||
+ \x{ffffffff}\x{ffffffff}
|
||||
+
|
||||
+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
|
||||
+Failed: error 134 at offset 15: character code point value in \x{} or \o{} is too large
|
||||
+\= Expect no match
|
||||
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
|
||||
+
|
||||
+/k\x{ffffffff}/caseless,ucp,no_start_optimize
|
||||
+Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large
|
||||
+ K\x{ffffffff}
|
||||
+\= Expect no match
|
||||
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
|
||||
+
|
||||
+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
|
||||
+\= Expect no match
|
||||
+ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
|
||||
+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
|
||||
+** Truncation will probably give the wrong result.
|
||||
+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
|
||||
+** Truncation will probably give the wrong result.
|
||||
+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
|
||||
+** Truncation will probably give the wrong result.
|
||||
+No match
|
||||
+
|
||||
+# ---------------------------------------------------------
|
||||
|
||||
# End of testinput12
|
||||
diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32
|
||||
index 721d8bc..3c9586e 100644
|
||||
--- a/testdata/testoutput12-32
|
||||
+++ b/testdata/testoutput12-32
|
||||
@@ -1821,5 +1821,38 @@ No match
|
||||
/[\x{ffffffff}]/caseless,ucp
|
||||
\x{ffffffff}xyz
|
||||
0: \x{ffffffff}
|
||||
+
|
||||
+# These are 32-bit tests for handing 0xffffffff when in UCP caselsss mode. They
|
||||
+# will give errors in 16-bit mode.
|
||||
+
|
||||
+/k*\x{ffffffff}/caseless,ucp
|
||||
+ \x{ffffffff}
|
||||
+ 0: \x{ffffffff}
|
||||
+
|
||||
+/k+\x{ffffffff}/caseless,ucp,no_start_optimize
|
||||
+ K\x{ffffffff}
|
||||
+ 0: K\x{ffffffff}
|
||||
+\= Expect no match
|
||||
+ \x{ffffffff}\x{ffffffff}
|
||||
+No match
|
||||
+
|
||||
+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
|
||||
+\= Expect no match
|
||||
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
|
||||
+No match
|
||||
+
|
||||
+/k\x{ffffffff}/caseless,ucp,no_start_optimize
|
||||
+ K\x{ffffffff}
|
||||
+ 0: K\x{ffffffff}
|
||||
+\= Expect no match
|
||||
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
|
||||
+No match
|
||||
+
|
||||
+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
|
||||
+\= Expect no match
|
||||
+ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
|
||||
+No match
|
||||
+
|
||||
+# ---------------------------------------------------------
|
||||
|
||||
# End of testinput12
|
||||
diff --git a/testdata/testoutput14-16 b/testdata/testoutput14-16
|
||||
index 61541f6..dd1a977 100644
|
||||
--- a/testdata/testoutput14-16
|
||||
+++ b/testdata/testoutput14-16
|
||||
@@ -122,4 +122,42 @@ No match
|
||||
|
||||
# ----------------------------------------------------
|
||||
|
||||
+# ----------------------------------------------------
|
||||
+# Tests for handling 0xffffffff in caseless UCP mode. They only apply to 32-bit
|
||||
+# mode; for the other widths they will fail.
|
||||
+
|
||||
+/k*\x{ffffffff}/caseless,ucp
|
||||
+Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large
|
||||
+ \x{ffffffff}
|
||||
+
|
||||
+/k+\x{ffffffff}/caseless,ucp,no_start_optimize
|
||||
+Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large
|
||||
+ K\x{ffffffff}
|
||||
+\= Expect no match
|
||||
+ \x{ffffffff}\x{ffffffff}
|
||||
+
|
||||
+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
|
||||
+Failed: error 134 at offset 15: character code point value in \x{} or \o{} is too large
|
||||
+\= Expect no match
|
||||
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
|
||||
+
|
||||
+/k\x{ffffffff}/caseless,ucp,no_start_optimize
|
||||
+Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large
|
||||
+ K\x{ffffffff}
|
||||
+\= Expect no match
|
||||
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
|
||||
+
|
||||
+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
|
||||
+\= Expect no match
|
||||
+ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
|
||||
+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
|
||||
+** Truncation will probably give the wrong result.
|
||||
+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
|
||||
+** Truncation will probably give the wrong result.
|
||||
+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
|
||||
+** Truncation will probably give the wrong result.
|
||||
+No match
|
||||
+
|
||||
+# ----------------------------------------------------
|
||||
+
|
||||
# End of testinput14
|
||||
diff --git a/testdata/testoutput14-32 b/testdata/testoutput14-32
|
||||
index f1f65b7..dc21569 100644
|
||||
--- a/testdata/testoutput14-32
|
||||
+++ b/testdata/testoutput14-32
|
||||
@@ -122,4 +122,38 @@ No match
|
||||
|
||||
# ----------------------------------------------------
|
||||
|
||||
+# ----------------------------------------------------
|
||||
+# Tests for handling 0xffffffff in caseless UCP mode. They only apply to 32-bit
|
||||
+# mode; for the other widths they will fail.
|
||||
+
|
||||
+/k*\x{ffffffff}/caseless,ucp
|
||||
+ \x{ffffffff}
|
||||
+ 0: \x{ffffffff}
|
||||
+
|
||||
+/k+\x{ffffffff}/caseless,ucp,no_start_optimize
|
||||
+ K\x{ffffffff}
|
||||
+ 0: K\x{ffffffff}
|
||||
+\= Expect no match
|
||||
+ \x{ffffffff}\x{ffffffff}
|
||||
+No match
|
||||
+
|
||||
+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
|
||||
+\= Expect no match
|
||||
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
|
||||
+No match
|
||||
+
|
||||
+/k\x{ffffffff}/caseless,ucp,no_start_optimize
|
||||
+ K\x{ffffffff}
|
||||
+ 0: K\x{ffffffff}
|
||||
+\= Expect no match
|
||||
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
|
||||
+No match
|
||||
+
|
||||
+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
|
||||
+\= Expect no match
|
||||
+ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
|
||||
+No match
|
||||
+
|
||||
+# ----------------------------------------------------
|
||||
+
|
||||
# End of testinput14
|
||||
diff --git a/testdata/testoutput14-8 b/testdata/testoutput14-8
|
||||
index aa62414..69285db 100644
|
||||
--- a/testdata/testoutput14-8
|
||||
+++ b/testdata/testoutput14-8
|
||||
@@ -122,4 +122,42 @@ Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too
|
||||
|
||||
# ----------------------------------------------------
|
||||
|
||||
+# ----------------------------------------------------
|
||||
+# Tests for handling 0xffffffff in caseless UCP mode. They only apply to 32-bit
|
||||
+# mode; for the other widths they will fail.
|
||||
+
|
||||
+/k*\x{ffffffff}/caseless,ucp
|
||||
+Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large
|
||||
+ \x{ffffffff}
|
||||
+
|
||||
+/k+\x{ffffffff}/caseless,ucp,no_start_optimize
|
||||
+Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large
|
||||
+ K\x{ffffffff}
|
||||
+\= Expect no match
|
||||
+ \x{ffffffff}\x{ffffffff}
|
||||
+
|
||||
+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
|
||||
+Failed: error 134 at offset 15: character code point value in \x{} or \o{} is too large
|
||||
+\= Expect no match
|
||||
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
|
||||
+
|
||||
+/k\x{ffffffff}/caseless,ucp,no_start_optimize
|
||||
+Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large
|
||||
+ K\x{ffffffff}
|
||||
+\= Expect no match
|
||||
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
|
||||
+
|
||||
+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
|
||||
+\= Expect no match
|
||||
+ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
|
||||
+** Character \x{ffffffff} is greater than 255 and UTF-8 mode is not enabled.
|
||||
+** Truncation will probably give the wrong result.
|
||||
+** Character \x{ffffffff} is greater than 255 and UTF-8 mode is not enabled.
|
||||
+** Truncation will probably give the wrong result.
|
||||
+** Character \x{ffffffff} is greater than 255 and UTF-8 mode is not enabled.
|
||||
+** Truncation will probably give the wrong result.
|
||||
+No match
|
||||
+
|
||||
+# ----------------------------------------------------
|
||||
+
|
||||
# End of testinput14
|
||||
--
|
||||
2.33.0
|
||||
|
||||
94
backport-Fix-incorrect-patch-in-c1306126.patch
Normal file
94
backport-Fix-incorrect-patch-in-c1306126.patch
Normal file
@ -0,0 +1,94 @@
|
||||
From 7fe586b892c9e0cbf3b21d57cfd8135e2311e45c Mon Sep 17 00:00:00 2001
|
||||
From: Philip Hazel <Philip.Hazel@gmail.com>
|
||||
Date: Mon, 20 Nov 2023 15:41:06 +0000
|
||||
Subject: [PATCH] Fix incorrect patch in c1306126
|
||||
|
||||
---
|
||||
src/pcre2_compile.c | 20 ++++++++++++++------
|
||||
testdata/testinput2 | 2 ++
|
||||
testdata/testoutput2 | 2 ++
|
||||
3 files changed, 18 insertions(+), 6 deletions(-)
|
||||
|
||||
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
|
||||
index fdaf2ad..9e45580 100644
|
||||
--- a/src/pcre2_compile.c
|
||||
+++ b/src/pcre2_compile.c
|
||||
@@ -2782,6 +2782,7 @@ uint32_t *previous_callout = NULL;
|
||||
uint32_t *parsed_pattern = cb->parsed_pattern;
|
||||
uint32_t *parsed_pattern_end = cb->parsed_pattern_end;
|
||||
uint32_t *this_parsed_item = NULL;
|
||||
+uint32_t *prev_parsed_item = NULL;
|
||||
uint32_t meta_quantifier = 0;
|
||||
uint32_t add_after_mark = 0;
|
||||
uint32_t extra_options = cb->cx->extra_options;
|
||||
@@ -2867,11 +2868,10 @@ while (ptr < ptrend)
|
||||
uint32_t set, unset, *optset;
|
||||
uint32_t terminator;
|
||||
uint32_t prev_meta_quantifier;
|
||||
- uint32_t *prev_parsed_item = this_parsed_item;
|
||||
BOOL prev_okquantifier;
|
||||
PCRE2_SPTR tempptr;
|
||||
PCRE2_SIZE offset;
|
||||
-
|
||||
+
|
||||
if (parsed_pattern >= parsed_pattern_end)
|
||||
{
|
||||
errorcode = ERR63; /* Internal error (parsed pattern overflow) */
|
||||
@@ -2883,10 +2883,17 @@ while (ptr < ptrend)
|
||||
errorcode = ERR19;
|
||||
goto FAILED; /* Parentheses too deeply nested */
|
||||
}
|
||||
-
|
||||
- /* Remember where this item started */
|
||||
|
||||
- this_parsed_item = parsed_pattern;
|
||||
+ /* If the last time round this loop something was added, parsed_pattern will
|
||||
+ no longer be equal to this_parsed_item. Remember where the previous item
|
||||
+ started and reset for the next item. Note that sometimes round the loop,
|
||||
+ nothing gets added (e.g. for ignored white space). */
|
||||
+
|
||||
+ if (this_parsed_item != parsed_pattern)
|
||||
+ {
|
||||
+ prev_parsed_item = this_parsed_item;
|
||||
+ this_parsed_item = parsed_pattern;
|
||||
+ }
|
||||
|
||||
/* Get next input character, save its position for callout handling. */
|
||||
|
||||
@@ -3440,7 +3447,8 @@ while (ptr < ptrend)
|
||||
|
||||
/* ---- Quantifier post-processing ---- */
|
||||
|
||||
- /* Check that a quantifier is allowed after the previous item. */
|
||||
+ /* Check that a quantifier is allowed after the previous item. This
|
||||
+ guarantees that there is a previous item. */
|
||||
|
||||
CHECK_QUANTIFIER:
|
||||
if (!prev_okquantifier)
|
||||
diff --git a/testdata/testinput2 b/testdata/testinput2
|
||||
index ba292d8..da845c1 100644
|
||||
--- a/testdata/testinput2
|
||||
+++ b/testdata/testinput2
|
||||
@@ -6051,4 +6051,6 @@ a)"xI
|
||||
/abcd/
|
||||
abcd\=ovector=65536
|
||||
|
||||
+/A +/extended
|
||||
+
|
||||
# End of testinput2
|
||||
diff --git a/testdata/testoutput2 b/testdata/testoutput2
|
||||
index 888f06a..85de4ae 100644
|
||||
--- a/testdata/testoutput2
|
||||
+++ b/testdata/testoutput2
|
||||
@@ -17932,6 +17932,8 @@ No match
|
||||
abcd\=ovector=65536
|
||||
0: abcd
|
||||
|
||||
+/A +/extended
|
||||
+
|
||||
# End of testinput2
|
||||
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
|
||||
Error -62: bad serialized data
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,28 @@
|
||||
From b88126f42382fa470b6480f82489303d4311ce18 Mon Sep 17 00:00:00 2001
|
||||
From: Philip Hazel <Philip.Hazel@gmail.com>
|
||||
Date: Thu, 16 Nov 2023 13:49:49 +0000
|
||||
Subject: [PATCH] Fix oversight in DFA when changing OP_REVERSE; also add some
|
||||
unrelated tests
|
||||
|
||||
Conflict:don't add unrelated tests
|
||||
|
||||
---
|
||||
src/pcre2_dfa_match.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c
|
||||
index e90c984..5768407 100644
|
||||
--- a/src/pcre2_dfa_match.c
|
||||
+++ b/src/pcre2_dfa_match.c
|
||||
@@ -591,7 +591,7 @@ if (*this_start_code == OP_ASSERTBACK || *this_start_code == OP_ASSERTBACK_NOT)
|
||||
end_code = this_start_code;
|
||||
do
|
||||
{
|
||||
- size_t back = (size_t)GET(end_code, 2+LINK_SIZE);
|
||||
+ size_t back = (size_t)GET2(end_code, 2+LINK_SIZE);
|
||||
if (back > max_back) max_back = back;
|
||||
end_code += GET(end_code, 1);
|
||||
}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
105
backport-Fix-z-behaviour-when-matching-within-invalid-UTF.patch
Normal file
105
backport-Fix-z-behaviour-when-matching-within-invalid-UTF.patch
Normal file
@ -0,0 +1,105 @@
|
||||
From 05206d66340341bef7a673108a855f594c148950 Mon Sep 17 00:00:00 2001
|
||||
From: Philip Hazel <Philip.Hazel@gmail.com>
|
||||
Date: Sun, 19 Nov 2023 18:32:10 +0000
|
||||
Subject: [PATCH] Fix \z behaviour when matching within invalid UTF
|
||||
|
||||
---
|
||||
src/pcre2_match.c | 6 ++++--
|
||||
testdata/testinput10 | 3 +++
|
||||
testdata/testinput12 | 3 +++
|
||||
testdata/testoutput10 | 4 ++++
|
||||
testdata/testoutput12-16 | 4 ++++
|
||||
testdata/testoutput12-32 | 4 ++++
|
||||
6 files changed, 22 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/src/pcre2_match.c b/src/pcre2_match.c
|
||||
index 2dcf8c4..ea03976 100644
|
||||
--- a/src/pcre2_match.c
|
||||
+++ b/src/pcre2_match.c
|
||||
@@ -6076,10 +6076,12 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
|
||||
if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS;
|
||||
|
||||
/* Fall through */
|
||||
- /* Unconditional end of subject assertion (\z) */
|
||||
+ /* Unconditional end of subject assertion (\z). We must check NOTEOL
|
||||
+ because it gets set for invalid UTF fragments. */
|
||||
|
||||
case OP_EOD:
|
||||
- if (Feptr < mb->end_subject) RRETURN(MATCH_NOMATCH);
|
||||
+ if (Feptr < mb->end_subject || (mb->moptions & PCRE2_NOTEOL) != 0)
|
||||
+ RRETURN(MATCH_NOMATCH);
|
||||
if (mb->partial != 0)
|
||||
{
|
||||
mb->hitend = TRUE;
|
||||
diff --git a/testdata/testinput10 b/testdata/testinput10
|
||||
index c7618b1..e901d51 100644
|
||||
--- a/testdata/testinput10
|
||||
+++ b/testdata/testinput10
|
||||
@@ -642,4 +642,7 @@
|
||||
qchq\=ph
|
||||
qchq\=ps
|
||||
|
||||
+/A\z/utf,match_invalid_utf
|
||||
+ A\x80\x42\n
|
||||
+
|
||||
# End of testinput10
|
||||
diff --git a/testdata/testinput12 b/testdata/testinput12
|
||||
index 1e552e6..5a2d8d2 100644
|
||||
--- a/testdata/testinput12
|
||||
+++ b/testdata/testinput12
|
||||
@@ -464,6 +464,9 @@
|
||||
|
||||
/aa/utf,ucp,match_invalid_utf,global
|
||||
\x{d800}aa
|
||||
+
|
||||
+/A\z/utf,match_invalid_utf
|
||||
+ A\x{df00}\n
|
||||
|
||||
# ----------------------------------------------------
|
||||
|
||||
diff --git a/testdata/testoutput10 b/testdata/testoutput10
|
||||
index 18dd9d2..8145891 100644
|
||||
--- a/testdata/testoutput10
|
||||
+++ b/testdata/testoutput10
|
||||
@@ -1921,4 +1921,8 @@ Partial match:
|
||||
qchq\=ps
|
||||
Partial match:
|
||||
|
||||
+/A\z/utf,match_invalid_utf
|
||||
+ A\x80\x42\n
|
||||
+No match
|
||||
+
|
||||
# End of testinput10
|
||||
diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16
|
||||
index 8cbc13d..9ac403e 100644
|
||||
--- a/testdata/testoutput12-16
|
||||
+++ b/testdata/testoutput12-16
|
||||
@@ -1607,6 +1607,10 @@ No match
|
||||
/aa/utf,ucp,match_invalid_utf,global
|
||||
\x{d800}aa
|
||||
0: aa
|
||||
+
|
||||
+/A\z/utf,match_invalid_utf
|
||||
+ A\x{df00}\n
|
||||
+No match
|
||||
|
||||
# ----------------------------------------------------
|
||||
|
||||
diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32
|
||||
index 1a98b4b..9396305 100644
|
||||
--- a/testdata/testoutput12-32
|
||||
+++ b/testdata/testoutput12-32
|
||||
@@ -1605,6 +1605,10 @@ No match
|
||||
/aa/utf,ucp,match_invalid_utf,global
|
||||
\x{d800}aa
|
||||
0: aa
|
||||
+
|
||||
+/A\z/utf,match_invalid_utf
|
||||
+ A\x{df00}\n
|
||||
+No match
|
||||
|
||||
# ----------------------------------------------------
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
||||
20
pcre2.spec
20
pcre2.spec
@ -1,6 +1,6 @@
|
||||
Name: pcre2
|
||||
Version: 10.42
|
||||
Release: 3
|
||||
Release: 4
|
||||
Summary: Perl Compatible Regular Expressions
|
||||
License: BSD
|
||||
URL: http://www.pcre.org/
|
||||
@ -14,6 +14,19 @@ Patch6003: backport-fix-wrong-test.patch
|
||||
Patch6004: sljit-sv48-sv57.patch
|
||||
Patch6005: backport-fix-a-possible-integer-overflow-in-DFA-matching-305.patch
|
||||
|
||||
Patch6006: backport-Fix-oversight-in-DFA-when-changing-OP_REVERSE-also-a.patch
|
||||
Patch6007: backport-Fix-32-bit-quantifier-following-a-character-larger-t.patch
|
||||
Patch6008: backport-Fix-z-behaviour-when-matching-within-invalid-UTF.patch
|
||||
Patch6009: backport-Fix-incorrect-patch-in-c1306126.patch
|
||||
Patch6010: backport-Fix-another-oversight-in-c1306126.patch
|
||||
Patch6011: backport-Fix-X-matching-in-32-bit-mode-without-UTF-in-JIT.patch
|
||||
Patch6012: backport-Fix-bad-patch-in-05206d66.-The-interpreter-was-handl.patch
|
||||
Patch6013: backport-Fix-backref-iterators-when-PCRE2_MATCH_UNSET_BACKREF.patch
|
||||
Patch6014: backport-Fix-compile-loop-in-32-bit-mode-for-characters-above.patch
|
||||
Patch6015: backport-Fix-incorrect-matching-of-0xffffffff-to-any-characte.patch
|
||||
Patch6016: backport-Fix-accept-and-endanchored-interaction-in-JIT.patch
|
||||
Patch6017: backport-Fix-backreferences-with-unset-backref-and-non-greedy.patch
|
||||
|
||||
BuildRequires: autoconf libtool automake coreutils gcc make readline-devel
|
||||
Obsoletes: pcre2-utf16 pcre2-utf32 pcre2-tools
|
||||
Provides: pcre2-utf16 pcre2-utf32 pcre2-tools
|
||||
@ -129,8 +142,11 @@ make check
|
||||
%{_pkgdocdir}/html/
|
||||
|
||||
%changelog
|
||||
* Mon Jan 22 2024 xujing <xujing125@huawei.com> - 10.42-4
|
||||
- DESC:sync patches from upstream to fix some bugs
|
||||
|
||||
* Thu Dec 14 2023 xujing <xujing125@huawei.com> - 10.42-3
|
||||
+- DESC:fix a possible integer overflow in DFA matching (#305)
|
||||
- DESC:fix a possible integer overflow in DFA matching (#305)
|
||||
|
||||
* Mon Dec 04 2023 Jingwiw <wangjingwei@iscas.ac.cn> - 10.42-2
|
||||
- enable riscv jit and fix error for sv48-sv57
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user