pcre2: sync patches from upstream to fix some bugs

This commit is contained in:
xujing 2024-01-22 17:04:24 +08:00
parent 2124205d67
commit 352a8d46ef
13 changed files with 1259 additions and 2 deletions

View File

@ -0,0 +1,116 @@
From c1306126c3f12c16ad62dd2553132f64a28ca607 Mon Sep 17 00:00:00 2001
From: Philip Hazel <Philip.Hazel@gmail.com>
Date: Sun, 19 Nov 2023 17:18:07 +0000
Subject: [PATCH] Fix 32-bit quantifier following a character larger than the
maximum UTF character.
---
src/pcre2_compile.c | 11 ++++++++---
testdata/testinput12 | 6 ++++++
testdata/testoutput12-16 | 7 +++++++
testdata/testoutput12-32 | 7 +++++++
4 files changed, 28 insertions(+), 3 deletions(-)
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index b3e4969..fdaf2ad 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -2781,6 +2781,7 @@ uint32_t *verbstartptr = NULL;
uint32_t *previous_callout = NULL;
uint32_t *parsed_pattern = cb->parsed_pattern;
uint32_t *parsed_pattern_end = cb->parsed_pattern_end;
+uint32_t *this_parsed_item = NULL;
uint32_t meta_quantifier = 0;
uint32_t add_after_mark = 0;
uint32_t extra_options = cb->cx->extra_options;
@@ -2866,10 +2867,11 @@ while (ptr < ptrend)
uint32_t set, unset, *optset;
uint32_t terminator;
uint32_t prev_meta_quantifier;
+ uint32_t *prev_parsed_item = this_parsed_item;
BOOL prev_okquantifier;
PCRE2_SPTR tempptr;
PCRE2_SIZE offset;
-
+
if (parsed_pattern >= parsed_pattern_end)
{
errorcode = ERR63; /* Internal error (parsed pattern overflow) */
@@ -2881,6 +2883,10 @@ while (ptr < ptrend)
errorcode = ERR19;
goto FAILED; /* Parentheses too deeply nested */
}
+
+ /* Remember where this item started */
+
+ this_parsed_item = parsed_pattern;
/* Get next input character, save its position for callout handling. */
@@ -3173,7 +3179,6 @@ while (ptr < ptrend)
continue; /* Next character in pattern */
}
-
/* Process the next item in the main part of a pattern. */
switch(c)
@@ -3450,7 +3455,7 @@ while (ptr < ptrend)
wrapping it in non-capturing brackets, but we have to allow for a preceding
(*MARK) for when (*ACCEPT) has an argument. */
- if (parsed_pattern[-1] == META_ACCEPT)
+ if (*prev_parsed_item == META_ACCEPT)
{
uint32_t *p;
for (p = parsed_pattern - 1; p >= verbstartptr; p--) p[1] = p[0];
diff --git a/testdata/testinput12 b/testdata/testinput12
index 7a85eb5..1e552e6 100644
--- a/testdata/testinput12
+++ b/testdata/testinput12
@@ -560,4 +560,10 @@
# ----------------------------------------------------
+# Quantifier after a literal that has the value of META_ACCEPT (not UTF). This
+# fails in 16-bit mode, but is OK for 32-bit.
+
+/\x{802a0000}*/
+ \x{802a0000}\x{802a0000}
+
# End of testinput12
diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16
index 9867632..8cbc13d 100644
--- a/testdata/testoutput12-16
+++ b/testdata/testoutput12-16
@@ -1803,4 +1803,11 @@ No match
# ----------------------------------------------------
+# Quantifier after a literal that has the value of META_ACCEPT (not UTF). This
+# fails in 16-bit mode, but is OK for 32-bit.
+
+/\x{802a0000}*/
+Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
+ \x{802a0000}\x{802a0000}
+
# End of testinput12
diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32
index 3a20dd4..1a98b4b 100644
--- a/testdata/testoutput12-32
+++ b/testdata/testoutput12-32
@@ -1801,4 +1801,11 @@ No match
# ----------------------------------------------------
+# Quantifier after a literal that has the value of META_ACCEPT (not UTF). This
+# fails in 16-bit mode, but is OK for 32-bit.
+
+/\x{802a0000}*/
+ \x{802a0000}\x{802a0000}
+ 0: \x{802a0000}\x{802a0000}
+
# End of testinput12
--
2.33.0

View File

@ -0,0 +1,91 @@
From 45dcb3de900b77583f4e9daa663004c55fad4794 Mon Sep 17 00:00:00 2001
From: Zoltan Herczeg <hzmester@freemail.hu>
Date: Wed, 22 Nov 2023 10:22:59 +0000
Subject: [PATCH] Fix \X matching in 32 bit mode without UTF in JIT
---
src/pcre2_jit_compile.c | 6 +++---
testdata/testinput12 | 4 ++++
testdata/testoutput12-16 | 9 +++++++++
testdata/testoutput12-32 | 5 +++++
4 files changed, 21 insertions(+), 3 deletions(-)
diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
index 510c392..8d64e1c 100644
--- a/src/pcre2_jit_compile.c
+++ b/src/pcre2_jit_compile.c
@@ -8718,7 +8718,7 @@ c = *cc++;
#if PCRE2_CODE_UNIT_WIDTH == 32
if (c >= 0x110000)
- return NULL;
+ return cc;
#endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
lgb = UCD_GRAPHBREAK(c);
@@ -8958,7 +8958,7 @@ switch(type)
#else
sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
- if (!common->utf || common->invalid_utf)
+ if (common->invalid_utf)
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
#endif
@@ -12044,7 +12044,7 @@ switch(opcode)
}
#if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
- if (common->utf)
+ if (type == OP_EXTUNI || common->utf)
{
OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
detect_partial_match(common, &no_match);
diff --git a/testdata/testinput12 b/testdata/testinput12
index 5a2d8d2..a6678bb 100644
--- a/testdata/testinput12
+++ b/testdata/testinput12
@@ -569,4 +569,8 @@
/\x{802a0000}*/
\x{802a0000}\x{802a0000}
+# UTF matching without UTF, check invalid UTF characters
+/\X++/
+ a\x{110000}\x{ffffffff}
+
# End of testinput12
diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16
index 9ac403e..f3b40a3 100644
--- a/testdata/testoutput12-16
+++ b/testdata/testoutput12-16
@@ -1814,4 +1814,13 @@ No match
Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
\x{802a0000}\x{802a0000}
+# UTF matching without UTF, check invalid UTF characters
+/\X++/
+ a\x{110000}\x{ffffffff}
+** Character \x{110000} is greater than 0xffff and UTF-16 mode is not enabled.
+** Truncation will probably give the wrong result.
+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
+** Truncation will probably give the wrong result.
+ 0: a\x00\x{ffff}
+
# End of testinput12
diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32
index 9396305..dd42f86 100644
--- a/testdata/testoutput12-32
+++ b/testdata/testoutput12-32
@@ -1812,4 +1812,9 @@ No match
\x{802a0000}\x{802a0000}
0: \x{802a0000}\x{802a0000}
+# UTF matching without UTF, check invalid UTF characters
+/\X++/
+ a\x{110000}\x{ffffffff}
+ 0: a\x{110000}\x{ffffffff}
+
# End of testinput12
--
2.33.0

View File

@ -0,0 +1,75 @@
From 1c09efe6b0008a3b463299efe7501bc3140806f3 Mon Sep 17 00:00:00 2001
From: Zoltan Herczeg <hzmester@freemail.hu>
Date: Wed, 6 Dec 2023 10:06:50 +0000
Subject: [PATCH] Fix accept and endanchored interaction in JIT
---
src/pcre2_jit_compile.c | 15 ++++++++++++---
src/pcre2_jit_test.c | 1 +
2 files changed, 13 insertions(+), 3 deletions(-)
diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
index 2e11c3c..849e2c8 100644
--- a/src/pcre2_jit_compile.c
+++ b/src/pcre2_jit_compile.c
@@ -489,6 +489,8 @@ typedef struct compiler_common {
jump_list *casefulcmp;
jump_list *caselesscmp;
jump_list *reset_match;
+ /* Same as reset_match, but resets the STR_PTR as well. */
+ jump_list *restart_match;
BOOL unset_backref;
BOOL alt_circumflex;
#ifdef SUPPORT_UNICODE
@@ -3146,7 +3148,7 @@ return (value & (value - 1)) == 0;
static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
{
-while (list)
+while (list != NULL)
{
/* sljit_set_label is clever enough to do nothing
if either the jump or the label is NULL. */
@@ -12187,7 +12189,7 @@ if (*cc == OP_FAIL)
}
if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)
- add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
+ add_jump(compiler, &common->restart_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
{
@@ -14552,10 +14554,17 @@ if (common->caselesscmp != NULL)
set_jumps(common->caselesscmp, LABEL());
do_caselesscmp(common);
}
-if (common->reset_match != NULL)
+if (common->reset_match != NULL || common->restart_match != NULL)
{
+ if (common->restart_match != NULL)
+ {
+ set_jumps(common->restart_match, LABEL());
+ OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
+ }
+
set_jumps(common->reset_match, LABEL());
do_reset_match(common, (re->top_bracket + 1) * 2);
+ /* The value of restart_match is in TMP1. */
CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
JUMPTO(SLJIT_JUMP, reset_match_label);
diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c
index b5d95d5..0974d19 100644
--- a/src/pcre2_jit_test.c
+++ b/src/pcre2_jit_test.c
@@ -655,6 +655,7 @@ static struct regression_test_case regression_test_cases[] = {
{ MU, A, 0, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
{ MU, A, PCRE2_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?=A)", "AB" },
+ { MU | PCRE2_ENDANCHORED, A, 0, 0, "aa(*ACCEPT)aa", "aaa" },
/* Conditional blocks. */
{ MU, A, 0, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
--
2.33.0

View File

@ -0,0 +1,28 @@
From 04f6668a09c51cf10fa5514019843ab0af9724c8 Mon Sep 17 00:00:00 2001
From: Philip Hazel <Philip.Hazel@gmail.com>
Date: Tue, 21 Nov 2023 15:10:34 +0000
Subject: [PATCH] Fix another oversight in c1306126
---
src/pcre2_compile.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index 9e45580..7b522c5 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -3108,8 +3108,11 @@ while (ptr < ptrend)
!read_repeat_counts(&tempptr, ptrend, NULL, NULL, &errorcode))))
{
if (after_manual_callout-- <= 0)
+ {
parsed_pattern = manage_callouts(thisptr, &previous_callout, auto_callout,
parsed_pattern, cb);
+ this_parsed_item = parsed_pattern; /* New start for current item */
+ }
}
/* If expect_cond_assert is 2, we have just passed (?( and are expecting an
--
2.33.0

View File

@ -0,0 +1,43 @@
From 936fef2a4480b21f5c43b207181097736fb311e3 Mon Sep 17 00:00:00 2001
From: Zoltan Herczeg <hzmester@freemail.hu>
Date: Wed, 22 Nov 2023 11:50:38 +0000
Subject: [PATCH] Fix backref iterators when PCRE2_MATCH_UNSET_BACKREF is set
in JIT
---
src/pcre2_jit_compile.c | 4 +++-
src/pcre2_jit_test.c | 1 +
2 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
index 8d64e1c..8110d8c 100644
--- a/src/pcre2_jit_compile.c
+++ b/src/pcre2_jit_compile.c
@@ -9539,9 +9539,11 @@ if (!minimize)
if (ref)
OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+
if (ref)
{
- add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
+ if (!common->unset_backref)
+ add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
}
else
diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c
index b27cec7..8bff3dc 100644
--- a/src/pcre2_jit_test.c
+++ b/src/pcre2_jit_test.c
@@ -595,6 +595,7 @@ static struct regression_test_case regression_test_cases[] = {
{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{1,3}M", "aaaaaaaabbbbaabbbbm" },
{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" },
{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
+ { MU | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "(a)|\\1+c", "xxc" },
/* Assertions. */
{ MU, A, 0, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
--
2.33.0

View File

@ -0,0 +1,40 @@
From 9de4d53cf850e0fca625ce9d80c12bea5b2a5ab9 Mon Sep 17 00:00:00 2001
From: Zoltan Herczeg <hzmester@freemail.hu>
Date: Thu, 7 Dec 2023 09:03:24 +0000
Subject: [PATCH] Fix backreferences with unset backref and non-greedy
iterators in JIT
---
src/pcre2_jit_compile.c | 3 ++-
src/pcre2_jit_test.c | 1 +
2 files changed, 3 insertions(+), 1 deletion(-)
diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
index 0f445e1..e1daa1e 100644
--- a/src/pcre2_jit_compile.c
+++ b/src/pcre2_jit_compile.c
@@ -9653,7 +9653,8 @@ else
{
if (ref)
{
- add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
+ if (!common->unset_backref)
+ add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
}
else
diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c
index 0974d19..9b63c8e 100644
--- a/src/pcre2_jit_test.c
+++ b/src/pcre2_jit_test.c
@@ -596,6 +596,7 @@ static struct regression_test_case regression_test_cases[] = {
{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" },
{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
{ MU | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "(a)|\\1+c", "xxc" },
+ { MU | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\1+?()", "" },
/* Assertions. */
{ MU, A, 0, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
--
2.33.0

View File

@ -0,0 +1,87 @@
From 57ee073252dc826dbe412846a83421d2bb4483bc Mon Sep 17 00:00:00 2001
From: Philip Hazel <Philip.Hazel@gmail.com>
Date: Wed, 22 Nov 2023 11:34:27 +0000
Subject: [PATCH] Fix bad patch in 05206d66. The interpreter was handling
NOTEOL incorrectly in general after trying to fix it in invalid UTF subjects.
---
src/pcre2_intmodedep.h | 3 ++-
src/pcre2_match.c | 7 +++----
testdata/testinput2 | 4 ++++
testdata/testoutput2 | 6 ++++++
4 files changed, 15 insertions(+), 5 deletions(-)
diff --git a/src/pcre2_intmodedep.h b/src/pcre2_intmodedep.h
index 5e7e10d..423764d 100644
--- a/src/pcre2_intmodedep.h
+++ b/src/pcre2_intmodedep.h
@@ -880,7 +880,8 @@ typedef struct match_block {
PCRE2_SPTR start_code; /* For use when recursing */
PCRE2_SPTR start_subject; /* Start of the subject string */
PCRE2_SPTR check_subject; /* Where UTF-checked from */
- PCRE2_SPTR end_subject; /* End of the subject string */
+ PCRE2_SPTR end_subject; /* Usable end of the subject string */
+ PCRE2_SPTR true_end_subject; /* Actual end of the subject string */
PCRE2_SPTR end_match_ptr; /* Subject position at end match */
PCRE2_SPTR start_used_ptr; /* Earliest consulted character */
PCRE2_SPTR last_used_ptr; /* Latest consulted character */
diff --git a/src/pcre2_match.c b/src/pcre2_match.c
index ea03976..c5e84ce 100644
--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
@@ -6076,12 +6076,10 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS;
/* Fall through */
- /* Unconditional end of subject assertion (\z). We must check NOTEOL
- because it gets set for invalid UTF fragments. */
+ /* Unconditional end of subject assertion (\z). */
case OP_EOD:
- if (Feptr < mb->end_subject || (mb->moptions & PCRE2_NOTEOL) != 0)
- RRETURN(MATCH_NOMATCH);
+ if (Feptr < mb->true_end_subject) RRETURN(MATCH_NOMATCH);
if (mb->partial != 0)
{
mb->hitend = TRUE;
@@ -6891,6 +6889,7 @@ mb->callout_data = mcontext->callout_data;
mb->start_subject = subject;
mb->start_offset = start_offset;
mb->end_subject = end_subject;
+mb->true_end_subject = true_end_subject;
mb->hasthen = (re->flags & PCRE2_HASTHEN) != 0;
mb->allowemptypartial = (re->max_lookbehind > 0) ||
(re->flags & PCRE2_MATCH_EMPTY) != 0;
diff --git a/testdata/testinput2 b/testdata/testinput2
index 0e24e78..b874f20 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -6055,4 +6055,8 @@ a)"xI
/A +/extended
+/a\z/
+ a
+ a\=noteol
+
# End of testinput2
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 68800fb..c1bc0e6 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -17946,6 +17946,12 @@ No match
/A +/extended
+/a\z/
+ a
+ 0: a
+ a\=noteol
+ 0: a
+
# End of testinput2
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data
--
2.33.0

View File

@ -0,0 +1,77 @@
From afce00e484cff118a824dac498e8044680dac401 Mon Sep 17 00:00:00 2001
From: Philip Hazel <Philip.Hazel@gmail.com>
Date: Fri, 1 Dec 2023 16:49:59 +0000
Subject: [PATCH] Fix compile loop in 32-bit mode for characters above the
Unicode limit when caseless and ucp are set.
---
src/pcre2_compile.c | 6 +++++-
testdata/testinput12 | 4 ++++
testdata/testoutput12-16 | 5 +++++
testdata/testoutput12-32 | 5 +++++
4 files changed, 19 insertions(+), 1 deletion(-)
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index 4a4fab1..3e4014b 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -4954,10 +4954,14 @@ uint32_t c, othercase, next;
unsigned int co;
/* Find the first character that has an other case. If it has multiple other
-cases, return its case offset value. */
+cases, return its case offset value. In 32-bit mode, a value
+greater than the Unicode maximum ends the range. */
for (c = *cptr; c <= d; c++)
{
+#if PCRE2_CODE_UNIT_WIDTH == 32
+ if (c > MAX_UTF_CODE_POINT) return -1;
+#endif
if ((co = UCD_CASESET(c)) != 0)
{
*ocptr = c++; /* Character that has the set */
diff --git a/testdata/testinput12 b/testdata/testinput12
index a6678bb..de3d406 100644
--- a/testdata/testinput12
+++ b/testdata/testinput12
@@ -573,4 +573,8 @@
/\X++/
a\x{110000}\x{ffffffff}
+# This used to loop in 32-bit mode; it will fail in 16-bit mode.
+/[\x{ffffffff}]/caseless,ucp
+ \x{ffffffff}xyz
+
# End of testinput12
diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16
index f3b40a3..9fa93fa 100644
--- a/testdata/testoutput12-16
+++ b/testdata/testoutput12-16
@@ -1823,4 +1823,9 @@ Failed: error 134 at offset 11: character code point value in \x{} or \o{} is to
** Truncation will probably give the wrong result.
0: a\x00\x{ffff}
+# This used to loop in 32-bit mode; it will fail in 16-bit mode.
+/[\x{ffffffff}]/caseless,ucp
+Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large
+ \x{ffffffff}xyz
+
# End of testinput12
diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32
index dd42f86..721d8bc 100644
--- a/testdata/testoutput12-32
+++ b/testdata/testoutput12-32
@@ -1817,4 +1817,9 @@ No match
a\x{110000}\x{ffffffff}
0: a\x{110000}\x{ffffffff}
+# This used to loop in 32-bit mode; it will fail in 16-bit mode.
+/[\x{ffffffff}]/caseless,ucp
+ \x{ffffffff}xyz
+ 0: \x{ffffffff}
+
# End of testinput12
--
2.33.0

View File

@ -0,0 +1,457 @@
From ad73148dfb6d06280a4d87f322991762aff90a55 Mon Sep 17 00:00:00 2001
From: Philip Hazel <Philip.Hazel@gmail.com>
Date: Mon, 4 Dec 2023 16:11:41 +0000
Subject: [PATCH] Fix incorrect matching of 0xffffffff to any character with
more than one other case in 32-bit UCP (but not UTF) mode.
---
src/pcre2_dfa_match.c | 28 ++++++++++++++++++++++++++
src/pcre2_match.c | 43 ++++++++++++++++++++++++++++++++++------
testdata/testinput12 | 26 ++++++++++++++++++++++++
testdata/testinput14 | 27 +++++++++++++++++++++++++
testdata/testoutput12-16 | 37 ++++++++++++++++++++++++++++++++++
testdata/testoutput12-32 | 33 ++++++++++++++++++++++++++++++
testdata/testoutput14-16 | 38 +++++++++++++++++++++++++++++++++++
testdata/testoutput14-32 | 34 +++++++++++++++++++++++++++++++
testdata/testoutput14-8 | 38 +++++++++++++++++++++++++++++++++++
9 files changed, 298 insertions(+), 6 deletions(-)
diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c
index 1c48ad6..caae652 100644
--- a/src/pcre2_dfa_match.c
+++ b/src/pcre2_dfa_match.c
@@ -1241,6 +1241,13 @@ for (;;)
break;
case PT_CLIST:
+#if PCRE2_CODE_UNIT_WIDTH == 32
+ if (c > MAX_UTF_CODE_POINT)
+ {
+ OK = FALSE;
+ break;
+ }
+#endif
cp = PRIV(ucd_caseless_sets) + code[2];
for (;;)
{
@@ -1516,6 +1523,13 @@ for (;;)
break;
case PT_CLIST:
+#if PCRE2_CODE_UNIT_WIDTH == 32
+ if (c > MAX_UTF_CODE_POINT)
+ {
+ OK = FALSE;
+ break;
+ }
+#endif
cp = PRIV(ucd_caseless_sets) + code[3];
for (;;)
{
@@ -1774,6 +1788,13 @@ for (;;)
break;
case PT_CLIST:
+#if PCRE2_CODE_UNIT_WIDTH == 32
+ if (c > MAX_UTF_CODE_POINT)
+ {
+ OK = FALSE;
+ break;
+ }
+#endif
cp = PRIV(ucd_caseless_sets) + code[3];
for (;;)
{
@@ -2058,6 +2079,13 @@ for (;;)
break;
case PT_CLIST:
+#if PCRE2_CODE_UNIT_WIDTH == 32
+ if (c > MAX_UTF_CODE_POINT)
+ {
+ OK = FALSE;
+ break;
+ }
+#endif
cp = PRIV(ucd_caseless_sets) + code[1 + IMM2_SIZE + 2];
for (;;)
{
diff --git a/src/pcre2_match.c b/src/pcre2_match.c
index d162e70..b2e1f23 100644
--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
@@ -2565,6 +2565,13 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
break;
case PT_CLIST:
+#if PCRE2_CODE_UNIT_WIDTH == 32
+ if (fc > MAX_UTF_CODE_POINT)
+ {
+ if (notmatch) break;;
+ RRETURN(MATCH_NOMATCH);
+ }
+#endif
cp = PRIV(ucd_caseless_sets) + Fecode[2];
for (;;)
{
@@ -2885,6 +2892,13 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
RRETURN(MATCH_NOMATCH);
}
GETCHARINCTEST(fc, Feptr);
+#if PCRE2_CODE_UNIT_WIDTH == 32
+ if (fc > MAX_UTF_CODE_POINT)
+ {
+ if (notmatch) continue;
+ RRETURN(MATCH_NOMATCH);
+ }
+#endif
cp = PRIV(ucd_caseless_sets) + Lpropvalue;
for (;;)
{
@@ -3698,6 +3712,13 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
RRETURN(MATCH_NOMATCH);
}
GETCHARINCTEST(fc, Feptr);
+#if PCRE2_CODE_UNIT_WIDTH == 32
+ if (fc > MAX_UTF_CODE_POINT)
+ {
+ if (Lctype == OP_NOTPROP) continue;
+ RRETURN(MATCH_NOMATCH);
+ }
+#endif
cp = PRIV(ucd_caseless_sets) + Lpropvalue;
for (;;)
{
@@ -4278,14 +4299,24 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
break;
}
GETCHARLENTEST(fc, Feptr, len);
- cp = PRIV(ucd_caseless_sets) + Lpropvalue;
- for (;;)
+#if PCRE2_CODE_UNIT_WIDTH == 32
+ if (fc > MAX_UTF_CODE_POINT)
{
- if (fc < *cp)
- { if (notmatch) break; else goto GOT_MAX; }
- if (fc == *cp++)
- { if (notmatch) goto GOT_MAX; else break; }
+ if (!notmatch) goto GOT_MAX;
}
+ else
+#endif
+ {
+ cp = PRIV(ucd_caseless_sets) + Lpropvalue;
+ for (;;)
+ {
+ if (fc < *cp)
+ { if (notmatch) break; else goto GOT_MAX; }
+ if (fc == *cp++)
+ { if (notmatch) goto GOT_MAX; else break; }
+ }
+ }
+
Feptr += len;
}
GOT_MAX:
diff --git a/testdata/testinput12 b/testdata/testinput12
index de3d406..85550c3 100644
--- a/testdata/testinput12
+++ b/testdata/testinput12
@@ -576,5 +576,31 @@
# This used to loop in 32-bit mode; it will fail in 16-bit mode.
/[\x{ffffffff}]/caseless,ucp
\x{ffffffff}xyz
+
+# These are 32-bit tests for handing 0xffffffff when in UCP caselsss mode. They
+# will give errors in 16-bit mode.
+
+/k*\x{ffffffff}/caseless,ucp
+ \x{ffffffff}
+
+/k+\x{ffffffff}/caseless,ucp,no_start_optimize
+ K\x{ffffffff}
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}
+
+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
+
+/k\x{ffffffff}/caseless,ucp,no_start_optimize
+ K\x{ffffffff}
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
+
+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
+\= Expect no match
+ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
+
+# ---------------------------------------------------------
# End of testinput12
diff --git a/testdata/testinput14 b/testdata/testinput14
index 8a17ae7..8880b5c 100644
--- a/testdata/testinput14
+++ b/testdata/testinput14
@@ -78,4 +78,31 @@
# ----------------------------------------------------
+# ----------------------------------------------------
+# Tests for handling 0xffffffff in caseless UCP mode. They only apply to 32-bit
+# mode; for the other widths they will fail.
+
+/k*\x{ffffffff}/caseless,ucp
+ \x{ffffffff}
+
+/k+\x{ffffffff}/caseless,ucp,no_start_optimize
+ K\x{ffffffff}
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}
+
+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
+
+/k\x{ffffffff}/caseless,ucp,no_start_optimize
+ K\x{ffffffff}
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
+
+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
+\= Expect no match
+ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
+
+# ----------------------------------------------------
+
# End of testinput14
diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16
index 9fa93fa..616d693 100644
--- a/testdata/testoutput12-16
+++ b/testdata/testoutput12-16
@@ -1827,5 +1827,42 @@ Failed: error 134 at offset 11: character code point value in \x{} or \o{} is to
/[\x{ffffffff}]/caseless,ucp
Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large
\x{ffffffff}xyz
+
+# These are 32-bit tests for handing 0xffffffff when in UCP caselsss mode. They
+# will give errors in 16-bit mode.
+
+/k*\x{ffffffff}/caseless,ucp
+Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large
+ \x{ffffffff}
+
+/k+\x{ffffffff}/caseless,ucp,no_start_optimize
+Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large
+ K\x{ffffffff}
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}
+
+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
+Failed: error 134 at offset 15: character code point value in \x{} or \o{} is too large
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
+
+/k\x{ffffffff}/caseless,ucp,no_start_optimize
+Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large
+ K\x{ffffffff}
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
+
+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
+\= Expect no match
+ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
+** Truncation will probably give the wrong result.
+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
+** Truncation will probably give the wrong result.
+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
+** Truncation will probably give the wrong result.
+No match
+
+# ---------------------------------------------------------
# End of testinput12
diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32
index 721d8bc..3c9586e 100644
--- a/testdata/testoutput12-32
+++ b/testdata/testoutput12-32
@@ -1821,5 +1821,38 @@ No match
/[\x{ffffffff}]/caseless,ucp
\x{ffffffff}xyz
0: \x{ffffffff}
+
+# These are 32-bit tests for handing 0xffffffff when in UCP caselsss mode. They
+# will give errors in 16-bit mode.
+
+/k*\x{ffffffff}/caseless,ucp
+ \x{ffffffff}
+ 0: \x{ffffffff}
+
+/k+\x{ffffffff}/caseless,ucp,no_start_optimize
+ K\x{ffffffff}
+ 0: K\x{ffffffff}
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}
+No match
+
+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
+No match
+
+/k\x{ffffffff}/caseless,ucp,no_start_optimize
+ K\x{ffffffff}
+ 0: K\x{ffffffff}
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
+No match
+
+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
+\= Expect no match
+ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
+No match
+
+# ---------------------------------------------------------
# End of testinput12
diff --git a/testdata/testoutput14-16 b/testdata/testoutput14-16
index 61541f6..dd1a977 100644
--- a/testdata/testoutput14-16
+++ b/testdata/testoutput14-16
@@ -122,4 +122,42 @@ No match
# ----------------------------------------------------
+# ----------------------------------------------------
+# Tests for handling 0xffffffff in caseless UCP mode. They only apply to 32-bit
+# mode; for the other widths they will fail.
+
+/k*\x{ffffffff}/caseless,ucp
+Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large
+ \x{ffffffff}
+
+/k+\x{ffffffff}/caseless,ucp,no_start_optimize
+Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large
+ K\x{ffffffff}
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}
+
+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
+Failed: error 134 at offset 15: character code point value in \x{} or \o{} is too large
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
+
+/k\x{ffffffff}/caseless,ucp,no_start_optimize
+Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large
+ K\x{ffffffff}
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
+
+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
+\= Expect no match
+ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
+** Truncation will probably give the wrong result.
+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
+** Truncation will probably give the wrong result.
+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
+** Truncation will probably give the wrong result.
+No match
+
+# ----------------------------------------------------
+
# End of testinput14
diff --git a/testdata/testoutput14-32 b/testdata/testoutput14-32
index f1f65b7..dc21569 100644
--- a/testdata/testoutput14-32
+++ b/testdata/testoutput14-32
@@ -122,4 +122,38 @@ No match
# ----------------------------------------------------
+# ----------------------------------------------------
+# Tests for handling 0xffffffff in caseless UCP mode. They only apply to 32-bit
+# mode; for the other widths they will fail.
+
+/k*\x{ffffffff}/caseless,ucp
+ \x{ffffffff}
+ 0: \x{ffffffff}
+
+/k+\x{ffffffff}/caseless,ucp,no_start_optimize
+ K\x{ffffffff}
+ 0: K\x{ffffffff}
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}
+No match
+
+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
+No match
+
+/k\x{ffffffff}/caseless,ucp,no_start_optimize
+ K\x{ffffffff}
+ 0: K\x{ffffffff}
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
+No match
+
+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
+\= Expect no match
+ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
+No match
+
+# ----------------------------------------------------
+
# End of testinput14
diff --git a/testdata/testoutput14-8 b/testdata/testoutput14-8
index aa62414..69285db 100644
--- a/testdata/testoutput14-8
+++ b/testdata/testoutput14-8
@@ -122,4 +122,42 @@ Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too
# ----------------------------------------------------
+# ----------------------------------------------------
+# Tests for handling 0xffffffff in caseless UCP mode. They only apply to 32-bit
+# mode; for the other widths they will fail.
+
+/k*\x{ffffffff}/caseless,ucp
+Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large
+ \x{ffffffff}
+
+/k+\x{ffffffff}/caseless,ucp,no_start_optimize
+Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large
+ K\x{ffffffff}
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}
+
+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
+Failed: error 134 at offset 15: character code point value in \x{} or \o{} is too large
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
+
+/k\x{ffffffff}/caseless,ucp,no_start_optimize
+Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large
+ K\x{ffffffff}
+\= Expect no match
+ \x{ffffffff}\x{ffffffff}\x{ffffffff}
+
+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
+\= Expect no match
+ Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
+** Character \x{ffffffff} is greater than 255 and UTF-8 mode is not enabled.
+** Truncation will probably give the wrong result.
+** Character \x{ffffffff} is greater than 255 and UTF-8 mode is not enabled.
+** Truncation will probably give the wrong result.
+** Character \x{ffffffff} is greater than 255 and UTF-8 mode is not enabled.
+** Truncation will probably give the wrong result.
+No match
+
+# ----------------------------------------------------
+
# End of testinput14
--
2.33.0

View File

@ -0,0 +1,94 @@
From 7fe586b892c9e0cbf3b21d57cfd8135e2311e45c Mon Sep 17 00:00:00 2001
From: Philip Hazel <Philip.Hazel@gmail.com>
Date: Mon, 20 Nov 2023 15:41:06 +0000
Subject: [PATCH] Fix incorrect patch in c1306126
---
src/pcre2_compile.c | 20 ++++++++++++++------
testdata/testinput2 | 2 ++
testdata/testoutput2 | 2 ++
3 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index fdaf2ad..9e45580 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -2782,6 +2782,7 @@ uint32_t *previous_callout = NULL;
uint32_t *parsed_pattern = cb->parsed_pattern;
uint32_t *parsed_pattern_end = cb->parsed_pattern_end;
uint32_t *this_parsed_item = NULL;
+uint32_t *prev_parsed_item = NULL;
uint32_t meta_quantifier = 0;
uint32_t add_after_mark = 0;
uint32_t extra_options = cb->cx->extra_options;
@@ -2867,11 +2868,10 @@ while (ptr < ptrend)
uint32_t set, unset, *optset;
uint32_t terminator;
uint32_t prev_meta_quantifier;
- uint32_t *prev_parsed_item = this_parsed_item;
BOOL prev_okquantifier;
PCRE2_SPTR tempptr;
PCRE2_SIZE offset;
-
+
if (parsed_pattern >= parsed_pattern_end)
{
errorcode = ERR63; /* Internal error (parsed pattern overflow) */
@@ -2883,10 +2883,17 @@ while (ptr < ptrend)
errorcode = ERR19;
goto FAILED; /* Parentheses too deeply nested */
}
-
- /* Remember where this item started */
- this_parsed_item = parsed_pattern;
+ /* If the last time round this loop something was added, parsed_pattern will
+ no longer be equal to this_parsed_item. Remember where the previous item
+ started and reset for the next item. Note that sometimes round the loop,
+ nothing gets added (e.g. for ignored white space). */
+
+ if (this_parsed_item != parsed_pattern)
+ {
+ prev_parsed_item = this_parsed_item;
+ this_parsed_item = parsed_pattern;
+ }
/* Get next input character, save its position for callout handling. */
@@ -3440,7 +3447,8 @@ while (ptr < ptrend)
/* ---- Quantifier post-processing ---- */
- /* Check that a quantifier is allowed after the previous item. */
+ /* Check that a quantifier is allowed after the previous item. This
+ guarantees that there is a previous item. */
CHECK_QUANTIFIER:
if (!prev_okquantifier)
diff --git a/testdata/testinput2 b/testdata/testinput2
index ba292d8..da845c1 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -6051,4 +6051,6 @@ a)"xI
/abcd/
abcd\=ovector=65536
+/A +/extended
+
# End of testinput2
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 888f06a..85de4ae 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -17932,6 +17932,8 @@ No match
abcd\=ovector=65536
0: abcd
+/A +/extended
+
# End of testinput2
Error -70: PCRE2_ERROR_BADDATA (unknown error number)
Error -62: bad serialized data
--
2.33.0

View File

@ -0,0 +1,28 @@
From b88126f42382fa470b6480f82489303d4311ce18 Mon Sep 17 00:00:00 2001
From: Philip Hazel <Philip.Hazel@gmail.com>
Date: Thu, 16 Nov 2023 13:49:49 +0000
Subject: [PATCH] Fix oversight in DFA when changing OP_REVERSE; also add some
unrelated tests
Conflict:don't add unrelated tests
---
src/pcre2_dfa_match.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c
index e90c984..5768407 100644
--- a/src/pcre2_dfa_match.c
+++ b/src/pcre2_dfa_match.c
@@ -591,7 +591,7 @@ if (*this_start_code == OP_ASSERTBACK || *this_start_code == OP_ASSERTBACK_NOT)
end_code = this_start_code;
do
{
- size_t back = (size_t)GET(end_code, 2+LINK_SIZE);
+ size_t back = (size_t)GET2(end_code, 2+LINK_SIZE);
if (back > max_back) max_back = back;
end_code += GET(end_code, 1);
}
--
2.33.0

View File

@ -0,0 +1,105 @@
From 05206d66340341bef7a673108a855f594c148950 Mon Sep 17 00:00:00 2001
From: Philip Hazel <Philip.Hazel@gmail.com>
Date: Sun, 19 Nov 2023 18:32:10 +0000
Subject: [PATCH] Fix \z behaviour when matching within invalid UTF
---
src/pcre2_match.c | 6 ++++--
testdata/testinput10 | 3 +++
testdata/testinput12 | 3 +++
testdata/testoutput10 | 4 ++++
testdata/testoutput12-16 | 4 ++++
testdata/testoutput12-32 | 4 ++++
6 files changed, 22 insertions(+), 2 deletions(-)
diff --git a/src/pcre2_match.c b/src/pcre2_match.c
index 2dcf8c4..ea03976 100644
--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
@@ -6076,10 +6076,12 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS;
/* Fall through */
- /* Unconditional end of subject assertion (\z) */
+ /* Unconditional end of subject assertion (\z). We must check NOTEOL
+ because it gets set for invalid UTF fragments. */
case OP_EOD:
- if (Feptr < mb->end_subject) RRETURN(MATCH_NOMATCH);
+ if (Feptr < mb->end_subject || (mb->moptions & PCRE2_NOTEOL) != 0)
+ RRETURN(MATCH_NOMATCH);
if (mb->partial != 0)
{
mb->hitend = TRUE;
diff --git a/testdata/testinput10 b/testdata/testinput10
index c7618b1..e901d51 100644
--- a/testdata/testinput10
+++ b/testdata/testinput10
@@ -642,4 +642,7 @@
qchq\=ph
qchq\=ps
+/A\z/utf,match_invalid_utf
+ A\x80\x42\n
+
# End of testinput10
diff --git a/testdata/testinput12 b/testdata/testinput12
index 1e552e6..5a2d8d2 100644
--- a/testdata/testinput12
+++ b/testdata/testinput12
@@ -464,6 +464,9 @@
/aa/utf,ucp,match_invalid_utf,global
\x{d800}aa
+
+/A\z/utf,match_invalid_utf
+ A\x{df00}\n
# ----------------------------------------------------
diff --git a/testdata/testoutput10 b/testdata/testoutput10
index 18dd9d2..8145891 100644
--- a/testdata/testoutput10
+++ b/testdata/testoutput10
@@ -1921,4 +1921,8 @@ Partial match:
qchq\=ps
Partial match:
+/A\z/utf,match_invalid_utf
+ A\x80\x42\n
+No match
+
# End of testinput10
diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16
index 8cbc13d..9ac403e 100644
--- a/testdata/testoutput12-16
+++ b/testdata/testoutput12-16
@@ -1607,6 +1607,10 @@ No match
/aa/utf,ucp,match_invalid_utf,global
\x{d800}aa
0: aa
+
+/A\z/utf,match_invalid_utf
+ A\x{df00}\n
+No match
# ----------------------------------------------------
diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32
index 1a98b4b..9396305 100644
--- a/testdata/testoutput12-32
+++ b/testdata/testoutput12-32
@@ -1605,6 +1605,10 @@ No match
/aa/utf,ucp,match_invalid_utf,global
\x{d800}aa
0: aa
+
+/A\z/utf,match_invalid_utf
+ A\x{df00}\n
+No match
# ----------------------------------------------------
--
2.33.0

View File

@ -1,6 +1,6 @@
Name: pcre2
Version: 10.42
Release: 3
Release: 4
Summary: Perl Compatible Regular Expressions
License: BSD
URL: http://www.pcre.org/
@ -14,6 +14,19 @@ Patch6003: backport-fix-wrong-test.patch
Patch6004: sljit-sv48-sv57.patch
Patch6005: backport-fix-a-possible-integer-overflow-in-DFA-matching-305.patch
Patch6006: backport-Fix-oversight-in-DFA-when-changing-OP_REVERSE-also-a.patch
Patch6007: backport-Fix-32-bit-quantifier-following-a-character-larger-t.patch
Patch6008: backport-Fix-z-behaviour-when-matching-within-invalid-UTF.patch
Patch6009: backport-Fix-incorrect-patch-in-c1306126.patch
Patch6010: backport-Fix-another-oversight-in-c1306126.patch
Patch6011: backport-Fix-X-matching-in-32-bit-mode-without-UTF-in-JIT.patch
Patch6012: backport-Fix-bad-patch-in-05206d66.-The-interpreter-was-handl.patch
Patch6013: backport-Fix-backref-iterators-when-PCRE2_MATCH_UNSET_BACKREF.patch
Patch6014: backport-Fix-compile-loop-in-32-bit-mode-for-characters-above.patch
Patch6015: backport-Fix-incorrect-matching-of-0xffffffff-to-any-characte.patch
Patch6016: backport-Fix-accept-and-endanchored-interaction-in-JIT.patch
Patch6017: backport-Fix-backreferences-with-unset-backref-and-non-greedy.patch
BuildRequires: autoconf libtool automake coreutils gcc make readline-devel
Obsoletes: pcre2-utf16 pcre2-utf32 pcre2-tools
Provides: pcre2-utf16 pcre2-utf32 pcre2-tools
@ -129,8 +142,11 @@ make check
%{_pkgdocdir}/html/
%changelog
* Mon Jan 22 2024 xujing <xujing125@huawei.com> - 10.42-4
- DESC:sync patches from upstream to fix some bugs
* Thu Dec 14 2023 xujing <xujing125@huawei.com> - 10.42-3
+- DESC:fix a possible integer overflow in DFA matching (#305)
- DESC:fix a possible integer overflow in DFA matching (#305)
* Mon Dec 04 2023 Jingwiw <wangjingwei@iscas.ac.cn> - 10.42-2
- enable riscv jit and fix error for sv48-sv57