pcre2: sync patches from upstream to fix some bugs

2024-01-22 17:04:24 +08:00 · 2024-01-22 17:04:24 +08:00 · 352a8d46ef
commit 352a8d46ef
parent 2124205d67
13 changed files with 1259 additions and 2 deletions
--- a/backport-Fix-32-bit-quantifier-following-a-character-larger-t.patch
+++ b/backport-Fix-32-bit-quantifier-following-a-character-larger-t.patch
@ -0,0 +1,116 @@
+From c1306126c3f12c16ad62dd2553132f64a28ca607 Mon Sep 17 00:00:00 2001
+From: Philip Hazel <Philip.Hazel@gmail.com>
+Date: Sun, 19 Nov 2023 17:18:07 +0000
+Subject: [PATCH] Fix 32-bit quantifier following a character larger than the
+ maximum UTF character.
+
+---
+ src/pcre2_compile.c      | 11 ++++++++---
+ testdata/testinput12     |  6 ++++++
+ testdata/testoutput12-16 |  7 +++++++
+ testdata/testoutput12-32 |  7 +++++++
+ 4 files changed, 28 insertions(+), 3 deletions(-)
+
+diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
+index b3e4969..fdaf2ad 100644
+--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
+@@ -2781,6 +2781,7 @@ uint32_t *verbstartptr = NULL;
+ uint32_t *previous_callout = NULL;
+ uint32_t *parsed_pattern = cb->parsed_pattern;
+ uint32_t *parsed_pattern_end = cb->parsed_pattern_end;
+uint32_t *this_parsed_item = NULL;
+ uint32_t meta_quantifier = 0;
+ uint32_t add_after_mark = 0;
+ uint32_t extra_options = cb->cx->extra_options;
+@@ -2866,10 +2867,11 @@ while (ptr < ptrend)
+   uint32_t set, unset, *optset;
+   uint32_t terminator;
+   uint32_t prev_meta_quantifier;
+  uint32_t *prev_parsed_item = this_parsed_item; 
+   BOOL prev_okquantifier;
+   PCRE2_SPTR tempptr;
+   PCRE2_SIZE offset;
+-
+  
+   if (parsed_pattern >= parsed_pattern_end)
+     {
+     errorcode = ERR63;  /* Internal error (parsed pattern overflow) */
+@@ -2881,6 +2883,10 @@ while (ptr < ptrend)
+     errorcode = ERR19;
+     goto FAILED;        /* Parentheses too deeply nested */
+     }
+    
+  /* Remember where this item started */
+
+  this_parsed_item = parsed_pattern;
+ 
+   /* Get next input character, save its position for callout handling. */
+ 
+@@ -3173,7 +3179,6 @@ while (ptr < ptrend)
+     continue;  /* Next character in pattern */
+     }
+ 
+-
+   /* Process the next item in the main part of a pattern. */
+ 
+   switch(c)
+@@ -3450,7 +3455,7 @@ while (ptr < ptrend)
+     wrapping it in non-capturing brackets, but we have to allow for a preceding
+     (*MARK) for when (*ACCEPT) has an argument. */
+ 
+-    if (parsed_pattern[-1] == META_ACCEPT)
+    if (*prev_parsed_item == META_ACCEPT)
+       {
+       uint32_t *p;
+       for (p = parsed_pattern - 1; p >= verbstartptr; p--) p[1] = p[0];
+diff --git a/testdata/testinput12 b/testdata/testinput12
+index 7a85eb5..1e552e6 100644
+--- a/testdata/testinput12
+++ b/testdata/testinput12
+@@ -560,4 +560,10 @@
+ 
+ # ---------------------------------------------------- 
+ 
+# Quantifier after a literal that has the value of META_ACCEPT (not UTF). This
+# fails in 16-bit mode, but is OK for 32-bit.
+
+/\x{802a0000}*/
+    \x{802a0000}\x{802a0000}
+
+ # End of testinput12
+diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16
+index 9867632..8cbc13d 100644
+--- a/testdata/testoutput12-16
+++ b/testdata/testoutput12-16
+@@ -1803,4 +1803,11 @@ No match
+ 
+ # ---------------------------------------------------- 
+ 
+# Quantifier after a literal that has the value of META_ACCEPT (not UTF). This
+# fails in 16-bit mode, but is OK for 32-bit.
+
+/\x{802a0000}*/
+Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
+    \x{802a0000}\x{802a0000}
+
+ # End of testinput12
+diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32
+index 3a20dd4..1a98b4b 100644
+--- a/testdata/testoutput12-32
+++ b/testdata/testoutput12-32
+@@ -1801,4 +1801,11 @@ No match
+ 
+ # ---------------------------------------------------- 
+ 
+# Quantifier after a literal that has the value of META_ACCEPT (not UTF). This
+# fails in 16-bit mode, but is OK for 32-bit.
+
+/\x{802a0000}*/
+    \x{802a0000}\x{802a0000}
+ 0: \x{802a0000}\x{802a0000}
+
+ # End of testinput12
+-- 
+2.33.0
+
--- a/backport-Fix-X-matching-in-32-bit-mode-without-UTF-in-JIT.patch
+++ b/backport-Fix-X-matching-in-32-bit-mode-without-UTF-in-JIT.patch
@ -0,0 +1,91 @@
+From 45dcb3de900b77583f4e9daa663004c55fad4794 Mon Sep 17 00:00:00 2001
+From: Zoltan Herczeg <hzmester@freemail.hu>
+Date: Wed, 22 Nov 2023 10:22:59 +0000
+Subject: [PATCH] Fix \X matching in 32 bit mode without UTF in JIT
+
+---
+ src/pcre2_jit_compile.c  | 6 +++---
+ testdata/testinput12     | 4 ++++
+ testdata/testoutput12-16 | 9 +++++++++
+ testdata/testoutput12-32 | 5 +++++
+ 4 files changed, 21 insertions(+), 3 deletions(-)
+
+diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
+index 510c392..8d64e1c 100644
+--- a/src/pcre2_jit_compile.c
+++ b/src/pcre2_jit_compile.c
+@@ -8718,7 +8718,7 @@ c = *cc++;
+ 
+ #if PCRE2_CODE_UNIT_WIDTH == 32
+ if (c >= 0x110000)
+-  return NULL;
+  return cc;
+ #endif /* PCRE2_CODE_UNIT_WIDTH == 32 */
+ lgb = UCD_GRAPHBREAK(c);
+ 
+@@ -8958,7 +8958,7 @@ switch(type)
+ #else
+   sljit_emit_icall(compiler, SLJIT_CALL, SLJIT_ARGS2(W, W, W), SLJIT_IMM,
+     common->invalid_utf ? SLJIT_FUNC_ADDR(do_extuni_utf_invalid) : SLJIT_FUNC_ADDR(do_extuni_no_utf));
+-  if (!common->utf || common->invalid_utf)
+  if (common->invalid_utf)
+     add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, SLJIT_RETURN_REG, 0, SLJIT_IMM, 0));
+ #endif
+ 
+@@ -12044,7 +12044,7 @@ switch(opcode)
+     }
+ 
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+-  if (common->utf)
+  if (type == OP_EXTUNI || common->utf)
+     {
+     OP1(SLJIT_MOV, tmp_base, tmp_offset, STR_PTR, 0);
+     detect_partial_match(common, &no_match);
+diff --git a/testdata/testinput12 b/testdata/testinput12
+index 5a2d8d2..a6678bb 100644
+--- a/testdata/testinput12
+++ b/testdata/testinput12
+@@ -569,4 +569,8 @@
+ /\x{802a0000}*/
+     \x{802a0000}\x{802a0000}
+ 
+# UTF matching without UTF, check invalid UTF characters
+/\X++/
+    a\x{110000}\x{ffffffff}
+
+ # End of testinput12
+diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16
+index 9ac403e..f3b40a3 100644
+--- a/testdata/testoutput12-16
+++ b/testdata/testoutput12-16
+@@ -1814,4 +1814,13 @@ No match
+ Failed: error 134 at offset 11: character code point value in \x{} or \o{} is too large
+     \x{802a0000}\x{802a0000}
+ 
+# UTF matching without UTF, check invalid UTF characters
+/\X++/
+    a\x{110000}\x{ffffffff}
+** Character \x{110000} is greater than 0xffff and UTF-16 mode is not enabled.
+** Truncation will probably give the wrong result.
+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
+** Truncation will probably give the wrong result.
+ 0: a\x00\x{ffff}
+
+ # End of testinput12
+diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32
+index 9396305..dd42f86 100644
+--- a/testdata/testoutput12-32
+++ b/testdata/testoutput12-32
+@@ -1812,4 +1812,9 @@ No match
+     \x{802a0000}\x{802a0000}
+  0: \x{802a0000}\x{802a0000}
+ 
+# UTF matching without UTF, check invalid UTF characters
+/\X++/
+    a\x{110000}\x{ffffffff}
+ 0: a\x{110000}\x{ffffffff}
+
+ # End of testinput12
+-- 
+2.33.0
+
--- a/backport-Fix-accept-and-endanchored-interaction-in-JIT.patch
+++ b/backport-Fix-accept-and-endanchored-interaction-in-JIT.patch
@ -0,0 +1,75 @@
+From 1c09efe6b0008a3b463299efe7501bc3140806f3 Mon Sep 17 00:00:00 2001
+From: Zoltan Herczeg <hzmester@freemail.hu>
+Date: Wed, 6 Dec 2023 10:06:50 +0000
+Subject: [PATCH] Fix accept and endanchored interaction in JIT
+
+---
+ src/pcre2_jit_compile.c | 15 ++++++++++++---
+ src/pcre2_jit_test.c    |  1 +
+ 2 files changed, 13 insertions(+), 3 deletions(-)
+
+diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
+index 2e11c3c..849e2c8 100644
+--- a/src/pcre2_jit_compile.c
+++ b/src/pcre2_jit_compile.c
+@@ -489,6 +489,8 @@ typedef struct compiler_common {
+   jump_list *casefulcmp;
+   jump_list *caselesscmp;
+   jump_list *reset_match;
+  /* Same as reset_match, but resets the STR_PTR as well. */
+  jump_list *restart_match;
+   BOOL unset_backref;
+   BOOL alt_circumflex;
+ #ifdef SUPPORT_UNICODE
+@@ -3146,7 +3148,7 @@ return (value & (value - 1)) == 0;
+ 
+ static SLJIT_INLINE void set_jumps(jump_list *list, struct sljit_label *label)
+ {
+-while (list)
+while (list != NULL)
+   {
+   /* sljit_set_label is clever enough to do nothing
+   if either the jump or the label is NULL. */
+@@ -12187,7 +12189,7 @@ if (*cc == OP_FAIL)
+   }
+ 
+ if (*cc == OP_ACCEPT && common->currententry == NULL && (common->re->overall_options & PCRE2_ENDANCHORED) != 0)
+-  add_jump(compiler, &common->reset_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
+  add_jump(compiler, &common->restart_match, CMP(SLJIT_NOT_EQUAL, STR_PTR, 0, STR_END, 0));
+ 
+ if (*cc == OP_ASSERT_ACCEPT || common->currententry != NULL || !common->might_be_empty)
+   {
+@@ -14552,10 +14554,17 @@ if (common->caselesscmp != NULL)
+   set_jumps(common->caselesscmp, LABEL());
+   do_caselesscmp(common);
+   }
+-if (common->reset_match != NULL)
+if (common->reset_match != NULL || common->restart_match != NULL)
+   {
+  if (common->restart_match != NULL)
+    {
+    set_jumps(common->restart_match, LABEL());
+    OP1(SLJIT_MOV, STR_PTR, 0, SLJIT_MEM1(SLJIT_SP), common->start_ptr);
+    }
+
+   set_jumps(common->reset_match, LABEL());
+   do_reset_match(common, (re->top_bracket + 1) * 2);
+  /* The value of restart_match is in TMP1. */
+   CMPTO(SLJIT_GREATER, STR_PTR, 0, TMP1, 0, continue_match_label);
+   OP1(SLJIT_MOV, STR_PTR, 0, TMP1, 0);
+   JUMPTO(SLJIT_JUMP, reset_match_label);
+diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c
+index b5d95d5..0974d19 100644
+--- a/src/pcre2_jit_test.c
+++ b/src/pcre2_jit_test.c
+@@ -655,6 +655,7 @@ static struct regression_test_case regression_test_cases[] = {
+ 	{ MU, A, 0, 0, "(?=(a(b(*ACCEPT)b)))a", "ab" },
+ 	{ MU, A, PCRE2_NOTEMPTY, 0, "(?=a*(*ACCEPT))c", "c" },
+ 	{ MU, A, PCRE2_NOTEMPTY, 0 | F_NOMATCH, "(?=A)", "AB" },
+	{ MU | PCRE2_ENDANCHORED, A, 0, 0, "aa(*ACCEPT)aa", "aaa" },
+ 
+ 	/* Conditional blocks. */
+ 	{ MU, A, 0, 0, "(?(?=(a))a|b)+k", "ababbalbbadabak" },
+-- 
+2.33.0
+
--- a/backport-Fix-another-oversight-in-c1306126.patch
+++ b/backport-Fix-another-oversight-in-c1306126.patch
@ -0,0 +1,28 @@
+From 04f6668a09c51cf10fa5514019843ab0af9724c8 Mon Sep 17 00:00:00 2001
+From: Philip Hazel <Philip.Hazel@gmail.com>
+Date: Tue, 21 Nov 2023 15:10:34 +0000
+Subject: [PATCH] Fix another oversight in c1306126
+
+---
+ src/pcre2_compile.c | 3 +++
+ 1 file changed, 3 insertions(+)
+
+diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
+index 9e45580..7b522c5 100644
+--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
+@@ -3108,8 +3108,11 @@ while (ptr < ptrend)
+          !read_repeat_counts(&tempptr, ptrend, NULL, NULL, &errorcode))))
+     {
+     if (after_manual_callout-- <= 0)
+      {
+       parsed_pattern = manage_callouts(thisptr, &previous_callout, auto_callout,
+         parsed_pattern, cb);
+      this_parsed_item = parsed_pattern;  /* New start for current item */
+      }
+     }
+ 
+   /* If expect_cond_assert is 2, we have just passed (?( and are expecting an
+-- 
+2.33.0
+
--- a/backport-Fix-backref-iterators-when-PCRE2_MATCH_UNSET_BACKREF.patch
+++ b/backport-Fix-backref-iterators-when-PCRE2_MATCH_UNSET_BACKREF.patch
@ -0,0 +1,43 @@
+From 936fef2a4480b21f5c43b207181097736fb311e3 Mon Sep 17 00:00:00 2001
+From: Zoltan Herczeg <hzmester@freemail.hu>
+Date: Wed, 22 Nov 2023 11:50:38 +0000
+Subject: [PATCH] Fix backref iterators when PCRE2_MATCH_UNSET_BACKREF is set
+ in JIT
+
+---
+ src/pcre2_jit_compile.c | 4 +++-
+ src/pcre2_jit_test.c    | 1 +
+ 2 files changed, 4 insertions(+), 1 deletion(-)
+
+diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
+index 8d64e1c..8110d8c 100644
+--- a/src/pcre2_jit_compile.c
+++ b/src/pcre2_jit_compile.c
+@@ -9539,9 +9539,11 @@ if (!minimize)
+     if (ref)
+       OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset));
+     OP1(SLJIT_MOV, SLJIT_MEM1(STACK_TOP), STACK(0), SLJIT_IMM, 0);
+
+     if (ref)
+       {
+-      add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
+      if (!common->unset_backref)
+        add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
+       zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
+       }
+     else
+diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c
+index b27cec7..8bff3dc 100644
+--- a/src/pcre2_jit_test.c
+++ b/src/pcre2_jit_test.c
+@@ -595,6 +595,7 @@ static struct regression_test_case regression_test_cases[] = {
+ 	{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{1,3}M", "aaaaaaaabbbbaabbbbm" },
+ 	{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" },
+ 	{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
+	{ MU | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "(a)|\\1+c", "xxc" },
+ 
+ 	/* Assertions. */
+ 	{ MU, A, 0, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
+-- 
+2.33.0
+
--- a/backport-Fix-backreferences-with-unset-backref-and-non-greedy.patch
+++ b/backport-Fix-backreferences-with-unset-backref-and-non-greedy.patch
@ -0,0 +1,40 @@
+From 9de4d53cf850e0fca625ce9d80c12bea5b2a5ab9 Mon Sep 17 00:00:00 2001
+From: Zoltan Herczeg <hzmester@freemail.hu>
+Date: Thu, 7 Dec 2023 09:03:24 +0000
+Subject: [PATCH] Fix backreferences with unset backref and non-greedy
+ iterators in JIT
+
+---
+ src/pcre2_jit_compile.c | 3 ++-
+ src/pcre2_jit_test.c    | 1 +
+ 2 files changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
+index 0f445e1..e1daa1e 100644
+--- a/src/pcre2_jit_compile.c
+++ b/src/pcre2_jit_compile.c
+@@ -9653,7 +9653,8 @@ else
+   {
+   if (ref)
+     {
+-    add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
+    if (!common->unset_backref)
+      add_jump(compiler, &backtrack->topbacktracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(1)));
+     zerolength = CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_MEM1(SLJIT_SP), OVECTOR(offset + 1));
+     }
+   else
+diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c
+index 0974d19..9b63c8e 100644
+--- a/src/pcre2_jit_test.c
+++ b/src/pcre2_jit_test.c
+@@ -596,6 +596,7 @@ static struct regression_test_case regression_test_cases[] = {
+ 	{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{0,3}?M", "aaaaaabbbbbbaabbbbbbbbbbm" },
+ 	{ CMU | PCRE2_DUPNAMES, A, 0, 0, "(?:(?<A>AA)|(?<A>BB))\\k<A>{2,3}?", "aaaabbbbaaaabbbbbbbbbb" },
+ 	{ MU | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "(a)|\\1+c", "xxc" },
+	{ MU | PCRE2_MATCH_UNSET_BACKREF, A, 0, 0, "\\1+?()", "" },
+ 
+ 	/* Assertions. */
+ 	{ MU, A, 0, 0, "(?=xx|yy|zz)\\w{4}", "abczzdefg" },
+-- 
+2.33.0
+
--- a/backport-Fix-bad-patch-in-05206d66.-The-interpreter-was-handl.patch
+++ b/backport-Fix-bad-patch-in-05206d66.-The-interpreter-was-handl.patch
@ -0,0 +1,87 @@
+From 57ee073252dc826dbe412846a83421d2bb4483bc Mon Sep 17 00:00:00 2001
+From: Philip Hazel <Philip.Hazel@gmail.com>
+Date: Wed, 22 Nov 2023 11:34:27 +0000
+Subject: [PATCH] Fix bad patch in 05206d66. The interpreter was handling
+ NOTEOL incorrectly in general after trying to fix it in invalid UTF subjects.
+
+---
+ src/pcre2_intmodedep.h | 3 ++-
+ src/pcre2_match.c      | 7 +++----
+ testdata/testinput2    | 4 ++++
+ testdata/testoutput2   | 6 ++++++
+ 4 files changed, 15 insertions(+), 5 deletions(-)
+
+diff --git a/src/pcre2_intmodedep.h b/src/pcre2_intmodedep.h
+index 5e7e10d..423764d 100644
+--- a/src/pcre2_intmodedep.h
+++ b/src/pcre2_intmodedep.h
+@@ -880,7 +880,8 @@ typedef struct match_block {
+   PCRE2_SPTR start_code;          /* For use when recursing */
+   PCRE2_SPTR start_subject;       /* Start of the subject string */
+   PCRE2_SPTR check_subject;       /* Where UTF-checked from */
+-  PCRE2_SPTR end_subject;         /* End of the subject string */
+  PCRE2_SPTR end_subject;         /* Usable end of the subject string */
+  PCRE2_SPTR true_end_subject;    /* Actual end of the subject string */
+   PCRE2_SPTR end_match_ptr;       /* Subject position at end match */
+   PCRE2_SPTR start_used_ptr;      /* Earliest consulted character */
+   PCRE2_SPTR last_used_ptr;       /* Latest consulted character */
+diff --git a/src/pcre2_match.c b/src/pcre2_match.c
+index ea03976..c5e84ce 100644
+--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
+@@ -6076,12 +6076,10 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
+     if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS;
+ 
+     /* Fall through */
+-    /* Unconditional end of subject assertion (\z). We must check NOTEOL
+-    because it gets set for invalid UTF fragments. */
+    /* Unconditional end of subject assertion (\z). */
+ 
+     case OP_EOD:
+-    if (Feptr < mb->end_subject || (mb->moptions & PCRE2_NOTEOL) != 0)
+-      RRETURN(MATCH_NOMATCH);
+    if (Feptr < mb->true_end_subject) RRETURN(MATCH_NOMATCH);
+     if (mb->partial != 0)
+       {
+       mb->hitend = TRUE;
+@@ -6891,6 +6889,7 @@ mb->callout_data = mcontext->callout_data;
+ mb->start_subject = subject;
+ mb->start_offset = start_offset;
+ mb->end_subject = end_subject;
+mb->true_end_subject = true_end_subject;
+ mb->hasthen = (re->flags & PCRE2_HASTHEN) != 0;
+ mb->allowemptypartial = (re->max_lookbehind > 0) ||
+     (re->flags & PCRE2_MATCH_EMPTY) != 0;
+diff --git a/testdata/testinput2 b/testdata/testinput2
+index 0e24e78..b874f20 100644
+--- a/testdata/testinput2
+++ b/testdata/testinput2
+@@ -6055,4 +6055,8 @@ a)"xI
+ 
+ /A +/extended
+ 
+/a\z/
+    a
+    a\=noteol 
+
+ # End of testinput2
+diff --git a/testdata/testoutput2 b/testdata/testoutput2
+index 68800fb..c1bc0e6 100644
+--- a/testdata/testoutput2
+++ b/testdata/testoutput2
+@@ -17946,6 +17946,12 @@ No match
+ 
+ /A +/extended
+ 
+/a\z/
+    a
+ 0: a
+    a\=noteol 
+ 0: a
+
+ # End of testinput2
+ Error -70: PCRE2_ERROR_BADDATA (unknown error number)
+ Error -62: bad serialized data
+-- 
+2.33.0
+
--- a/backport-Fix-compile-loop-in-32-bit-mode-for-characters-above.patch
+++ b/backport-Fix-compile-loop-in-32-bit-mode-for-characters-above.patch
@ -0,0 +1,77 @@
+From afce00e484cff118a824dac498e8044680dac401 Mon Sep 17 00:00:00 2001
+From: Philip Hazel <Philip.Hazel@gmail.com>
+Date: Fri, 1 Dec 2023 16:49:59 +0000
+Subject: [PATCH] Fix compile loop in 32-bit mode for characters above the
+ Unicode limit when caseless and ucp are set.
+
+---
+ src/pcre2_compile.c      | 6 +++++-
+ testdata/testinput12     | 4 ++++
+ testdata/testoutput12-16 | 5 +++++
+ testdata/testoutput12-32 | 5 +++++
+ 4 files changed, 19 insertions(+), 1 deletion(-)
+
+diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
+index 4a4fab1..3e4014b 100644
+--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
+@@ -4954,10 +4954,14 @@ uint32_t c, othercase, next;
+ unsigned int co;
+ 
+ /* Find the first character that has an other case. If it has multiple other
+-cases, return its case offset value. */
+cases, return its case offset value. In 32-bit mode, a value
+greater than the Unicode maximum ends the range. */
+ 
+ for (c = *cptr; c <= d; c++)
+   {
+#if PCRE2_CODE_UNIT_WIDTH == 32
+  if (c > MAX_UTF_CODE_POINT) return -1;
+#endif
+   if ((co = UCD_CASESET(c)) != 0)
+     {
+     *ocptr = c++;   /* Character that has the set */
+diff --git a/testdata/testinput12 b/testdata/testinput12
+index a6678bb..de3d406 100644
+--- a/testdata/testinput12
+++ b/testdata/testinput12
+@@ -573,4 +573,8 @@
+ /\X++/
+     a\x{110000}\x{ffffffff}
+ 
+# This used to loop in 32-bit mode; it will fail in 16-bit mode.
+/[\x{ffffffff}]/caseless,ucp
+    \x{ffffffff}xyz
+
+ # End of testinput12
+diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16
+index f3b40a3..9fa93fa 100644
+--- a/testdata/testoutput12-16
+++ b/testdata/testoutput12-16
+@@ -1823,4 +1823,9 @@ Failed: error 134 at offset 11: character code point value in \x{} or \o{} is to
+ ** Truncation will probably give the wrong result.
+  0: a\x00\x{ffff}
+ 
+# This used to loop in 32-bit mode; it will fail in 16-bit mode.
+/[\x{ffffffff}]/caseless,ucp
+Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large
+    \x{ffffffff}xyz
+
+ # End of testinput12
+diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32
+index dd42f86..721d8bc 100644
+--- a/testdata/testoutput12-32
+++ b/testdata/testoutput12-32
+@@ -1817,4 +1817,9 @@ No match
+     a\x{110000}\x{ffffffff}
+  0: a\x{110000}\x{ffffffff}
+ 
+# This used to loop in 32-bit mode; it will fail in 16-bit mode.
+/[\x{ffffffff}]/caseless,ucp
+    \x{ffffffff}xyz
+ 0: \x{ffffffff}
+
+ # End of testinput12
+-- 
+2.33.0
+
--- a/backport-Fix-incorrect-matching-of-0xffffffff-to-any-characte.patch
+++ b/backport-Fix-incorrect-matching-of-0xffffffff-to-any-characte.patch
@ -0,0 +1,457 @@
+From ad73148dfb6d06280a4d87f322991762aff90a55 Mon Sep 17 00:00:00 2001
+From: Philip Hazel <Philip.Hazel@gmail.com>
+Date: Mon, 4 Dec 2023 16:11:41 +0000
+Subject: [PATCH] Fix incorrect matching of 0xffffffff to any character with
+ more than one other case in 32-bit UCP (but not UTF) mode.
+
+---
+ src/pcre2_dfa_match.c    | 28 ++++++++++++++++++++++++++
+ src/pcre2_match.c        | 43 ++++++++++++++++++++++++++++++++++------
+ testdata/testinput12     | 26 ++++++++++++++++++++++++
+ testdata/testinput14     | 27 +++++++++++++++++++++++++
+ testdata/testoutput12-16 | 37 ++++++++++++++++++++++++++++++++++
+ testdata/testoutput12-32 | 33 ++++++++++++++++++++++++++++++
+ testdata/testoutput14-16 | 38 +++++++++++++++++++++++++++++++++++
+ testdata/testoutput14-32 | 34 +++++++++++++++++++++++++++++++
+ testdata/testoutput14-8  | 38 +++++++++++++++++++++++++++++++++++
+ 9 files changed, 298 insertions(+), 6 deletions(-)
+
+diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c
+index 1c48ad6..caae652 100644
+--- a/src/pcre2_dfa_match.c
+++ b/src/pcre2_dfa_match.c
+@@ -1241,6 +1241,13 @@ for (;;)
+           break;
+ 
+           case PT_CLIST:
+#if PCRE2_CODE_UNIT_WIDTH == 32
+          if (c > MAX_UTF_CODE_POINT)
+            {
+            OK = FALSE;
+            break;
+            }
+#endif
+           cp = PRIV(ucd_caseless_sets) + code[2];
+           for (;;)
+             {
+@@ -1516,6 +1523,13 @@ for (;;)
+           break;
+ 
+           case PT_CLIST:
+#if PCRE2_CODE_UNIT_WIDTH == 32
+          if (c > MAX_UTF_CODE_POINT)
+            {
+            OK = FALSE;
+            break;
+            }
+#endif
+           cp = PRIV(ucd_caseless_sets) + code[3];
+           for (;;)
+             {
+@@ -1774,6 +1788,13 @@ for (;;)
+           break;
+ 
+           case PT_CLIST:
+#if PCRE2_CODE_UNIT_WIDTH == 32
+          if (c > MAX_UTF_CODE_POINT)
+            {
+            OK = FALSE;
+            break;
+            }
+#endif
+           cp = PRIV(ucd_caseless_sets) + code[3];
+           for (;;)
+             {
+@@ -2058,6 +2079,13 @@ for (;;)
+           break;
+ 
+           case PT_CLIST:
+#if PCRE2_CODE_UNIT_WIDTH == 32
+          if (c > MAX_UTF_CODE_POINT)
+            {
+            OK = FALSE;
+            break;
+            }
+#endif
+           cp = PRIV(ucd_caseless_sets) + code[1 + IMM2_SIZE + 2];
+           for (;;)
+             {
+diff --git a/src/pcre2_match.c b/src/pcre2_match.c
+index d162e70..b2e1f23 100644
+--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
+@@ -2565,6 +2565,13 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
+         break;
+ 
+         case PT_CLIST:
+#if PCRE2_CODE_UNIT_WIDTH == 32
+            if (fc > MAX_UTF_CODE_POINT)
+              {
+              if (notmatch) break;;
+              RRETURN(MATCH_NOMATCH);
+              }
+#endif
+         cp = PRIV(ucd_caseless_sets) + Fecode[2];
+         for (;;)
+           {
+@@ -2885,6 +2892,13 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
+               RRETURN(MATCH_NOMATCH);
+               }
+             GETCHARINCTEST(fc, Feptr);
+#if PCRE2_CODE_UNIT_WIDTH == 32
+            if (fc > MAX_UTF_CODE_POINT)
+              {
+              if (notmatch) continue;
+              RRETURN(MATCH_NOMATCH);
+              }
+#endif
+             cp = PRIV(ucd_caseless_sets) + Lpropvalue;
+             for (;;)
+               {
+@@ -3698,6 +3712,13 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
+               RRETURN(MATCH_NOMATCH);
+               }
+             GETCHARINCTEST(fc, Feptr);
+#if PCRE2_CODE_UNIT_WIDTH == 32
+            if (fc > MAX_UTF_CODE_POINT)
+              {
+              if (Lctype == OP_NOTPROP) continue;
+              RRETURN(MATCH_NOMATCH);
+              }
+#endif
+             cp = PRIV(ucd_caseless_sets) + Lpropvalue;
+             for (;;)
+               {
+@@ -4278,14 +4299,24 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
+               break;
+               }
+             GETCHARLENTEST(fc, Feptr, len);
+-            cp = PRIV(ucd_caseless_sets) + Lpropvalue;
+-            for (;;)
+#if PCRE2_CODE_UNIT_WIDTH == 32
+            if (fc > MAX_UTF_CODE_POINT)
+               {
+-              if (fc < *cp)
+-                { if (notmatch) break; else goto GOT_MAX; }
+-              if (fc == *cp++)
+-                { if (notmatch) goto GOT_MAX; else break; }
+              if (!notmatch) goto GOT_MAX;
+               }
+            else
+#endif
+              {
+              cp = PRIV(ucd_caseless_sets) + Lpropvalue;
+              for (;;)
+                {
+                if (fc < *cp)
+                  { if (notmatch) break; else goto GOT_MAX; }
+                if (fc == *cp++)
+                  { if (notmatch) goto GOT_MAX; else break; }
+                }
+              }
+
+             Feptr += len;
+             }
+           GOT_MAX:
+diff --git a/testdata/testinput12 b/testdata/testinput12
+index de3d406..85550c3 100644
+--- a/testdata/testinput12
+++ b/testdata/testinput12
+@@ -576,5 +576,31 @@
+ # This used to loop in 32-bit mode; it will fail in 16-bit mode.
+ /[\x{ffffffff}]/caseless,ucp
+     \x{ffffffff}xyz
+    
+# These are 32-bit tests for handing 0xffffffff when in UCP caselsss mode. They
+# will give errors in 16-bit mode.
+
+/k*\x{ffffffff}/caseless,ucp
+    \x{ffffffff}
+
+/k+\x{ffffffff}/caseless,ucp,no_start_optimize
+    K\x{ffffffff}
+\= Expect no match     
+    \x{ffffffff}\x{ffffffff}
+
+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
+\= Expect no match
+    \x{ffffffff}\x{ffffffff}\x{ffffffff}
+
+/k\x{ffffffff}/caseless,ucp,no_start_optimize
+    K\x{ffffffff}
+\= Expect no match
+    \x{ffffffff}\x{ffffffff}\x{ffffffff}
+
+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
+\= Expect no match
+    Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
+
+# --------------------------------------------------------- 
+ 
+ # End of testinput12
+diff --git a/testdata/testinput14 b/testdata/testinput14
+index 8a17ae7..8880b5c 100644
+--- a/testdata/testinput14
+++ b/testdata/testinput14
+@@ -78,4 +78,31 @@
+ 
+ # ---------------------------------------------------- 
+ 
+# ---------------------------------------------------- 
+# Tests for handling 0xffffffff in caseless UCP mode. They only apply to 32-bit
+# mode; for the other widths they will fail.
+
+/k*\x{ffffffff}/caseless,ucp
+    \x{ffffffff}
+
+/k+\x{ffffffff}/caseless,ucp,no_start_optimize
+    K\x{ffffffff}
+\= Expect no match     
+    \x{ffffffff}\x{ffffffff}
+
+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
+\= Expect no match
+    \x{ffffffff}\x{ffffffff}\x{ffffffff}
+
+/k\x{ffffffff}/caseless,ucp,no_start_optimize
+    K\x{ffffffff}
+\= Expect no match
+    \x{ffffffff}\x{ffffffff}\x{ffffffff}
+
+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
+\= Expect no match
+    Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
+
+# ---------------------------------------------------- 
+
+ # End of testinput14
+diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16
+index 9fa93fa..616d693 100644
+--- a/testdata/testoutput12-16
+++ b/testdata/testoutput12-16
+@@ -1827,5 +1827,42 @@ Failed: error 134 at offset 11: character code point value in \x{} or \o{} is to
+ /[\x{ffffffff}]/caseless,ucp
+ Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large
+     \x{ffffffff}xyz
+    
+# These are 32-bit tests for handing 0xffffffff when in UCP caselsss mode. They
+# will give errors in 16-bit mode.
+
+/k*\x{ffffffff}/caseless,ucp
+Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large
+    \x{ffffffff}
+
+/k+\x{ffffffff}/caseless,ucp,no_start_optimize
+Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large
+    K\x{ffffffff}
+\= Expect no match     
+    \x{ffffffff}\x{ffffffff}
+
+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
+Failed: error 134 at offset 15: character code point value in \x{} or \o{} is too large
+\= Expect no match
+    \x{ffffffff}\x{ffffffff}\x{ffffffff}
+
+/k\x{ffffffff}/caseless,ucp,no_start_optimize
+Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large
+    K\x{ffffffff}
+\= Expect no match
+    \x{ffffffff}\x{ffffffff}\x{ffffffff}
+
+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
+\= Expect no match
+    Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
+** Truncation will probably give the wrong result.
+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
+** Truncation will probably give the wrong result.
+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
+** Truncation will probably give the wrong result.
+No match
+
+# --------------------------------------------------------- 
+ 
+ # End of testinput12
+diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32
+index 721d8bc..3c9586e 100644
+--- a/testdata/testoutput12-32
+++ b/testdata/testoutput12-32
+@@ -1821,5 +1821,38 @@ No match
+ /[\x{ffffffff}]/caseless,ucp
+     \x{ffffffff}xyz
+  0: \x{ffffffff}
+    
+# These are 32-bit tests for handing 0xffffffff when in UCP caselsss mode. They
+# will give errors in 16-bit mode.
+
+/k*\x{ffffffff}/caseless,ucp
+    \x{ffffffff}
+ 0: \x{ffffffff}
+
+/k+\x{ffffffff}/caseless,ucp,no_start_optimize
+    K\x{ffffffff}
+ 0: K\x{ffffffff}
+\= Expect no match     
+    \x{ffffffff}\x{ffffffff}
+No match
+
+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
+\= Expect no match
+    \x{ffffffff}\x{ffffffff}\x{ffffffff}
+No match
+
+/k\x{ffffffff}/caseless,ucp,no_start_optimize
+    K\x{ffffffff}
+ 0: K\x{ffffffff}
+\= Expect no match
+    \x{ffffffff}\x{ffffffff}\x{ffffffff}
+No match
+
+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
+\= Expect no match
+    Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
+No match
+
+# --------------------------------------------------------- 
+ 
+ # End of testinput12
+diff --git a/testdata/testoutput14-16 b/testdata/testoutput14-16
+index 61541f6..dd1a977 100644
+--- a/testdata/testoutput14-16
+++ b/testdata/testoutput14-16
+@@ -122,4 +122,42 @@ No match
+ 
+ # ---------------------------------------------------- 
+ 
+# ---------------------------------------------------- 
+# Tests for handling 0xffffffff in caseless UCP mode. They only apply to 32-bit
+# mode; for the other widths they will fail.
+
+/k*\x{ffffffff}/caseless,ucp
+Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large
+    \x{ffffffff}
+
+/k+\x{ffffffff}/caseless,ucp,no_start_optimize
+Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large
+    K\x{ffffffff}
+\= Expect no match     
+    \x{ffffffff}\x{ffffffff}
+
+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
+Failed: error 134 at offset 15: character code point value in \x{} or \o{} is too large
+\= Expect no match
+    \x{ffffffff}\x{ffffffff}\x{ffffffff}
+
+/k\x{ffffffff}/caseless,ucp,no_start_optimize
+Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large
+    K\x{ffffffff}
+\= Expect no match
+    \x{ffffffff}\x{ffffffff}\x{ffffffff}
+
+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
+\= Expect no match
+    Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
+** Truncation will probably give the wrong result.
+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
+** Truncation will probably give the wrong result.
+** Character \x{ffffffff} is greater than 0xffff and UTF-16 mode is not enabled.
+** Truncation will probably give the wrong result.
+No match
+
+# ---------------------------------------------------- 
+
+ # End of testinput14
+diff --git a/testdata/testoutput14-32 b/testdata/testoutput14-32
+index f1f65b7..dc21569 100644
+--- a/testdata/testoutput14-32
+++ b/testdata/testoutput14-32
+@@ -122,4 +122,38 @@ No match
+ 
+ # ---------------------------------------------------- 
+ 
+# ---------------------------------------------------- 
+# Tests for handling 0xffffffff in caseless UCP mode. They only apply to 32-bit
+# mode; for the other widths they will fail.
+
+/k*\x{ffffffff}/caseless,ucp
+    \x{ffffffff}
+ 0: \x{ffffffff}
+
+/k+\x{ffffffff}/caseless,ucp,no_start_optimize
+    K\x{ffffffff}
+ 0: K\x{ffffffff}
+\= Expect no match     
+    \x{ffffffff}\x{ffffffff}
+No match
+
+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
+\= Expect no match
+    \x{ffffffff}\x{ffffffff}\x{ffffffff}
+No match
+
+/k\x{ffffffff}/caseless,ucp,no_start_optimize
+    K\x{ffffffff}
+ 0: K\x{ffffffff}
+\= Expect no match
+    \x{ffffffff}\x{ffffffff}\x{ffffffff}
+No match
+
+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
+\= Expect no match
+    Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
+No match
+
+# ---------------------------------------------------- 
+
+ # End of testinput14
+diff --git a/testdata/testoutput14-8 b/testdata/testoutput14-8
+index aa62414..69285db 100644
+--- a/testdata/testoutput14-8
+++ b/testdata/testoutput14-8
+@@ -122,4 +122,42 @@ Failed: error 134 at offset 8: character code point value in \x{} or \o{} is too
+ 
+ # ---------------------------------------------------- 
+ 
+# ---------------------------------------------------- 
+# Tests for handling 0xffffffff in caseless UCP mode. They only apply to 32-bit
+# mode; for the other widths they will fail.
+
+/k*\x{ffffffff}/caseless,ucp
+Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large
+    \x{ffffffff}
+
+/k+\x{ffffffff}/caseless,ucp,no_start_optimize
+Failed: error 134 at offset 13: character code point value in \x{} or \o{} is too large
+    K\x{ffffffff}
+\= Expect no match     
+    \x{ffffffff}\x{ffffffff}
+
+/k{2}\x{ffffffff}/caseless,ucp,no_start_optimize
+Failed: error 134 at offset 15: character code point value in \x{} or \o{} is too large
+\= Expect no match
+    \x{ffffffff}\x{ffffffff}\x{ffffffff}
+
+/k\x{ffffffff}/caseless,ucp,no_start_optimize
+Failed: error 134 at offset 12: character code point value in \x{} or \o{} is too large
+    K\x{ffffffff}
+\= Expect no match
+    \x{ffffffff}\x{ffffffff}\x{ffffffff}
+
+/k{2,}?Z/caseless,ucp,no_start_optimize,no_auto_possess
+\= Expect no match
+    Kk\x{ffffffff}\x{ffffffff}\x{ffffffff}Z
+** Character \x{ffffffff} is greater than 255 and UTF-8 mode is not enabled.
+** Truncation will probably give the wrong result.
+** Character \x{ffffffff} is greater than 255 and UTF-8 mode is not enabled.
+** Truncation will probably give the wrong result.
+** Character \x{ffffffff} is greater than 255 and UTF-8 mode is not enabled.
+** Truncation will probably give the wrong result.
+No match
+
+# ---------------------------------------------------- 
+
+ # End of testinput14
+-- 
+2.33.0
+
--- a/backport-Fix-incorrect-patch-in-c1306126.patch
+++ b/backport-Fix-incorrect-patch-in-c1306126.patch
@ -0,0 +1,94 @@
+From 7fe586b892c9e0cbf3b21d57cfd8135e2311e45c Mon Sep 17 00:00:00 2001
+From: Philip Hazel <Philip.Hazel@gmail.com>
+Date: Mon, 20 Nov 2023 15:41:06 +0000
+Subject: [PATCH] Fix incorrect patch in c1306126
+
+---
+ src/pcre2_compile.c  | 20 ++++++++++++++------
+ testdata/testinput2  |  2 ++
+ testdata/testoutput2 |  2 ++
+ 3 files changed, 18 insertions(+), 6 deletions(-)
+
+diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
+index fdaf2ad..9e45580 100644
+--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
+@@ -2782,6 +2782,7 @@ uint32_t *previous_callout = NULL;
+ uint32_t *parsed_pattern = cb->parsed_pattern;
+ uint32_t *parsed_pattern_end = cb->parsed_pattern_end;
+ uint32_t *this_parsed_item = NULL;
+uint32_t *prev_parsed_item = NULL;
+ uint32_t meta_quantifier = 0;
+ uint32_t add_after_mark = 0;
+ uint32_t extra_options = cb->cx->extra_options;
+@@ -2867,11 +2868,10 @@ while (ptr < ptrend)
+   uint32_t set, unset, *optset;
+   uint32_t terminator;
+   uint32_t prev_meta_quantifier;
+-  uint32_t *prev_parsed_item = this_parsed_item; 
+   BOOL prev_okquantifier;
+   PCRE2_SPTR tempptr;
+   PCRE2_SIZE offset;
+-  
+
+   if (parsed_pattern >= parsed_pattern_end)
+     {
+     errorcode = ERR63;  /* Internal error (parsed pattern overflow) */
+@@ -2883,10 +2883,17 @@ while (ptr < ptrend)
+     errorcode = ERR19;
+     goto FAILED;        /* Parentheses too deeply nested */
+     }
+-    
+-  /* Remember where this item started */
+ 
+-  this_parsed_item = parsed_pattern;
+  /* If the last time round this loop something was added, parsed_pattern will
+  no longer be equal to this_parsed_item. Remember where the previous item
+  started and reset for the next item. Note that sometimes round the loop,
+  nothing gets added (e.g. for ignored white space). */
+
+  if (this_parsed_item != parsed_pattern)
+    {
+    prev_parsed_item = this_parsed_item;
+    this_parsed_item = parsed_pattern;
+    }
+ 
+   /* Get next input character, save its position for callout handling. */
+ 
+@@ -3440,7 +3447,8 @@ while (ptr < ptrend)
+ 
+     /* ---- Quantifier post-processing ---- */
+ 
+-    /* Check that a quantifier is allowed after the previous item. */
+    /* Check that a quantifier is allowed after the previous item. This
+    guarantees that there is a previous item. */
+ 
+     CHECK_QUANTIFIER:
+     if (!prev_okquantifier)
+diff --git a/testdata/testinput2 b/testdata/testinput2
+index ba292d8..da845c1 100644
+--- a/testdata/testinput2
+++ b/testdata/testinput2
+@@ -6051,4 +6051,6 @@ a)"xI
+ /abcd/
+     abcd\=ovector=65536
+ 
+/A +/extended
+
+ # End of testinput2
+diff --git a/testdata/testoutput2 b/testdata/testoutput2
+index 888f06a..85de4ae 100644
+--- a/testdata/testoutput2
+++ b/testdata/testoutput2
+@@ -17932,6 +17932,8 @@ No match
+     abcd\=ovector=65536
+  0: abcd
+ 
+/A +/extended
+
+ # End of testinput2
+ Error -70: PCRE2_ERROR_BADDATA (unknown error number)
+ Error -62: bad serialized data
+-- 
+2.33.0
+
--- a/backport-Fix-oversight-in-DFA-when-changing-OP_REVERSE-also-a.patch
+++ b/backport-Fix-oversight-in-DFA-when-changing-OP_REVERSE-also-a.patch
@ -0,0 +1,28 @@
+From b88126f42382fa470b6480f82489303d4311ce18 Mon Sep 17 00:00:00 2001
+From: Philip Hazel <Philip.Hazel@gmail.com>
+Date: Thu, 16 Nov 2023 13:49:49 +0000
+Subject: [PATCH] Fix oversight in DFA when changing OP_REVERSE; also add some
+ unrelated tests
+
+Conflict:don't add unrelated tests
+
+---
+ src/pcre2_dfa_match.c |  2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/src/pcre2_dfa_match.c b/src/pcre2_dfa_match.c
+index e90c984..5768407 100644
+--- a/src/pcre2_dfa_match.c
+++ b/src/pcre2_dfa_match.c
+@@ -591,7 +591,7 @@ if (*this_start_code == OP_ASSERTBACK || *this_start_code == OP_ASSERTBACK_NOT)
+   end_code = this_start_code;
+   do
+     {
+-    size_t back = (size_t)GET(end_code, 2+LINK_SIZE);
+    size_t back = (size_t)GET2(end_code, 2+LINK_SIZE);
+     if (back > max_back) max_back = back;
+     end_code += GET(end_code, 1);
+     }
+-- 
+2.33.0
+
--- a/backport-Fix-z-behaviour-when-matching-within-invalid-UTF.patch
+++ b/backport-Fix-z-behaviour-when-matching-within-invalid-UTF.patch
@ -0,0 +1,105 @@
+From 05206d66340341bef7a673108a855f594c148950 Mon Sep 17 00:00:00 2001
+From: Philip Hazel <Philip.Hazel@gmail.com>
+Date: Sun, 19 Nov 2023 18:32:10 +0000
+Subject: [PATCH] Fix \z behaviour when matching within invalid UTF
+
+---
+ src/pcre2_match.c        | 6 ++++--
+ testdata/testinput10     | 3 +++
+ testdata/testinput12     | 3 +++
+ testdata/testoutput10    | 4 ++++
+ testdata/testoutput12-16 | 4 ++++
+ testdata/testoutput12-32 | 4 ++++
+ 6 files changed, 22 insertions(+), 2 deletions(-)
+
+diff --git a/src/pcre2_match.c b/src/pcre2_match.c
+index 2dcf8c4..ea03976 100644
+--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
+@@ -6076,10 +6076,12 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
+     if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS;
+ 
+     /* Fall through */
+-    /* Unconditional end of subject assertion (\z) */
+    /* Unconditional end of subject assertion (\z). We must check NOTEOL
+    because it gets set for invalid UTF fragments. */
+ 
+     case OP_EOD:
+-    if (Feptr < mb->end_subject) RRETURN(MATCH_NOMATCH);
+    if (Feptr < mb->end_subject || (mb->moptions & PCRE2_NOTEOL) != 0)
+      RRETURN(MATCH_NOMATCH);
+     if (mb->partial != 0)
+       {
+       mb->hitend = TRUE;
+diff --git a/testdata/testinput10 b/testdata/testinput10
+index c7618b1..e901d51 100644
+--- a/testdata/testinput10
+++ b/testdata/testinput10
+@@ -642,4 +642,7 @@
+     qchq\=ph
+     qchq\=ps
+ 
+/A\z/utf,match_invalid_utf
+    A\x80\x42\n
+
+ # End of testinput10
+diff --git a/testdata/testinput12 b/testdata/testinput12
+index 1e552e6..5a2d8d2 100644
+--- a/testdata/testinput12
+++ b/testdata/testinput12
+@@ -464,6 +464,9 @@
+ 
+ /aa/utf,ucp,match_invalid_utf,global
+     \x{d800}aa
+    
+/A\z/utf,match_invalid_utf
+    A\x{df00}\n
+ 
+ # ---------------------------------------------------- 
+ 
+diff --git a/testdata/testoutput10 b/testdata/testoutput10
+index 18dd9d2..8145891 100644
+--- a/testdata/testoutput10
+++ b/testdata/testoutput10
+@@ -1921,4 +1921,8 @@ Partial match:
+     qchq\=ps
+ Partial match: 
+ 
+/A\z/utf,match_invalid_utf
+    A\x80\x42\n
+No match
+
+ # End of testinput10
+diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16
+index 8cbc13d..9ac403e 100644
+--- a/testdata/testoutput12-16
+++ b/testdata/testoutput12-16
+@@ -1607,6 +1607,10 @@ No match
+ /aa/utf,ucp,match_invalid_utf,global
+     \x{d800}aa
+  0: aa
+    
+/A\z/utf,match_invalid_utf
+    A\x{df00}\n
+No match
+ 
+ # ---------------------------------------------------- 
+ 
+diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32
+index 1a98b4b..9396305 100644
+--- a/testdata/testoutput12-32
+++ b/testdata/testoutput12-32
+@@ -1605,6 +1605,10 @@ No match
+ /aa/utf,ucp,match_invalid_utf,global
+     \x{d800}aa
+  0: aa
+    
+/A\z/utf,match_invalid_utf
+    A\x{df00}\n
+No match
+ 
+ # ---------------------------------------------------- 
+ 
+-- 
+2.33.0
+
--- a/pcre2.spec
+++ b/pcre2.spec
@ -1,6 +1,6 @@
 Name:        pcre2
 Version:     10.42
-Release:     3
+Release:     4
 Summary:     Perl Compatible Regular Expressions
 License:     BSD
 URL:         http://www.pcre.org/
@ -14,6 +14,19 @@ Patch6003:     backport-fix-wrong-test.patch
 Patch6004:     sljit-sv48-sv57.patch
 Patch6005:     backport-fix-a-possible-integer-overflow-in-DFA-matching-305.patch

+Patch6006:     backport-Fix-oversight-in-DFA-when-changing-OP_REVERSE-also-a.patch
+Patch6007:     backport-Fix-32-bit-quantifier-following-a-character-larger-t.patch
+Patch6008:     backport-Fix-z-behaviour-when-matching-within-invalid-UTF.patch
+Patch6009:     backport-Fix-incorrect-patch-in-c1306126.patch
+Patch6010:     backport-Fix-another-oversight-in-c1306126.patch
+Patch6011:     backport-Fix-X-matching-in-32-bit-mode-without-UTF-in-JIT.patch
+Patch6012:     backport-Fix-bad-patch-in-05206d66.-The-interpreter-was-handl.patch
+Patch6013:     backport-Fix-backref-iterators-when-PCRE2_MATCH_UNSET_BACKREF.patch
+Patch6014:     backport-Fix-compile-loop-in-32-bit-mode-for-characters-above.patch
+Patch6015:     backport-Fix-incorrect-matching-of-0xffffffff-to-any-characte.patch
+Patch6016:     backport-Fix-accept-and-endanchored-interaction-in-JIT.patch
+Patch6017:     backport-Fix-backreferences-with-unset-backref-and-non-greedy.patch
+
 BuildRequires:  autoconf libtool automake coreutils gcc make readline-devel
 Obsoletes:      pcre2-utf16 pcre2-utf32 pcre2-tools
 Provides:       pcre2-utf16 pcre2-utf32 pcre2-tools
@ -129,8 +142,11 @@ make check
 %{_pkgdocdir}/html/

 %changelog
+* Mon Jan 22 2024 xujing <xujing125@huawei.com> - 10.42-4
+- DESC:sync patches from upstream to fix some bugs
+
 * Thu Dec 14 2023 xujing <xujing125@huawei.com> - 10.42-3
-+- DESC:fix a possible integer overflow in DFA matching (#305)
+- DESC:fix a possible integer overflow in DFA matching (#305)

 * Mon Dec 04 2023 Jingwiw  <wangjingwei@iscas.ac.cn> - 10.42-2
 - enable riscv jit and fix error for sv48-sv57