sync patches from upstream

2024-12-09 16:51:06 +08:00 · 2024-12-09 16:51:06 +08:00 · abeb907f1d
commit abeb907f1d
parent 3822b2533a
6 changed files with 1590 additions and 1 deletions
--- a/backport-Fix-the-lookahead-after-d-or-posix-to-skip-whitespac.patch
+++ b/backport-Fix-the-lookahead-after-d-or-posix-to-skip-whitespac.patch
@ -0,0 +1,318 @@
+From 16d7edb56757e5294eeeecc9a19135aab89a50ba Mon Sep 17 00:00:00 2001
+From: Nicholas Wilson <niwilson@microsoft.com>
+Date: Fri, 1 Nov 2024 17:13:34 +0000
+Subject: [PATCH] Fix the lookahead after [\d or [[:posix] to skip whitespace
+ (#544)
+
+Conflict:don't modify alt_extended_class because fc38d9e784 is not merged;
+don't modify class_op_state because class_op_state is not merged; adapt context
+Reference:https://github.com/PCRE2Project/pcre2/commit/16d7edb56757e5294eeeecc9a19135aab89a50ba
+
+---
+ src/pcre2_compile.c    |  88 +++++++++++++++++++++++++++---------------
+ src/pcre2_intmodedep.h |   2 +-
+ testdata/testinput1    |  20 +++++++---
+ testdata/testinput2    |   8 ++++
+ testdata/testoutput1   |  30 ++++++++++----
+ testdata/testoutput2   |  12 ++++++
+ 6 files changed, 113 insertions(+), 47 deletions(-)
+
+diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
+index 2493c871..9be26b07 100644
+--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
+@@ -2681,7 +2681,14 @@ the main compiling phase. */
+ /* States used for analyzing ranges in character classes. The two OK values
+ must be last. */
+ 
+-enum { RANGE_NO, RANGE_STARTED, RANGE_OK_ESCAPED, RANGE_OK_LITERAL };
+enum {
+  RANGE_NO, /* State after '[' (initial), or '[a-z'; hyphen is literal */
+  RANGE_STARTED, /* State after '[1-'; last-emitted code is META_RANGE_XYZ */
+  RANGE_FORBID_NO, /* State after '[\d'; '-]' is allowed but not '-1]' */
+  RANGE_FORBID_STARTED, /* State after '[\d-'*/
+  RANGE_OK_ESCAPED, /* State after '[1'; hyphen may be a range */
+  RANGE_OK_LITERAL /* State after '[\1'; hyphen may be a range */
+};
+ 
+ /* Only in 32-bit mode can there be literals > META_END. A macro encapsulates
+ the storing of literal values in the main parsed pattern, where they can always
+@@ -2734,6 +2741,7 @@ PCRE2_SPTR thisptr;
+ PCRE2_SPTR name;
+ PCRE2_SPTR ptrend = cb->end_pattern;
+ PCRE2_SPTR verbnamestart = NULL;    /* Value avoids compiler warning */
+PCRE2_SPTR class_range_forbid_ptr = NULL;
+ named_group *ng;
+ nest_save *top_nest, *end_nests;
+ 
+@@ -3559,6 +3567,21 @@ while (ptr < ptrend)
+           goto FAILED;
+           }
+ 
+        /* Perl treats a hyphen after a POSIX class as a literal, not the
+        start of a range. However, it gives a warning in its warning mode
+        unless the hyphen is the last character in the class. PCRE does not
+        have a warning mode, so we give an error, because this is likely an
+        error on the user's part.
+
+        Roll back to the hyphen for the error position. */
+
+        if (class_range_state == RANGE_FORBID_STARTED)
+          {
+          ptr = class_range_forbid_ptr;
+          errorcode = ERR50;
+          goto FAILED;
+          }
+
+         if (*ptr != CHAR_COLON)
+           {
+           errorcode = ERR13;
+@@ -3579,26 +3602,12 @@ while (ptr < ptrend)
+           }
+         ptr = tempptr + 2;
+ 
+-        /* Perl treats a hyphen after a POSIX class as a literal, not the
+-        start of a range. However, it gives a warning in its warning mode
+-        unless the hyphen is the last character in the class. PCRE does not
+-        have a warning mode, so we give an error, because this is likely an
+-        error on the user's part. */
+-
+-        if (ptr < ptrend - 1 && *ptr == CHAR_MINUS &&
+-            ptr[1] != CHAR_RIGHT_SQUARE_BRACKET)
+-          {
+-          errorcode = ERR50;
+-          goto FAILED;
+-          }
+-
+-        /* Set "a hyphen is not the start of a range" for the -] case, and also
+-        in case the POSIX class is followed by \E or \Q\E (possibly repeated -
+-        fuzzers do that kind of thing) and *then* a hyphen. This causes that
+-        hyphen to be treated as a literal. I don't think it's worth setting up
+-        special apparatus to do otherwise. */
+        /* Set "a hyphen is forbidden to be the start of a range". For the '-]'
+        case, the hyphen is treated as a literal, but for '-1' it is disallowed
+        (because it would be interpreted as range). */
+
+-        class_range_state = RANGE_NO;
+        class_range_state = RANGE_FORBID_NO;
+        class_range_forbid_ptr = ptr;
+ 
+         /* When PCRE2_UCP is set, unless PCRE2_EXTRA_ASCII_POSIX is set, some
+         of the POSIX classes are converted to use Unicode properties \p or \P
+@@ -3648,6 +3657,14 @@ while (ptr < ptrend)
+         class_range_state = RANGE_STARTED;
+         }
+ 
+      /* Handle forbidden start of range */
+
+      else if (c == CHAR_MINUS && class_range_state == RANGE_FORBID_NO)
+        {
+        *parsed_pattern++ = CHAR_MINUS;
+        class_range_state = RANGE_FORBID_STARTED;
+        }
+
+       /* Handle a literal character */
+ 
+       else if (c != CHAR_BACKSLASH)
+@@ -3670,6 +3687,12 @@ while (ptr < ptrend)
+             }
+           class_range_state = RANGE_NO;
+           }
+        else if (class_range_state == RANGE_FORBID_STARTED)
+          {
+          ptr = class_range_forbid_ptr;
+          errorcode = ERR50;
+          goto FAILED;
+          }
+         else  /* Potential start of range */
+           {
+           class_range_state = char_is_literal?
+@@ -3733,13 +3756,23 @@ while (ptr < ptrend)
+         if (class_range_state == RANGE_STARTED)
+           {
+           errorcode = ERR50;
+-          goto FAILED;  /* Not CLASS_ESCAPE_FAILED; always an error */
+          goto FAILED;
+          }
+        /* Perl gives a warning unless the hyphen following a multi-character
+        escape is the last character in the class. PCRE throws an error. */
+        if (class_range_state == RANGE_FORBID_STARTED)
+          {
+          ptr = class_range_forbid_ptr;
+          errorcode = ERR50;
+          goto FAILED;
+           }
+ 
+         /* Of the remaining escapes, only those that define characters are
+         allowed in a class. None may start a range. */
+ 
+-        class_range_state = RANGE_NO;
+        class_range_state = RANGE_FORBID_NO;
+        class_range_forbid_ptr = ptr;
+
+         switch(escape)
+           {
+           case ESC_N:
+@@ -3779,6 +3812,7 @@ while (ptr < ptrend)
+             if (negated) escape = (escape == ESC_P)? ESC_p : ESC_P;
+             *parsed_pattern++ = META_ESCAPE + escape;
+             *parsed_pattern++ = (ptype << 16) | pdata;
+            class_range_forbid_ptr = ptr;
+             }
+ #else
+           errorcode = ERR45;
+@@ -3791,16 +3825,6 @@ while (ptr < ptrend)
+           ptr--;
+           goto FAILED;
+           }
+-
+-        /* Perl gives a warning unless a following hyphen is the last character
+-        in the class. PCRE throws an error. */
+-
+-        if (ptr < ptrend - 1 && *ptr == CHAR_MINUS &&
+-            ptr[1] != CHAR_RIGHT_SQUARE_BRACKET)
+-          {
+-          errorcode = ERR50;
+-          goto FAILED;
+-          }
+         }
+ 
+       /* Proceed to next thing in the class. */
+diff --git a/src/pcre2_intmodedep.h b/src/pcre2_intmodedep.h
+index 598060c9..a11b4faa 100644
+--- a/src/pcre2_intmodedep.h
+++ b/src/pcre2_intmodedep.h
+@@ -435,7 +435,7 @@ UTF-16 mode. */
+   c = *eptr; \
+   if ((c & 0xfc00u) == 0xd800u) GETUTF16LEN(c, eptr, len);
+ 
+-/* Get the next UTF-816character, testing for UTF-16 mode, not advancing the
+/* Get the next UTF-16 character, testing for UTF-16 mode, not advancing the
+ pointer, incrementing length if there is a low surrogate. This is called when
+ we do not know if we are in UTF-16 mode. */
+ 
+diff --git a/testdata/testinput1 b/testdata/testinput1
+index 0794502e..1e50369f 100644
+--- a/testdata/testinput1
+++ b/testdata/testinput1
+@@ -5787,12 +5787,6 @@ ef) x/x,mark
+ 
+ /(?'c')XX(?'YYYYYYYYYYYYYYYYYYYYYYYCl')/
+ 
+-/[s[:digit:]\E-H]+/
+-    s09-H
+-
+-/[s[:digit:]\Q\E-H]+/
+-    s09-H
+-
+ /a+(?:|b)a/
+     aaaa
+ 
+@@ -6435,4 +6429,18 @@ ef) x/x,mark
+ /(a\K.(?1)*)/
+     abac
+ 
+/[[:digit:]-   ]/xx
+    1
+    -
+\= Expect no match
+    z
+    \ \
+
+/[\d-   ]/xx
+    1
+    -
+\= Expect no match
+    z
+    \ \
+
+ # End of testinput1 
+diff --git a/testdata/testinput2 b/testdata/testinput2
+index b6464a0b..61b94e69 100644
+--- a/testdata/testinput2
+++ b/testdata/testinput2
+@@ -5981,4 +5981,12 @@ a)"xI
+     a
+     a\=noteol 
+ 
+/[[:digit:]   -Z]/xx
+
+/[\d   -Z]/xx
+
+/[[:digit:]\E-H]/
+
+/[[:digit:]\Q\E-H]+/
+
+ # End of testinput2
+diff --git a/testdata/testoutput1 b/testdata/testoutput1
+index 8daf8362..6f927729 100644
+--- a/testdata/testoutput1
+++ b/testdata/testoutput1
+@@ -9246,14 +9246,6 @@ No match
+ 
+ /(?'c')XX(?'YYYYYYYYYYYYYYYYYYYYYYYCl')/
+ 
+-/[s[:digit:]\E-H]+/
+-    s09-H
+- 0: s09-H
+-
+-/[s[:digit:]\Q\E-H]+/
+-    s09-H
+- 0: s09-H
+-
+ /a+(?:|b)a/
+     aaaa
+  0: aaaa
+@@ -10197,4 +10189,26 @@ No match
+  0: c
+  1: abac
+ 
+/[[:digit:]-   ]/xx
+    1
+ 0: 1
+    -
+ 0: -
+\= Expect no match
+    z
+No match
+    \ \
+No match
+
+/[\d-   ]/xx
+    1
+ 0: 1
+    -
+ 0: -
+\= Expect no match
+    z
+No match
+    \ \
+No match
+
+ # End of testinput1 
+diff --git a/testdata/testoutput2 b/testdata/testoutput2
+index 1075b4d4..86bfe964 100644
+--- a/testdata/testoutput2
+++ b/testdata/testoutput2
+@@ -17815,6 +17815,18 @@ Subject length lower bound = 2
+     a\=noteol 
+  0: a
+ 
+/[[:digit:]   -Z]/xx
+Failed: error 150 at offset 10: invalid range in character class
+
+/[\d   -Z]/xx
+Failed: error 150 at offset 3: invalid range in character class
+
+/[[:digit:]\E-H]/
+Failed: error 150 at offset 10: invalid range in character class
+
+/[[:digit:]\Q\E-H]+/
+Failed: error 150 at offset 10: invalid range in character class
+
+ # End of testinput2
+ Error -70: PCRE2_ERROR_BADDATA (unknown error number)
+ Error -62: bad serialized data
+-- 
+2.33.0
+
--- a/backport-Further-ASCII-tests-and-minor-bugfix-plus-ChangeLog-.patch
+++ b/backport-Further-ASCII-tests-and-minor-bugfix-plus-ChangeLog-.patch
@ -0,0 +1,104 @@
+From fc56fd790c1a3ba8f2890fc2b6afba21250923de Mon Sep 17 00:00:00 2001
+From: Philip Hazel <Philip.Hazel@gmail.com>
+Date: Thu, 2 Feb 2023 17:19:45 +0000
+Subject: [PATCH] Further ASCII tests and minor bugfix plus ChangeLog update
+
+Conflict:don't modify ChangeLog
+Reference:https://github.com/PCRE2Project/pcre2/commit/fc56fd790c1a3ba8f2890fc2b6afba21250923de
+
+---
+ src/pcre2_compile.c  | 5 ++---
+ testdata/testinput5  | 5 +++++
+ testdata/testinput7  | 5 +++++
+ testdata/testoutput5 | 7 +++++++
+ testdata/testoutput7 | 7 +++++++
+ 5 files changed, 26 insertions(+), 3 deletions(-)
+
+diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
+index b8a9e098..64a35bda 100644
+--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
+@@ -2660,10 +2660,9 @@ the main compiling phase. */
+   PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE| \
+   PCRE2_UNGREEDY)
+ 
+-#define PARSE_TRACKED_EXTRA_OPTIONS (PCRE2_EXTRA_CASELESS_RESTRICT)
+-
+ #define PARSE_TRACKED_EXTRA_OPTIONS (PCRE2_EXTRA_CASELESS_RESTRICT| \
+-  PCRE2_EXTRA_ASCII_BSD|PCRE2_EXTRA_ASCII_BSS|PCRE2_EXTRA_ASCII_BSW)
+  PCRE2_EXTRA_ASCII_BSD|PCRE2_EXTRA_ASCII_BSS|PCRE2_EXTRA_ASCII_BSW| \
+  PCRE2_EXTRA_ASCII_POSIX)
+ 
+ /* States used for analyzing ranges in character classes. The two OK values
+ must be last. */
+diff --git a/testdata/testinput5 b/testdata/testinput5
+index 6e186cf0..49b46f82 100644
+--- a/testdata/testinput5
+++ b/testdata/testinput5
+@@ -2434,6 +2434,11 @@
+ /(?aP)[[:alnum:]\d]+/i,ucp,utf
+     abc\x{660}xyz
+     
+/(*UCP)(*UTF)[[:alnum:]](?aP:[[:alnum:]])[[:alnum:]]/
+    \x{660}A\x{660}
+\= Expect no match     
+    \x{660}\x{660}\x{660}
+    
+ # VARIOUS
+ 
+ /[\d\s\w]+/a,ucp,utf
+diff --git a/testdata/testinput7 b/testdata/testinput7
+index 64a37ad2..a2b7fb8d 100644
+--- a/testdata/testinput7
+++ b/testdata/testinput7
+@@ -2453,6 +2453,11 @@
+ /(?aP)[[:alnum:]\d]+/i,ucp,utf
+     abc\x{660}xyz
+     
+/(*UCP)(*UTF)[[:alnum:]](?aP:[[:alnum:]])[[:alnum:]]/
+    \x{660}A\x{660}
+\= Expect no match     
+    \x{660}\x{660}\x{660}
+    
+ # VARIOUS
+ 
+ /[\d\s\w]+/a,ucp,utf
+diff --git a/testdata/testoutput5 b/testdata/testoutput5
+index 26972f70..4f845c84 100644
+--- a/testdata/testoutput5
+++ b/testdata/testoutput5
+@@ -5365,6 +5365,13 @@ No match
+     abc\x{660}xyz
+  0: abc\x{660}xyz
+     
+/(*UCP)(*UTF)[[:alnum:]](?aP:[[:alnum:]])[[:alnum:]]/
+    \x{660}A\x{660}
+ 0: \x{660}A\x{660}
+\= Expect no match     
+    \x{660}\x{660}\x{660}
+No match
+    
+ # VARIOUS
+ 
+ /[\d\s\w]+/a,ucp,utf
+diff --git a/testdata/testoutput7 b/testdata/testoutput7
+index c830748c..4065981d 100644
+--- a/testdata/testoutput7
+++ b/testdata/testoutput7
+@@ -4105,6 +4105,13 @@ No match
+     abc\x{660}xyz
+  0: abc\x{660}xyz
+     
+/(*UCP)(*UTF)[[:alnum:]](?aP:[[:alnum:]])[[:alnum:]]/
+    \x{660}A\x{660}
+ 0: \x{660}A\x{660}
+\= Expect no match     
+    \x{660}\x{660}\x{660}
+No match
+    
+ # VARIOUS
+ 
+ /[\d\s\w]+/a,ucp,utf
+-- 
+2.33.0
+
--- a/backport-Improve-error-offsets-for-character-classes-548.patch
+++ b/backport-Improve-error-offsets-for-character-classes-548.patch
@ -0,0 +1,425 @@
+From 6185344ed8617ff84a08764e808e5b3667c34a7a Mon Sep 17 00:00:00 2001
+From: Nicholas Wilson <niwilson@microsoft.com>
+Date: Wed, 6 Nov 2024 08:45:46 +0000
+Subject: [PATCH] Improve error offsets for character classes (#548)
+
+Conflict:don't modify alt_extended_class because fc38d9e784 is not merged;
+don't modify class_op_state because class_op_state is not merged; adapt context
+Reference:https://github.com/PCRE2Project/pcre2/commit/6185344ed8617ff84a08764e808e5b3667c34a7a
+
+* Error offset should be advanced by one character for "[\d-z]"
+  invalid range error
+
+  The code does a 1-char lookahead for a hyphen, but then doesn't
+  advance the pointer to consume the hyphen when returning the error.
+
+  Perl's error message (with "use warnings") does advance to just
+  after the hyphen, so PCRE2 should match.
+
+  Fixes #545.
+
+* Also improve error offsets for [[:bad:]], [[=...=]] and [z-\p{...}]
+  cases
+---
+ src/pcre2_compile.c  | 67 +++++++++++++++++++-------------------
+ testdata/testinput2  |  8 +++++
+ testdata/testinput5  |  8 +++++
+ testdata/testoutput2 | 76 +++++++++++++++++++++++++-------------------
+ testdata/testoutput5 | 14 +++++++-
+ 5 files changed, 106 insertions(+), 67 deletions(-)
+
+diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
+index 32db44db..290e759b 100644
+--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
+@@ -3563,6 +3563,7 @@ while (ptr < ptrend)
+ 
+         if (class_range_state == RANGE_STARTED)
+           {
+          ptr = tempptr + 2;
+           errorcode = ERR50;
+           goto FAILED;
+           }
+@@ -3584,8 +3585,9 @@ while (ptr < ptrend)
+ 
+         if (*ptr != CHAR_COLON)
+           {
+          ptr = tempptr + 2;
+           errorcode = ERR13;
+-          goto FAILED_BACK;
+          goto FAILED;
+           }
+ 
+         if (*(++ptr) == CHAR_CIRCUMFLEX_ACCENT)
+@@ -3595,19 +3597,18 @@ while (ptr < ptrend)
+           }
+ 
+         posix_class = check_posix_name(ptr, (int)(tempptr - ptr));
+        ptr = tempptr + 2;
+         if (posix_class < 0)
+           {
+           errorcode = ERR30;
+           goto FAILED;
+           }
+-        ptr = tempptr + 2;
+ 
+         /* Set "a hyphen is forbidden to be the start of a range". For the '-]'
+         case, the hyphen is treated as a literal, but for '-1' it is disallowed
+         (because it would be interpreted as range). */
+ 
+         class_range_state = RANGE_FORBID_NO;
+-        class_range_forbid_ptr = ptr;
+ 
+         /* When PCRE2_UCP is set, unless PCRE2_EXTRA_ASCII_POSIX is set, some
+         of the POSIX classes are converted to use Unicode properties \p or \P
+@@ -3664,6 +3665,7 @@ while (ptr < ptrend)
+         {
+         *parsed_pattern++ = CHAR_MINUS;
+         class_range_state = RANGE_FORBID_STARTED;
+        class_range_forbid_ptr = ptr;
+         }
+ 
+       /* Handle a literal character */
+@@ -3746,37 +3748,8 @@ while (ptr < ptrend)
+           errorcode = ERR7;
+           ptr--;
+           goto FAILED;
+-          }
+ 
+-        /* The second part of a range can be a single-character escape
+-        sequence (detected above), but not any of the other escapes. Perl
+-        treats a hyphen as a literal in such circumstances. However, in Perl's
+-        warning mode, a warning is given, so PCRE now faults it, as it is
+-        almost certainly a mistake on the user's part. */
+-
+-        if (class_range_state == RANGE_STARTED)
+-          {
+-          errorcode = ERR50;
+-          goto FAILED;
+-          }
+-        /* Perl gives a warning unless the hyphen following a multi-character
+-        escape is the last character in the class. PCRE throws an error. */
+-        if (class_range_state == RANGE_FORBID_STARTED)
+-          {
+-          ptr = class_range_forbid_ptr;
+-          errorcode = ERR50;
+-          goto FAILED;
+-          }
+-
+-        /* Of the remaining escapes, only those that define characters are
+-        allowed in a class. None may start a range. */
+-
+-        class_range_state = RANGE_FORBID_NO;
+-        class_range_forbid_ptr = ptr;
+-
+-        switch(escape)
+-          {
+-          case ESC_N:
+          case ESC_N:     /* Not permitted by Perl either */
+           errorcode = ERR71;
+           goto FAILED;
+ 
+@@ -3813,7 +3786,6 @@ while (ptr < ptrend)
+             if (negated) escape = (escape == ESC_P)? ESC_p : ESC_P;
+             *parsed_pattern++ = META_ESCAPE + escape;
+             *parsed_pattern++ = (ptype << 16) | pdata;
+-            class_range_forbid_ptr = ptr;
+             }
+ #else
+           errorcode = ERR45;
+@@ -3826,6 +3798,33 @@ while (ptr < ptrend)
+           ptr--;
+           goto FAILED;
+           }
+
+        /* All the switch-cases above which end in "break" describe a set
+        of characters. None may start a range. */
+
+        /* The second part of a range can be a single-character escape
+        sequence (detected above), but not any of the other escapes. Perl
+        treats a hyphen as a literal in such circumstances. However, in Perl's
+        warning mode, a warning is given, so PCRE now faults it, as it is
+        almost certainly a mistake on the user's part. */
+
+        if (class_range_state == RANGE_STARTED)
+          {
+          errorcode = ERR50;
+          goto FAILED;
+          }
+
+        /* Perl gives a warning unless the hyphen following a multi-character
+        escape is the last character in the class. PCRE throws an error. */
+
+        if (class_range_state == RANGE_FORBID_STARTED)
+          {
+          ptr = class_range_forbid_ptr;
+          errorcode = ERR50;
+          goto FAILED;
+          }
+
+        class_range_state = RANGE_FORBID_NO;
+         }
+ 
+       /* Proceed to next thing in the class. */
+diff --git a/testdata/testinput2 b/testdata/testinput2
+index 61b94e69..1fbb778e 100644
+--- a/testdata/testinput2
+++ b/testdata/testinput2
+@@ -7008,4 +7008,12 @@ a)"xI
+ 
+ /[[:digit:]\Q\E-H]+/
+ 
+/[z-[:space:]]/
+
+/[z-\d]/
+
+/[[:space:]-z]/
+
+/[\d-z]/
+
+ # End of testinput2
+diff --git a/testdata/testinput5 b/testdata/testinput5
+index 494371b5..f3faeb8f 100644
+--- a/testdata/testinput5
+++ b/testdata/testinput5
+@@ -2458,4 +2458,12 @@
+ /abc/utf,substitute_extended,replace=>\777<
+     abc
+ 
+/[z-\p{Lu}]/
+
+/[z-\pL]/
+
+/[\p{Lu}-z]/
+
+/[\pL-z]/
+
+ # End of testinput5
+diff --git a/testdata/testoutput2 b/testdata/testoutput2
+index 86bfe964..99714596 100644
+--- a/testdata/testoutput2
+++ b/testdata/testoutput2
+@@ -2176,13 +2176,13 @@ Starting code units: % 0 1 A B C D E F G H I J K L M N O P Q R S T U V W
+ Subject length lower bound = 1
+ 
+ /[[.ch.]]/I
+-Failed: error 113 at offset 1: POSIX collating elements are not supported
+Failed: error 113 at offset 7: POSIX collating elements are not supported
+ 
+ /[[=ch=]]/I
+-Failed: error 113 at offset 1: POSIX collating elements are not supported
+Failed: error 113 at offset 7: POSIX collating elements are not supported
+ 
+ /[[:rhubarb:]]/I
+-Failed: error 130 at offset 3: unknown POSIX class name
+Failed: error 130 at offset 12: unknown POSIX class name
+ 
+ /[[:upper:]]/Ii
+ Capture group count = 0
+@@ -8722,31 +8722,31 @@ Failed: error 162 at offset 4: subpattern name expected
+ Failed: error 162 at offset 4: subpattern name expected
+ 
+ /[[:foo:]]/
+-Failed: error 130 at offset 3: unknown POSIX class name
+Failed: error 130 at offset 8: unknown POSIX class name
+ 
+ /[[:1234:]]/
+-Failed: error 130 at offset 3: unknown POSIX class name
+Failed: error 130 at offset 9: unknown POSIX class name
+ 
+ /[[:f\oo:]]/
+-Failed: error 130 at offset 3: unknown POSIX class name
+Failed: error 130 at offset 9: unknown POSIX class name
+ 
+ /[[: :]]/
+-Failed: error 130 at offset 3: unknown POSIX class name
+Failed: error 130 at offset 6: unknown POSIX class name
+ 
+ /[[:...:]]/
+-Failed: error 130 at offset 3: unknown POSIX class name
+Failed: error 130 at offset 8: unknown POSIX class name
+ 
+ /[[:l\ower:]]/
+-Failed: error 130 at offset 3: unknown POSIX class name
+Failed: error 130 at offset 11: unknown POSIX class name
+ 
+ /[[:abc\:]]/
+-Failed: error 130 at offset 3: unknown POSIX class name
+Failed: error 130 at offset 9: unknown POSIX class name
+ 
+ /[abc[:x\]pqr:]]/
+-Failed: error 130 at offset 6: unknown POSIX class name
+Failed: error 130 at offset 14: unknown POSIX class name
+ 
+ /[[:a\dz:]]/
+-Failed: error 130 at offset 3: unknown POSIX class name
+Failed: error 130 at offset 9: unknown POSIX class name
+ 
+ /(^(a|b\g<-1'c))/
+ Failed: error 157 at offset 8: \g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number
+@@ -11409,7 +11409,7 @@ Failed: error 171 at offset 4: \N is not supported in a class
+     aNc
+ 
+ /a[B-\Nc]/
+-Failed: error 150 at offset 6: invalid range in character class
+Failed: error 171 at offset 6: \N is not supported in a class
+ 
+ /a[B\Nc]/
+ Failed: error 171 at offset 5: \N is not supported in a class
+@@ -13232,16 +13232,16 @@ Failed: error 178 at offset 5: digits missing in \x{} or \o{} or \N{U+}
+ ------------------------------------------------------------------
+ 
+ /[a-[:digit:]]+/
+-Failed: error 150 at offset 4: invalid range in character class
+Failed: error 150 at offset 12: invalid range in character class
+ 
+ /[A-[:digit:]]+/
+-Failed: error 150 at offset 4: invalid range in character class
+Failed: error 150 at offset 12: invalid range in character class
+ 
+ /[a-[.xxx.]]+/
+-Failed: error 150 at offset 4: invalid range in character class
+Failed: error 150 at offset 10: invalid range in character class
+ 
+ /[a-[=xxx=]]+/
+-Failed: error 150 at offset 4: invalid range in character class
+Failed: error 150 at offset 10: invalid range in character class
+ 
+ /[a-[!xxx!]]+/
+ Failed: error 108 at offset 3: range out of order in character class
+@@ -13362,7 +13362,7 @@ No match
+ No match
+ 
+ /[a[:<:]] should give error/
+-Failed: error 130 at offset 4: unknown POSIX class name
+Failed: error 130 at offset 7: unknown POSIX class name
+ 
+ /(?=ab\K)/aftertext,allow_lookaround_bsk
+     abcd\=startchar
+@@ -15510,11 +15510,11 @@ Failed: error 125 at offset 13: lookbehind assertion is not fixed length
+ # Perl accepts these, but gives a warning. We can't warn, so give an error.
+ 
+ /[a-[:digit:]]+/
+-Failed: error 150 at offset 4: invalid range in character class
+Failed: error 150 at offset 12: invalid range in character class
+     a-a9-a
+ 
+ /[A-[:digit:]]+/
+-Failed: error 150 at offset 4: invalid range in character class
+Failed: error 150 at offset 12: invalid range in character class
+     A-A9-A
+ 
+ /[a-\d]+/
+@@ -15651,7 +15651,7 @@ Failed: error 128 at offset 63: assertion expected after (?( or (?(?C)
+     .+(?(?C'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'))?!XXXX.=X
+ 
+ /[:[:alnum:]-[[a:lnum:]+/
+-Failed: error 150 at offset 11: invalid range in character class
+Failed: error 150 at offset 12: invalid range in character class
+ 
+ /((?(?C'')\QX\E(?!((?(?C'')(?!X=X));=)r*X=X));=)/
+ Failed: error 128 at offset 11: assertion expected after (?( or (?(?C)
+@@ -16285,10 +16285,10 @@ Subject length lower bound = 3
+ ------------------------------------------------------------------
+ 
+ /[Q-\N]/B,bad_escape_is_literal
+-Failed: error 150 at offset 5: invalid range in character class
+Failed: error 171 at offset 5: \N is not supported in a class
+ 
+ /[\s-_]/bad_escape_is_literal
+-Failed: error 150 at offset 3: invalid range in character class
+Failed: error 150 at offset 4: invalid range in character class
+ 
+ /[_-\s]/bad_escape_is_literal
+ Failed: error 150 at offset 5: invalid range in character class
+@@ -16443,19 +16443,19 @@ No match
+ No match
+ 
+ /[[:digit:]-a]/
+-Failed: error 150 at offset 10: invalid range in character class
+Failed: error 150 at offset 11: invalid range in character class
+ 
+ /[[:digit:]-[:print:]]/
+-Failed: error 150 at offset 10: invalid range in character class
+Failed: error 150 at offset 11: invalid range in character class
+ 
+ /[\d-a]/
+-Failed: error 150 at offset 3: invalid range in character class
+Failed: error 150 at offset 4: invalid range in character class
+ 
+ /[\H-z]/
+-Failed: error 150 at offset 3: invalid range in character class
+Failed: error 150 at offset 4: invalid range in character class
+ 
+ /[\d-[:print:]]/
+-Failed: error 150 at offset 3: invalid range in character class
+Failed: error 150 at offset 4: invalid range in character class
+ 
+ # Perl gets the second of these wrong, giving no match.
+ 
+@@ -17816,16 +17816,28 @@ Subject length lower bound = 2
+  0: a
+ 
+ /[[:digit:]   -Z]/xx
+-Failed: error 150 at offset 10: invalid range in character class
+Failed: error 150 at offset 14: invalid range in character class
+ 
+ /[\d   -Z]/xx
+-Failed: error 150 at offset 3: invalid range in character class
+Failed: error 150 at offset 7: invalid range in character class
+ 
+ /[[:digit:]\E-H]/
+-Failed: error 150 at offset 10: invalid range in character class
+Failed: error 150 at offset 13: invalid range in character class
+ 
+ /[[:digit:]\Q\E-H]+/
+-Failed: error 150 at offset 10: invalid range in character class
+Failed: error 150 at offset 15: invalid range in character class
+
+/[z-[:space:]]/
+Failed: error 150 at offset 12: invalid range in character class
+
+/[z-\d]/
+Failed: error 150 at offset 5: invalid range in character class
+
+/[[:space:]-z]/
+Failed: error 150 at offset 11: invalid range in character class
+
+/[\d-z]/
+Failed: error 150 at offset 4: invalid range in character class
+ 
+ # End of testinput2
+ Error -70: PCRE2_ERROR_BADDATA (unknown error number)
+diff --git a/testdata/testoutput5 b/testdata/testoutput5
+index bf06ee12..0dba11c6 100644
+--- a/testdata/testoutput5
+++ b/testdata/testoutput5
+@@ -795,7 +795,7 @@ No match
+ No match
+ 
+ /[[:a\x{100}b:]]/utf
+-Failed: error 130 at offset 3: unknown POSIX class name
+Failed: error 130 at offset 14: unknown POSIX class name
+ 
+ /a[^]b/utf,allow_empty_class,match_unset_backref
+     a\x{1234}b
+@@ -5403,4 +5403,16 @@ No match
+     abc
+  1: >\x{1ff}<
+ 
+/[z-\p{Lu}]/
+Failed: error 150 at offset 9: invalid range in character class
+
+/[z-\pL]/
+Failed: error 150 at offset 6: invalid range in character class
+
+/[\p{Lu}-z]/
+Failed: error 150 at offset 8: invalid range in character class
+
+/[\pL-z]/
+Failed: error 150 at offset 5: invalid range in character class
+
+ # End of testinput5
+-- 
+2.33.0
+
--- a/backport-Non-recursive-scan-prefix-in-JIT-560.patch
+++ b/backport-Non-recursive-scan-prefix-in-JIT-560.patch
@ -0,0 +1,459 @@
+From 6f2da25f009ff463cd9357ae5ebe452fbec8ab5c Mon Sep 17 00:00:00 2001
+From: Zoltan Herczeg <zherczeg7@gmail.com>
+Date: Fri, 15 Nov 2024 13:21:03 +0100
+Subject: [PATCH] Non-recursive scan prefix in JIT (#560)
+
+Conflict:NA
+Reference:https://github.com/PCRE2Project/pcre2/commit/6f2da25f009ff463cd9357ae5ebe452fbec8ab5c
+
+---
+ src/pcre2_jit_compile.c | 238 ++++++++++++++++++++++++++++------------
+ src/pcre2_jit_test.c    |   1 +
+ 2 files changed, 168 insertions(+), 71 deletions(-)
+
+diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
+index 127c393d..4449d59f 100644
+--- a/src/pcre2_jit_compile.c
+++ b/src/pcre2_jit_compile.c
+@@ -5670,11 +5670,38 @@ if (last)
+   chars->last_count++;
+ }
+ 
+-static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars, int max_chars, sljit_u32 *rec_count)
+/* Value can be increased if needed. Patterns
+such as /(a|){33}b/ can exhaust the stack.
+
+Note: /(a|){29}b/ already stops scan_prefix()
+because it reaches the maximum step_count. */
+#define SCAN_PREFIX_STACK_END 32
+
+/*
+Scan prefix stores the prefix string in the chars array.
+The elements of the chars array is either small character
+sets or "any" (count is set to 255).
+
+Examples (the chars array is represented by a simple regex):
+
+/(abc|xbyd)/ prefix: /[ax]b[cy]/ (length: 3)
+/a[a-z]b+c/ prefix: a.b (length: 3)
+/ab?cd/ prefix: a[bc][cd] (length: 3)
+/(ab|cd)|(ef|gh)/ prefix: [aceg][bdfh] (length: 2)
+
+The length is returned by scan_prefix(). The length is
+less than or equal than the minimum length of the pattern.
+*/
+
+static int scan_prefix(compiler_common *common, PCRE2_SPTR cc, fast_forward_char_data *chars)
+ {
+-/* Recursive function, which scans prefix literals. */
+fast_forward_char_data *chars_start = chars;
+fast_forward_char_data *chars_end = chars + MAX_N_CHARS;
+PCRE2_SPTR cc_stack[SCAN_PREFIX_STACK_END];
+fast_forward_char_data *chars_stack[SCAN_PREFIX_STACK_END];
+sljit_u8 next_alternative_stack[SCAN_PREFIX_STACK_END];
+ BOOL last, any, class, caseless;
+-int len, repeat, len_save, consumed = 0;
+int stack_ptr, step_count, repeat, len, len_save;
+ sljit_u32 chr; /* Any unicode character. */
+ sljit_u8 *bytes, *bytes_end, byte;
+ PCRE2_SPTR alternative, cc_save, oc;
+@@ -5687,11 +5714,44 @@ PCRE2_UCHAR othercase[1];
+ #endif
+ 
+ repeat = 1;
+stack_ptr = 0;
+step_count = 10000;
+ while (TRUE)
+   {
+-  if (*rec_count == 0)
+  if (--step_count == 0)
+     return 0;
+-  (*rec_count)--;
+
+  SLJIT_ASSERT(chars <= chars_start + MAX_N_CHARS);
+
+  if (chars >= chars_end)
+    {
+    if (stack_ptr == 0)
+      return (int)(chars_end - chars_start);
+
+    --stack_ptr;
+    cc = cc_stack[stack_ptr];
+    chars = chars_stack[stack_ptr];
+
+    if (chars >= chars_end)
+      continue;
+
+    if (next_alternative_stack[stack_ptr] != 0)
+      {
+      /* When an alternative is processed, the
+      next alternative is pushed onto the stack. */
+      SLJIT_ASSERT(*cc == OP_ALT);
+      alternative = cc + GET(cc, 1);
+      if (*alternative == OP_ALT)
+        {
+        SLJIT_ASSERT(stack_ptr < SCAN_PREFIX_STACK_END);
+        SLJIT_ASSERT(chars_stack[stack_ptr] == chars);
+        SLJIT_ASSERT(next_alternative_stack[stack_ptr] == 1);
+        cc_stack[stack_ptr] = alternative;
+        stack_ptr++;
+        }
+      cc += 1 + LINK_SIZE;
+      }
+    }
+ 
+   last = TRUE;
+   any = FALSE;
+@@ -5768,9 +5828,17 @@ while (TRUE)
+ #ifdef SUPPORT_UNICODE
+     if (common->utf && HAS_EXTRALEN(*cc)) len += GET_EXTRALEN(*cc);
+ #endif
+-    max_chars = scan_prefix(common, cc + len, chars, max_chars, rec_count);
+-    if (max_chars == 0)
+-      return consumed;
+    if (stack_ptr >= SCAN_PREFIX_STACK_END)
+      {
+      chars_end = chars;
+      continue;
+      }
+
+    cc_stack[stack_ptr] = cc + len;
+    chars_stack[stack_ptr] = chars;
+    next_alternative_stack[stack_ptr] = 0;
+    stack_ptr++;
+
+     last = FALSE;
+     break;
+ 
+@@ -5788,12 +5856,18 @@ while (TRUE)
+     case OP_CBRA:
+     case OP_CBRAPOS:
+     alternative = cc + GET(cc, 1);
+-    while (*alternative == OP_ALT)
+    if (*alternative == OP_ALT)
+       {
+-      max_chars = scan_prefix(common, alternative + 1 + LINK_SIZE, chars, max_chars, rec_count);
+-      if (max_chars == 0)
+-        return consumed;
+-      alternative += GET(alternative, 1);
+      if (stack_ptr >= SCAN_PREFIX_STACK_END)
+        {
+        chars_end = chars;
+        continue;
+        }
+
+      cc_stack[stack_ptr] = alternative;
+      chars_stack[stack_ptr] = chars;
+      next_alternative_stack[stack_ptr] = 1;
+      stack_ptr++;
+       }
+ 
+     if (*cc == OP_CBRA || *cc == OP_CBRAPOS)
+@@ -5804,14 +5878,21 @@ while (TRUE)
+     case OP_CLASS:
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
+     if (common->utf && !is_char7_bitset((const sljit_u8 *)(cc + 1), FALSE))
+-      return consumed;
+      {
+      chars_end = chars;
+      continue;
+      }
+ #endif
+     class = TRUE;
+     break;
+ 
+     case OP_NCLASS:
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+-    if (common->utf) return consumed;
+    if (common->utf)
+      {
+      chars_end = chars;
+      continue;
+      }
+ #endif
+     class = TRUE;
+     break;
+@@ -5819,7 +5900,11 @@ while (TRUE)
+ #if defined SUPPORT_UNICODE || PCRE2_CODE_UNIT_WIDTH != 8
+     case OP_XCLASS:
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+-    if (common->utf) return consumed;
+    if (common->utf)
+      {
+      chars_end = chars;
+      continue;
+      }
+ #endif
+     any = TRUE;
+     cc += GET(cc, 1);
+@@ -5829,7 +5914,10 @@ while (TRUE)
+     case OP_DIGIT:
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
+     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_digit, FALSE))
+-      return consumed;
+      {
+      chars_end = chars;
+      continue;
+      }
+ #endif
+     any = TRUE;
+     cc++;
+@@ -5838,7 +5926,10 @@ while (TRUE)
+     case OP_WHITESPACE:
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
+     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_space, FALSE))
+-      return consumed;
+      {
+      chars_end = chars;
+      continue;
+      }
+ #endif
+     any = TRUE;
+     cc++;
+@@ -5847,7 +5938,10 @@ while (TRUE)
+     case OP_WORDCHAR:
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH == 8
+     if (common->utf && !is_char7_bitset((const sljit_u8 *)common->ctypes - cbit_length + cbit_word, FALSE))
+-      return consumed;
+      {
+      chars_end = chars;
+      continue;
+      }
+ #endif
+     any = TRUE;
+     cc++;
+@@ -5863,7 +5957,11 @@ while (TRUE)
+     case OP_ANY:
+     case OP_ALLANY:
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+-    if (common->utf) return consumed;
+    if (common->utf)
+      {
+      chars_end = chars;
+      continue;
+      }
+ #endif
+     any = TRUE;
+     cc++;
+@@ -5873,7 +5971,11 @@ while (TRUE)
+     case OP_NOTPROP:
+     case OP_PROP:
+ #if PCRE2_CODE_UNIT_WIDTH != 32
+-    if (common->utf) return consumed;
+    if (common->utf)
+      {
+      chars_end = chars;
+      continue;
+      }
+ #endif
+     any = TRUE;
+     cc += 1 + 2;
+@@ -5888,7 +5990,11 @@ while (TRUE)
+     case OP_NOTEXACT:
+     case OP_NOTEXACTI:
+ #if defined SUPPORT_UNICODE && PCRE2_CODE_UNIT_WIDTH != 32
+-    if (common->utf) return consumed;
+    if (common->utf)
+      {
+      chars_end = chars;
+      continue;
+      }
+ #endif
+     any = TRUE;
+     repeat = GET2(cc, 1);
+@@ -5896,21 +6002,20 @@ while (TRUE)
+     break;
+ 
+     default:
+-    return consumed;
+    chars_end = chars;
+    continue;
+     }
+ 
+  SLJIT_ASSERT(chars < chars_end);
+
+   if (any)
+     {
+     do
+       {
+       chars->count = 255;
+-
+-      consumed++;
+-      if (--max_chars == 0)
+-        return consumed;
+       chars++;
+       }
+-    while (--repeat > 0);
+    while (--repeat > 0 && chars < chars_end);
+ 
+     repeat = 1;
+     continue;
+@@ -5921,17 +6026,27 @@ while (TRUE)
+     bytes = (sljit_u8*) (cc + 1);
+     cc += 1 + 32 / sizeof(PCRE2_UCHAR);
+ 
+    SLJIT_ASSERT(last == TRUE && repeat == 1);
+     switch (*cc)
+       {
+-      case OP_CRSTAR:
+-      case OP_CRMINSTAR:
+-      case OP_CRPOSSTAR:
+       case OP_CRQUERY:
+       case OP_CRMINQUERY:
+       case OP_CRPOSQUERY:
+-      max_chars = scan_prefix(common, cc + 1, chars, max_chars, rec_count);
+-      if (max_chars == 0)
+-        return consumed;
+      last = FALSE;
+      /* Fall through */
+      case OP_CRSTAR:
+      case OP_CRMINSTAR:
+      case OP_CRPOSSTAR:
+      if (stack_ptr >= SCAN_PREFIX_STACK_END)
+        {
+        chars_end = chars;
+        continue;
+        }
+
+      cc_stack[stack_ptr] = ++cc;
+      chars_stack[stack_ptr] = chars;
+      next_alternative_stack[stack_ptr] = 0;
+      stack_ptr++;
+       break;
+ 
+       default:
+@@ -5945,7 +6060,13 @@ while (TRUE)
+       case OP_CRPOSRANGE:
+       repeat = GET2(cc, 1);
+       if (repeat <= 0)
+-        return consumed;
+        {
+        chars_end = chars;
+        continue;
+        }
+
+      last = (repeat != (int)GET2(cc, 1 + IMM2_SIZE));
+      cc += 1 + 2 * IMM2_SIZE;
+       break;
+       }
+ 
+@@ -5980,36 +6101,13 @@ while (TRUE)
+         bytes = bytes_end - 32;
+         }
+ 
+-      consumed++;
+-      if (--max_chars == 0)
+-        return consumed;
+       chars++;
+       }
+-    while (--repeat > 0);
+-
+-    switch (*cc)
+-      {
+-      case OP_CRSTAR:
+-      case OP_CRMINSTAR:
+-      case OP_CRPOSSTAR:
+-      return consumed;
+-
+-      case OP_CRQUERY:
+-      case OP_CRMINQUERY:
+-      case OP_CRPOSQUERY:
+-      cc++;
+-      break;
+-
+-      case OP_CRRANGE:
+-      case OP_CRMINRANGE:
+-      case OP_CRPOSRANGE:
+-      if (GET2(cc, 1) != GET2(cc, 1 + IMM2_SIZE))
+-        return consumed;
+-      cc += 1 + 2 * IMM2_SIZE;
+-      break;
+-      }
+    while (--repeat > 0 && chars < chars_end);
+ 
+     repeat = 1;
+    if (last)
+      chars_end = chars;
+     continue;
+     }
+ 
+@@ -6025,7 +6123,10 @@ while (TRUE)
+       {
+       GETCHAR(chr, cc);
+       if ((int)PRIV(ord2utf)(char_othercase(common, chr), othercase) != len)
+-        return consumed;
+        {
+        chars_end = chars;
+        continue;
+        }
+       }
+     else
+ #endif
+@@ -6056,7 +6157,6 @@ while (TRUE)
+     do
+       {
+       len--;
+-      consumed++;
+ 
+       chr = *cc;
+       add_prefix_char(*cc, chars, len == 0);
+@@ -6064,15 +6164,13 @@ while (TRUE)
+       if (caseless)
+         add_prefix_char(*oc, chars, len == 0);
+ 
+-      if (--max_chars == 0)
+-        return consumed;
+       chars++;
+       cc++;
+       oc++;
+       }
+-    while (len > 0);
+    while (len > 0 && chars < chars_end);
+ 
+-    if (--repeat == 0)
+    if (--repeat == 0 || chars >= chars_end)
+       break;
+ 
+     len = len_save;
+@@ -6081,7 +6179,7 @@ while (TRUE)
+ 
+   repeat = 1;
+   if (last)
+-    return consumed;
+    chars_end = chars;
+   }
+ }
+ 
+@@ -6251,7 +6349,6 @@ int i, max, from;
+ int range_right = -1, range_len;
+ sljit_u8 *update_table = NULL;
+ BOOL in_range;
+-sljit_u32 rec_count;
+ 
+ for (i = 0; i < MAX_N_CHARS; i++)
+   {
+@@ -6259,8 +6356,7 @@ for (i = 0; i < MAX_N_CHARS; i++)
+   chars[i].last_count = 0;
+   }
+ 
+-rec_count = 10000;
+-max = scan_prefix(common, common->start, chars, MAX_N_CHARS, &rec_count);
+max = scan_prefix(common, common->start, chars);
+ 
+ if (max < 1)
+   return FALSE;
+diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c
+index 28bc7af9..066095fe 100644
+--- a/src/pcre2_jit_test.c
+++ b/src/pcre2_jit_test.c
+@@ -286,6 +286,7 @@ static struct regression_test_case regression_test_cases[] = {
+ 	{ CMU, A, 0, 0, "(a|b)?\?d((?:e)?)", "ABABdx" },
+ 	{ MU, A, 0, 0, "(a|b)?\?d((?:e)?)", "abcde" },
+ 	{ MU, A, 0, 0, "((?:ab)?\?g|b(?:g(nn|d)?\?)?)?\?(?:n)?m", "abgnbgnnbgdnmm" },
+	{ M, A, 0, 0, "(?:a?|a)b", "ba" },
+ 
+ 	/* Greedy and non-greedy + operators */
+ 	{ MU, A, 0, 0, "(aa)+aa", "aaaaaaa" },
+-- 
+2.33.0
+
--- a/backport-avoid-inconsistency-between-d-and-digit-when-using-a.patch
+++ b/backport-avoid-inconsistency-between-d-and-digit-when-using-a.patch
@ -0,0 +1,270 @@
+From 64549346f044dec18d18d06c2d08a68a68e26817 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= <carenas@gmail.com>
+Date: Sun, 9 Apr 2023 04:29:46 -0700
+Subject: [PATCH] avoid inconsistency between \d and [:digit:] when using /a
+ (#223)
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Conflict:don't modify Changelog and doc/*; keep pcre2.h.generic consistent
+with pcre2.h.in according to 1de7291
+Reference:https://github.com/PCRE2Project/pcre2/commit/64549346f044dec18d18d06c2d08a68a68e26817
+
+Since a608946 (Additional PCRE2_EXTRA_ASCII_xxx code, 2023-02-01)
+PCRE2_EXTRA_ASCII_BSD could be used to restrict \d to ASCII causing
+the following inconsistent behaviour in UCP mode.
+
+  PCRE2 version 10.43-DEV 2023-01-15
+    re> /\d/utf,ucp,ascii_bsd
+  data> ٣
+  No match
+  data>
+  re> /[[:digit:]]/utf,ucp,ascii_bsd
+  data> ٣
+    0: \x{663}
+
+It has been suggested[1] that the change to match \p{Nd} when Unicode
+is enabled for [:digit:] might had been unintentional and a bug, as
+[:digit:] should be able to be POSIX compatible, so add a new flag
+PCRE2_EXTRA_ASCII_DIGIT to avoid changing its definition in UCP mode.
+
+[1] https://lore.kernel.org/git/CANgJU+U+xXsh9psd0z5Xjr+Se5QgdKkjQ7LUQ-PdUULSN3n4+g@mail.gmail.com/
+---
+ src/pcre2.h.generic  |  6 ++++++
+ src/pcre2.h.in       |  1 +
+ src/pcre2_compile.c  |  6 ++++--
+ src/pcre2test.c      |  4 +++-
+ testdata/testinput5  | 10 +++++++++-
+ testdata/testinput7  | 10 ++++++++--
+ testdata/testoutput5 | 19 ++++++++++++++++++-
+ testdata/testoutput7 | 13 +++++++++++--
+ 8 files changed, 60 insertions(+), 9 deletions(-)
+
+diff --git a/src/pcre2.h.generic b/src/pcre2.h.generic
+index dad774ce..05cf9bc1 100644
+--- a/src/pcre2.h.generic
+++ b/src/pcre2.h.generic
+@@ -153,6 +153,12 @@ D   is inspected during pcre2_dfa_match() execution
+ #define PCRE2_EXTRA_ESCAPED_CR_IS_LF         0x00000010u  /* C */
+ #define PCRE2_EXTRA_ALT_BSUX                 0x00000020u  /* C */
+ #define PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK     0x00000040u  /* C */
+#define PCRE2_EXTRA_CASELESS_RESTRICT        0x00000080u  /* C */
+#define PCRE2_EXTRA_ASCII_BSD                0x00000100u  /* C */
+#define PCRE2_EXTRA_ASCII_BSS                0x00000200u  /* C */
+#define PCRE2_EXTRA_ASCII_BSW                0x00000400u  /* C */
+#define PCRE2_EXTRA_ASCII_POSIX              0x00000800u  /* C */
+#define PCRE2_EXTRA_ASCII_DIGIT              0x00001000u  /* C */
+ 
+ /* These are for pcre2_jit_compile(). */
+ 
+diff --git a/src/pcre2.h.in b/src/pcre2.h.in
+index 7202c633..cd7fdcf2 100644
+--- a/src/pcre2.h.in
+++ b/src/pcre2.h.in
+@@ -158,6 +158,7 @@ D   is inspected during pcre2_dfa_match() execution
+ #define PCRE2_EXTRA_ASCII_BSS                0x00000200u  /* C */
+ #define PCRE2_EXTRA_ASCII_BSW                0x00000400u  /* C */
+ #define PCRE2_EXTRA_ASCII_POSIX              0x00000800u  /* C */
+#define PCRE2_EXTRA_ASCII_DIGIT              0x00001000u  /* C */
+ 
+ /* These are for pcre2_jit_compile(). */
+ 
+diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
+index 95c4a79d..634360b7 100644
+--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
+@@ -786,7 +786,8 @@ are allowed. */
+     PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES|PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL| \
+     PCRE2_EXTRA_ESCAPED_CR_IS_LF|PCRE2_EXTRA_ALT_BSUX| \
+     PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK|PCRE2_EXTRA_ASCII_BSD| \
+-    PCRE2_EXTRA_ASCII_BSS|PCRE2_EXTRA_ASCII_BSW|PCRE2_EXTRA_ASCII_POSIX)
+    PCRE2_EXTRA_ASCII_BSS|PCRE2_EXTRA_ASCII_BSW|PCRE2_EXTRA_ASCII_POSIX| \
+    PCRE2_EXTRA_ASCII_DIGIT)
+ 
+ /* Compile time error code numbers. They are given names so that they can more
+ easily be tracked. When a new number is added, the tables called eint1 and
+@@ -3581,7 +3582,8 @@ while (ptr < ptrend)
+ 
+ #ifdef SUPPORT_UNICODE
+         if ((options & PCRE2_UCP) != 0 &&
+-            (xoptions & PCRE2_EXTRA_ASCII_POSIX) == 0)
+            (xoptions & PCRE2_EXTRA_ASCII_POSIX) == 0 &&
+            !(posix_class == 7 && (xoptions & PCRE2_EXTRA_ASCII_DIGIT) != 0))
+           {
+           int ptype = posix_substitutes[2*posix_class];
+           int pvalue = posix_substitutes[2*posix_class + 1];
+diff --git a/src/pcre2test.c b/src/pcre2test.c
+index 4da3ef90..21b19370 100644
+--- a/src/pcre2test.c
+++ b/src/pcre2test.c
+@@ -651,6 +651,7 @@ static modstruct modlist[] = {
+   { "ascii_bsd",                   MOD_CTC,  MOD_OPT, PCRE2_EXTRA_ASCII_BSD,      CO(extra_options) },
+   { "ascii_bss",                   MOD_CTC,  MOD_OPT, PCRE2_EXTRA_ASCII_BSS,      CO(extra_options) },
+   { "ascii_bsw",                   MOD_CTC,  MOD_OPT, PCRE2_EXTRA_ASCII_BSW,      CO(extra_options) },
+  { "ascii_digit",                 MOD_CTC,  MOD_OPT, PCRE2_EXTRA_ASCII_DIGIT,    CO(extra_options) },
+   { "ascii_posix",                 MOD_CTC,  MOD_OPT, PCRE2_EXTRA_ASCII_POSIX,    CO(extra_options) },
+   { "auto_callout",                MOD_PAT,  MOD_OPT, PCRE2_AUTO_CALLOUT,         PO(options) },
+   { "bad_escape_is_literal",       MOD_CTC,  MOD_OPT, PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL, CO(extra_options) },
+@@ -4294,13 +4295,14 @@ show_compile_extra_options(uint32_t options, const char *before,
+   const char *after)
+ {
+ if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
+-else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s",
+else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
+   before,
+   ((options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) != 0)? " allow_surrogate_escapes" : "",
+   ((options & PCRE2_EXTRA_ALT_BSUX) != 0)? " alt_bsux" : "",
+   ((options & PCRE2_EXTRA_ASCII_BSD) != 0)? " ascii_bsd" : "",
+   ((options & PCRE2_EXTRA_ASCII_BSS) != 0)? " ascii_bss" : "",
+   ((options & PCRE2_EXTRA_ASCII_BSW) != 0)? " ascii_bsw" : "",
+  ((options & PCRE2_EXTRA_ASCII_DIGIT) != 0)? " ascii_digit" : "",
+   ((options & PCRE2_EXTRA_ASCII_POSIX) != 0)? " ascii_posix" : "",
+   ((options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) != 0)? " bad_escape_is_literal" : "",
+   ((options & PCRE2_EXTRA_CASELESS_RESTRICT) != 0)? " caseless_restrict" : "",
+diff --git a/testdata/testinput5 b/testdata/testinput5
+index 0f105408..0624a0c3 100644
+--- a/testdata/testinput5
+++ b/testdata/testinput5
+@@ -1215,6 +1215,8 @@
+ 
+ /[[:digit:]]/B,ucp
+ 
+/[[:digit:]]/B,ucp,ascii_digit
+
+ /[[:graph:]]/B,ucp
+ 
+ /[[:print:]]/B,ucp
+@@ -1227,7 +1229,7 @@
+ 
+ /[[:xdigit:]]/B,ucp
+ 
+-# Unicode properties for \b abd \B
+# Unicode properties for \b and \B
+ 
+ /\b...\B/utf,ucp
+     abc_
+@@ -2431,6 +2433,12 @@
+ /[[:digit:]]+/utf,ucp
+     123\x{660}456
+ 
+/[[:digit:]]+/utf,ucp,ascii_digit
+    123\x{660}456
+
+/[[:digit:]]+/g,utf,ucp,ascii_digit
+    123\x{660}456
+
+ /[[:digit:]]+/utf,ucp,ascii_posix
+     123\x{660}456
+ 
+diff --git a/testdata/testinput7 b/testdata/testinput7
+index a2b7fb8d..96deaa30 100644
+--- a/testdata/testinput7
+++ b/testdata/testinput7
+@@ -1657,7 +1657,7 @@
+ /^[\p{Xwd}]+/utf
+     ABCD1234\x{6ca}\x{a6c}\x{10a7}_
+ 
+-# Unicode properties for \b abd \B 
+# Unicode properties for \b and \B
+ 
+ /\b...\B/utf,ucp
+     abc_
+@@ -2435,9 +2435,15 @@
+ /[[:digit:]]+/utf,ucp
+     123\x{660}456
+ 
+/[[:digit:]]+/utf,ucp,ascii_digit
+    123\x{660}456
+
+/[[:digit:]]+/g,utf,ucp,ascii_digit
+    123\x{660}456
+
+ /[[:digit:]]+/utf,ucp,ascii_posix
+     123\x{660}456
+-    
+
+ />[[:space:]]+</utf,ucp
+     >\x{a0} \x{a0}<
+     >\x{a0}\x{a0}\x{a0}<
+diff --git a/testdata/testoutput5 b/testdata/testoutput5
+index 3cee990e..febcc954 100644
+--- a/testdata/testoutput5
+++ b/testdata/testoutput5
+@@ -2520,6 +2520,14 @@ No match
+         End
+ ------------------------------------------------------------------
+ 
+/[[:digit:]]/B,ucp,ascii_digit
+------------------------------------------------------------------
+        Bra
+        [0-9]
+        Ket
+        End
+------------------------------------------------------------------
+
+ /[[:graph:]]/B,ucp
+ ------------------------------------------------------------------
+         Bra
+@@ -2568,7 +2576,7 @@ No match
+         End
+ ------------------------------------------------------------------
+ 
+-# Unicode properties for \b abd \B
+# Unicode properties for \b and \B
+ 
+ /\b...\B/utf,ucp
+     abc_
+@@ -5359,6 +5367,15 @@ No match
+     123\x{660}456
+  0: 123\x{660}456
+ 
+/[[:digit:]]+/utf,ucp,ascii_digit
+    123\x{660}456
+ 0: 123
+
+/[[:digit:]]+/g,utf,ucp,ascii_digit
+    123\x{660}456
+ 0: 123
+ 0: 456
+
+ /[[:digit:]]+/utf,ucp,ascii_posix
+     123\x{660}456
+  0: 123
+diff --git a/testdata/testoutput7 b/testdata/testoutput7
+index 4065981d..d98178e6 100644
+--- a/testdata/testoutput7
+++ b/testdata/testoutput7
+@@ -2853,7 +2853,7 @@ No match
+     ABCD1234\x{6ca}\x{a6c}\x{10a7}_
+  0: ABCD1234\x{6ca}\x{a6c}\x{10a7}_
+ 
+-# Unicode properties for \b abd \B 
+# Unicode properties for \b and \B
+ 
+ /\b...\B/utf,ucp
+     abc_
+@@ -4080,10 +4080,19 @@ No match
+     123\x{660}456
+  0: 123\x{660}456
+ 
+/[[:digit:]]+/utf,ucp,ascii_digit
+    123\x{660}456
+ 0: 123
+
+/[[:digit:]]+/g,utf,ucp,ascii_digit
+    123\x{660}456
+ 0: 123
+ 0: 456
+
+ /[[:digit:]]+/utf,ucp,ascii_posix
+     123\x{660}456
+  0: 123
+-    
+
+ />[[:space:]]+</utf,ucp
+     >\x{a0} \x{a0}<
+  0: >\x{a0} \x{a0}<
+-- 
+2.33.0
+
--- a/pcre2.spec
+++ b/pcre2.spec
@ -1,6 +1,6 @@
 Name:        pcre2
 Version:     10.42
-Release:     11
+Release:     12
 Summary:     Perl Compatible Regular Expressions
 License:     BSD
 URL:         http://www.pcre.org/
@ -39,6 +39,11 @@ Patch6027:     backport-Add-Perl-titlecasing-475.patch
 Patch6028:     backport-Fix-incorrect-positive-error-code-from-pcre2_substitute.patch
 Patch6029:     backport-pcre2_compile-avoid-1-byte-buffer-overread-parsing-V.patch
 Patch6030:     backport-Improve-error-message-for-N-name-in-character-classes.patch
+Patch6031:     backport-Further-ASCII-tests-and-minor-bugfix-plus-ChangeLog-.patch
+Patch6032:     backport-avoid-inconsistency-between-d-and-digit-when-using-a.patch
+Patch6033:     backport-Fix-the-lookahead-after-d-or-posix-to-skip-whitespac.patch
+Patch6034:     backport-Improve-error-offsets-for-character-classes-548.patch
+Patch6035:     backport-Non-recursive-scan-prefix-in-JIT-560.patch

 BuildRequires:  autoconf libtool automake coreutils gcc make readline-devel
 Obsoletes:      pcre2-utf16 pcre2-utf32 pcre2-tools
@ -156,6 +161,14 @@ make check
 %{_pkgdocdir}/html/

 %changelog
+* Tue Dec 10 2024 hugel <gengqihu2@h-partners.com> - 10.42-12
+- DESC:sync patches from upstream
+       backport-Further-ASCII-tests-and-minor-bugfix-plus-ChangeLog-.patch
+       backport-avoid-inconsistency-between-d-and-digit-when-using-a.patch
+       backport-Fix-the-lookahead-after-d-or-posix-to-skip-whitespac.patch
+       backport-Improve-error-offsets-for-character-classes-548.patch
+       backport-Non-recursive-scan-prefix-in-JIT-560.patch
+
 * Tue Nov 19 2024 yanglongkang <yanglongkang@h-partners.com> - 10.42-11
 - DESC:sync patches from upstream