sync backport patches from upstream

This commit is contained in:
markeryang 2024-11-19 09:16:25 +00:00
parent 8be0b453f3
commit f01ff6070f
7 changed files with 826 additions and 1 deletions

View File

@ -0,0 +1,69 @@
From f334e76dc765f23670e957413bae18c9d20b1d82 Mon Sep 17 00:00:00 2001
From: Nicholas Wilson <nicholas@nicholaswilson.me.uk>
Date: Mon, 16 Sep 2024 17:38:40 +0100
Subject: [PATCH] Add Perl titlecasing (#475)
---
src/pcre2_substitute.c | 11 +++++++++++
testdata/testinput2 | 3 +++
testdata/testoutput2 | 4 ++++
3 files changed, 18 insertions(+)
diff --git a/src/pcre2_substitute.c b/src/pcre2_substitute.c
index 1ccef0660..83ddb8364 100644
--- a/src/pcre2_substitute.c
+++ b/src/pcre2_substitute.c
@@ -839,6 +839,12 @@ do
forcecase = -1;
forcecasereset = 0;
ptr += 2;
+ if (ptr + 2 < repend && ptr[0] == CHAR_BACKSLASH && ptr[1] == CHAR_U)
+ {
+ /* Perl title-casing feature for \l\U (and \u\L) */
+ forcecasereset = 1;
+ ptr += 2;
+ }
continue;
case CHAR_U:
@@ -850,6 +856,11 @@ do
forcecase = 1;
forcecasereset = 0;
ptr += 2;
+ if (ptr + 2 < repend && ptr[0] == CHAR_BACKSLASH && ptr[1] == CHAR_L)
+ {
+ forcecasereset = -1;
+ ptr += 2;
+ }
continue;
default:
diff --git a/testdata/testinput2 b/testdata/testinput2
index 51e2095c8..7a836c994 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -4612,6 +4612,9 @@ B)x/alt_verbnames,mark
/a(bc)(DE)/replace=a\u$1\U$1\E$1\l$2\L$2\Eab\Uab\LYZ\EDone,substitute_extended
abcDE
+/(Hello)|wORLD/g,replace=>${1:+\l\U$0:\u\L$0}<,substitute_extended
+ Hello between wORLD
+
/abcd/replace=xy\kz,substitute_extended
abcd
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index eeb635d6d..7c71866b7 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -14854,6 +14854,10 @@ No match
abcDE
1: aBcBCbcdEdeabAByzDone
+/(Hello)|wORLD/g,replace=>${1:+\l\U$0:\u\L$0}<,substitute_extended
+ Hello between wORLD
+ 2: >hELLO< between >World<
+
/abcd/replace=xy\kz,substitute_extended
abcd
Failed: error -57 at offset 4 in replacement: bad escape sequence in replacement string

View File

@ -0,0 +1,65 @@
From 829414f8e549fe7e4b1a6696ca70664e89e5e7f0 Mon Sep 17 00:00:00 2001
From: Nicholas Wilson <niwilson@microsoft.com>
Date: Wed, 18 Sep 2024 16:39:22 +0100
Subject: [PATCH] Fix incorrect positive error code from pcre2_substitute()
(#481)
---
src/pcre2_substitute.c | 4 +++-
testdata/testinput2 | 6 ++++++
testdata/testoutput2 | 10 ++++++++++
3 files changed, 19 insertions(+), 1 deletion(-)
diff --git a/src/pcre2_substitute.c b/src/pcre2_substitute.c
index 86c1d1e69..862ea9f73 100644
--- a/src/pcre2_substitute.c
+++ b/src/pcre2_substitute.c
@@ -134,7 +134,9 @@ for (; ptr < ptrend; ptr++)
ptr -= 1; /* Back to last code unit of escape */
if (errorcode != 0)
{
- rc = errorcode;
+ /* errorcode from check_escape is positive, so must not be returned by
+ pcre2_substitute(). */
+ rc = PCRE2_ERROR_BADREPESCAPE;
goto EXIT;
}
diff --git a/testdata/testinput2 b/testdata/testinput2
index c2abdb890..8be78ff50 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -4201,6 +4201,12 @@
123abc123\=substitute_overflow_length,replace=[1]x$1z
123abc123\=substitute_overflow_length,replace=[0]x$1z
+/a(b)c/substitute_extended
+ ZabcZ\=replace=>${1:+ yes : no }
+ ZabcZ\=replace=>${1:+ \o{100} : \o{100} }
+ ZabcZ\=replace=>${1:+ \o{Z} : no }
+ ZabcZ\=replace=>${1:+ yes : \o{Z} }
+
"((?=(?(?=(?(?=(?(?=()))))))))"
a
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 7a582cd23..ccf209b5c 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -13818,6 +13818,16 @@ Failed: error -48: no more memory: 10 code units are needed
123abc123\=substitute_overflow_length,replace=[0]x$1z
Failed: error -48: no more memory: 10 code units are needed
+/a(b)c/substitute_extended
+ ZabcZ\=replace=>${1:+ yes : no }
+ 1: Z> yes Z
+ ZabcZ\=replace=>${1:+ \o{100} : \o{100} }
+ 1: Z> @ Z
+ ZabcZ\=replace=>${1:+ \o{Z} : no }
+Failed: error -57 at offset 9 in replacement: bad escape sequence in replacement string
+ ZabcZ\=replace=>${1:+ yes : \o{Z} }
+Failed: error -57 at offset 15 in replacement: bad escape sequence in replacement string
+
"((?=(?(?=(?(?=(?(?=()))))))))"
a
0:

View File

@ -0,0 +1,263 @@
From d29e729000a3724e2aebaa64318dfd7530a55370 Mon Sep 17 00:00:00 2001
From: Philip Hazel <Philip.Hazel@gmail.com>
Date: Wed, 4 Sep 2024 16:18:35 +0100
Subject: [PATCH] Fix non-recognition of some octal escapes in substitute
replacement strings
---
src/pcre2_compile.c | 15 ++++++++-------
src/pcre2_substitute.c | 4 ++--
testdata/testinput11 | 6 ++++++
testdata/testinput2 | 12 ++++++++++++
testdata/testinput5 | 3 +++
testdata/testinput9 | 8 ++++++++
testdata/testoutput11-16 | 8 ++++++++
testdata/testoutput11-32 | 8 ++++++++
testdata/testoutput2 | 16 ++++++++++++++++
testdata/testoutput5 | 4 ++++
testdata/testoutput9 | 10 ++++++++++
11 files changed, 85 insertions(+), 9 deletions(-)
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index ad2baf8..80a1a48 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -1480,8 +1480,8 @@ final code unit of the escape sequence.
This function is also called from pcre2_substitute() to handle escape sequences
in replacement strings. In this case, the cb argument is NULL, and in the case
of escapes that have further processing, only sequences that define a data
-character are recognised. The isclass argument is not relevant; the options
-argument is the final value of the compiled pattern's options.
+character are recognised. The options argument is the final value of the
+compiled pattern's options.
Arguments:
ptrptr points to the input position pointer
@@ -1496,7 +1496,7 @@ Arguments:
errorcodeptr points to the errorcode variable (containing zero)
options the current options bits
xoptions the current extra options bits
- isclass TRUE if inside a character class
+ isclassorsub TRUE if in a character class or called from pcre2_substitute()
cb compile data block or NULL when called from pcre2_substitute()
Returns: zero => a data character
@@ -1507,7 +1507,7 @@ Returns: zero => a data character
int
PRIV(check_escape)(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, uint32_t *chptr,
- int *errorcodeptr, uint32_t options, uint32_t xoptions, BOOL isclass,
+ int *errorcodeptr, uint32_t options, uint32_t xoptions, BOOL isclassorsub,
compile_block *cb)
{
BOOL utf = (options & PCRE2_UTF) != 0;
@@ -1607,7 +1607,8 @@ else
if (cb == NULL)
{
- if (c != CHAR_c && c != CHAR_o && c != CHAR_x)
+ if (c < CHAR_0 ||
+ (c > CHAR_9 && (c != CHAR_c && c != CHAR_o && c != CHAR_x)))
{
*errorcodeptr = ERR3;
return 0;
@@ -1719,7 +1720,7 @@ else
*/
case CHAR_g:
- if (isclass) break;
+ if (isclassorsub) break;
if (ptr >= ptrend)
{
@@ -1791,7 +1792,7 @@ else
case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: case CHAR_5:
case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
- if (!isclass)
+ if (!isclassorsub)
{
oldptr = ptr;
ptr--; /* Back to the digit */
diff --git a/src/pcre2_substitute.c b/src/pcre2_substitute.c
index d1f17eb05..1ccef0660 100644
--- a/src/pcre2_substitute.c
+++ b/src/pcre2_substitute.c
@@ -130,7 +130,7 @@ for (; ptr < ptrend; ptr++)
ptr += 1; /* Must point after \ */
erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode,
- code->overall_options, code->extra_options, FALSE, NULL);
+ code->overall_options, code->extra_options, TRUE, NULL);
ptr -= 1; /* Back to last code unit of escape */
if (errorcode != 0)
{
@@ -858,7 +858,7 @@ do
ptr++; /* Point after \ */
rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode,
- code->overall_options, code->extra_options, FALSE, NULL);
+ code->overall_options, code->extra_options, TRUE, NULL);
if (errorcode != 0) goto BADESCAPE;
switch(rc)
diff --git a/testdata/testinput11 b/testdata/testinput11
index 2bc8a25e3..69aea351b 100644
--- a/testdata/testinput11
+++ b/testdata/testinput11
@@ -371,4 +371,10 @@
/(?i:A{1,}\6666666666)/
A\x{1b6}6666666
+/abc/substitute_extended,replace=>\777<
+ abc
+
+/abc/substitute_extended,replace=>\o{012345}<
+ abc
+
# End of testinput11
diff --git a/testdata/testinput2 b/testdata/testinput2
index 7d8dfc149..51e2095c8 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -4668,6 +4668,18 @@ B)x/alt_verbnames,mark
/abcd/g
>abcd1234abcd5678<\=replace=wxyz,substitute_matched
+/abc/substitute_extended,replace=>\045<
+ abc
+
+/abc/substitute_extended,replace=>\45<
+ abc
+
+/abc/substitute_extended,replace=>\o{45}<
+ abc
+
+/abc/substitute_extended,replace=>\845<
+ abc
+
/^(o(\1{72}{\"{\\{00000059079}\d*){74}}){19}/I
/((p(?'K/
diff --git a/testdata/testinput5 b/testdata/testinput5
index 9126236..da2830d 100644
--- a/testdata/testinput5
+++ b/testdata/testinput5
@@ -2442,4 +2442,7 @@
# End PCRE2_EXTRA_ASCII_xxx tests
+/abc/utf,substitute_extended,replace=>\777<
+ abc
+
# End of testinput5
diff --git a/testdata/testinput9 b/testdata/testinput9
index 4eb228afe..f2f50033f 100644
--- a/testdata/testinput9
+++ b/testdata/testinput9
@@ -263,4 +263,12 @@
/(?i:A{1,}\6666666666)/
A\x{1b6}6666666
+# Should cause an error
+/abc/substitute_extended,replace=>\777<
+ abc
+
+# Should cause an error
+/abc/substitute_extended,replace=>\o{012345}<
+ abc
+
# End of testinput9
diff --git a/testdata/testoutput11-16 b/testdata/testoutput11-16
index f70d89ee9..806f6b3e0 100644
--- a/testdata/testoutput11-16
+++ b/testdata/testoutput11-16
@@ -665,4 +665,12 @@ Subject length lower bound = 1
A\x{1b6}6666666
0: A\x{1b6}6666666
+/abc/substitute_extended,replace=>\777<
+ abc
+ 1: >\x{1ff}<
+
+/abc/substitute_extended,replace=>\o{012345}<
+ abc
+ 1: >\x{14e5}<
+
# End of testinput11
diff --git a/testdata/testoutput11-32 b/testdata/testoutput11-32
index 961c4cd05..c5f5c8a42 100644
--- a/testdata/testoutput11-32
+++ b/testdata/testoutput11-32
@@ -671,4 +671,12 @@ Subject length lower bound = 1
A\x{1b6}6666666
0: A\x{1b6}6666666
+/abc/substitute_extended,replace=>\777<
+ abc
+ 1: >\x{1ff}<
+
+/abc/substitute_extended,replace=>\o{012345}<
+ abc
+ 1: >\x{14e5}<
+
# End of testinput11
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 1cffe6a36..eeb635d6d 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -14934,6 +14934,22 @@ Failed: error -55 at offset 3 in replacement: requested value is not set
>abcd1234abcd5678<\=replace=wxyz,substitute_matched
2: >wxyz1234wxyz5678<
+/abc/substitute_extended,replace=>\045<
+ abc
+ 1: >%<
+
+/abc/substitute_extended,replace=>\45<
+ abc
+ 1: >%<
+
+/abc/substitute_extended,replace=>\o{45}<
+ abc
+ 1: >%<
+
+/abc/substitute_extended,replace=>\845<
+ abc
+ 1: >845<
+
/^(o(\1{72}{\"{\\{00000059079}\d*){74}}){19}/I
Capture group count = 2
Max back reference = 1
diff --git a/testdata/testoutput5 b/testdata/testoutput5
index b1842df..24d849c 100644
--- a/testdata/testoutput5
+++ b/testdata/testoutput5
@@ -5375,4 +5375,8 @@ No match
# End PCRE2_EXTRA_ASCII_xxx tests
+/abc/utf,substitute_extended,replace=>\777<
+ abc
+ 1: >\x{1ff}<
+
# End of testinput5
diff --git a/testdata/testoutput9 b/testdata/testoutput9
index 3613703e0..8556c9e14 100644
--- a/testdata/testoutput9
+++ b/testdata/testoutput9
@@ -371,4 +371,14 @@ Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP),
Failed: error 151 at offset 13: octal value is greater than \377 in 8-bit non-UTF-8 mode
A\x{1b6}6666666
+# Should cause an error
+/abc/substitute_extended,replace=>\777<
+ abc
+Failed: error -57 at offset 5 in replacement: bad escape sequence in replacement string
+
+# Should cause an error
+/abc/substitute_extended,replace=>\o{012345}<
+ abc
+Failed: error -57 at offset 10 in replacement: bad escape sequence in replacement string
+
# End of testinput9

View File

@ -0,0 +1,233 @@
From ef218fbba60bfe5b0a8ac9ea4445eac5fb0847e5 Mon Sep 17 00:00:00 2001
From: Alex Dowad <alexinbeijing@gmail.com>
Date: Sat, 7 Sep 2024 00:16:03 +0900
Subject: [PATCH] Guard against out-of-bounds memory access when parsing
LIMIT_HEAP et al (#463)
Patterns passed to pcre2_compile are not guaranteed to be
null-terminated. Also, it can happen that there is an invalid
pattern like this:
(*LIMIT_HEAP=123
If the next byte of memory after the end of the pattern happens
to be a digit, it will be parsed as part of the limit value. Or,
if the next byte is a right parenthesis character, it will be taken
as the end of the (*LIMIT_HEAP=nnn) construct.
This will result in `skipatstart` being larger than `patlen`, which
will result in underflow and an erroneous call to malloc requesting
a huge number of bytes.
---
src/pcre2_compile.c | 7 ++-
src/pcre2_internal.h | 3 +
src/pcre2_util.h | 132 ++++++++++++++++++++++++++++++++++++++++++
testdata/testoutput15 | 4 +-
4 files changed, 141 insertions(+), 5 deletions(-)
create mode 100644 src/pcre2_util.h
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index e6843bb13..410f220b3 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -10552,12 +10552,12 @@ if ((options & PCRE2_LITERAL) == 0)
ptr += pp;
goto HAD_EARLY_ERROR;
}
- while (IS_DIGIT(ptr[pp]))
+ while (pp < patlen && IS_DIGIT(ptr[pp]))
{
if (c > UINT32_MAX / 10 - 1) break; /* Integer overflow */
c = c*10 + (ptr[pp++] - CHAR_0);
}
- if (ptr[pp++] != CHAR_RIGHT_PARENTHESIS)
+ if (pp >= patlen || ptr[pp] != CHAR_RIGHT_PARENTHESIS)
{
errorcode = ERR60;
ptr += pp;
@@ -10566,7 +10566,7 @@ if ((options & PCRE2_LITERAL) == 0)
if (p->type == PSO_LIMH) limit_heap = c;
else if (p->type == PSO_LIMM) limit_match = c;
else limit_depth = c;
- skipatstart += pp - skipatstart;
+ skipatstart = ++pp;
break;
}
break; /* Out of the table scan loop */
@@ -10574,6 +10574,7 @@ if ((options & PCRE2_LITERAL) == 0)
}
if (i >= sizeof(pso_list)/sizeof(pso)) break; /* Out of pso loop */
}
+ PCRE2_ASSERT(skipatstart <= patlen);
}
/* End of pattern-start options; advance to start of real regex. */
diff --git a/src/pcre2_internal.h b/src/pcre2_internal.h
index d8fad1e..edb36ca 100644
--- a/src/pcre2_internal.h
+++ b/src/pcre2_internal.h
@@ -1999,6 +1999,9 @@ extern void * _pcre2_memmove(void *, const void *, size_t);
#endif
#endif /* PCRE2_CODE_UNIT_WIDTH */
+
+#include "pcre2_util.h"
+
#endif /* PCRE2_INTERNAL_H_IDEMPOTENT_GUARD */
/* End of pcre2_internal.h */
diff --git a/src/pcre2_util.h b/src/pcre2_util.h
new file mode 100644
index 0000000..ea86355
--- /dev/null
+++ b/src/pcre2_util.h
@@ -0,0 +1,132 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/* PCRE2 is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+ Written by Philip Hazel
+ Original API code Copyright (c) 1997-2012 University of Cambridge
+ New API code Copyright (c) 2016-2024 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+#ifndef PCRE2_UTIL_H_IDEMPOTENT_GUARD
+#define PCRE2_UTIL_H_IDEMPOTENT_GUARD
+
+/* Assertion macros */
+
+#ifdef PCRE2_DEBUG
+
+#if defined(HAVE_ASSERT_H) && !defined(NDEBUG)
+#include <assert.h>
+#endif
+
+/* PCRE2_ASSERT(x) can be used to inject an assert() for conditions
+that the code below doesn't support. It is a NOP for non debug builds
+but in debug builds will print information about the location of the
+code where it triggered and crash.
+
+It is meant to work like assert(), and therefore the expression used
+should indicate what the expected state is, and shouldn't have any
+side-effects. */
+
+#if defined(HAVE_ASSERT_H) && !defined(NDEBUG)
+#define PCRE2_ASSERT(x) assert(x)
+#else
+#define PCRE2_ASSERT(x) do \
+{ \
+ if (!(x)) \
+ { \
+ fprintf(stderr, "Assertion failed at " __FILE__ ":%d\n", __LINE__); \
+ abort(); \
+ } \
+} while(0)
+#endif
+
+/* PCRE2_UNREACHABLE() can be used to mark locations on the code that
+shouldn't be reached. In non debug builds is defined as a hint for
+the compiler to eliminate any code after it, so it is useful also for
+performance reasons, but should be used with care because if it is
+ever reached will trigger Undefined Behaviour and if you are lucky a
+crash. In debug builds it will report the location where it was triggered
+and crash. One important point to consider when using this macro, is
+that it is only implemented for a few compilers, and therefore can't
+be relied on to always be active either, so if it is followed by some
+code it is important to make sure that the whole thing is safe to
+use even if the macro is not there (ex: make sure there is a `break`
+after it if used at the end of a `case`) and to test your code also
+with a configuration where the macro will be a NOP. */
+
+#if defined(HAVE_ASSERT_H) && !defined(NDEBUG)
+#define PCRE2_UNREACHABLE() \
+assert(((void)"Execution reached unexpected point", 0))
+#else
+#define PCRE2_UNREACHABLE() do \
+{ \
+fprintf(stderr, "Execution reached unexpected point at " __FILE__ \
+ ":%d\n", __LINE__); \
+abort(); \
+} while(0)
+#endif
+
+/* PCRE2_DEBUG_UNREACHABLE() is a debug only version of the previous
+macro. It is meant to be used in places where the code is handling
+an error situation in code that shouldn't be reached, but that has
+some sort of fallback code to normally handle the error. When in
+doubt you should use this instead of the previous macro. Like in
+the previous case, it is a good idea to document as much as possible
+the reason and the actions that should be taken if it ever triggers. */
+
+#define PCRE2_DEBUG_UNREACHABLE() PCRE2_UNREACHABLE()
+
+#endif /* PCRE2_DEBUG */
+
+#ifndef PCRE2_DEBUG_UNREACHABLE
+#define PCRE2_DEBUG_UNREACHABLE() do {} while(0)
+#endif
+
+#ifndef PCRE2_UNREACHABLE
+#ifdef HAVE_BUILTIN_UNREACHABLE
+#define PCRE2_UNREACHABLE() __builtin_unreachable()
+#elif defined(HAVE_BUILTIN_ASSUME)
+#define PCRE2_UNREACHABLE() __assume(0)
+#else
+#define PCRE2_UNREACHABLE() do {} while(0)
+#endif
+#endif /* !PCRE2_UNREACHABLE */
+
+#ifndef PCRE2_ASSERT
+#define PCRE2_ASSERT(x) do {} while(0)
+#endif
+
+#endif /* PCRE2_UTIL_H_IDEMPOTENT_GUARD */
+
+/* End of pcre2_util.h */
diff --git a/testdata/testoutput15 b/testdata/testoutput15
index aa9c5c930..f36faeeaf 100644
--- a/testdata/testoutput15
+++ b/testdata/testoutput15
@@ -111,10 +111,10 @@ Minimum depth limit = 10
3: ee
/(*LIMIT_MATCH=12bc)abc/
-Failed: error 160 at offset 17: (*VERB) not recognized or malformed
+Failed: error 160 at offset 16: (*VERB) not recognized or malformed
/(*LIMIT_MATCH=4294967290)abc/
-Failed: error 160 at offset 24: (*VERB) not recognized or malformed
+Failed: error 160 at offset 23: (*VERB) not recognized or malformed
/(*LIMIT_DEPTH=4294967280)abc/I
Capture group count = 0

View File

@ -0,0 +1,68 @@
From d704ee40c5324e5ff6c08f009a7aaa3b67b71565 Mon Sep 17 00:00:00 2001
From: Nicholas Wilson <niwilson@microsoft.com>
Date: Fri, 27 Sep 2024 16:31:01 +0100
Subject: [PATCH] Improve error message for \N{name} in character classes
(#502)
---
src/pcre2_compile.c | 8 ++++++++
testdata/testinput2 | 6 ++++++
testdata/testoutput2 | 9 +++++++++
3 files changed, 23 insertions(+)
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index ec4940e63..fd554f1d2 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -1542,6 +1542,14 @@ else if ((i = escapes[c - ESCAPES_FIRST]) != 0)
#endif
}
+ /* Give an error in contexts where quantifiers are not allowed
+ (character classes; substitution strings). */
+
+ else if (isclassorsub || cb == NULL)
+ {
+ *errorcodeptr = ERR37;
+ }
+
/* Give an error if what follows is not a quantifier, but don't override
an error set by the quantifier reader (e.g. number overflow). */
diff --git a/testdata/testinput2 b/testdata/testinput2
index c6ee980..a33d987 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -913,6 +913,12 @@
/\U/I
+/[\N]/
+
+/[\N{4}]/
+
+/[\N{name}]/
+
/a{1,3}b/ungreedy
ab
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 2f2b3d1..4c07b72 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -3245,6 +3245,15 @@ Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U,
/\U/I
Failed: error 137 at offset 2: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u
+/[\N]/
+Failed: error 171 at offset 3: \N is not supported in a class
+
+/[\N{4}]/
+Failed: error 137 at offset 3: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u
+
+/[\N{name}]/
+Failed: error 137 at offset 3: PCRE2 does not support \F, \L, \l, \N{name}, \U, or \u
+
/a{1,3}b/ungreedy
ab
0: ab

View File

@ -0,0 +1,118 @@
From bc367f1880ae5ccc771d5780e35df4c42744a9c4 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= <carenas@gmail.com>
Date: Sun, 22 Sep 2024 01:49:03 -0700
Subject: [PATCH] pcre2_compile: avoid 1 byte buffer overread parsing VERBs
(#487)
As reported recently by ef218fb (Guard against out-of-bounds memory
access when parsing LIMIT_HEAP et al (#463), 2024-09-07), a malformed
pattern could result in reading 1 byte past its end.
Fix a similar issue that affects all VERBs and add test cases to
ensure the original bug and all its siblings are no longer an issue.
While at it fix the wording of the related documentation.
---
doc/pcre2syntax.3 | 4 ++--
src/pcre2_compile.c | 11 +++--------
testdata/testinput2 | 8 ++++++++
testdata/testoutput2 | 12 ++++++++++++
4 files changed, 25 insertions(+), 10 deletions(-)
diff --git a/doc/pcre2syntax.3 b/doc/pcre2syntax.3
index 232125b82..db0bb6586 100644
--- a/doc/pcre2syntax.3
+++ b/doc/pcre2syntax.3
@@ -408,8 +408,8 @@ only one hyphen. Setting (but no unsetting) is allowed after (?^ for example
example (?i:...).
.P
The following are recognized only at the very start of a pattern or after one
-of the newline or \eR options with similar syntax. More than one of them may
-appear. For the first three, d is a decimal number.
+of the newline or \eR sequences or options with similar syntax. More than one
+of them may appear. For the first three, d is a decimal number.
.sp
(*LIMIT_DEPTH=d) set the backtracking limit to d
(*LIMIT_HEAP=d) set the heap size limit to d * 1024 bytes
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index 7e48b26..3d9a500 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -9877,13 +9877,14 @@ if ((options & PCRE2_LITERAL) == 0)
{
for (i = 0; i < sizeof(pso_list)/sizeof(pso); i++)
{
- uint32_t c, pp;
pso *p = pso_list + i;
if (patlen - skipatstart - 2 >= p->length &&
PRIV(strncmp_c8)(ptr + skipatstart + 2, (char *)(p->name),
p->length) == 0)
{
+ uint32_t c, pp;
+
skipatstart += p->length + 2;
switch(p->type)
{
@@ -9910,18 +9911,12 @@ if ((options & PCRE2_LITERAL) == 0)
case PSO_LIMH:
c = 0;
pp = skipatstart;
- if (!IS_DIGIT(ptr[pp]))
- {
- errorcode = ERR60;
- ptr += pp;
- goto HAD_EARLY_ERROR;
- }
while (pp < patlen && IS_DIGIT(ptr[pp]))
{
if (c > UINT32_MAX / 10 - 1) break; /* Integer overflow */
c = c*10 + (ptr[pp++] - CHAR_0);
}
- if (pp >= patlen || ptr[pp] != CHAR_RIGHT_PARENTHESIS)
+ if (pp >= patlen || pp == skipatstart || ptr[pp] != CHAR_RIGHT_PARENTHESIS)
{
errorcode = ERR60;
ptr += pp;
diff --git a/testdata/testinput2 b/testdata/testinput2
index a869c5bc2..542d14520 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -5261,6 +5261,14 @@ a)"xI
/(*LIMIT_HEAP=0)xxx/I
+/(*LIMIT_HEAP=123/use_length
+
+/(*LIMIT_MATCH=/use_length
+
+/(*CRLF)(*LIMIT_DEPTH=/use_length
+
+/(*CRLF)(*LIMIT_RECURSION=1)(*BOGUS/use_length
+
/\d{0,3}(*:abc)(?C1)xxx/callout_info
# ----------------------------------------------------------------------
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index bf7b7620e..b99d64781 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -16220,6 +16220,18 @@ First code unit = 'x'
Last code unit = 'x'
Subject length lower bound = 3
+/(*LIMIT_HEAP=123/use_length
+Failed: error 160 at offset 16: (*VERB) not recognized or malformed
+
+/(*LIMIT_MATCH=/use_length
+Failed: error 160 at offset 14: (*VERB) not recognized or malformed
+
+/(*CRLF)(*LIMIT_DEPTH=/use_length
+Failed: error 160 at offset 21: (*VERB) not recognized or malformed
+
+/(*CRLF)(*LIMIT_RECURSION=1)(*BOGUS/use_length
+Failed: error 160 at offset 34: (*VERB) not recognized or malformed
+
/\d{0,3}(*:abc)(?C1)xxx/callout_info
Callout 1 x

View File

@ -1,6 +1,6 @@
Name: pcre2 Name: pcre2
Version: 10.42 Version: 10.42
Release: 10 Release: 11
Summary: Perl Compatible Regular Expressions Summary: Perl Compatible Regular Expressions
License: BSD License: BSD
URL: http://www.pcre.org/ URL: http://www.pcre.org/
@ -33,6 +33,12 @@ Patch6021: backport-pcre2grep-document-better-possible-multiline-matchin.pat
Patch6022: backport-Remove-incorrect-optimization-in-DFA-matching-when-p.patch Patch6022: backport-Remove-incorrect-optimization-in-DFA-matching-when-p.patch
Patch6023: backport-Implement-PCRE2_EXTRA_CASELESS_RESTRICT-and-related-.patch Patch6023: backport-Implement-PCRE2_EXTRA_CASELESS_RESTRICT-and-related-.patch
Patch6024: backport-Additional-PCRE2_EXTRA_ASCII_xxx-code.patch Patch6024: backport-Additional-PCRE2_EXTRA_ASCII_xxx-code.patch
Patch6025: backport-Fix-non-recognition-of-some-octal-escapes-in-substitute.patch
Patch6026: backport-Guard-against-out-of-bounds-memory-access-when-parsing.patch
Patch6027: backport-Add-Perl-titlecasing-475.patch
Patch6028: backport-Fix-incorrect-positive-error-code-from-pcre2_substitute.patch
Patch6029: backport-pcre2_compile-avoid-1-byte-buffer-overread-parsing-V.patch
Patch6030: backport-Improve-error-message-for-N-name-in-character-classes.patch
BuildRequires: autoconf libtool automake coreutils gcc make readline-devel BuildRequires: autoconf libtool automake coreutils gcc make readline-devel
Obsoletes: pcre2-utf16 pcre2-utf32 pcre2-tools Obsoletes: pcre2-utf16 pcre2-utf32 pcre2-tools
@ -150,6 +156,9 @@ make check
%{_pkgdocdir}/html/ %{_pkgdocdir}/html/
%changelog %changelog
* Tue Nov 19 2024 yanglongkang <yanglongkang@h-partners.com> - 10.42-11
- DESC:sync patches from upstream
* Thu Oct 31 2024 xujing <xujing125@huawei.com> - 10.42-10 * Thu Oct 31 2024 xujing <xujing125@huawei.com> - 10.42-10
- DESC:sync patches to fix grep testcase failed - DESC:sync patches to fix grep testcase failed