pcre2/backport-Fix-non-recognition-of-some-octal-escapes-in-substitute.patch

264 lines
7.7 KiB
Diff
Raw Normal View History

2024-11-19 09:16:25 +00:00
From d29e729000a3724e2aebaa64318dfd7530a55370 Mon Sep 17 00:00:00 2001
From: Philip Hazel <Philip.Hazel@gmail.com>
Date: Wed, 4 Sep 2024 16:18:35 +0100
Subject: [PATCH] Fix non-recognition of some octal escapes in substitute
replacement strings
---
src/pcre2_compile.c | 15 ++++++++-------
src/pcre2_substitute.c | 4 ++--
testdata/testinput11 | 6 ++++++
testdata/testinput2 | 12 ++++++++++++
testdata/testinput5 | 3 +++
testdata/testinput9 | 8 ++++++++
testdata/testoutput11-16 | 8 ++++++++
testdata/testoutput11-32 | 8 ++++++++
testdata/testoutput2 | 16 ++++++++++++++++
testdata/testoutput5 | 4 ++++
testdata/testoutput9 | 10 ++++++++++
11 files changed, 85 insertions(+), 9 deletions(-)
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index ad2baf8..80a1a48 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -1480,8 +1480,8 @@ final code unit of the escape sequence.
This function is also called from pcre2_substitute() to handle escape sequences
in replacement strings. In this case, the cb argument is NULL, and in the case
of escapes that have further processing, only sequences that define a data
-character are recognised. The isclass argument is not relevant; the options
-argument is the final value of the compiled pattern's options.
+character are recognised. The options argument is the final value of the
+compiled pattern's options.
Arguments:
ptrptr points to the input position pointer
@@ -1496,7 +1496,7 @@ Arguments:
errorcodeptr points to the errorcode variable (containing zero)
options the current options bits
xoptions the current extra options bits
- isclass TRUE if inside a character class
+ isclassorsub TRUE if in a character class or called from pcre2_substitute()
cb compile data block or NULL when called from pcre2_substitute()
Returns: zero => a data character
@@ -1507,7 +1507,7 @@ Returns: zero => a data character
int
PRIV(check_escape)(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, uint32_t *chptr,
- int *errorcodeptr, uint32_t options, uint32_t xoptions, BOOL isclass,
+ int *errorcodeptr, uint32_t options, uint32_t xoptions, BOOL isclassorsub,
compile_block *cb)
{
BOOL utf = (options & PCRE2_UTF) != 0;
@@ -1607,7 +1607,8 @@ else
if (cb == NULL)
{
- if (c != CHAR_c && c != CHAR_o && c != CHAR_x)
+ if (c < CHAR_0 ||
+ (c > CHAR_9 && (c != CHAR_c && c != CHAR_o && c != CHAR_x)))
{
*errorcodeptr = ERR3;
return 0;
@@ -1719,7 +1720,7 @@ else
*/
case CHAR_g:
- if (isclass) break;
+ if (isclassorsub) break;
if (ptr >= ptrend)
{
@@ -1791,7 +1792,7 @@ else
case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: case CHAR_5:
case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
- if (!isclass)
+ if (!isclassorsub)
{
oldptr = ptr;
ptr--; /* Back to the digit */
diff --git a/src/pcre2_substitute.c b/src/pcre2_substitute.c
index d1f17eb05..1ccef0660 100644
--- a/src/pcre2_substitute.c
+++ b/src/pcre2_substitute.c
@@ -130,7 +130,7 @@ for (; ptr < ptrend; ptr++)
ptr += 1; /* Must point after \ */
erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode,
- code->overall_options, code->extra_options, FALSE, NULL);
+ code->overall_options, code->extra_options, TRUE, NULL);
ptr -= 1; /* Back to last code unit of escape */
if (errorcode != 0)
{
@@ -858,7 +858,7 @@ do
ptr++; /* Point after \ */
rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode,
- code->overall_options, code->extra_options, FALSE, NULL);
+ code->overall_options, code->extra_options, TRUE, NULL);
if (errorcode != 0) goto BADESCAPE;
switch(rc)
diff --git a/testdata/testinput11 b/testdata/testinput11
index 2bc8a25e3..69aea351b 100644
--- a/testdata/testinput11
+++ b/testdata/testinput11
@@ -371,4 +371,10 @@
/(?i:A{1,}\6666666666)/
A\x{1b6}6666666
+/abc/substitute_extended,replace=>\777<
+ abc
+
+/abc/substitute_extended,replace=>\o{012345}<
+ abc
+
# End of testinput11
diff --git a/testdata/testinput2 b/testdata/testinput2
index 7d8dfc149..51e2095c8 100644
--- a/testdata/testinput2
+++ b/testdata/testinput2
@@ -4668,6 +4668,18 @@ B)x/alt_verbnames,mark
/abcd/g
>abcd1234abcd5678<\=replace=wxyz,substitute_matched
+/abc/substitute_extended,replace=>\045<
+ abc
+
+/abc/substitute_extended,replace=>\45<
+ abc
+
+/abc/substitute_extended,replace=>\o{45}<
+ abc
+
+/abc/substitute_extended,replace=>\845<
+ abc
+
/^(o(\1{72}{\"{\\{00000059079}\d*){74}}){19}/I
/((p(?'K/
diff --git a/testdata/testinput5 b/testdata/testinput5
index 9126236..da2830d 100644
--- a/testdata/testinput5
+++ b/testdata/testinput5
@@ -2442,4 +2442,7 @@
# End PCRE2_EXTRA_ASCII_xxx tests
+/abc/utf,substitute_extended,replace=>\777<
+ abc
+
# End of testinput5
diff --git a/testdata/testinput9 b/testdata/testinput9
index 4eb228afe..f2f50033f 100644
--- a/testdata/testinput9
+++ b/testdata/testinput9
@@ -263,4 +263,12 @@
/(?i:A{1,}\6666666666)/
A\x{1b6}6666666
+# Should cause an error
+/abc/substitute_extended,replace=>\777<
+ abc
+
+# Should cause an error
+/abc/substitute_extended,replace=>\o{012345}<
+ abc
+
# End of testinput9
diff --git a/testdata/testoutput11-16 b/testdata/testoutput11-16
index f70d89ee9..806f6b3e0 100644
--- a/testdata/testoutput11-16
+++ b/testdata/testoutput11-16
@@ -665,4 +665,12 @@ Subject length lower bound = 1
A\x{1b6}6666666
0: A\x{1b6}6666666
+/abc/substitute_extended,replace=>\777<
+ abc
+ 1: >\x{1ff}<
+
+/abc/substitute_extended,replace=>\o{012345}<
+ abc
+ 1: >\x{14e5}<
+
# End of testinput11
diff --git a/testdata/testoutput11-32 b/testdata/testoutput11-32
index 961c4cd05..c5f5c8a42 100644
--- a/testdata/testoutput11-32
+++ b/testdata/testoutput11-32
@@ -671,4 +671,12 @@ Subject length lower bound = 1
A\x{1b6}6666666
0: A\x{1b6}6666666
+/abc/substitute_extended,replace=>\777<
+ abc
+ 1: >\x{1ff}<
+
+/abc/substitute_extended,replace=>\o{012345}<
+ abc
+ 1: >\x{14e5}<
+
# End of testinput11
diff --git a/testdata/testoutput2 b/testdata/testoutput2
index 1cffe6a36..eeb635d6d 100644
--- a/testdata/testoutput2
+++ b/testdata/testoutput2
@@ -14934,6 +14934,22 @@ Failed: error -55 at offset 3 in replacement: requested value is not set
>abcd1234abcd5678<\=replace=wxyz,substitute_matched
2: >wxyz1234wxyz5678<
+/abc/substitute_extended,replace=>\045<
+ abc
+ 1: >%<
+
+/abc/substitute_extended,replace=>\45<
+ abc
+ 1: >%<
+
+/abc/substitute_extended,replace=>\o{45}<
+ abc
+ 1: >%<
+
+/abc/substitute_extended,replace=>\845<
+ abc
+ 1: >845<
+
/^(o(\1{72}{\"{\\{00000059079}\d*){74}}){19}/I
Capture group count = 2
Max back reference = 1
diff --git a/testdata/testoutput5 b/testdata/testoutput5
index b1842df..24d849c 100644
--- a/testdata/testoutput5
+++ b/testdata/testoutput5
@@ -5375,4 +5375,8 @@ No match
# End PCRE2_EXTRA_ASCII_xxx tests
+/abc/utf,substitute_extended,replace=>\777<
+ abc
+ 1: >\x{1ff}<
+
# End of testinput5
diff --git a/testdata/testoutput9 b/testdata/testoutput9
index 3613703e0..8556c9e14 100644
--- a/testdata/testoutput9
+++ b/testdata/testoutput9
@@ -371,4 +371,14 @@ Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP),
Failed: error 151 at offset 13: octal value is greater than \377 in 8-bit non-UTF-8 mode
A\x{1b6}6666666
+# Should cause an error
+/abc/substitute_extended,replace=>\777<
+ abc
+Failed: error -57 at offset 5 in replacement: bad escape sequence in replacement string
+
+# Should cause an error
+/abc/substitute_extended,replace=>\o{012345}<
+ abc
+Failed: error -57 at offset 10 in replacement: bad escape sequence in replacement string
+
# End of testinput9