264 lines
7.7 KiB
Diff
264 lines
7.7 KiB
Diff
|
|
From d29e729000a3724e2aebaa64318dfd7530a55370 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Philip Hazel <Philip.Hazel@gmail.com>
|
||
|
|
Date: Wed, 4 Sep 2024 16:18:35 +0100
|
||
|
|
Subject: [PATCH] Fix non-recognition of some octal escapes in substitute
|
||
|
|
replacement strings
|
||
|
|
|
||
|
|
---
|
||
|
|
src/pcre2_compile.c | 15 ++++++++-------
|
||
|
|
src/pcre2_substitute.c | 4 ++--
|
||
|
|
testdata/testinput11 | 6 ++++++
|
||
|
|
testdata/testinput2 | 12 ++++++++++++
|
||
|
|
testdata/testinput5 | 3 +++
|
||
|
|
testdata/testinput9 | 8 ++++++++
|
||
|
|
testdata/testoutput11-16 | 8 ++++++++
|
||
|
|
testdata/testoutput11-32 | 8 ++++++++
|
||
|
|
testdata/testoutput2 | 16 ++++++++++++++++
|
||
|
|
testdata/testoutput5 | 4 ++++
|
||
|
|
testdata/testoutput9 | 10 ++++++++++
|
||
|
|
11 files changed, 85 insertions(+), 9 deletions(-)
|
||
|
|
|
||
|
|
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
|
||
|
|
index ad2baf8..80a1a48 100644
|
||
|
|
--- a/src/pcre2_compile.c
|
||
|
|
+++ b/src/pcre2_compile.c
|
||
|
|
@@ -1480,8 +1480,8 @@ final code unit of the escape sequence.
|
||
|
|
This function is also called from pcre2_substitute() to handle escape sequences
|
||
|
|
in replacement strings. In this case, the cb argument is NULL, and in the case
|
||
|
|
of escapes that have further processing, only sequences that define a data
|
||
|
|
-character are recognised. The isclass argument is not relevant; the options
|
||
|
|
-argument is the final value of the compiled pattern's options.
|
||
|
|
+character are recognised. The options argument is the final value of the
|
||
|
|
+compiled pattern's options.
|
||
|
|
|
||
|
|
Arguments:
|
||
|
|
ptrptr points to the input position pointer
|
||
|
|
@@ -1496,7 +1496,7 @@ Arguments:
|
||
|
|
errorcodeptr points to the errorcode variable (containing zero)
|
||
|
|
options the current options bits
|
||
|
|
xoptions the current extra options bits
|
||
|
|
- isclass TRUE if inside a character class
|
||
|
|
+ isclassorsub TRUE if in a character class or called from pcre2_substitute()
|
||
|
|
cb compile data block or NULL when called from pcre2_substitute()
|
||
|
|
|
||
|
|
Returns: zero => a data character
|
||
|
|
@@ -1507,7 +1507,7 @@ Returns: zero => a data character
|
||
|
|
|
||
|
|
int
|
||
|
|
PRIV(check_escape)(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, uint32_t *chptr,
|
||
|
|
- int *errorcodeptr, uint32_t options, uint32_t xoptions, BOOL isclass,
|
||
|
|
+ int *errorcodeptr, uint32_t options, uint32_t xoptions, BOOL isclassorsub,
|
||
|
|
compile_block *cb)
|
||
|
|
{
|
||
|
|
BOOL utf = (options & PCRE2_UTF) != 0;
|
||
|
|
@@ -1607,7 +1607,8 @@ else
|
||
|
|
|
||
|
|
if (cb == NULL)
|
||
|
|
{
|
||
|
|
- if (c != CHAR_c && c != CHAR_o && c != CHAR_x)
|
||
|
|
+ if (c < CHAR_0 ||
|
||
|
|
+ (c > CHAR_9 && (c != CHAR_c && c != CHAR_o && c != CHAR_x)))
|
||
|
|
{
|
||
|
|
*errorcodeptr = ERR3;
|
||
|
|
return 0;
|
||
|
|
@@ -1719,7 +1720,7 @@ else
|
||
|
|
*/
|
||
|
|
|
||
|
|
case CHAR_g:
|
||
|
|
- if (isclass) break;
|
||
|
|
+ if (isclassorsub) break;
|
||
|
|
|
||
|
|
if (ptr >= ptrend)
|
||
|
|
{
|
||
|
|
@@ -1791,7 +1792,7 @@ else
|
||
|
|
case CHAR_1: case CHAR_2: case CHAR_3: case CHAR_4: case CHAR_5:
|
||
|
|
case CHAR_6: case CHAR_7: case CHAR_8: case CHAR_9:
|
||
|
|
|
||
|
|
- if (!isclass)
|
||
|
|
+ if (!isclassorsub)
|
||
|
|
{
|
||
|
|
oldptr = ptr;
|
||
|
|
ptr--; /* Back to the digit */
|
||
|
|
diff --git a/src/pcre2_substitute.c b/src/pcre2_substitute.c
|
||
|
|
index d1f17eb05..1ccef0660 100644
|
||
|
|
--- a/src/pcre2_substitute.c
|
||
|
|
+++ b/src/pcre2_substitute.c
|
||
|
|
@@ -130,7 +130,7 @@ for (; ptr < ptrend; ptr++)
|
||
|
|
|
||
|
|
ptr += 1; /* Must point after \ */
|
||
|
|
erc = PRIV(check_escape)(&ptr, ptrend, &ch, &errorcode,
|
||
|
|
- code->overall_options, code->extra_options, FALSE, NULL);
|
||
|
|
+ code->overall_options, code->extra_options, TRUE, NULL);
|
||
|
|
ptr -= 1; /* Back to last code unit of escape */
|
||
|
|
if (errorcode != 0)
|
||
|
|
{
|
||
|
|
@@ -858,7 +858,7 @@ do
|
||
|
|
|
||
|
|
ptr++; /* Point after \ */
|
||
|
|
rc = PRIV(check_escape)(&ptr, repend, &ch, &errorcode,
|
||
|
|
- code->overall_options, code->extra_options, FALSE, NULL);
|
||
|
|
+ code->overall_options, code->extra_options, TRUE, NULL);
|
||
|
|
if (errorcode != 0) goto BADESCAPE;
|
||
|
|
|
||
|
|
switch(rc)
|
||
|
|
diff --git a/testdata/testinput11 b/testdata/testinput11
|
||
|
|
index 2bc8a25e3..69aea351b 100644
|
||
|
|
--- a/testdata/testinput11
|
||
|
|
+++ b/testdata/testinput11
|
||
|
|
@@ -371,4 +371,10 @@
|
||
|
|
/(?i:A{1,}\6666666666)/
|
||
|
|
A\x{1b6}6666666
|
||
|
|
|
||
|
|
+/abc/substitute_extended,replace=>\777<
|
||
|
|
+ abc
|
||
|
|
+
|
||
|
|
+/abc/substitute_extended,replace=>\o{012345}<
|
||
|
|
+ abc
|
||
|
|
+
|
||
|
|
# End of testinput11
|
||
|
|
diff --git a/testdata/testinput2 b/testdata/testinput2
|
||
|
|
index 7d8dfc149..51e2095c8 100644
|
||
|
|
--- a/testdata/testinput2
|
||
|
|
+++ b/testdata/testinput2
|
||
|
|
@@ -4668,6 +4668,18 @@ B)x/alt_verbnames,mark
|
||
|
|
/abcd/g
|
||
|
|
>abcd1234abcd5678<\=replace=wxyz,substitute_matched
|
||
|
|
|
||
|
|
+/abc/substitute_extended,replace=>\045<
|
||
|
|
+ abc
|
||
|
|
+
|
||
|
|
+/abc/substitute_extended,replace=>\45<
|
||
|
|
+ abc
|
||
|
|
+
|
||
|
|
+/abc/substitute_extended,replace=>\o{45}<
|
||
|
|
+ abc
|
||
|
|
+
|
||
|
|
+/abc/substitute_extended,replace=>\845<
|
||
|
|
+ abc
|
||
|
|
+
|
||
|
|
/^(o(\1{72}{\"{\\{00000059079}\d*){74}}){19}/I
|
||
|
|
|
||
|
|
/((p(?'K/
|
||
|
|
diff --git a/testdata/testinput5 b/testdata/testinput5
|
||
|
|
index 9126236..da2830d 100644
|
||
|
|
--- a/testdata/testinput5
|
||
|
|
+++ b/testdata/testinput5
|
||
|
|
@@ -2442,4 +2442,7 @@
|
||
|
|
|
||
|
|
# End PCRE2_EXTRA_ASCII_xxx tests
|
||
|
|
|
||
|
|
+/abc/utf,substitute_extended,replace=>\777<
|
||
|
|
+ abc
|
||
|
|
+
|
||
|
|
# End of testinput5
|
||
|
|
diff --git a/testdata/testinput9 b/testdata/testinput9
|
||
|
|
index 4eb228afe..f2f50033f 100644
|
||
|
|
--- a/testdata/testinput9
|
||
|
|
+++ b/testdata/testinput9
|
||
|
|
@@ -263,4 +263,12 @@
|
||
|
|
/(?i:A{1,}\6666666666)/
|
||
|
|
A\x{1b6}6666666
|
||
|
|
|
||
|
|
+# Should cause an error
|
||
|
|
+/abc/substitute_extended,replace=>\777<
|
||
|
|
+ abc
|
||
|
|
+
|
||
|
|
+# Should cause an error
|
||
|
|
+/abc/substitute_extended,replace=>\o{012345}<
|
||
|
|
+ abc
|
||
|
|
+
|
||
|
|
# End of testinput9
|
||
|
|
diff --git a/testdata/testoutput11-16 b/testdata/testoutput11-16
|
||
|
|
index f70d89ee9..806f6b3e0 100644
|
||
|
|
--- a/testdata/testoutput11-16
|
||
|
|
+++ b/testdata/testoutput11-16
|
||
|
|
@@ -665,4 +665,12 @@ Subject length lower bound = 1
|
||
|
|
A\x{1b6}6666666
|
||
|
|
0: A\x{1b6}6666666
|
||
|
|
|
||
|
|
+/abc/substitute_extended,replace=>\777<
|
||
|
|
+ abc
|
||
|
|
+ 1: >\x{1ff}<
|
||
|
|
+
|
||
|
|
+/abc/substitute_extended,replace=>\o{012345}<
|
||
|
|
+ abc
|
||
|
|
+ 1: >\x{14e5}<
|
||
|
|
+
|
||
|
|
# End of testinput11
|
||
|
|
diff --git a/testdata/testoutput11-32 b/testdata/testoutput11-32
|
||
|
|
index 961c4cd05..c5f5c8a42 100644
|
||
|
|
--- a/testdata/testoutput11-32
|
||
|
|
+++ b/testdata/testoutput11-32
|
||
|
|
@@ -671,4 +671,12 @@ Subject length lower bound = 1
|
||
|
|
A\x{1b6}6666666
|
||
|
|
0: A\x{1b6}6666666
|
||
|
|
|
||
|
|
+/abc/substitute_extended,replace=>\777<
|
||
|
|
+ abc
|
||
|
|
+ 1: >\x{1ff}<
|
||
|
|
+
|
||
|
|
+/abc/substitute_extended,replace=>\o{012345}<
|
||
|
|
+ abc
|
||
|
|
+ 1: >\x{14e5}<
|
||
|
|
+
|
||
|
|
# End of testinput11
|
||
|
|
diff --git a/testdata/testoutput2 b/testdata/testoutput2
|
||
|
|
index 1cffe6a36..eeb635d6d 100644
|
||
|
|
--- a/testdata/testoutput2
|
||
|
|
+++ b/testdata/testoutput2
|
||
|
|
@@ -14934,6 +14934,22 @@ Failed: error -55 at offset 3 in replacement: requested value is not set
|
||
|
|
>abcd1234abcd5678<\=replace=wxyz,substitute_matched
|
||
|
|
2: >wxyz1234wxyz5678<
|
||
|
|
|
||
|
|
+/abc/substitute_extended,replace=>\045<
|
||
|
|
+ abc
|
||
|
|
+ 1: >%<
|
||
|
|
+
|
||
|
|
+/abc/substitute_extended,replace=>\45<
|
||
|
|
+ abc
|
||
|
|
+ 1: >%<
|
||
|
|
+
|
||
|
|
+/abc/substitute_extended,replace=>\o{45}<
|
||
|
|
+ abc
|
||
|
|
+ 1: >%<
|
||
|
|
+
|
||
|
|
+/abc/substitute_extended,replace=>\845<
|
||
|
|
+ abc
|
||
|
|
+ 1: >845<
|
||
|
|
+
|
||
|
|
/^(o(\1{72}{\"{\\{00000059079}\d*){74}}){19}/I
|
||
|
|
Capture group count = 2
|
||
|
|
Max back reference = 1
|
||
|
|
diff --git a/testdata/testoutput5 b/testdata/testoutput5
|
||
|
|
index b1842df..24d849c 100644
|
||
|
|
--- a/testdata/testoutput5
|
||
|
|
+++ b/testdata/testoutput5
|
||
|
|
@@ -5375,4 +5375,8 @@ No match
|
||
|
|
|
||
|
|
# End PCRE2_EXTRA_ASCII_xxx tests
|
||
|
|
|
||
|
|
+/abc/utf,substitute_extended,replace=>\777<
|
||
|
|
+ abc
|
||
|
|
+ 1: >\x{1ff}<
|
||
|
|
+
|
||
|
|
# End of testinput5
|
||
|
|
diff --git a/testdata/testoutput9 b/testdata/testoutput9
|
||
|
|
index 3613703e0..8556c9e14 100644
|
||
|
|
--- a/testdata/testoutput9
|
||
|
|
+++ b/testdata/testoutput9
|
||
|
|
@@ -371,4 +371,14 @@ Failed: error 176 at offset 259: name is too long in (*MARK), (*PRUNE), (*SKIP),
|
||
|
|
Failed: error 151 at offset 13: octal value is greater than \377 in 8-bit non-UTF-8 mode
|
||
|
|
A\x{1b6}6666666
|
||
|
|
|
||
|
|
+# Should cause an error
|
||
|
|
+/abc/substitute_extended,replace=>\777<
|
||
|
|
+ abc
|
||
|
|
+Failed: error -57 at offset 5 in replacement: bad escape sequence in replacement string
|
||
|
|
+
|
||
|
|
+# Should cause an error
|
||
|
|
+/abc/substitute_extended,replace=>\o{012345}<
|
||
|
|
+ abc
|
||
|
|
+Failed: error -57 at offset 10 in replacement: bad escape sequence in replacement string
|
||
|
|
+
|
||
|
|
# End of testinput9
|