106 lines
2.8 KiB
Diff
106 lines
2.8 KiB
Diff
From 05206d66340341bef7a673108a855f594c148950 Mon Sep 17 00:00:00 2001
|
|
From: Philip Hazel <Philip.Hazel@gmail.com>
|
|
Date: Sun, 19 Nov 2023 18:32:10 +0000
|
|
Subject: [PATCH] Fix \z behaviour when matching within invalid UTF
|
|
|
|
---
|
|
src/pcre2_match.c | 6 ++++--
|
|
testdata/testinput10 | 3 +++
|
|
testdata/testinput12 | 3 +++
|
|
testdata/testoutput10 | 4 ++++
|
|
testdata/testoutput12-16 | 4 ++++
|
|
testdata/testoutput12-32 | 4 ++++
|
|
6 files changed, 22 insertions(+), 2 deletions(-)
|
|
|
|
diff --git a/src/pcre2_match.c b/src/pcre2_match.c
|
|
index 2dcf8c4..ea03976 100644
|
|
--- a/src/pcre2_match.c
|
|
+++ b/src/pcre2_match.c
|
|
@@ -6076,10 +6076,12 @@ fprintf(stderr, "++ %2ld op=%3d %s\n", Fecode - mb->start_code, *Fecode,
|
|
if ((mb->poptions & PCRE2_DOLLAR_ENDONLY) == 0) goto ASSERT_NL_OR_EOS;
|
|
|
|
/* Fall through */
|
|
- /* Unconditional end of subject assertion (\z) */
|
|
+ /* Unconditional end of subject assertion (\z). We must check NOTEOL
|
|
+ because it gets set for invalid UTF fragments. */
|
|
|
|
case OP_EOD:
|
|
- if (Feptr < mb->end_subject) RRETURN(MATCH_NOMATCH);
|
|
+ if (Feptr < mb->end_subject || (mb->moptions & PCRE2_NOTEOL) != 0)
|
|
+ RRETURN(MATCH_NOMATCH);
|
|
if (mb->partial != 0)
|
|
{
|
|
mb->hitend = TRUE;
|
|
diff --git a/testdata/testinput10 b/testdata/testinput10
|
|
index c7618b1..e901d51 100644
|
|
--- a/testdata/testinput10
|
|
+++ b/testdata/testinput10
|
|
@@ -642,4 +642,7 @@
|
|
qchq\=ph
|
|
qchq\=ps
|
|
|
|
+/A\z/utf,match_invalid_utf
|
|
+ A\x80\x42\n
|
|
+
|
|
# End of testinput10
|
|
diff --git a/testdata/testinput12 b/testdata/testinput12
|
|
index 1e552e6..5a2d8d2 100644
|
|
--- a/testdata/testinput12
|
|
+++ b/testdata/testinput12
|
|
@@ -464,6 +464,9 @@
|
|
|
|
/aa/utf,ucp,match_invalid_utf,global
|
|
\x{d800}aa
|
|
+
|
|
+/A\z/utf,match_invalid_utf
|
|
+ A\x{df00}\n
|
|
|
|
# ----------------------------------------------------
|
|
|
|
diff --git a/testdata/testoutput10 b/testdata/testoutput10
|
|
index 18dd9d2..8145891 100644
|
|
--- a/testdata/testoutput10
|
|
+++ b/testdata/testoutput10
|
|
@@ -1921,4 +1921,8 @@ Partial match:
|
|
qchq\=ps
|
|
Partial match:
|
|
|
|
+/A\z/utf,match_invalid_utf
|
|
+ A\x80\x42\n
|
|
+No match
|
|
+
|
|
# End of testinput10
|
|
diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16
|
|
index 8cbc13d..9ac403e 100644
|
|
--- a/testdata/testoutput12-16
|
|
+++ b/testdata/testoutput12-16
|
|
@@ -1607,6 +1607,10 @@ No match
|
|
/aa/utf,ucp,match_invalid_utf,global
|
|
\x{d800}aa
|
|
0: aa
|
|
+
|
|
+/A\z/utf,match_invalid_utf
|
|
+ A\x{df00}\n
|
|
+No match
|
|
|
|
# ----------------------------------------------------
|
|
|
|
diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32
|
|
index 1a98b4b..9396305 100644
|
|
--- a/testdata/testoutput12-32
|
|
+++ b/testdata/testoutput12-32
|
|
@@ -1605,6 +1605,10 @@ No match
|
|
/aa/utf,ucp,match_invalid_utf,global
|
|
\x{d800}aa
|
|
0: aa
|
|
+
|
|
+/A\z/utf,match_invalid_utf
|
|
+ A\x{df00}\n
|
|
+No match
|
|
|
|
# ----------------------------------------------------
|
|
|
|
--
|
|
2.33.0
|
|
|