206 lines
4.5 KiB
Diff
206 lines
4.5 KiB
Diff
From 1bb1ef67eff000fe4aa8c1c45289938188e11e48 Mon Sep 17 00:00:00 2001
|
|
From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= <carenas@gmail.com>
|
|
Date: Fri, 21 Apr 2023 01:12:45 -0700
|
|
Subject: [PATCH] no partial match if trailing data is invalid utf (#238)
|
|
|
|
Avoid returning a partial match if one was found but followed by invalid
|
|
UTF, making the result consistent with JIT and unlike:
|
|
|
|
PCRE2 version 10.34 2019-11-21
|
|
re> /.a/match_invalid_utf,allvector,jit
|
|
data> b\xb1\=ph,ovector=1
|
|
No match
|
|
0: <unchanged>
|
|
data> b\xb1\=ph,ovector=1,no_jit
|
|
Partial match: b\x{b1}
|
|
** ovector[1] is not equal to the subject length: 1 != 2
|
|
0: 0 1
|
|
|
|
Conflict:NA
|
|
Reference:https://github.com/PCRE2Project/pcre2/commit/1bb1ef67eff000fe4aa8c1c45289938188e11e48
|
|
---
|
|
src/pcre2_match.c | 1 +
|
|
testdata/testinput10 | 19 +++++++++++++++++++
|
|
testdata/testinput12 | 14 ++++++++++++++
|
|
testdata/testoutput10 | 32 ++++++++++++++++++++++++++++++++
|
|
testdata/testoutput12-16 | 22 ++++++++++++++++++++++
|
|
testdata/testoutput12-32 | 22 ++++++++++++++++++++++
|
|
6 files changed, 110 insertions(+)
|
|
|
|
diff --git a/src/pcre2_match.c b/src/pcre2_match.c
|
|
index ea98af3c..afe3036b 100644
|
|
--- a/src/pcre2_match.c
|
|
+++ b/src/pcre2_match.c
|
|
@@ -7454,6 +7454,7 @@ if (utf && end_subject != true_end_subject &&
|
|
if (start_match >= true_end_subject)
|
|
{
|
|
rc = MATCH_NOMATCH; /* In case it was partial */
|
|
+ match_partial = NULL;
|
|
break;
|
|
}
|
|
|
|
diff --git a/testdata/testinput10 b/testdata/testinput10
|
|
index 53e37cbc..27321e37 100644
|
|
--- a/testdata/testinput10
|
|
+++ b/testdata/testinput10
|
|
@@ -506,6 +506,25 @@
|
|
\= Expect no match
|
|
ab\x80cdef\=ph
|
|
|
|
+/.a/match_invalid_utf
|
|
+ ab\=ph
|
|
+ ab\=ps
|
|
+ b\xf0\x91\x88b\=ph
|
|
+ b\xf0\x91\x88b\=ps
|
|
+ b\xf0\x91\x88\xb4a
|
|
+\= Expect no match
|
|
+ b\x80\=ph
|
|
+ b\x80\=ps
|
|
+ b\xf0\x91\x88\=ph
|
|
+ b\xf0\x91\x88\=ps
|
|
+
|
|
+/.a$/match_invalid_utf
|
|
+ ab\=ph
|
|
+ ab\=ps
|
|
+\= Expect no match
|
|
+ b\xf0\x91\x98\=ph
|
|
+ b\xf0\x91\x98\=ps
|
|
+
|
|
/ab$/match_invalid_utf
|
|
ab\x80cdeab
|
|
\= Expect no match
|
|
diff --git a/testdata/testinput12 b/testdata/testinput12
|
|
index 9b4f8d34..7a85eb57 100644
|
|
--- a/testdata/testinput12
|
|
+++ b/testdata/testinput12
|
|
@@ -413,6 +413,20 @@
|
|
\= Expect no match
|
|
ab\x{df00}cdef\=ph
|
|
|
|
+/.a/match_invalid_utf
|
|
+ ab\=ph
|
|
+ ab\=ps
|
|
+\= Expect no match
|
|
+ b\x{df00}\=ph
|
|
+ b\x{df00}\=ps
|
|
+
|
|
+/.a$/match_invalid_utf
|
|
+ ab\=ph
|
|
+ ab\=ps
|
|
+\= Expect no match
|
|
+ b\x{df00}\=ph
|
|
+ b\x{df00}\=ps
|
|
+
|
|
/ab$/match_invalid_utf
|
|
ab\x{df00}cdeab
|
|
\= Expect no match
|
|
diff --git a/testdata/testoutput10 b/testdata/testoutput10
|
|
index d4085106..1f4c876b 100644
|
|
--- a/testdata/testoutput10
|
|
+++ b/testdata/testoutput10
|
|
@@ -1646,6 +1646,38 @@ Partial match: ab
|
|
ab\x80cdef\=ph
|
|
No match
|
|
|
|
+/.a/match_invalid_utf
|
|
+ ab\=ph
|
|
+Partial match: b
|
|
+ ab\=ps
|
|
+Partial match: b
|
|
+ b\xf0\x91\x88b\=ph
|
|
+Partial match: b
|
|
+ b\xf0\x91\x88b\=ps
|
|
+Partial match: b
|
|
+ b\xf0\x91\x88\xb4a
|
|
+ 0: \x{11234}a
|
|
+\= Expect no match
|
|
+ b\x80\=ph
|
|
+No match
|
|
+ b\x80\=ps
|
|
+No match
|
|
+ b\xf0\x91\x88\=ph
|
|
+No match
|
|
+ b\xf0\x91\x88\=ps
|
|
+No match
|
|
+
|
|
+/.a$/match_invalid_utf
|
|
+ ab\=ph
|
|
+Partial match: b
|
|
+ ab\=ps
|
|
+Partial match: b
|
|
+\= Expect no match
|
|
+ b\xf0\x91\x98\=ph
|
|
+No match
|
|
+ b\xf0\x91\x98\=ps
|
|
+No match
|
|
+
|
|
/ab$/match_invalid_utf
|
|
ab\x80cdeab
|
|
0: ab
|
|
diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16
|
|
index 84c48581..98676324 100644
|
|
--- a/testdata/testoutput12-16
|
|
+++ b/testdata/testoutput12-16
|
|
@@ -1522,6 +1522,28 @@ Partial match: ab
|
|
ab\x{df00}cdef\=ph
|
|
No match
|
|
|
|
+/.a/match_invalid_utf
|
|
+ ab\=ph
|
|
+Partial match: b
|
|
+ ab\=ps
|
|
+Partial match: b
|
|
+\= Expect no match
|
|
+ b\x{df00}\=ph
|
|
+No match
|
|
+ b\x{df00}\=ps
|
|
+No match
|
|
+
|
|
+/.a$/match_invalid_utf
|
|
+ ab\=ph
|
|
+Partial match: b
|
|
+ ab\=ps
|
|
+Partial match: b
|
|
+\= Expect no match
|
|
+ b\x{df00}\=ph
|
|
+No match
|
|
+ b\x{df00}\=ps
|
|
+No match
|
|
+
|
|
/ab$/match_invalid_utf
|
|
ab\x{df00}cdeab
|
|
0: ab
|
|
diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32
|
|
index 03b6e394..3a20dd4b 100644
|
|
--- a/testdata/testoutput12-32
|
|
+++ b/testdata/testoutput12-32
|
|
@@ -1520,6 +1520,28 @@ Partial match: ab
|
|
ab\x{df00}cdef\=ph
|
|
No match
|
|
|
|
+/.a/match_invalid_utf
|
|
+ ab\=ph
|
|
+Partial match: b
|
|
+ ab\=ps
|
|
+Partial match: b
|
|
+\= Expect no match
|
|
+ b\x{df00}\=ph
|
|
+No match
|
|
+ b\x{df00}\=ps
|
|
+No match
|
|
+
|
|
+/.a$/match_invalid_utf
|
|
+ ab\=ph
|
|
+Partial match: b
|
|
+ ab\=ps
|
|
+Partial match: b
|
|
+\= Expect no match
|
|
+ b\x{df00}\=ph
|
|
+No match
|
|
+ b\x{df00}\=ps
|
|
+No match
|
|
+
|
|
/ab$/match_invalid_utf
|
|
ab\x{df00}cdeab
|
|
0: ab
|