!46 sync community patches

From: @yangmingtaip 
Reviewed-by: @openeuler-basic 
Signed-off-by: @openeuler-basic
This commit is contained in:
openeuler-ci-bot 2023-06-27 07:54:24 +00:00 committed by Gitee
commit eae3e65980
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
5 changed files with 362 additions and 1 deletions

View File

@ -0,0 +1,41 @@
From 2c08b619dc973beacc474dcb67cda8cd366200ce Mon Sep 17 00:00:00 2001
From: Zoltan Herczeg <hzmester@freemail.hu>
Date: Tue, 11 Apr 2023 12:42:11 +0000
Subject: [PATCH] Fix an invalid match of ascii word classes when invalid utf
is enabled
Fixes #224
Conflict:delete changelog
Reference:https://github.com/PCRE2Project/pcre2/commit/2c08b619dc973beacc474dcb67cda8cd366200ce
---
src/pcre2_jit_compile.c | 1 +
src/pcre2_jit_test.c | 1 +
2 files changed, 2 insertions(+)
diff --git a/src/pcre2_jit_compile.c b/src/pcre2_jit_compile.c
index 75ba610..81b7a93 100644
--- a/src/pcre2_jit_compile.c
+++ b/src/pcre2_jit_compile.c
@@ -4132,6 +4132,7 @@ if (negated)
if (common->invalid_utf)
{
+ OP1(SLJIT_MOV, TMP1, 0, TMP2, 0);
add_jump(compiler, &common->utfreadchar_invalid, JUMP(SLJIT_FAST_CALL));
add_jump(compiler, backtracks, CMP(SLJIT_EQUAL, TMP1, 0, SLJIT_IMM, INVALID_UTF_CHAR));
OP1(SLJIT_MOV, TMP1, 0, SLJIT_IMM, 0);
diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c
index e1f0bbc..1a1f6c5 100644
--- a/src/pcre2_jit_test.c
+++ b/src/pcre2_jit_test.c
@@ -1979,6 +1979,7 @@ static const struct invalid_utf8_regression_test_case invalid_utf8_regression_te
{ PCRE2_UTF, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" },
{ PCRE2_UTF | PCRE2_UCP, CI, 0, 0, 0, -1, -1, { "[\\s]", NULL }, "\xed\xa0\x80" },
+ { PCRE2_UTF, CI, 0, 0, 0, 1, 4, { "[\\D]", NULL }, "@\xe0\xab\xaa@" },
/* These two are not invalid UTF tests, but this infrastructure fits better for them. */
{ 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\X{2}", NULL }, "\r\n\n" },
--
2.27.0

View File

@ -0,0 +1,24 @@
From a8558f252e8d8b3fd52d1dc8ea46ecefd991a187 Mon Sep 17 00:00:00 2001
From: Zoltan Herczeg <hzmester@freemail.hu>
Date: Tue, 11 Apr 2023 12:55:01 +0000
Subject: [PATCH] Fix wrong test
Conflict:NA
Reference:https://github.com/PCRE2Project/pcre2/commit/a8558f252e8d8b3fd52d1dc8ea46ecefd991a187
---
src/pcre2_jit_test.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/pcre2_jit_test.c b/src/pcre2_jit_test.c
index 7a83bc02..331ed6d4 100644
--- a/src/pcre2_jit_test.c
+++ b/src/pcre2_jit_test.c
@@ -1981,7 +1981,7 @@ static const struct invalid_utf8_regression_test_case invalid_utf8_regression_te
{ PCRE2_UTF, CI, 0, 0, 0, 7, 11, { "#\xc7\x85#", NULL }, "\x80\x80#\xc7\x80\x80\x80#\xc7\x85#" },
{ PCRE2_UTF | PCRE2_UCP, CI, 0, 0, 0, -1, -1, { "[\\s]", NULL }, "\xed\xa0\x80" },
- { PCRE2_UTF, CI, 0, 0, 0, 1, 4, { "[\\D]", NULL }, "@\xe0\xab\xaa@" },
+ { PCRE2_UTF, CI, 0, 0, 0, 0, 3, { "[\\D]", NULL }, "\xe0\xab\xaa@" },
/* These two are not invalid UTF tests, but this infrastructure fits better for them. */
{ 0, PCRE2_JIT_COMPLETE, 0, 0, 1, -1, -1, { "\\X{2}", NULL }, "\r\n\n" },

View File

@ -0,0 +1,205 @@
From 1bb1ef67eff000fe4aa8c1c45289938188e11e48 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= <carenas@gmail.com>
Date: Fri, 21 Apr 2023 01:12:45 -0700
Subject: [PATCH] no partial match if trailing data is invalid utf (#238)
Avoid returning a partial match if one was found but followed by invalid
UTF, making the result consistent with JIT and unlike:
PCRE2 version 10.34 2019-11-21
re> /.a/match_invalid_utf,allvector,jit
data> b\xb1\=ph,ovector=1
No match
0: <unchanged>
data> b\xb1\=ph,ovector=1,no_jit
Partial match: b\x{b1}
** ovector[1] is not equal to the subject length: 1 != 2
0: 0 1
Conflict:NA
Reference:https://github.com/PCRE2Project/pcre2/commit/1bb1ef67eff000fe4aa8c1c45289938188e11e48
---
src/pcre2_match.c | 1 +
testdata/testinput10 | 19 +++++++++++++++++++
testdata/testinput12 | 14 ++++++++++++++
testdata/testoutput10 | 32 ++++++++++++++++++++++++++++++++
testdata/testoutput12-16 | 22 ++++++++++++++++++++++
testdata/testoutput12-32 | 22 ++++++++++++++++++++++
6 files changed, 110 insertions(+)
diff --git a/src/pcre2_match.c b/src/pcre2_match.c
index ea98af3c..afe3036b 100644
--- a/src/pcre2_match.c
+++ b/src/pcre2_match.c
@@ -7454,6 +7454,7 @@ if (utf && end_subject != true_end_subject &&
if (start_match >= true_end_subject)
{
rc = MATCH_NOMATCH; /* In case it was partial */
+ match_partial = NULL;
break;
}
diff --git a/testdata/testinput10 b/testdata/testinput10
index 53e37cbc..27321e37 100644
--- a/testdata/testinput10
+++ b/testdata/testinput10
@@ -506,6 +506,25 @@
\= Expect no match
ab\x80cdef\=ph
+/.a/match_invalid_utf
+ ab\=ph
+ ab\=ps
+ b\xf0\x91\x88b\=ph
+ b\xf0\x91\x88b\=ps
+ b\xf0\x91\x88\xb4a
+\= Expect no match
+ b\x80\=ph
+ b\x80\=ps
+ b\xf0\x91\x88\=ph
+ b\xf0\x91\x88\=ps
+
+/.a$/match_invalid_utf
+ ab\=ph
+ ab\=ps
+\= Expect no match
+ b\xf0\x91\x98\=ph
+ b\xf0\x91\x98\=ps
+
/ab$/match_invalid_utf
ab\x80cdeab
\= Expect no match
diff --git a/testdata/testinput12 b/testdata/testinput12
index 9b4f8d34..7a85eb57 100644
--- a/testdata/testinput12
+++ b/testdata/testinput12
@@ -413,6 +413,20 @@
\= Expect no match
ab\x{df00}cdef\=ph
+/.a/match_invalid_utf
+ ab\=ph
+ ab\=ps
+\= Expect no match
+ b\x{df00}\=ph
+ b\x{df00}\=ps
+
+/.a$/match_invalid_utf
+ ab\=ph
+ ab\=ps
+\= Expect no match
+ b\x{df00}\=ph
+ b\x{df00}\=ps
+
/ab$/match_invalid_utf
ab\x{df00}cdeab
\= Expect no match
diff --git a/testdata/testoutput10 b/testdata/testoutput10
index d4085106..1f4c876b 100644
--- a/testdata/testoutput10
+++ b/testdata/testoutput10
@@ -1646,6 +1646,38 @@ Partial match: ab
ab\x80cdef\=ph
No match
+/.a/match_invalid_utf
+ ab\=ph
+Partial match: b
+ ab\=ps
+Partial match: b
+ b\xf0\x91\x88b\=ph
+Partial match: b
+ b\xf0\x91\x88b\=ps
+Partial match: b
+ b\xf0\x91\x88\xb4a
+ 0: \x{11234}a
+\= Expect no match
+ b\x80\=ph
+No match
+ b\x80\=ps
+No match
+ b\xf0\x91\x88\=ph
+No match
+ b\xf0\x91\x88\=ps
+No match
+
+/.a$/match_invalid_utf
+ ab\=ph
+Partial match: b
+ ab\=ps
+Partial match: b
+\= Expect no match
+ b\xf0\x91\x98\=ph
+No match
+ b\xf0\x91\x98\=ps
+No match
+
/ab$/match_invalid_utf
ab\x80cdeab
0: ab
diff --git a/testdata/testoutput12-16 b/testdata/testoutput12-16
index 84c48581..98676324 100644
--- a/testdata/testoutput12-16
+++ b/testdata/testoutput12-16
@@ -1522,6 +1522,28 @@ Partial match: ab
ab\x{df00}cdef\=ph
No match
+/.a/match_invalid_utf
+ ab\=ph
+Partial match: b
+ ab\=ps
+Partial match: b
+\= Expect no match
+ b\x{df00}\=ph
+No match
+ b\x{df00}\=ps
+No match
+
+/.a$/match_invalid_utf
+ ab\=ph
+Partial match: b
+ ab\=ps
+Partial match: b
+\= Expect no match
+ b\x{df00}\=ph
+No match
+ b\x{df00}\=ps
+No match
+
/ab$/match_invalid_utf
ab\x{df00}cdeab
0: ab
diff --git a/testdata/testoutput12-32 b/testdata/testoutput12-32
index 03b6e394..3a20dd4b 100644
--- a/testdata/testoutput12-32
+++ b/testdata/testoutput12-32
@@ -1520,6 +1520,28 @@ Partial match: ab
ab\x{df00}cdef\=ph
No match
+/.a/match_invalid_utf
+ ab\=ph
+Partial match: b
+ ab\=ps
+Partial match: b
+\= Expect no match
+ b\x{df00}\=ph
+No match
+ b\x{df00}\=ps
+No match
+
+/.a$/match_invalid_utf
+ ab\=ph
+Partial match: b
+ ab\=ps
+Partial match: b
+\= Expect no match
+ b\x{df00}\=ph
+No match
+ b\x{df00}\=ps
+No match
+
/ab$/match_invalid_utf
ab\x{df00}cdeab
0: ab

View File

@ -0,0 +1,84 @@
From 1bc34ffa64c33381d793fb5cdddf3f484e603d23 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= <carenas@gmail.com>
Date: Fri, 12 May 2023 07:54:02 -0700
Subject: [PATCH] pcre2grep: document better possible multiline matching misses
(#252)
While at it, remove a misplaced cast that would cause problems for
subjects over 2GB and a few typos.
Conflict:adapt context
Reference:https://github.com/PCRE2Project/pcre2/commit/1bc34ffa64c33381d793fb5cdddf3f484e603d23
---
doc/pcre2grep.1 | 11 ++++++++---
src/pcre2_compile.c | 4 ++--
src/pcre2grep.c | 2 +-
3 files changed, 11 insertions(+), 6 deletions(-)
diff --git a/doc/pcre2grep.1 b/doc/pcre2grep.1
index 1081591..5077304 100644
--- a/doc/pcre2grep.1
+++ b/doc/pcre2grep.1
@@ -66,6 +66,9 @@ The block of memory that is actually used is three times the "buffer size", to
allow for buffering "before" and "after" lines. If the buffer size is too
small, fewer than requested "before" and "after" lines may be output.
.P
+When matching with a multiline pattern, the size of the buffer must be at least
+half of the maximum match expected or the pattern might fail to match.
+.P
Patterns can be no longer than 8KiB or BUFSIZ bytes, whichever is the greater.
BUFSIZ is defined in \fB<stdio.h>\fP. When there is more than one pattern
(specified by the use of \fB-e\fP and/or \fB-f\fP), each pattern is applied to
@@ -201,7 +204,7 @@ exactly the same as the number of lines that would have been output, but if the
\fB-M\fP (multiline) option is used (without \fB-v\fP), there may be more
suppressed lines than the count (that is, the number of matches).
.sp
-If no lines are selected, the number zero is output. If several files are are
+If no lines are selected, the number zero is output. If several files are
being scanned, a count is output for each of them and the \fB-t\fP option can
be used to cause a total to be output at the end. However, if the
\fB--files-with-matches\fP option is also used, only those files whose counts
@@ -490,8 +493,10 @@ well as possibly handling a two-character newline sequence.
.sp
There is a limit to the number of lines that can be matched, imposed by the way
that \fBpcre2grep\fP buffers the input file as it scans it. With a sufficiently
-large processing buffer, this should not be a problem, but the \fB-M\fP option
-does not work when input is read line by line (see \fB--line-buffered\fP.)
+large processing buffer, this should not be a problem.
+.sp
+The \fB-M\fP option does not work when input is read line by line (see
+\fB--line-buffered\fP.)
.TP
\fB-m\fP \fInumber\fP, \fB--max-count\fP=\fInumber\fP
Stop processing after finding \fInumber\fP matching lines, or non-matching
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index b906dc0..c6d4c60 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -5549,8 +5549,8 @@ for (;; pptr++)
If the class contains characters outside the 0-255 range, a different
opcode is compiled. It may optionally have a bit map for characters < 256,
- but those above are are explicitly listed afterwards. A flag code unit
- tells whether the bitmap is present, and whether this is a negated class or
+ but those above are explicitly listed afterwards. A flag code unit tells
+ whether the bitmap is present, and whether this is a negated class or
not. */
case META_CLASS_NOT:
diff --git a/src/pcre2grep.c b/src/pcre2grep.c
index 1484d67..d2ab620 100644
--- a/src/pcre2grep.c
+++ b/src/pcre2grep.c
@@ -1843,7 +1843,7 @@ if (slen > 200)
for (i = 1; p != NULL; p = p->next, i++)
{
- *mrc = pcre2_match(p->compiled, (PCRE2_SPTR)matchptr, (int)length,
+ *mrc = pcre2_match(p->compiled, (PCRE2_SPTR)matchptr, length,
startoffset, options, match_data, match_context);
if (*mrc >= 0) return TRUE;
if (*mrc == PCRE2_ERROR_NOMATCH) continue;
--
2.27.0

View File

@ -1,6 +1,6 @@
Name: pcre2
Version: 10.40
Release: 3
Release: 4
Summary: Perl Compatible Regular Expressions
License: BSD
URL: http://www.pcre.org/
@ -16,6 +16,10 @@ Patch6014: backport-Add-an-ifdef-to-avoid-the-need-even-to-link-with-pcr.pat
Patch6015: backport-Fixed-an-issue-in-the-backtracking-optimization-of-c.patch
Patch6016: backport-jit-fail-early-in-ffcps_-if-subject-shorter-than-off.patch
Patch6017: backport-jit-fix-pcre2_jit_free_unused_memory-if-sljit-not-us.patch
Patch6018: backport-pcre2grep-document-better-possible-multiline-matchin.patch
Patch6019: backport-no-partial-match-if-trailing-data-is-invalid-utf-238.patch
Patch6020: backport-Fix-an-invalid-match-of-ascii-word-classes-when-inva.patch
Patch6021: backport-fix-wrong-test.patch
BuildRequires: autoconf libtool automake coreutils gcc make readline-devel
Obsoletes: pcre2-utf16 pcre2-utf32 pcre2-tools
@ -132,6 +136,9 @@ make check
%{_pkgdocdir}/html/
%changelog
* Mon Jun 26 2023 yangmingtai <yangmingtai@huawei.com> - 10.40-4
- DESC:sync community patches
* Thu Mar 16 2023 yangmingtai <yangmingtai@huawei.com> - 10.40-3
- DESC:sync community patches