backport patches from upstream

This commit is contained in:
panxiaohe 2022-07-27 14:37:18 +08:00
parent cd158069ae
commit 01ebb36065
7 changed files with 520 additions and 8 deletions

View File

@ -0,0 +1,50 @@
From ef6c7768b300678895348ba7c827fa919e3f1d5c Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Fri, 13 May 2022 23:28:30 -0700
Subject: [PATCH] build: update gnulib submodule to latest
https://git.savannah.gnu.org/cgit/gnulib.git/commit/?id=b19a10775e54f8ed17e3a8c08a72d261d8c26244
This fixes a bug introduced in 2019-12-18T05:41:27Z!eggert@cs.ucla.edu,
an earlier patch that fixed dfa.c to not match invalid UTF-8.
Unfortunately that patch had a couple of typos when dfa.c is
matching against the regular expression . (dot). One typo
caused dfa.c to incorrectly reject the valid UTF-8 sequences
(ED)(90-9F)(80-BF) corresponding to U+D400 through U+D7FF, which
are some Hangul Syllables and Hangul Jamo Extended-B. The other
typo caused dfa.c to incorrectly reject the valid sequences
(F4)(88-8F)(80-BF)(80-BF) which correspond to U+108000 through
U+10FFFF (Supplemental Private Use Area plane B).
* lib/dfa.c (utf8_classes): Fix typos.
* tests/test-dfa-match.sh: Test the fix.
Reference:https://git.savannah.gnu.org/cgit/grep.git/commit?id=ef6c7768b300678895348ba7c827fa919e3f1d5c
Conflict:delete ChangeLog and test-dfa-match.sh
---
lib/dfa.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/lib/dfa.c b/lib/dfa.c
index a27d096f7..e88fabb44 100644
--- a/lib/dfa.c
+++ b/lib/dfa.c
@@ -1704,7 +1704,7 @@ add_utf8_anychar (struct dfa *dfa)
/* G. ed (just a token). */
/* H. 80-9f: 2nd byte of a "GHC" sequence. */
- CHARCLASS_INIT (0, 0, 0, 0, 0xffff, 0, 0, 0),
+ CHARCLASS_INIT (0, 0, 0, 0, 0xffffffff, 0, 0, 0),
/* I. f0 (just a token). */
@@ -1717,7 +1717,7 @@ add_utf8_anychar (struct dfa *dfa)
/* L. f4 (just a token). */
/* M. 80-8f: 2nd byte of a "LMCC" sequence. */
- CHARCLASS_INIT (0, 0, 0, 0, 0xff, 0, 0, 0),
+ CHARCLASS_INIT (0, 0, 0, 0, 0xffff, 0, 0, 0),
};
/* Define the character classes that are needed below. */
--
2.27.0

View File

@ -0,0 +1,38 @@
From 0687c51c4792b997988c03a34a8b57717d9961cc Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Tue, 17 Aug 2021 13:58:13 -0700
Subject: [PATCH] grep: djb2 correction
Problem reported by Alex Murray (bug#50093).
* src/grep.c (hash_pattern): Use a nonzero initial value.
Reference:https://git.savannah.gnu.org/cgit/grep.git/commit?id=0687c51c4792b997988c03a34a8b57717d9961cc
Conflict:NA
---
src/grep.c | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/src/grep.c b/src/grep.c
index 271b6b9..7a33686 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -126,7 +126,15 @@ static Hash_table *pattern_table;
static size_t _GL_ATTRIBUTE_PURE
hash_pattern (void const *pat, size_t n_buckets)
{
- size_t h = 0;
+ /* This uses the djb2 algorithm, except starting with a larger prime
+ in place of djb2's 5381, if size_t is wide enough. The primes
+ are taken from the primeth recurrence sequence
+ <https://oeis.org/A007097>. h15, h32 and h64 are the largest
+ sequence members that fit into 15, 32 and 64 bits, respectively.
+ Since any H will do, hashing works correctly on oddball machines
+ where size_t has some other width. */
+ uint_fast64_t h15 = 5381, h32 = 3657500101, h64 = 4123221751654370051;
+ size_t h = h64 <= SIZE_MAX ? h64 : h32 <= SIZE_MAX ? h32 : h15;
intptr_t pat_offset = (intptr_t) pat - 1;
unsigned char const *s = (unsigned char const *) pattern_array + pat_offset;
for ( ; *s != '\n'; s++)
--
2.27.0

View File

@ -0,0 +1,126 @@
From 5447010fdbdf3f1a874689dd41a7c916bb262b2a Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Fri, 13 May 2022 23:46:21 -0700
Subject: [PATCH] grep: fix bug with . and some Hangul Syllables
* NEWS: Mention the fix, which comes from the recent Gnulib update.
* tests/hangul-syllable: New file.
* tests/Makefile.am (TESTS): Add it.
Reference:https://git.savannah.gnu.org/cgit/grep.git/commit?id=5447010fdbdf3f1a874689dd41a7c916bb262b2a
Conflict:delete NEWS
---
tests/Makefile.am | 1 +
tests/hangul-syllable | 88 +++++++++++++++++++++++++++++++++++++++++++
2 files changed, 89 insertions(+)
create mode 100755 tests/hangul-syllable
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 708980d..d72637f 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -110,6 +110,7 @@ TESTS = \
grep-dev-null \
grep-dev-null-out \
grep-dir \
+ hangul-syllable \
hash-collision-perf \
help-version \
high-bit-range \
diff --git a/tests/hangul-syllable b/tests/hangul-syllable
new file mode 100755
index 0000000..9f94d2e
--- /dev/null
+++ b/tests/hangul-syllable
@@ -0,0 +1,88 @@
+#!/bin/sh
+# grep 3.4 through 3.7 mishandled matching '.' against the valid UTF-8
+# sequences (ED)(90-9F)(80-BF) corresponding to U+D400 through U+D7FF,
+# which are some Hangul Syllables and Hangul Jamo Extended-B. They
+# also mishandled (F4)(88-8F)(80-BF)(80-BF) which correspond to
+# U+108000 through U+10FFFF (Supplemental Private Use Area plane B).
+
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
+
+require_en_utf8_locale_
+
+LC_ALL=en_US.UTF-8
+export LC_ALL
+
+check_char ()
+{
+ printf "$1\\n" >in || framewmork_failure_
+
+ grep $2 '^.$' in >out || fail=1
+ cmp in out || fail=1
+}
+
+fail=0
+
+# "." should match U+D45C HANGUL SYLLABLE PYO.
+check_char '\355\221\234'
+
+# Check boundary-condition characters
+# while we are at it.
+
+check_char '\0' -a
+check_char '\177'
+
+for i in 302 337; do
+ for j in 200 277; do
+ check_char "\\$i\\$j"
+ done
+done
+for i in 340; do
+ for j in 240 277; do
+ for k in 200 277; do
+ check_char "\\$i\\$j\\$k"
+ done
+ done
+done
+for i in 341 354 356 357; do
+ for j in 200 277; do
+ for k in 200 277; do
+ check_char "\\$i\\$j\\$k"
+ done
+ done
+done
+for i in 355; do
+ for j in 200 237; do
+ for k in 200 277; do
+ check_char "\\$i\\$j\\$k"
+ done
+ done
+done
+for i in 360; do
+ for j in 220 277; do
+ for k in 200 277; do
+ for l in 200 277; do
+ check_char "\\$i\\$j\\$k\\$l"
+ done
+ done
+ done
+done
+for i in 361 363; do
+ for j in 200 277; do
+ for k in 200 277; do
+ for l in 200 277; do
+ check_char "\\$i\\$j\\$k\\$l"
+ done
+ done
+ done
+done
+for i in 364; do
+ for j in 200 217; do
+ for k in 200 277; do
+ for l in 200 277; do
+ check_char "\\$i\\$j\\$k\\$l"
+ done
+ done
+ done
+done
+
+Exit $fail
--
2.27.0

View File

@ -0,0 +1,63 @@
From e2aec8c91e9d6ed3fc76f9f145dec8a456ce623a Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Fri, 24 Jun 2022 17:53:34 -0500
Subject: [PATCH] grep: fix regex compilation memory leaks
Problem reported by Jim Meyering in:
https://lists.gnu.org/r/grep-devel/2022-06/msg00012.html
* src/dfasearch.c (regex_compile): Fix memory leaks when SYNTAX_ONLY.
Reference:https://git.savannah.gnu.org/cgit/grep.git/commit?id=e2aec8c91e9d6ed3fc76f9f145dec8a456ce623a
Conflict:context adaptation
---
src/dfasearch.c | 24 ++++++++++++++++--------
1 file changed, 16 insertions(+), 8 deletions(-)
diff --git a/src/dfasearch.c b/src/dfasearch.c
index d6afa8d..2875453 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -148,24 +148,32 @@ regex_compile (struct dfa_comp *dc, char const *p, ptrdiff_t len,
ptrdiff_t pcount, ptrdiff_t lineno, reg_syntax_t syntax_bits,
bool syntax_only)
{
- struct re_pattern_buffer pat0;
- struct re_pattern_buffer *pat = syntax_only ? &pat0 : &dc->patterns[pcount];
- pat->buffer = NULL;
- pat->allocated = 0;
+ struct re_pattern_buffer pat;
+ pat.buffer = NULL;
+ pat.allocated = 0;
/* Do not use a fastmap with -i, to work around glibc Bug#20381. */
- pat->fastmap = (syntax_only | match_icase) ? NULL : xmalloc (UCHAR_MAX + 1);
+ pat.fastmap = syntax_only | match_icase ? NULL : ximalloc (UCHAR_MAX + 1);
- pat->translate = NULL;
+ pat.translate = NULL;
if (syntax_only)
re_set_syntax (syntax_bits | RE_NO_SUB);
else
re_set_syntax (syntax_bits);
- char const *err = re_compile_pattern (p, len, pat);
+ char const *err = re_compile_pattern (p, len, &pat);
if (!err)
- return true;
+ {
+ if (syntax_only)
+ regfree (&pat);
+ else
+ dc->patterns[pcount] = pat;
+
+ return true;
+ }
+
+ free (pat.fastmap);
/* Emit a filename:lineno: prefix for patterns taken from files. */
size_t pat_lineno;
--
2.27.0

View File

@ -0,0 +1,44 @@
From 5e3d207d5b7dba28ca248475188a029570766bc1 Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Mon, 30 May 2022 17:03:26 -0700
Subject: [PATCH] grep: sanity-check GREP_COLOR
This patch closes a longstanding security issue with GREP_COLOR that I
just noticed, where if the attacker has control over GREP_COLOR's
settings the attacker can trash the victim's terminal or have 'grep'
generate misleading output. For example, without the patch
the shell command:
GREP_COLOR="$(printf '31m\33[2J\33[31')" grep --color=always PATTERN
mucks with the screen, leaving behind only the trailing part of
the last matching line. With the patch, this GREP_COLOR is ignored.
* src/grep.c (main): Sanity-check GREP_COLOR contents the same way
GREP_COLORS values are checked, to not trash the user's terminal.
This follows up the recent fix to Bug#55641.
Reference:https://git.savannah.gnu.org/cgit/grep.git/commit?id=5e3d207d5b7dba28ca248475188a029570766bc1
Conflict:delete NEWS
---
src/grep.c | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/src/grep.c b/src/grep.c
index edefac6..59d3431 100644
--- a/src/grep.c
+++ b/src/grep.c
@@ -2911,7 +2911,12 @@ main (int argc, char **argv)
/* Legacy. */
char *userval = getenv ("GREP_COLOR");
if (userval != NULL && *userval != '\0')
- selected_match_color = context_match_color = userval;
+ for (char *q = userval; *q == ';' || c_isdigit (*q); q++)
+ if (!q[1])
+ {
+ selected_match_color = context_match_color = userval;
+ break;
+ }
/* New GREP_COLORS has priority. */
parse_grep_colors ();
--
2.27.0

View File

@ -0,0 +1,182 @@
From e4a71086bf8143ae083f4e97d8226f30c7e1a079 Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Tue, 17 May 2022 13:47:44 -0700
Subject: [PATCH] =?UTF-8?q?tests:=20improve=20tests=20of=20=E2=80=98.?=
=?UTF-8?q?=E2=80=99?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
* tests/hangul-syllable: Test some encoding errors too.
Reference:https://git.savannah.gnu.org/cgit/grep.git/commit?id=e4a71086bf8143ae083f4e97d8226f30c7e1a079
Conflict:NA
---
tests/hangul-syllable | 89 ++++++++++++++++++++++++++++++++++++++++++-
1 file changed, 88 insertions(+), 1 deletion(-)
diff --git a/tests/hangul-syllable b/tests/hangul-syllable
index 9f94d2e..fce5c2c 100755
--- a/tests/hangul-syllable
+++ b/tests/hangul-syllable
@@ -12,6 +12,7 @@ require_en_utf8_locale_
LC_ALL=en_US.UTF-8
export LC_ALL
+# Check that '.' completely matches $1, i.e., that $1 is a single UTF-8 char.
check_char ()
{
printf "$1\\n" >in || framewmork_failure_
@@ -20,27 +21,52 @@ check_char ()
cmp in out || fail=1
}
+# Check that '.*' does not completely match $1, i.e., that
+# $1 contains an encoding error.
+check_nonchar ()
+{
+ printf "$1\\n" >in || framewmork_failure_
+
+ grep -a -v '^.*$' in >out || fail=1
+ cmp in out || fail=1
+}
+
fail=0
# "." should match U+D45C HANGUL SYLLABLE PYO.
check_char '\355\221\234'
-# Check boundary-condition characters
+# Check boundary-condition characters, and non-characters,
# while we are at it.
check_char '\0' -a
check_char '\177'
+check_nonchar '\200'
+check_nonchar '\277'
+check_nonchar '\300\200'
+check_nonchar '\301\277'
for i in 302 337; do
for j in 200 277; do
check_char "\\$i\\$j"
done
+ for j in 177 300; do
+ check_nonchar "\\$i\\$j"
+ done
done
for i in 340; do
for j in 240 277; do
for k in 200 277; do
check_char "\\$i\\$j\\$k"
done
+ for k in 177 300; do
+ check_nonchar "\\$i\\$j\\$k"
+ done
+ done
+ for j in 239 300; do
+ for k in 177 200 277 300; do
+ check_nonchar "\\$i\\$j\\$k"
+ done
done
done
for i in 341 354 356 357; do
@@ -48,6 +74,14 @@ for i in 341 354 356 357; do
for k in 200 277; do
check_char "\\$i\\$j\\$k"
done
+ for k in 177 300; do
+ check_nonchar "\\$i\\$j\\$k"
+ done
+ done
+ for j in 177 300; do
+ for k in 177 200 277 300; do
+ check_nonchar "\\$i\\$j\\$k"
+ done
done
done
for i in 355; do
@@ -55,6 +89,14 @@ for i in 355; do
for k in 200 277; do
check_char "\\$i\\$j\\$k"
done
+ for k in 177 300; do
+ check_nonchar "\\$i\\$j\\$k"
+ done
+ done
+ for j in 177 240; do
+ for k in 177 200 277 300; do
+ check_nonchar "\\$i\\$j\\$k"
+ done
done
done
for i in 360; do
@@ -63,6 +105,21 @@ for i in 360; do
for l in 200 277; do
check_char "\\$i\\$j\\$k\\$l"
done
+ for l in 177 300; do
+ check_nonchar "\\$i\\$j\\$k\\$l"
+ done
+ done
+ for k in 177 300; do
+ for l in 177 200 277 300; do
+ check_nonchar "\\$i\\$j\\$k\\$l"
+ done
+ done
+ done
+ for j in 217 300; do
+ for k in 177 200 277 300; do
+ for l in 177 200 277 300; do
+ check_nonchar "\\$i\\$j\\$k\\$l"
+ done
done
done
done
@@ -72,6 +129,21 @@ for i in 361 363; do
for l in 200 277; do
check_char "\\$i\\$j\\$k\\$l"
done
+ for l in 177 300; do
+ check_nonchar "\\$i\\$j\\$k\\$l"
+ done
+ done
+ for k in 177 300; do
+ for l in 177 200 277 300; do
+ check_nonchar "\\$i\\$j\\$k\\$l"
+ done
+ done
+ done
+ for j in 177 300; do
+ for k in 177 200 277 300; do
+ for l in 177 200 277 300; do
+ check_nonchar "\\$i\\$j\\$k\\$l"
+ done
done
done
done
@@ -81,6 +153,21 @@ for i in 364; do
for l in 200 277; do
check_char "\\$i\\$j\\$k\\$l"
done
+ for l in 177 300; do
+ check_nonchar "\\$i\\$j\\$k\\$l"
+ done
+ done
+ for k in 177 300; do
+ for l in 177 200 277 300; do
+ check_nonchar "\\$i\\$j\\$k\\$l"
+ done
+ done
+ done
+ for j in 177 220; do
+ for k in 177 200 277 300; do
+ for l in 177 200 277 300; do
+ check_nonchar "\\$i\\$j\\$k\\$l"
+ done
done
done
done
--
2.27.0

View File

@ -1,6 +1,6 @@
Name: grep
Version: 3.7
Release: 6
Release: 7
Summary: A string search utility
License: GPLv3+
URL: http://www.gnu.org/software/grep/
@ -9,13 +9,19 @@ Source1: color_grep.sh
Source2: colorgrep.csh
Source3: grepconf.sh
Patch1: backport-grep-avoid-sticky-problem-with-f-f.patch
Patch2: backport-grep-s-does-not-suppress-binary-file-matches.patch
Patch3: backport-grep-work-around-PCRE-bug.patch
Patch4: backport-grep-migrate-to-pcre2.patch
Patch5: backport-grep-Don-t-limit-jitstack_max-to-INT_MAX.patch
Patch6: backport-grep-speed-up-fix-bad-UTF8-check-with-P.patch
Patch7: backport-grep-fix-minor-P-memory-leak.patch
Patch1: backport-grep-avoid-sticky-problem-with-f-f.patch
Patch2: backport-grep-s-does-not-suppress-binary-file-matches.patch
Patch3: backport-grep-work-around-PCRE-bug.patch
Patch4: backport-grep-migrate-to-pcre2.patch
Patch5: backport-grep-Don-t-limit-jitstack_max-to-INT_MAX.patch
Patch6: backport-grep-speed-up-fix-bad-UTF8-check-with-P.patch
Patch7: backport-grep-fix-minor-P-memory-leak.patch
Patch8: backport-grep-djb2-correction.patch
Patch9: backport-build-update-gnulib-submodule-to-latest.patch
Patch10: backport-grep-fix-bug-with-and-some-Hangul-Syllables.patch
Patch11: backport-tests-improve-tests-of.patch
Patch12: backport-grep-sanity-check-GREP_COLOR.patch
Patch13: backport-grep-fix-regex-compilation-memory-leaks.patch
BuildRequires: gcc pcre2-devel texinfo gettext libsigsegv-devel automake
Provides: /bin/egrep /bin/fgrep /bin/grep bundled(gnulib)
@ -61,6 +67,9 @@ make check
%changelog
* Wed Jul 27 2022 panxiaohe <panxh.life@foxmail.com> - 3.7-7
- backport patches from upstream
* Fri Jul 15 2022 panxiaohe <panxh.life@foxmail.com> - 3.7-6
- Added coloring aliases to fgrep egrep and grep