51 lines
1.9 KiB
Diff
51 lines
1.9 KiB
Diff
|
|
From ef6c7768b300678895348ba7c827fa919e3f1d5c Mon Sep 17 00:00:00 2001
|
|||
|
|
From: Paul Eggert <eggert@cs.ucla.edu>
|
|||
|
|
Date: Fri, 13 May 2022 23:28:30 -0700
|
|||
|
|
Subject: [PATCH] build: update gnulib submodule to latest
|
|||
|
|
|
|||
|
|
https://git.savannah.gnu.org/cgit/gnulib.git/commit/?id=b19a10775e54f8ed17e3a8c08a72d261d8c26244
|
|||
|
|
This fixes a bug introduced in 2019-12-18T05:41:27Z!eggert@cs.ucla.edu,
|
|||
|
|
an earlier patch that fixed dfa.c to not match invalid UTF-8.
|
|||
|
|
Unfortunately that patch had a couple of typos when dfa.c is
|
|||
|
|
matching against the regular expression ‘.’ (dot). One typo
|
|||
|
|
caused dfa.c to incorrectly reject the valid UTF-8 sequences
|
|||
|
|
(ED)(90-9F)(80-BF) corresponding to U+D400 through U+D7FF, which
|
|||
|
|
are some Hangul Syllables and Hangul Jamo Extended-B. The other
|
|||
|
|
typo caused dfa.c to incorrectly reject the valid sequences
|
|||
|
|
(F4)(88-8F)(80-BF)(80-BF) which correspond to U+108000 through
|
|||
|
|
U+10FFFF (Supplemental Private Use Area plane B).
|
|||
|
|
* lib/dfa.c (utf8_classes): Fix typos.
|
|||
|
|
* tests/test-dfa-match.sh: Test the fix.
|
|||
|
|
|
|||
|
|
Reference:https://git.savannah.gnu.org/cgit/grep.git/commit?id=ef6c7768b300678895348ba7c827fa919e3f1d5c
|
|||
|
|
Conflict:delete ChangeLog and test-dfa-match.sh
|
|||
|
|
---
|
|||
|
|
lib/dfa.c | 4 ++--
|
|||
|
|
1 file changed, 2 insertions(+), 2 deletions(-)
|
|||
|
|
|
|||
|
|
diff --git a/lib/dfa.c b/lib/dfa.c
|
|||
|
|
index a27d096f7..e88fabb44 100644
|
|||
|
|
--- a/lib/dfa.c
|
|||
|
|
+++ b/lib/dfa.c
|
|||
|
|
@@ -1704,7 +1704,7 @@ add_utf8_anychar (struct dfa *dfa)
|
|||
|
|
/* G. ed (just a token). */
|
|||
|
|
|
|||
|
|
/* H. 80-9f: 2nd byte of a "GHC" sequence. */
|
|||
|
|
- CHARCLASS_INIT (0, 0, 0, 0, 0xffff, 0, 0, 0),
|
|||
|
|
+ CHARCLASS_INIT (0, 0, 0, 0, 0xffffffff, 0, 0, 0),
|
|||
|
|
|
|||
|
|
/* I. f0 (just a token). */
|
|||
|
|
|
|||
|
|
@@ -1717,7 +1717,7 @@ add_utf8_anychar (struct dfa *dfa)
|
|||
|
|
/* L. f4 (just a token). */
|
|||
|
|
|
|||
|
|
/* M. 80-8f: 2nd byte of a "LMCC" sequence. */
|
|||
|
|
- CHARCLASS_INIT (0, 0, 0, 0, 0xff, 0, 0, 0),
|
|||
|
|
+ CHARCLASS_INIT (0, 0, 0, 0, 0xffff, 0, 0, 0),
|
|||
|
|
};
|
|||
|
|
|
|||
|
|
/* Define the character classes that are needed below. */
|
|||
|
|
--
|
|||
|
|
2.27.0
|
|||
|
|
|