diff --git a/backport-grep-migrate-to-pcre2.patch b/backport-grep-migrate-to-pcre2.patch new file mode 100644 index 0000000..bcc74bc --- /dev/null +++ b/backport-grep-migrate-to-pcre2.patch @@ -0,0 +1,1113 @@ +From e0d39a9133e1507345d73ac5aff85f037f39aa54 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= +Date: Fri, 12 Nov 2021 16:45:04 -0800 +Subject: grep: migrate to pcre2 + +Mostly a bug by bug translation of the original code to the PCRE2 API. +Code still could do with some optimizations but should be good as a +starting point. + +The API changes the sign of some types and therefore some ugly casts +were needed, some of the changes are just to make sure all variables +fit into the newer types better. + +Includes backward compatibility and could be made to build all the way +to 10.00, but assumes a recent enough version and has been tested with +10.23 (from CentOS 7, the oldest). + +Performance seems equivalent, and it also seems functionally complete. + +* m4/pcre.m4 (gl_FUNC_PCRE): Check for PCRE2, not the original PCRE. +* src/pcresearch.c (struct pcre_comp, jit_exec) +(Pcompile, Pexecute): +Use PCRE2, not the original PCRE. +* tests/filename-lineno.pl: Adjust to match PCRE2 diagnostics. +--- + 0001-grep-migrate-to-pcre2.patch | 543 +++++++++++++++++++++++++++++++ + doc/grep.in.1 | 8 +- + doc/grep.texi | 2 +- + m4/pcre.m4 | 21 +- + src/pcresearch.c | 244 +++++++------- + tests/filename-lineno.pl | 4 +- + 6 files changed, 681 insertions(+), 141 deletions(-) + create mode 100644 0001-grep-migrate-to-pcre2.patch + +diff --git a/0001-grep-migrate-to-pcre2.patch b/0001-grep-migrate-to-pcre2.patch +new file mode 100644 +index 0000000..8375f30 +--- /dev/null ++++ b/0001-grep-migrate-to-pcre2.patch +@@ -0,0 +1,543 @@ ++From 2b4c255e67ae835c18c5ec41f3b67dadfd190213 Mon Sep 17 00:00:00 2001 ++From: licihua ++Date: Sat, 14 May 2022 18:24:47 +0800 ++Subject: [PATCH 1/1] grep: migrate to pcre2 ++ ++--- ++ doc/grep.in.1 | 8 +- ++ doc/grep.texi | 2 +- ++ m4/pcre.m4 | 21 ++-- ++ src/pcresearch.c | 244 +++++++++++++++++++-------------------- ++ tests/filename-lineno.pl | 4 +- ++ 5 files changed, 138 insertions(+), 141 deletions(-) ++ ++diff --git a/doc/grep.in.1 b/doc/grep.in.1 ++index e8854f2..0178db1 100644 ++--- a/doc/grep.in.1 +++++ b/doc/grep.in.1 ++@@ -767,7 +767,7 @@ In other implementations, basic regular expressions are less powerful. ++ The following description applies to extended regular expressions; ++ differences for basic regular expressions are summarized afterwards. ++ Perl-compatible regular expressions give additional functionality, and are ++-documented in B(3) and B(3), but work only if +++documented in B(3) and B(3), but work only if ++ PCRE support is enabled. ++ .PP ++ The fundamental building blocks are the regular expressions ++@@ -1371,9 +1371,9 @@ from the globbing syntax that the shell uses to match file names. ++ .BR sort (1), ++ .BR xargs (1), ++ .BR read (2), ++-.BR pcre (3), ++-.BR pcresyntax (3), ++-.BR pcrepattern (3), +++.BR pcre2 (3), +++.BR pcre2syntax (3), +++.BR pcre2pattern (3), ++ .BR terminfo (5), ++ .BR glob (7), ++ .BR regex (7) ++diff --git a/doc/grep.texi b/doc/grep.texi ++index 01ac81e..aae8571 100644 ++--- a/doc/grep.texi +++++ b/doc/grep.texi ++@@ -1186,7 +1186,7 @@ In other implementations, basic regular expressions are less powerful. ++ The following description applies to extended regular expressions; ++ differences for basic regular expressions are summarized afterwards. ++ Perl-compatible regular expressions give additional functionality, and ++-are documented in the @i{pcresyntax}(3) and @i{pcrepattern}(3) manual +++are documented in the @i{pcre2syntax}(3) and @i{pcre2pattern}(3) manual ++ pages, but work only if PCRE is available in the system. ++ ++ @menu ++diff --git a/m4/pcre.m4 b/m4/pcre.m4 ++index 78b7fda..0ca510f 100644 ++--- a/m4/pcre.m4 +++++ b/m4/pcre.m4 ++@@ -1,4 +1,4 @@ ++-# pcre.m4 - check for libpcre support +++# pcre.m4 - check for PCRE library support ++ ++ # Copyright (C) 2010-2021 Free Software Foundation, Inc. ++ # This file is free software; the Free Software Foundation ++@@ -9,7 +9,7 @@ AC_DEFUN([gl_FUNC_PCRE], ++ [ ++ AC_ARG_ENABLE([perl-regexp], ++ AS_HELP_STRING([--disable-perl-regexp], ++- [disable perl-regexp (pcre) support]), +++ [disable perl-regexp (pcre2) support]), ++ [case $enableval in ++ yes|no) test_pcre=$enableval;; ++ *) AC_MSG_ERROR([invalid value $enableval for --disable-perl-regexp]);; ++@@ -21,24 +21,25 @@ AC_DEFUN([gl_FUNC_PCRE], ++ use_pcre=no ++ ++ if test $test_pcre != no; then ++- PKG_CHECK_MODULES([PCRE], [libpcre], [], [: ${PCRE_LIBS=-lpcre}]) +++ PKG_CHECK_MODULES([PCRE], [libpcre2-8], [], [: ${PCRE_LIBS=-lpcre2-8}]) ++ ++- AC_CACHE_CHECK([for pcre_compile], [pcre_cv_have_pcre_compile], +++ AC_CACHE_CHECK([for pcre2_compile], [pcre_cv_have_pcre2_compile], ++ [pcre_saved_CFLAGS=$CFLAGS ++ pcre_saved_LIBS=$LIBS ++ CFLAGS="$CFLAGS $PCRE_CFLAGS" ++ LIBS="$PCRE_LIBS $LIBS" ++ AC_LINK_IFELSE( ++- [AC_LANG_PROGRAM([[#include +++ [AC_LANG_PROGRAM([[#define PCRE2_CODE_UNIT_WIDTH 8 +++ #include ++ ]], ++- [[pcre *p = pcre_compile (0, 0, 0, 0, 0); +++ [[pcre2_code *p = pcre2_compile (0, 0, 0, 0, 0, 0); ++ return !p;]])], ++- [pcre_cv_have_pcre_compile=yes], ++- [pcre_cv_have_pcre_compile=no]) +++ [pcre_cv_have_pcre2_compile=yes], +++ [pcre_cv_have_pcre2_compile=no]) ++ CFLAGS=$pcre_saved_CFLAGS ++ LIBS=$pcre_saved_LIBS]) ++ ++- if test "$pcre_cv_have_pcre_compile" = yes; then +++ if test "$pcre_cv_have_pcre2_compile" = yes; then ++ use_pcre=yes ++ elif test $test_pcre = maybe; then ++ AC_MSG_WARN([AC_PACKAGE_NAME will be built without pcre support.]) ++@@ -50,7 +51,7 @@ AC_DEFUN([gl_FUNC_PCRE], ++ if test $use_pcre = yes; then ++ AC_DEFINE([HAVE_LIBPCRE], [1], ++ [Define to 1 if you have the Perl Compatible Regular Expressions ++- library (-lpcre).]) +++ library (-lpcre2).]) ++ else ++ PCRE_CFLAGS= ++ PCRE_LIBS= ++diff --git a/src/pcresearch.c b/src/pcresearch.c ++index 37f7e40..38dc010 100644 ++--- a/src/pcresearch.c +++++ b/src/pcresearch.c ++@@ -17,40 +17,32 @@ ++ 02110-1301, USA. */ ++ ++ /* Written August 1992 by Mike Haertel. */ +++/* Updated for PCRE2 by Carlo Arenas. */ ++ ++ #include ++ #include "search.h" ++ #include "die.h" ++ ++-#include +++#define PCRE2_CODE_UNIT_WIDTH 8 +++#include ++ ++-/* This must be at least 2; everything after that is for performance ++- in pcre_exec. */ ++-enum { NSUB = 300 }; ++- ++-#ifndef PCRE_EXTRA_MATCH_LIMIT_RECURSION ++-# define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0 ++-#endif ++-#ifndef PCRE_STUDY_JIT_COMPILE ++-# define PCRE_STUDY_JIT_COMPILE 0 ++-#endif ++-#ifndef PCRE_STUDY_EXTRA_NEEDED ++-# define PCRE_STUDY_EXTRA_NEEDED 0 +++/* Needed for backward compatibility for PCRE2 < 10.30 */ +++#ifndef PCRE2_CONFIG_DEPTHLIMIT +++#define PCRE2_CONFIG_DEPTHLIMIT PCRE2_CONFIG_RECURSIONLIMIT +++#define PCRE2_ERROR_DEPTHLIMIT PCRE2_ERROR_RECURSIONLIMIT +++#define pcre2_set_depth_limit pcre2_set_recursion_limit ++ #endif ++ ++ struct pcre_comp ++ { ++- /* Compiled internal form of a Perl regular expression. */ ++- pcre *cre; ++- ++- /* Additional information about the pattern. */ ++- pcre_extra *extra; ++- ++-#if PCRE_STUDY_JIT_COMPILE ++ /* The JIT stack and its maximum size. */ ++- pcre_jit_stack *jit_stack; ++- int jit_stack_size; ++-#endif +++ pcre2_jit_stack *jit_stack; +++ PCRE2_SIZE jit_stack_size; +++ +++ /* Compiled internal form of a Perl regular expression. */ +++ pcre2_code *cre; +++ pcre2_match_context *mcontext; +++ pcre2_match_data *data; ++ ++ /* Table, indexed by ! (flag & PCRE_NOTBOL), of whether the empty ++ string matches when that flag is used. */ ++@@ -60,51 +52,50 @@ struct pcre_comp ++ ++ /* Match the already-compiled PCRE pattern against the data in SUBJECT, ++ of size SEARCH_BYTES and starting with offset SEARCH_OFFSET, with ++- options OPTIONS, and storing resulting matches into SUB. Return ++- the (nonnegative) match location or a (negative) error number. */ +++ options OPTIONS. +++ Return the (nonnegative) match count or a (negative) error number. */ ++ static int ++-jit_exec (struct pcre_comp *pc, char const *subject, int search_bytes, ++- int search_offset, int options, int *sub) +++jit_exec (struct pcre_comp *pc, char const *subject, PCRE2_SIZE search_bytes, +++ PCRE2_SIZE search_offset, int options) ++ { ++ while (true) ++ { ++- int e = pcre_exec (pc->cre, pc->extra, subject, search_bytes, ++- search_offset, options, sub, NSUB); ++- ++-#if PCRE_STUDY_JIT_COMPILE ++- if (e == PCRE_ERROR_JIT_STACKLIMIT +++ int e = pcre2_match (pc->cre, (PCRE2_SPTR)subject, search_bytes, +++ search_offset, options, pc->data, pc->mcontext); +++ if (e == PCRE2_ERROR_JIT_STACKLIMIT ++ && 0 < pc->jit_stack_size && pc->jit_stack_size <= INT_MAX / 2) ++ { ++- int old_size = pc->jit_stack_size; ++- int new_size = pc->jit_stack_size = old_size * 2; +++ PCRE2_SIZE old_size = pc->jit_stack_size; +++ PCRE2_SIZE new_size = pc->jit_stack_size = old_size * 2; ++ if (pc->jit_stack) ++- pcre_jit_stack_free (pc->jit_stack); ++- pc->jit_stack = pcre_jit_stack_alloc (old_size, new_size); ++- if (!pc->jit_stack) +++ pcre2_jit_stack_free (pc->jit_stack); +++ pc->jit_stack = pcre2_jit_stack_create (old_size, new_size, NULL); +++ +++ if (!pc->mcontext) +++ pc->mcontext = pcre2_match_context_create (NULL); +++ +++ if (!pc->jit_stack || !pc->mcontext) ++ die (EXIT_TROUBLE, 0, ++ _("failed to allocate memory for the PCRE JIT stack")); ++- pcre_assign_jit_stack (pc->extra, NULL, pc->jit_stack); +++ pcre2_jit_stack_assign (pc->mcontext, NULL, pc->jit_stack); ++ continue; ++ } ++-#endif ++ ++-#if PCRE_EXTRA_MATCH_LIMIT_RECURSION ++- if (e == PCRE_ERROR_RECURSIONLIMIT ++- && (PCRE_STUDY_EXTRA_NEEDED || pc->extra)) +++ +++ if (e == PCRE2_ERROR_DEPTHLIMIT) ++ { ++- unsigned long lim ++- = (pc->extra->flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION ++- ? pc->extra->match_limit_recursion ++- : 0); ++- if (lim <= ULONG_MAX / 2) ++- { ++- pc->extra->match_limit_recursion = lim ? 2 * lim : (1 << 24) - 1; ++- pc->extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; ++- continue; ++- } +++ uint32_t lim; +++ pcre2_config (PCRE2_CONFIG_DEPTHLIMIT, &lim); +++ if (lim >= UINT32_MAX / 2) +++ return e; +++ +++ lim <<= 1; +++ if (!pc->mcontext) +++ pc->mcontext = pcre2_match_context_create (NULL); +++ +++ pcre2_set_depth_limit (pc->mcontext, lim); +++ continue; ++ } ++-#endif ++- ++ return e; ++ } ++ } ++@@ -115,27 +106,35 @@ jit_exec (struct pcre_comp *pc, char const *subject, int search_bytes, ++ void * ++ Pcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact) ++ { ++- int e; ++- char const *ep; +++ PCRE2_SIZE e; +++ int ec; +++ PCRE2_UCHAR8 ep[128]; /* 120 code units is suggested to avoid truncation */ ++ static char const wprefix[] = "(?cre = pcre_compile (re, flags, &ep, &e, pcre_maketables ()); +++ pcre2_set_character_tables (ccontext, pcre2_maketables (NULL)); +++ pc->cre = pcre2_compile (re, n - (char *)re, flags, &ec, &e, ccontext); ++ if (!pc->cre) ++- die (EXIT_TROUBLE, 0, "%s", ep); ++- ++- int pcre_study_flags = PCRE_STUDY_EXTRA_NEEDED | PCRE_STUDY_JIT_COMPILE; ++- pc->extra = pcre_study (pc->cre, pcre_study_flags, &ep); ++- if (ep) ++- die (EXIT_TROUBLE, 0, "%s", ep); +++ { +++ pcre2_get_error_message (ec, ep, sizeof (ep)); +++ die (EXIT_TROUBLE, 0, "%s", ep); +++ } ++ ++-#if PCRE_STUDY_JIT_COMPILE ++- if (pcre_fullinfo (pc->cre, pc->extra, PCRE_INFO_JIT, &e)) ++- die (EXIT_TROUBLE, 0, _("internal error (should never happen)")); +++ pc->data = pcre2_match_data_create_from_pattern (pc->cre, NULL); ++ ++- /* The PCRE documentation says that a 32 KiB stack is the default. */ ++- if (e) ++- pc->jit_stack_size = 32 << 10; ++-#endif +++ ec = pcre2_jit_compile (pc->cre, PCRE2_JIT_COMPLETE); +++ if (ec && ec != PCRE2_ERROR_JIT_BADOPTION && ec != PCRE2_ERROR_NOMEMORY) +++ die (EXIT_TROUBLE, 0, _("JIT internal error: %d"), ec); +++ else +++ { +++ /* The PCRE documentation says that a 32 KiB stack is the default. */ +++ pc->jit_stack_size = 32 << 10; +++ } ++ ++ free (re); ++ ++- int sub[NSUB]; ++- pc->empty_match[false] = pcre_exec (pc->cre, pc->extra, "", 0, 0, ++- PCRE_NOTBOL, sub, NSUB); ++- pc->empty_match[true] = pcre_exec (pc->cre, pc->extra, "", 0, 0, 0, sub, ++- NSUB); +++ pc->empty_match[false] = jit_exec (pc, "", 0, 0, PCRE2_NOTBOL); +++ pc->empty_match[true] = jit_exec (pc, "", 0, 0, 0); ++ ++ return pc; ++ } ++@@ -206,15 +193,14 @@ size_t ++ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, ++ char const *start_ptr) ++ { ++- int sub[NSUB]; ++ char const *p = start_ptr ? start_ptr : buf; ++ bool bol = p[-1] == eolbyte; ++ char const *line_start = buf; ++- int e = PCRE_ERROR_NOMATCH; +++ int e = PCRE2_ERROR_NOMATCH; ++ char const *line_end; ++ struct pcre_comp *pc = vcp; ++- ++- /* The search address to pass to pcre_exec. This is the start of +++ PCRE2_SIZE *sub = pcre2_get_ovector_pointer (pc->data); +++ /* The search address to pass to PCRE. This is the start of ++ the buffer, or just past the most-recently discovered encoding ++ error or line end. */ ++ char const *subject = buf; ++@@ -226,14 +212,14 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, ++ better and the correctness issues were too puzzling. See ++ Bug#22655. */ ++ line_end = rawmemchr (p, eolbyte); ++- if (INT_MAX < line_end - p) +++ if (PCRE2_SIZE_MAX < line_end - p) ++ die (EXIT_TROUBLE, 0, _("exceeded PCRE's line length limit")); ++ ++ for (;;) ++ { ++ /* Skip past bytes that are easily determined to be encoding ++ errors, treating them as data that cannot match. This is ++- faster than having pcre_exec check them. */ +++ faster than having PCRE check them. */ ++ while (localeinfo.sbclen[to_uchar (*p)] == -1) ++ { ++ p++; ++@@ -241,10 +227,10 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, ++ bol = false; ++ } ++ ++- int search_offset = p - subject; +++ PCRE2_SIZE search_offset = p - subject; ++ ++ /* Check for an empty match; this is faster than letting ++- pcre_exec do it. */ +++ PCRE do it. */ ++ if (p == line_end) ++ { ++ sub[0] = sub[1] = search_offset; ++@@ -254,13 +240,14 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, ++ ++ int options = 0; ++ if (!bol) ++- options |= PCRE_NOTBOL; +++ options |= PCRE2_NOTBOL; ++ ++- e = jit_exec (pc, subject, line_end - subject, search_offset, ++- options, sub); ++- if (e != PCRE_ERROR_BADUTF8) +++ e = jit_exec (pc, subject, line_end - subject, +++ search_offset, options); +++ /* PCRE2 provides 22 different error codes for bad UTF-8 */ +++ if (! (PCRE2_ERROR_UTF8_ERR21 <= e && e < PCRE2_ERROR_UTF8_ERR1)) ++ break; ++- int valid_bytes = sub[0]; +++ PCRE2_SIZE valid_bytes = pcre2_get_startchar (pc->data); ++ ++ if (search_offset <= valid_bytes) ++ { ++@@ -270,14 +257,15 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, ++ /* Handle the empty-match case specially, for speed. ++ This optimization is valid if VALID_BYTES is zero, ++ which means SEARCH_OFFSET is also zero. */ +++ sub[0] = valid_bytes; ++ sub[1] = 0; ++ e = pc->empty_match[bol]; ++ } ++ else ++ e = jit_exec (pc, subject, valid_bytes, search_offset, ++- options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL, sub); +++ options | PCRE2_NO_UTF_CHECK | PCRE2_NOTEOL); ++ ++- if (e != PCRE_ERROR_NOMATCH) +++ if (e != PCRE2_ERROR_NOMATCH) ++ break; ++ ++ /* Treat the encoding error as data that cannot match. */ ++@@ -288,7 +276,7 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, ++ subject += valid_bytes + 1; ++ } ++ ++- if (e != PCRE_ERROR_NOMATCH) +++ if (e != PCRE2_ERROR_NOMATCH) ++ break; ++ bol = true; ++ p = subject = line_start = line_end + 1; ++@@ -299,26 +287,34 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, ++ { ++ switch (e) ++ { ++- case PCRE_ERROR_NOMATCH: +++ case PCRE2_ERROR_NOMATCH: ++ break; ++ ++- case PCRE_ERROR_NOMEMORY: +++ case PCRE2_ERROR_NOMEMORY: ++ die (EXIT_TROUBLE, 0, _("%s: memory exhausted"), input_filename ()); ++ ++-#if PCRE_STUDY_JIT_COMPILE ++- case PCRE_ERROR_JIT_STACKLIMIT: +++ case PCRE2_ERROR_JIT_STACKLIMIT: ++ die (EXIT_TROUBLE, 0, _("%s: exhausted PCRE JIT stack"), ++ input_filename ()); ++-#endif ++ ++- case PCRE_ERROR_MATCHLIMIT: +++ case PCRE2_ERROR_MATCHLIMIT: ++ die (EXIT_TROUBLE, 0, _("%s: exceeded PCRE's backtracking limit"), ++ input_filename ()); ++ ++- case PCRE_ERROR_RECURSIONLIMIT: ++- die (EXIT_TROUBLE, 0, _("%s: exceeded PCRE's recursion limit"), +++ case PCRE2_ERROR_DEPTHLIMIT: +++ die (EXIT_TROUBLE, 0, +++ _("%s: exceeded PCRE's nested backtracking limit"), +++ input_filename ()); +++ +++ case PCRE2_ERROR_RECURSELOOP: +++ die (EXIT_TROUBLE, 0, _("%s: PCRE detected recurse loop"), ++ input_filename ()); ++ +++#ifdef PCRE2_ERROR_HEAPLIMIT +++ case PCRE2_ERROR_HEAPLIMIT: +++ die (EXIT_TROUBLE, 0, _("%s: exceeded PCRE's heap limit"), +++ input_filename ()); ++++#endif ++ default: ++ /* For now, we lump all remaining PCRE failures into this basket. ++ If anyone cares to provide sample grep usage that can trigger ++diff --git a/tests/filename-lineno.pl b/tests/filename-lineno.pl ++index 1e84b45..1ff3d6a 100755 ++--- a/tests/filename-lineno.pl +++++ b/tests/filename-lineno.pl ++@@ -101,13 +101,13 @@ my @Tests = ++ ], ++ ['invalid-re-P-paren', '-P ")"', {EXIT=>2}, ++ {ERR => $ENV{PCRE_WORKS} == 1 ++- ? "$prog: unmatched parentheses\n" +++ ? "$prog: unmatched closing parenthesis\n" ++ : $no_pcre ++ }, ++ ], ++ ['invalid-re-P-star-paren', '-P "a.*)"', {EXIT=>2}, ++ {ERR => $ENV{PCRE_WORKS} == 1 ++- ? "$prog: unmatched parentheses\n" +++ ? "$prog: unmatched closing parenthesis\n" ++ : $no_pcre ++ }, ++ ], ++-- ++2.26.2 ++ +diff --git a/doc/grep.in.1 b/doc/grep.in.1 +index e8854f2..0178db1 100644 +--- a/doc/grep.in.1 ++++ b/doc/grep.in.1 +@@ -767,7 +767,7 @@ In other implementations, basic regular expressions are less powerful. + The following description applies to extended regular expressions; + differences for basic regular expressions are summarized afterwards. + Perl-compatible regular expressions give additional functionality, and are +-documented in B(3) and B(3), but work only if ++documented in B(3) and B(3), but work only if + PCRE support is enabled. + .PP + The fundamental building blocks are the regular expressions +@@ -1371,9 +1371,9 @@ from the globbing syntax that the shell uses to match file names. + .BR sort (1), + .BR xargs (1), + .BR read (2), +-.BR pcre (3), +-.BR pcresyntax (3), +-.BR pcrepattern (3), ++.BR pcre2 (3), ++.BR pcre2syntax (3), ++.BR pcre2pattern (3), + .BR terminfo (5), + .BR glob (7), + .BR regex (7) +diff --git a/doc/grep.texi b/doc/grep.texi +index 01ac81e..aae8571 100644 +--- a/doc/grep.texi ++++ b/doc/grep.texi +@@ -1186,7 +1186,7 @@ In other implementations, basic regular expressions are less powerful. + The following description applies to extended regular expressions; + differences for basic regular expressions are summarized afterwards. + Perl-compatible regular expressions give additional functionality, and +-are documented in the @i{pcresyntax}(3) and @i{pcrepattern}(3) manual ++are documented in the @i{pcre2syntax}(3) and @i{pcre2pattern}(3) manual + pages, but work only if PCRE is available in the system. + + @menu +diff --git a/m4/pcre.m4 b/m4/pcre.m4 +index 78b7fda..0ca510f 100644 +--- a/m4/pcre.m4 ++++ b/m4/pcre.m4 +@@ -1,4 +1,4 @@ +-# pcre.m4 - check for libpcre support ++# pcre.m4 - check for PCRE library support + + # Copyright (C) 2010-2021 Free Software Foundation, Inc. + # This file is free software; the Free Software Foundation +@@ -9,7 +9,7 @@ AC_DEFUN([gl_FUNC_PCRE], + [ + AC_ARG_ENABLE([perl-regexp], + AS_HELP_STRING([--disable-perl-regexp], +- [disable perl-regexp (pcre) support]), ++ [disable perl-regexp (pcre2) support]), + [case $enableval in + yes|no) test_pcre=$enableval;; + *) AC_MSG_ERROR([invalid value $enableval for --disable-perl-regexp]);; +@@ -21,24 +21,25 @@ AC_DEFUN([gl_FUNC_PCRE], + use_pcre=no + + if test $test_pcre != no; then +- PKG_CHECK_MODULES([PCRE], [libpcre], [], [: ${PCRE_LIBS=-lpcre}]) ++ PKG_CHECK_MODULES([PCRE], [libpcre2-8], [], [: ${PCRE_LIBS=-lpcre2-8}]) + +- AC_CACHE_CHECK([for pcre_compile], [pcre_cv_have_pcre_compile], ++ AC_CACHE_CHECK([for pcre2_compile], [pcre_cv_have_pcre2_compile], + [pcre_saved_CFLAGS=$CFLAGS + pcre_saved_LIBS=$LIBS + CFLAGS="$CFLAGS $PCRE_CFLAGS" + LIBS="$PCRE_LIBS $LIBS" + AC_LINK_IFELSE( +- [AC_LANG_PROGRAM([[#include ++ [AC_LANG_PROGRAM([[#define PCRE2_CODE_UNIT_WIDTH 8 ++ #include + ]], +- [[pcre *p = pcre_compile (0, 0, 0, 0, 0); ++ [[pcre2_code *p = pcre2_compile (0, 0, 0, 0, 0, 0); + return !p;]])], +- [pcre_cv_have_pcre_compile=yes], +- [pcre_cv_have_pcre_compile=no]) ++ [pcre_cv_have_pcre2_compile=yes], ++ [pcre_cv_have_pcre2_compile=no]) + CFLAGS=$pcre_saved_CFLAGS + LIBS=$pcre_saved_LIBS]) + +- if test "$pcre_cv_have_pcre_compile" = yes; then ++ if test "$pcre_cv_have_pcre2_compile" = yes; then + use_pcre=yes + elif test $test_pcre = maybe; then + AC_MSG_WARN([AC_PACKAGE_NAME will be built without pcre support.]) +@@ -50,7 +51,7 @@ AC_DEFUN([gl_FUNC_PCRE], + if test $use_pcre = yes; then + AC_DEFINE([HAVE_LIBPCRE], [1], + [Define to 1 if you have the Perl Compatible Regular Expressions +- library (-lpcre).]) ++ library (-lpcre2).]) + else + PCRE_CFLAGS= + PCRE_LIBS= +diff --git a/src/pcresearch.c b/src/pcresearch.c +index 37f7e40..caedf49 100644 +--- a/src/pcresearch.c ++++ b/src/pcresearch.c +@@ -17,40 +17,32 @@ + 02110-1301, USA. */ + + /* Written August 1992 by Mike Haertel. */ ++/* Updated for PCRE2 by Carlo Arenas. */ + + #include + #include "search.h" + #include "die.h" + +-#include ++#define PCRE2_CODE_UNIT_WIDTH 8 ++#include + +-/* This must be at least 2; everything after that is for performance +- in pcre_exec. */ +-enum { NSUB = 300 }; +- +-#ifndef PCRE_EXTRA_MATCH_LIMIT_RECURSION +-# define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0 +-#endif +-#ifndef PCRE_STUDY_JIT_COMPILE +-# define PCRE_STUDY_JIT_COMPILE 0 +-#endif +-#ifndef PCRE_STUDY_EXTRA_NEEDED +-# define PCRE_STUDY_EXTRA_NEEDED 0 ++/* Needed for backward compatibility for PCRE2 < 10.30 */ ++#ifndef PCRE2_CONFIG_DEPTHLIMIT ++#define PCRE2_CONFIG_DEPTHLIMIT PCRE2_CONFIG_RECURSIONLIMIT ++#define PCRE2_ERROR_DEPTHLIMIT PCRE2_ERROR_RECURSIONLIMIT ++#define pcre2_set_depth_limit pcre2_set_recursion_limit + #endif + + struct pcre_comp + { +- /* Compiled internal form of a Perl regular expression. */ +- pcre *cre; +- +- /* Additional information about the pattern. */ +- pcre_extra *extra; +- +-#if PCRE_STUDY_JIT_COMPILE + /* The JIT stack and its maximum size. */ +- pcre_jit_stack *jit_stack; +- int jit_stack_size; +-#endif ++ pcre2_jit_stack *jit_stack; ++ PCRE2_SIZE jit_stack_size; ++ ++ /* Compiled internal form of a Perl regular expression. */ ++ pcre2_code *cre; ++ pcre2_match_context *mcontext; ++ pcre2_match_data *data; + + /* Table, indexed by ! (flag & PCRE_NOTBOL), of whether the empty + string matches when that flag is used. */ +@@ -60,51 +52,50 @@ struct pcre_comp + + /* Match the already-compiled PCRE pattern against the data in SUBJECT, + of size SEARCH_BYTES and starting with offset SEARCH_OFFSET, with +- options OPTIONS, and storing resulting matches into SUB. Return +- the (nonnegative) match location or a (negative) error number. */ ++ options OPTIONS. ++ Return the (nonnegative) match count or a (negative) error number. */ + static int +-jit_exec (struct pcre_comp *pc, char const *subject, int search_bytes, +- int search_offset, int options, int *sub) ++jit_exec (struct pcre_comp *pc, char const *subject, PCRE2_SIZE search_bytes, ++ PCRE2_SIZE search_offset, int options) + { + while (true) + { +- int e = pcre_exec (pc->cre, pc->extra, subject, search_bytes, +- search_offset, options, sub, NSUB); +- +-#if PCRE_STUDY_JIT_COMPILE +- if (e == PCRE_ERROR_JIT_STACKLIMIT ++ int e = pcre2_match (pc->cre, (PCRE2_SPTR)subject, search_bytes, ++ search_offset, options, pc->data, pc->mcontext); ++ if (e == PCRE2_ERROR_JIT_STACKLIMIT + && 0 < pc->jit_stack_size && pc->jit_stack_size <= INT_MAX / 2) + { +- int old_size = pc->jit_stack_size; +- int new_size = pc->jit_stack_size = old_size * 2; ++ PCRE2_SIZE old_size = pc->jit_stack_size; ++ PCRE2_SIZE new_size = pc->jit_stack_size = old_size * 2; + if (pc->jit_stack) +- pcre_jit_stack_free (pc->jit_stack); +- pc->jit_stack = pcre_jit_stack_alloc (old_size, new_size); +- if (!pc->jit_stack) ++ pcre2_jit_stack_free (pc->jit_stack); ++ pc->jit_stack = pcre2_jit_stack_create (old_size, new_size, NULL); ++ ++ if (!pc->mcontext) ++ pc->mcontext = pcre2_match_context_create (NULL); ++ ++ if (!pc->jit_stack || !pc->mcontext) + die (EXIT_TROUBLE, 0, + _("failed to allocate memory for the PCRE JIT stack")); +- pcre_assign_jit_stack (pc->extra, NULL, pc->jit_stack); ++ pcre2_jit_stack_assign (pc->mcontext, NULL, pc->jit_stack); + continue; + } +-#endif + +-#if PCRE_EXTRA_MATCH_LIMIT_RECURSION +- if (e == PCRE_ERROR_RECURSIONLIMIT +- && (PCRE_STUDY_EXTRA_NEEDED || pc->extra)) ++ ++ if (e == PCRE2_ERROR_DEPTHLIMIT) + { +- unsigned long lim +- = (pc->extra->flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION +- ? pc->extra->match_limit_recursion +- : 0); +- if (lim <= ULONG_MAX / 2) +- { +- pc->extra->match_limit_recursion = lim ? 2 * lim : (1 << 24) - 1; +- pc->extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION; +- continue; +- } ++ uint32_t lim; ++ pcre2_config (PCRE2_CONFIG_DEPTHLIMIT, &lim); ++ if (lim >= UINT32_MAX / 2) ++ return e; ++ ++ lim <<= 1; ++ if (!pc->mcontext) ++ pc->mcontext = pcre2_match_context_create (NULL); ++ ++ pcre2_set_depth_limit (pc->mcontext, lim); ++ continue; + } +-#endif +- + return e; + } + } +@@ -115,27 +106,35 @@ jit_exec (struct pcre_comp *pc, char const *subject, int search_bytes, + void * + Pcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact) + { +- int e; +- char const *ep; ++ PCRE2_SIZE e; ++ int ec; ++ PCRE2_UCHAR8 ep[128]; /* 120 code units is suggested to avoid truncation */ + static char const wprefix[] = "(?cre = pcre_compile (re, flags, &ep, &e, pcre_maketables ()); ++ pcre2_set_character_tables (ccontext, pcre2_maketables (NULL)); ++ pc->cre = pcre2_compile (re, n - (char *)re, flags, &ec, &e, ccontext); + if (!pc->cre) +- die (EXIT_TROUBLE, 0, "%s", ep); +- +- int pcre_study_flags = PCRE_STUDY_EXTRA_NEEDED | PCRE_STUDY_JIT_COMPILE; +- pc->extra = pcre_study (pc->cre, pcre_study_flags, &ep); +- if (ep) +- die (EXIT_TROUBLE, 0, "%s", ep); ++ { ++ pcre2_get_error_message (ec, ep, sizeof (ep)); ++ die (EXIT_TROUBLE, 0, "%s", ep); ++ } + +-#if PCRE_STUDY_JIT_COMPILE +- if (pcre_fullinfo (pc->cre, pc->extra, PCRE_INFO_JIT, &e)) +- die (EXIT_TROUBLE, 0, _("internal error (should never happen)")); ++ pc->data = pcre2_match_data_create_from_pattern (pc->cre, NULL); + +- /* The PCRE documentation says that a 32 KiB stack is the default. */ +- if (e) +- pc->jit_stack_size = 32 << 10; +-#endif ++ ec = pcre2_jit_compile (pc->cre, PCRE2_JIT_COMPLETE); ++ if (ec && ec != PCRE2_ERROR_JIT_BADOPTION && ec != PCRE2_ERROR_NOMEMORY) ++ die (EXIT_TROUBLE, 0, _("JIT internal error: %d"), ec); ++ else ++ { ++ /* The PCRE documentation says that a 32 KiB stack is the default. */ ++ pc->jit_stack_size = 32 << 10; ++ } + + free (re); + +- int sub[NSUB]; +- pc->empty_match[false] = pcre_exec (pc->cre, pc->extra, "", 0, 0, +- PCRE_NOTBOL, sub, NSUB); +- pc->empty_match[true] = pcre_exec (pc->cre, pc->extra, "", 0, 0, 0, sub, +- NSUB); ++ pc->empty_match[false] = jit_exec (pc, "", 0, 0, PCRE2_NOTBOL); ++ pc->empty_match[true] = jit_exec (pc, "", 0, 0, 0); + + return pc; + } +@@ -206,15 +193,14 @@ size_t + Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, + char const *start_ptr) + { +- int sub[NSUB]; + char const *p = start_ptr ? start_ptr : buf; + bool bol = p[-1] == eolbyte; + char const *line_start = buf; +- int e = PCRE_ERROR_NOMATCH; ++ int e = PCRE2_ERROR_NOMATCH; + char const *line_end; + struct pcre_comp *pc = vcp; +- +- /* The search address to pass to pcre_exec. This is the start of ++ PCRE2_SIZE *sub = pcre2_get_ovector_pointer (pc->data); ++ /* The search address to pass to PCRE. This is the start of + the buffer, or just past the most-recently discovered encoding + error or line end. */ + char const *subject = buf; +@@ -226,14 +212,14 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, + better and the correctness issues were too puzzling. See + Bug#22655. */ + line_end = rawmemchr (p, eolbyte); +- if (INT_MAX < line_end - p) ++ if (PCRE2_SIZE_MAX < line_end - p) + die (EXIT_TROUBLE, 0, _("exceeded PCRE's line length limit")); + + for (;;) + { + /* Skip past bytes that are easily determined to be encoding + errors, treating them as data that cannot match. This is +- faster than having pcre_exec check them. */ ++ faster than having PCRE check them. */ + while (localeinfo.sbclen[to_uchar (*p)] == -1) + { + p++; +@@ -241,10 +227,10 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, + bol = false; + } + +- int search_offset = p - subject; ++ PCRE2_SIZE search_offset = p - subject; + + /* Check for an empty match; this is faster than letting +- pcre_exec do it. */ ++ PCRE do it. */ + if (p == line_end) + { + sub[0] = sub[1] = search_offset; +@@ -254,13 +240,14 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, + + int options = 0; + if (!bol) +- options |= PCRE_NOTBOL; ++ options |= PCRE2_NOTBOL; + +- e = jit_exec (pc, subject, line_end - subject, search_offset, +- options, sub); +- if (e != PCRE_ERROR_BADUTF8) ++ e = jit_exec (pc, subject, line_end - subject, ++ search_offset, options); ++ /* PCRE2 provides 22 different error codes for bad UTF-8 */ ++ if (! (PCRE2_ERROR_UTF8_ERR21 <= e && e < PCRE2_ERROR_UTF8_ERR1)) + break; +- int valid_bytes = sub[0]; ++ PCRE2_SIZE valid_bytes = pcre2_get_startchar (pc->data); + + if (search_offset <= valid_bytes) + { +@@ -270,14 +257,15 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, + /* Handle the empty-match case specially, for speed. + This optimization is valid if VALID_BYTES is zero, + which means SEARCH_OFFSET is also zero. */ ++ sub[0] = valid_bytes; + sub[1] = 0; + e = pc->empty_match[bol]; + } + else + e = jit_exec (pc, subject, valid_bytes, search_offset, +- options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL, sub); ++ options | PCRE2_NO_UTF_CHECK | PCRE2_NOTEOL); + +- if (e != PCRE_ERROR_NOMATCH) ++ if (e != PCRE2_ERROR_NOMATCH) + break; + + /* Treat the encoding error as data that cannot match. */ +@@ -288,7 +276,7 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, + subject += valid_bytes + 1; + } + +- if (e != PCRE_ERROR_NOMATCH) ++ if (e != PCRE2_ERROR_NOMATCH) + break; + bol = true; + p = subject = line_start = line_end + 1; +@@ -299,26 +287,34 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size, + { + switch (e) + { +- case PCRE_ERROR_NOMATCH: ++ case PCRE2_ERROR_NOMATCH: + break; + +- case PCRE_ERROR_NOMEMORY: ++ case PCRE2_ERROR_NOMEMORY: + die (EXIT_TROUBLE, 0, _("%s: memory exhausted"), input_filename ()); + +-#if PCRE_STUDY_JIT_COMPILE +- case PCRE_ERROR_JIT_STACKLIMIT: ++ case PCRE2_ERROR_JIT_STACKLIMIT: + die (EXIT_TROUBLE, 0, _("%s: exhausted PCRE JIT stack"), + input_filename ()); +-#endif + +- case PCRE_ERROR_MATCHLIMIT: ++ case PCRE2_ERROR_MATCHLIMIT: + die (EXIT_TROUBLE, 0, _("%s: exceeded PCRE's backtracking limit"), + input_filename ()); + +- case PCRE_ERROR_RECURSIONLIMIT: +- die (EXIT_TROUBLE, 0, _("%s: exceeded PCRE's recursion limit"), ++ case PCRE2_ERROR_DEPTHLIMIT: ++ die (EXIT_TROUBLE, 0, ++ _("%s: exceeded PCRE's nested backtracking limit"), + input_filename ()); + ++ case PCRE2_ERROR_RECURSELOOP: ++ die (EXIT_TROUBLE, 0, _("%s: PCRE detected recurse loop"), ++ input_filename ()); ++ ++#ifdef PCRE2_ERROR_HEAPLIMIT ++ case PCRE2_ERROR_HEAPLIMIT: ++ die (EXIT_TROUBLE, 0, _("%s: exceeded PCRE's heap limit"), ++ input_filename ()); ++#endif + default: + /* For now, we lump all remaining PCRE failures into this basket. + If anyone cares to provide sample grep usage that can trigger +diff --git a/tests/filename-lineno.pl b/tests/filename-lineno.pl +index 1e84b45..1ff3d6a 100755 +--- a/tests/filename-lineno.pl ++++ b/tests/filename-lineno.pl +@@ -101,13 +101,13 @@ my @Tests = + ], + ['invalid-re-P-paren', '-P ")"', {EXIT=>2}, + {ERR => $ENV{PCRE_WORKS} == 1 +- ? "$prog: unmatched parentheses\n" ++ ? "$prog: unmatched closing parenthesis\n" + : $no_pcre + }, + ], + ['invalid-re-P-star-paren', '-P "a.*)"', {EXIT=>2}, + {ERR => $ENV{PCRE_WORKS} == 1 +- ? "$prog: unmatched parentheses\n" ++ ? "$prog: unmatched closing parenthesis\n" + : $no_pcre + }, + ], +-- +2.26.2 + diff --git a/grep.spec b/grep.spec index 553421f..337c31a 100644 --- a/grep.spec +++ b/grep.spec @@ -1,6 +1,6 @@ Name: grep Version: 3.7 -Release: 3 +Release: 4 Summary: A string search utility License: GPLv3+ URL: http://www.gnu.org/software/grep/ @@ -8,8 +8,9 @@ Source0: https://ftp.gnu.org/gnu/grep/grep-%{version}.tar.xz Patch1: backport-grep-avoid-sticky-problem-with-f-f.patch Patch2: backport-grep-s-does-not-suppress-binary-file-matches.patch +Patch3: backport-grep-migrate-to-pcre2.patch -BuildRequires: gcc pcre-devel >= 3.9-10 texinfo gettext libsigsegv-devel automake +BuildRequires: gcc pcre2-devel texinfo gettext libsigsegv-devel automake Provides: /bin/egrep /bin/fgrep /bin/grep bundled(gnulib) %description @@ -21,7 +22,7 @@ a specified pattern. By default, Grep outputs the matching lines. %build %configure --disable-silent-rules \ -CPPFLAGS="-I%{_includedir}/pcre" CFLAGS="$RPM_OPT_FLAGS -fsigned-char" +CPPFLAGS="-I%{_includedir}/pcre2" CFLAGS="$RPM_OPT_FLAGS -fsigned-char" %make_build %install @@ -47,6 +48,9 @@ make check %changelog +* Sat May 14 2022 licihua -3.7-4 +- Modify the dependency from pcre to pcre2 + * Fri Mar 18 2022 yangzhuangzhuang - 3.7-3 - The -s option no longer suppresses "binary file matches" messages