1114 lines
40 KiB
Diff
1114 lines
40 KiB
Diff
From e0d39a9133e1507345d73ac5aff85f037f39aa54 Mon Sep 17 00:00:00 2001
|
|
From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= <carenas@gmail.com>
|
|
Date: Fri, 12 Nov 2021 16:45:04 -0800
|
|
Subject: grep: migrate to pcre2
|
|
|
|
Mostly a bug by bug translation of the original code to the PCRE2 API.
|
|
Code still could do with some optimizations but should be good as a
|
|
starting point.
|
|
|
|
The API changes the sign of some types and therefore some ugly casts
|
|
were needed, some of the changes are just to make sure all variables
|
|
fit into the newer types better.
|
|
|
|
Includes backward compatibility and could be made to build all the way
|
|
to 10.00, but assumes a recent enough version and has been tested with
|
|
10.23 (from CentOS 7, the oldest).
|
|
|
|
Performance seems equivalent, and it also seems functionally complete.
|
|
|
|
* m4/pcre.m4 (gl_FUNC_PCRE): Check for PCRE2, not the original PCRE.
|
|
* src/pcresearch.c (struct pcre_comp, jit_exec)
|
|
(Pcompile, Pexecute):
|
|
Use PCRE2, not the original PCRE.
|
|
* tests/filename-lineno.pl: Adjust to match PCRE2 diagnostics.
|
|
---
|
|
0001-grep-migrate-to-pcre2.patch | 543 +++++++++++++++++++++++++++++++
|
|
doc/grep.in.1 | 8 +-
|
|
doc/grep.texi | 2 +-
|
|
m4/pcre.m4 | 21 +-
|
|
src/pcresearch.c | 244 +++++++-------
|
|
tests/filename-lineno.pl | 4 +-
|
|
6 files changed, 681 insertions(+), 141 deletions(-)
|
|
create mode 100644 0001-grep-migrate-to-pcre2.patch
|
|
|
|
diff --git a/0001-grep-migrate-to-pcre2.patch b/0001-grep-migrate-to-pcre2.patch
|
|
new file mode 100644
|
|
index 0000000..8375f30
|
|
--- /dev/null
|
|
+++ b/0001-grep-migrate-to-pcre2.patch
|
|
@@ -0,0 +1,543 @@
|
|
+From 2b4c255e67ae835c18c5ec41f3b67dadfd190213 Mon Sep 17 00:00:00 2001
|
|
+From: licihua <licihua@huawei.com>
|
|
+Date: Sat, 14 May 2022 18:24:47 +0800
|
|
+Subject: [PATCH 1/1] grep: migrate to pcre2
|
|
+
|
|
+---
|
|
+ doc/grep.in.1 | 8 +-
|
|
+ doc/grep.texi | 2 +-
|
|
+ m4/pcre.m4 | 21 ++--
|
|
+ src/pcresearch.c | 244 +++++++++++++++++++--------------------
|
|
+ tests/filename-lineno.pl | 4 +-
|
|
+ 5 files changed, 138 insertions(+), 141 deletions(-)
|
|
+
|
|
+diff --git a/doc/grep.in.1 b/doc/grep.in.1
|
|
+index e8854f2..0178db1 100644
|
|
+--- a/doc/grep.in.1
|
|
++++ b/doc/grep.in.1
|
|
+@@ -767,7 +767,7 @@ In other implementations, basic regular expressions are less powerful.
|
|
+ The following description applies to extended regular expressions;
|
|
+ differences for basic regular expressions are summarized afterwards.
|
|
+ Perl-compatible regular expressions give additional functionality, and are
|
|
+-documented in B<pcresyntax>(3) and B<pcrepattern>(3), but work only if
|
|
++documented in B<pcres2yntax>(3) and B<pcre2pattern>(3), but work only if
|
|
+ PCRE support is enabled.
|
|
+ .PP
|
|
+ The fundamental building blocks are the regular expressions
|
|
+@@ -1371,9 +1371,9 @@ from the globbing syntax that the shell uses to match file names.
|
|
+ .BR sort (1),
|
|
+ .BR xargs (1),
|
|
+ .BR read (2),
|
|
+-.BR pcre (3),
|
|
+-.BR pcresyntax (3),
|
|
+-.BR pcrepattern (3),
|
|
++.BR pcre2 (3),
|
|
++.BR pcre2syntax (3),
|
|
++.BR pcre2pattern (3),
|
|
+ .BR terminfo (5),
|
|
+ .BR glob (7),
|
|
+ .BR regex (7)
|
|
+diff --git a/doc/grep.texi b/doc/grep.texi
|
|
+index 01ac81e..aae8571 100644
|
|
+--- a/doc/grep.texi
|
|
++++ b/doc/grep.texi
|
|
+@@ -1186,7 +1186,7 @@ In other implementations, basic regular expressions are less powerful.
|
|
+ The following description applies to extended regular expressions;
|
|
+ differences for basic regular expressions are summarized afterwards.
|
|
+ Perl-compatible regular expressions give additional functionality, and
|
|
+-are documented in the @i{pcresyntax}(3) and @i{pcrepattern}(3) manual
|
|
++are documented in the @i{pcre2syntax}(3) and @i{pcre2pattern}(3) manual
|
|
+ pages, but work only if PCRE is available in the system.
|
|
+
|
|
+ @menu
|
|
+diff --git a/m4/pcre.m4 b/m4/pcre.m4
|
|
+index 78b7fda..0ca510f 100644
|
|
+--- a/m4/pcre.m4
|
|
++++ b/m4/pcre.m4
|
|
+@@ -1,4 +1,4 @@
|
|
+-# pcre.m4 - check for libpcre support
|
|
++# pcre.m4 - check for PCRE library support
|
|
+
|
|
+ # Copyright (C) 2010-2021 Free Software Foundation, Inc.
|
|
+ # This file is free software; the Free Software Foundation
|
|
+@@ -9,7 +9,7 @@ AC_DEFUN([gl_FUNC_PCRE],
|
|
+ [
|
|
+ AC_ARG_ENABLE([perl-regexp],
|
|
+ AS_HELP_STRING([--disable-perl-regexp],
|
|
+- [disable perl-regexp (pcre) support]),
|
|
++ [disable perl-regexp (pcre2) support]),
|
|
+ [case $enableval in
|
|
+ yes|no) test_pcre=$enableval;;
|
|
+ *) AC_MSG_ERROR([invalid value $enableval for --disable-perl-regexp]);;
|
|
+@@ -21,24 +21,25 @@ AC_DEFUN([gl_FUNC_PCRE],
|
|
+ use_pcre=no
|
|
+
|
|
+ if test $test_pcre != no; then
|
|
+- PKG_CHECK_MODULES([PCRE], [libpcre], [], [: ${PCRE_LIBS=-lpcre}])
|
|
++ PKG_CHECK_MODULES([PCRE], [libpcre2-8], [], [: ${PCRE_LIBS=-lpcre2-8}])
|
|
+
|
|
+- AC_CACHE_CHECK([for pcre_compile], [pcre_cv_have_pcre_compile],
|
|
++ AC_CACHE_CHECK([for pcre2_compile], [pcre_cv_have_pcre2_compile],
|
|
+ [pcre_saved_CFLAGS=$CFLAGS
|
|
+ pcre_saved_LIBS=$LIBS
|
|
+ CFLAGS="$CFLAGS $PCRE_CFLAGS"
|
|
+ LIBS="$PCRE_LIBS $LIBS"
|
|
+ AC_LINK_IFELSE(
|
|
+- [AC_LANG_PROGRAM([[#include <pcre.h>
|
|
++ [AC_LANG_PROGRAM([[#define PCRE2_CODE_UNIT_WIDTH 8
|
|
++ #include <pcre2.h>
|
|
+ ]],
|
|
+- [[pcre *p = pcre_compile (0, 0, 0, 0, 0);
|
|
++ [[pcre2_code *p = pcre2_compile (0, 0, 0, 0, 0, 0);
|
|
+ return !p;]])],
|
|
+- [pcre_cv_have_pcre_compile=yes],
|
|
+- [pcre_cv_have_pcre_compile=no])
|
|
++ [pcre_cv_have_pcre2_compile=yes],
|
|
++ [pcre_cv_have_pcre2_compile=no])
|
|
+ CFLAGS=$pcre_saved_CFLAGS
|
|
+ LIBS=$pcre_saved_LIBS])
|
|
+
|
|
+- if test "$pcre_cv_have_pcre_compile" = yes; then
|
|
++ if test "$pcre_cv_have_pcre2_compile" = yes; then
|
|
+ use_pcre=yes
|
|
+ elif test $test_pcre = maybe; then
|
|
+ AC_MSG_WARN([AC_PACKAGE_NAME will be built without pcre support.])
|
|
+@@ -50,7 +51,7 @@ AC_DEFUN([gl_FUNC_PCRE],
|
|
+ if test $use_pcre = yes; then
|
|
+ AC_DEFINE([HAVE_LIBPCRE], [1],
|
|
+ [Define to 1 if you have the Perl Compatible Regular Expressions
|
|
+- library (-lpcre).])
|
|
++ library (-lpcre2).])
|
|
+ else
|
|
+ PCRE_CFLAGS=
|
|
+ PCRE_LIBS=
|
|
+diff --git a/src/pcresearch.c b/src/pcresearch.c
|
|
+index 37f7e40..38dc010 100644
|
|
+--- a/src/pcresearch.c
|
|
++++ b/src/pcresearch.c
|
|
+@@ -17,40 +17,32 @@
|
|
+ 02110-1301, USA. */
|
|
+
|
|
+ /* Written August 1992 by Mike Haertel. */
|
|
++/* Updated for PCRE2 by Carlo Arenas. */
|
|
+
|
|
+ #include <config.h>
|
|
+ #include "search.h"
|
|
+ #include "die.h"
|
|
+
|
|
+-#include <pcre.h>
|
|
++#define PCRE2_CODE_UNIT_WIDTH 8
|
|
++#include <pcre2.h>
|
|
+
|
|
+-/* This must be at least 2; everything after that is for performance
|
|
+- in pcre_exec. */
|
|
+-enum { NSUB = 300 };
|
|
+-
|
|
+-#ifndef PCRE_EXTRA_MATCH_LIMIT_RECURSION
|
|
+-# define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0
|
|
+-#endif
|
|
+-#ifndef PCRE_STUDY_JIT_COMPILE
|
|
+-# define PCRE_STUDY_JIT_COMPILE 0
|
|
+-#endif
|
|
+-#ifndef PCRE_STUDY_EXTRA_NEEDED
|
|
+-# define PCRE_STUDY_EXTRA_NEEDED 0
|
|
++/* Needed for backward compatibility for PCRE2 < 10.30 */
|
|
++#ifndef PCRE2_CONFIG_DEPTHLIMIT
|
|
++#define PCRE2_CONFIG_DEPTHLIMIT PCRE2_CONFIG_RECURSIONLIMIT
|
|
++#define PCRE2_ERROR_DEPTHLIMIT PCRE2_ERROR_RECURSIONLIMIT
|
|
++#define pcre2_set_depth_limit pcre2_set_recursion_limit
|
|
+ #endif
|
|
+
|
|
+ struct pcre_comp
|
|
+ {
|
|
+- /* Compiled internal form of a Perl regular expression. */
|
|
+- pcre *cre;
|
|
+-
|
|
+- /* Additional information about the pattern. */
|
|
+- pcre_extra *extra;
|
|
+-
|
|
+-#if PCRE_STUDY_JIT_COMPILE
|
|
+ /* The JIT stack and its maximum size. */
|
|
+- pcre_jit_stack *jit_stack;
|
|
+- int jit_stack_size;
|
|
+-#endif
|
|
++ pcre2_jit_stack *jit_stack;
|
|
++ PCRE2_SIZE jit_stack_size;
|
|
++
|
|
++ /* Compiled internal form of a Perl regular expression. */
|
|
++ pcre2_code *cre;
|
|
++ pcre2_match_context *mcontext;
|
|
++ pcre2_match_data *data;
|
|
+
|
|
+ /* Table, indexed by ! (flag & PCRE_NOTBOL), of whether the empty
|
|
+ string matches when that flag is used. */
|
|
+@@ -60,51 +52,50 @@ struct pcre_comp
|
|
+
|
|
+ /* Match the already-compiled PCRE pattern against the data in SUBJECT,
|
|
+ of size SEARCH_BYTES and starting with offset SEARCH_OFFSET, with
|
|
+- options OPTIONS, and storing resulting matches into SUB. Return
|
|
+- the (nonnegative) match location or a (negative) error number. */
|
|
++ options OPTIONS.
|
|
++ Return the (nonnegative) match count or a (negative) error number. */
|
|
+ static int
|
|
+-jit_exec (struct pcre_comp *pc, char const *subject, int search_bytes,
|
|
+- int search_offset, int options, int *sub)
|
|
++jit_exec (struct pcre_comp *pc, char const *subject, PCRE2_SIZE search_bytes,
|
|
++ PCRE2_SIZE search_offset, int options)
|
|
+ {
|
|
+ while (true)
|
|
+ {
|
|
+- int e = pcre_exec (pc->cre, pc->extra, subject, search_bytes,
|
|
+- search_offset, options, sub, NSUB);
|
|
+-
|
|
+-#if PCRE_STUDY_JIT_COMPILE
|
|
+- if (e == PCRE_ERROR_JIT_STACKLIMIT
|
|
++ int e = pcre2_match (pc->cre, (PCRE2_SPTR)subject, search_bytes,
|
|
++ search_offset, options, pc->data, pc->mcontext);
|
|
++ if (e == PCRE2_ERROR_JIT_STACKLIMIT
|
|
+ && 0 < pc->jit_stack_size && pc->jit_stack_size <= INT_MAX / 2)
|
|
+ {
|
|
+- int old_size = pc->jit_stack_size;
|
|
+- int new_size = pc->jit_stack_size = old_size * 2;
|
|
++ PCRE2_SIZE old_size = pc->jit_stack_size;
|
|
++ PCRE2_SIZE new_size = pc->jit_stack_size = old_size * 2;
|
|
+ if (pc->jit_stack)
|
|
+- pcre_jit_stack_free (pc->jit_stack);
|
|
+- pc->jit_stack = pcre_jit_stack_alloc (old_size, new_size);
|
|
+- if (!pc->jit_stack)
|
|
++ pcre2_jit_stack_free (pc->jit_stack);
|
|
++ pc->jit_stack = pcre2_jit_stack_create (old_size, new_size, NULL);
|
|
++
|
|
++ if (!pc->mcontext)
|
|
++ pc->mcontext = pcre2_match_context_create (NULL);
|
|
++
|
|
++ if (!pc->jit_stack || !pc->mcontext)
|
|
+ die (EXIT_TROUBLE, 0,
|
|
+ _("failed to allocate memory for the PCRE JIT stack"));
|
|
+- pcre_assign_jit_stack (pc->extra, NULL, pc->jit_stack);
|
|
++ pcre2_jit_stack_assign (pc->mcontext, NULL, pc->jit_stack);
|
|
+ continue;
|
|
+ }
|
|
+-#endif
|
|
+
|
|
+-#if PCRE_EXTRA_MATCH_LIMIT_RECURSION
|
|
+- if (e == PCRE_ERROR_RECURSIONLIMIT
|
|
+- && (PCRE_STUDY_EXTRA_NEEDED || pc->extra))
|
|
++
|
|
++ if (e == PCRE2_ERROR_DEPTHLIMIT)
|
|
+ {
|
|
+- unsigned long lim
|
|
+- = (pc->extra->flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION
|
|
+- ? pc->extra->match_limit_recursion
|
|
+- : 0);
|
|
+- if (lim <= ULONG_MAX / 2)
|
|
+- {
|
|
+- pc->extra->match_limit_recursion = lim ? 2 * lim : (1 << 24) - 1;
|
|
+- pc->extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
|
|
+- continue;
|
|
+- }
|
|
++ uint32_t lim;
|
|
++ pcre2_config (PCRE2_CONFIG_DEPTHLIMIT, &lim);
|
|
++ if (lim >= UINT32_MAX / 2)
|
|
++ return e;
|
|
++
|
|
++ lim <<= 1;
|
|
++ if (!pc->mcontext)
|
|
++ pc->mcontext = pcre2_match_context_create (NULL);
|
|
++
|
|
++ pcre2_set_depth_limit (pc->mcontext, lim);
|
|
++ continue;
|
|
+ }
|
|
+-#endif
|
|
+-
|
|
+ return e;
|
|
+ }
|
|
+ }
|
|
+@@ -115,27 +106,35 @@ jit_exec (struct pcre_comp *pc, char const *subject, int search_bytes,
|
|
+ void *
|
|
+ Pcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact)
|
|
+ {
|
|
+- int e;
|
|
+- char const *ep;
|
|
++ PCRE2_SIZE e;
|
|
++ int ec;
|
|
++ PCRE2_UCHAR8 ep[128]; /* 120 code units is suggested to avoid truncation */
|
|
+ static char const wprefix[] = "(?<!\\w)(?:";
|
|
+ static char const wsuffix[] = ")(?!\\w)";
|
|
+ static char const xprefix[] = "^(?:";
|
|
+ static char const xsuffix[] = ")$";
|
|
+ int fix_len_max = MAX (sizeof wprefix - 1 + sizeof wsuffix - 1,
|
|
+ sizeof xprefix - 1 + sizeof xsuffix - 1);
|
|
+- char *re = xnmalloc (4, size + (fix_len_max + 4 - 1) / 4);
|
|
+- int flags = PCRE_DOLLAR_ENDONLY | (match_icase ? PCRE_CASELESS : 0);
|
|
++ unsigned char *re = xmalloc (size + fix_len_max + 1);
|
|
++ int flags = PCRE2_DOLLAR_ENDONLY | (match_icase ? PCRE2_CASELESS : 0);
|
|
+ char *patlim = pattern + size;
|
|
+- char *n = re;
|
|
+- char const *p;
|
|
+- char const *pnul;
|
|
++ char *n = (char *)re;
|
|
+ struct pcre_comp *pc = xcalloc (1, sizeof (*pc));
|
|
++ pcre2_compile_context *ccontext = pcre2_compile_context_create(NULL);
|
|
+
|
|
+ if (localeinfo.multibyte)
|
|
+ {
|
|
+ if (! localeinfo.using_utf8)
|
|
+ die (EXIT_TROUBLE, 0, _("-P supports only unibyte and UTF-8 locales"));
|
|
+- flags |= PCRE_UTF8;
|
|
++ flags |= PCRE2_UTF;
|
|
++#if 0
|
|
++ /* do not match individual code units but only UTF-8 */
|
|
++ flags |= PCRE2_NEVER_BACKSLASH_C;
|
|
++#endif
|
|
++#ifdef PCRE2_MATCH_INVALID_UTF
|
|
++ /* consider invalid UTF-8 as a barrier, instead of error */
|
|
++ flags |= PCRE2_MATCH_INVALID_UTF;
|
|
++#endif
|
|
+ }
|
|
+
|
|
+ /* FIXME: Remove this restriction. */
|
|
+@@ -149,55 +148,43 @@ Pcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact)
|
|
+ strcpy (n, xprefix);
|
|
+ n += strlen (n);
|
|
+
|
|
+- /* The PCRE interface doesn't allow NUL bytes in the pattern, so
|
|
+- replace each NUL byte in the pattern with the four characters
|
|
+- "\000", removing a preceding backslash if there are an odd
|
|
+- number of backslashes before the NUL. */
|
|
+- *patlim = '\0';
|
|
+- for (p = pattern; (pnul = p + strlen (p)) < patlim; p = pnul + 1)
|
|
++ memcpy (n, pattern, size);
|
|
++ n += size;
|
|
++ if (match_words && !match_lines)
|
|
+ {
|
|
+- memcpy (n, p, pnul - p);
|
|
+- n += pnul - p;
|
|
+- for (p = pnul; pattern < p && p[-1] == '\\'; p--)
|
|
+- continue;
|
|
+- n -= (pnul - p) & 1;
|
|
+- strcpy (n, "\\000");
|
|
+- n += 4;
|
|
++ strcpy (n, wsuffix);
|
|
++ n += strlen(wsuffix);
|
|
+ }
|
|
+- memcpy (n, p, patlim - p + 1);
|
|
+- n += patlim - p;
|
|
+- *patlim = '\n';
|
|
+
|
|
+- if (match_words)
|
|
+- strcpy (n, wsuffix);
|
|
+ if (match_lines)
|
|
+- strcpy (n, xsuffix);
|
|
++ {
|
|
++ strcpy (n, xsuffix);
|
|
++ n += strlen(xsuffix);
|
|
++ }
|
|
+
|
|
+- pc->cre = pcre_compile (re, flags, &ep, &e, pcre_maketables ());
|
|
++ pcre2_set_character_tables (ccontext, pcre2_maketables (NULL));
|
|
++ pc->cre = pcre2_compile (re, n - (char *)re, flags, &ec, &e, ccontext);
|
|
+ if (!pc->cre)
|
|
+- die (EXIT_TROUBLE, 0, "%s", ep);
|
|
+-
|
|
+- int pcre_study_flags = PCRE_STUDY_EXTRA_NEEDED | PCRE_STUDY_JIT_COMPILE;
|
|
+- pc->extra = pcre_study (pc->cre, pcre_study_flags, &ep);
|
|
+- if (ep)
|
|
+- die (EXIT_TROUBLE, 0, "%s", ep);
|
|
++ {
|
|
++ pcre2_get_error_message (ec, ep, sizeof (ep));
|
|
++ die (EXIT_TROUBLE, 0, "%s", ep);
|
|
++ }
|
|
+
|
|
+-#if PCRE_STUDY_JIT_COMPILE
|
|
+- if (pcre_fullinfo (pc->cre, pc->extra, PCRE_INFO_JIT, &e))
|
|
+- die (EXIT_TROUBLE, 0, _("internal error (should never happen)"));
|
|
++ pc->data = pcre2_match_data_create_from_pattern (pc->cre, NULL);
|
|
+
|
|
+- /* The PCRE documentation says that a 32 KiB stack is the default. */
|
|
+- if (e)
|
|
+- pc->jit_stack_size = 32 << 10;
|
|
+-#endif
|
|
++ ec = pcre2_jit_compile (pc->cre, PCRE2_JIT_COMPLETE);
|
|
++ if (ec && ec != PCRE2_ERROR_JIT_BADOPTION && ec != PCRE2_ERROR_NOMEMORY)
|
|
++ die (EXIT_TROUBLE, 0, _("JIT internal error: %d"), ec);
|
|
++ else
|
|
++ {
|
|
++ /* The PCRE documentation says that a 32 KiB stack is the default. */
|
|
++ pc->jit_stack_size = 32 << 10;
|
|
++ }
|
|
+
|
|
+ free (re);
|
|
+
|
|
+- int sub[NSUB];
|
|
+- pc->empty_match[false] = pcre_exec (pc->cre, pc->extra, "", 0, 0,
|
|
+- PCRE_NOTBOL, sub, NSUB);
|
|
+- pc->empty_match[true] = pcre_exec (pc->cre, pc->extra, "", 0, 0, 0, sub,
|
|
+- NSUB);
|
|
++ pc->empty_match[false] = jit_exec (pc, "", 0, 0, PCRE2_NOTBOL);
|
|
++ pc->empty_match[true] = jit_exec (pc, "", 0, 0, 0);
|
|
+
|
|
+ return pc;
|
|
+ }
|
|
+@@ -206,15 +193,14 @@ size_t
|
|
+ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
|
|
+ char const *start_ptr)
|
|
+ {
|
|
+- int sub[NSUB];
|
|
+ char const *p = start_ptr ? start_ptr : buf;
|
|
+ bool bol = p[-1] == eolbyte;
|
|
+ char const *line_start = buf;
|
|
+- int e = PCRE_ERROR_NOMATCH;
|
|
++ int e = PCRE2_ERROR_NOMATCH;
|
|
+ char const *line_end;
|
|
+ struct pcre_comp *pc = vcp;
|
|
+-
|
|
+- /* The search address to pass to pcre_exec. This is the start of
|
|
++ PCRE2_SIZE *sub = pcre2_get_ovector_pointer (pc->data);
|
|
++ /* The search address to pass to PCRE. This is the start of
|
|
+ the buffer, or just past the most-recently discovered encoding
|
|
+ error or line end. */
|
|
+ char const *subject = buf;
|
|
+@@ -226,14 +212,14 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
|
|
+ better and the correctness issues were too puzzling. See
|
|
+ Bug#22655. */
|
|
+ line_end = rawmemchr (p, eolbyte);
|
|
+- if (INT_MAX < line_end - p)
|
|
++ if (PCRE2_SIZE_MAX < line_end - p)
|
|
+ die (EXIT_TROUBLE, 0, _("exceeded PCRE's line length limit"));
|
|
+
|
|
+ for (;;)
|
|
+ {
|
|
+ /* Skip past bytes that are easily determined to be encoding
|
|
+ errors, treating them as data that cannot match. This is
|
|
+- faster than having pcre_exec check them. */
|
|
++ faster than having PCRE check them. */
|
|
+ while (localeinfo.sbclen[to_uchar (*p)] == -1)
|
|
+ {
|
|
+ p++;
|
|
+@@ -241,10 +227,10 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
|
|
+ bol = false;
|
|
+ }
|
|
+
|
|
+- int search_offset = p - subject;
|
|
++ PCRE2_SIZE search_offset = p - subject;
|
|
+
|
|
+ /* Check for an empty match; this is faster than letting
|
|
+- pcre_exec do it. */
|
|
++ PCRE do it. */
|
|
+ if (p == line_end)
|
|
+ {
|
|
+ sub[0] = sub[1] = search_offset;
|
|
+@@ -254,13 +240,14 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
|
|
+
|
|
+ int options = 0;
|
|
+ if (!bol)
|
|
+- options |= PCRE_NOTBOL;
|
|
++ options |= PCRE2_NOTBOL;
|
|
+
|
|
+- e = jit_exec (pc, subject, line_end - subject, search_offset,
|
|
+- options, sub);
|
|
+- if (e != PCRE_ERROR_BADUTF8)
|
|
++ e = jit_exec (pc, subject, line_end - subject,
|
|
++ search_offset, options);
|
|
++ /* PCRE2 provides 22 different error codes for bad UTF-8 */
|
|
++ if (! (PCRE2_ERROR_UTF8_ERR21 <= e && e < PCRE2_ERROR_UTF8_ERR1))
|
|
+ break;
|
|
+- int valid_bytes = sub[0];
|
|
++ PCRE2_SIZE valid_bytes = pcre2_get_startchar (pc->data);
|
|
+
|
|
+ if (search_offset <= valid_bytes)
|
|
+ {
|
|
+@@ -270,14 +257,15 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
|
|
+ /* Handle the empty-match case specially, for speed.
|
|
+ This optimization is valid if VALID_BYTES is zero,
|
|
+ which means SEARCH_OFFSET is also zero. */
|
|
++ sub[0] = valid_bytes;
|
|
+ sub[1] = 0;
|
|
+ e = pc->empty_match[bol];
|
|
+ }
|
|
+ else
|
|
+ e = jit_exec (pc, subject, valid_bytes, search_offset,
|
|
+- options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL, sub);
|
|
++ options | PCRE2_NO_UTF_CHECK | PCRE2_NOTEOL);
|
|
+
|
|
+- if (e != PCRE_ERROR_NOMATCH)
|
|
++ if (e != PCRE2_ERROR_NOMATCH)
|
|
+ break;
|
|
+
|
|
+ /* Treat the encoding error as data that cannot match. */
|
|
+@@ -288,7 +276,7 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
|
|
+ subject += valid_bytes + 1;
|
|
+ }
|
|
+
|
|
+- if (e != PCRE_ERROR_NOMATCH)
|
|
++ if (e != PCRE2_ERROR_NOMATCH)
|
|
+ break;
|
|
+ bol = true;
|
|
+ p = subject = line_start = line_end + 1;
|
|
+@@ -299,26 +287,34 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
|
|
+ {
|
|
+ switch (e)
|
|
+ {
|
|
+- case PCRE_ERROR_NOMATCH:
|
|
++ case PCRE2_ERROR_NOMATCH:
|
|
+ break;
|
|
+
|
|
+- case PCRE_ERROR_NOMEMORY:
|
|
++ case PCRE2_ERROR_NOMEMORY:
|
|
+ die (EXIT_TROUBLE, 0, _("%s: memory exhausted"), input_filename ());
|
|
+
|
|
+-#if PCRE_STUDY_JIT_COMPILE
|
|
+- case PCRE_ERROR_JIT_STACKLIMIT:
|
|
++ case PCRE2_ERROR_JIT_STACKLIMIT:
|
|
+ die (EXIT_TROUBLE, 0, _("%s: exhausted PCRE JIT stack"),
|
|
+ input_filename ());
|
|
+-#endif
|
|
+
|
|
+- case PCRE_ERROR_MATCHLIMIT:
|
|
++ case PCRE2_ERROR_MATCHLIMIT:
|
|
+ die (EXIT_TROUBLE, 0, _("%s: exceeded PCRE's backtracking limit"),
|
|
+ input_filename ());
|
|
+
|
|
+- case PCRE_ERROR_RECURSIONLIMIT:
|
|
+- die (EXIT_TROUBLE, 0, _("%s: exceeded PCRE's recursion limit"),
|
|
++ case PCRE2_ERROR_DEPTHLIMIT:
|
|
++ die (EXIT_TROUBLE, 0,
|
|
++ _("%s: exceeded PCRE's nested backtracking limit"),
|
|
++ input_filename ());
|
|
++
|
|
++ case PCRE2_ERROR_RECURSELOOP:
|
|
++ die (EXIT_TROUBLE, 0, _("%s: PCRE detected recurse loop"),
|
|
+ input_filename ());
|
|
+
|
|
++#ifdef PCRE2_ERROR_HEAPLIMIT
|
|
++ case PCRE2_ERROR_HEAPLIMIT:
|
|
++ die (EXIT_TROUBLE, 0, _("%s: exceeded PCRE's heap limit"),
|
|
++ input_filename ());
|
|
+++#endif
|
|
+ default:
|
|
+ /* For now, we lump all remaining PCRE failures into this basket.
|
|
+ If anyone cares to provide sample grep usage that can trigger
|
|
+diff --git a/tests/filename-lineno.pl b/tests/filename-lineno.pl
|
|
+index 1e84b45..1ff3d6a 100755
|
|
+--- a/tests/filename-lineno.pl
|
|
++++ b/tests/filename-lineno.pl
|
|
+@@ -101,13 +101,13 @@ my @Tests =
|
|
+ ],
|
|
+ ['invalid-re-P-paren', '-P ")"', {EXIT=>2},
|
|
+ {ERR => $ENV{PCRE_WORKS} == 1
|
|
+- ? "$prog: unmatched parentheses\n"
|
|
++ ? "$prog: unmatched closing parenthesis\n"
|
|
+ : $no_pcre
|
|
+ },
|
|
+ ],
|
|
+ ['invalid-re-P-star-paren', '-P "a.*)"', {EXIT=>2},
|
|
+ {ERR => $ENV{PCRE_WORKS} == 1
|
|
+- ? "$prog: unmatched parentheses\n"
|
|
++ ? "$prog: unmatched closing parenthesis\n"
|
|
+ : $no_pcre
|
|
+ },
|
|
+ ],
|
|
+--
|
|
+2.26.2
|
|
+
|
|
diff --git a/doc/grep.in.1 b/doc/grep.in.1
|
|
index e8854f2..0178db1 100644
|
|
--- a/doc/grep.in.1
|
|
+++ b/doc/grep.in.1
|
|
@@ -767,7 +767,7 @@ In other implementations, basic regular expressions are less powerful.
|
|
The following description applies to extended regular expressions;
|
|
differences for basic regular expressions are summarized afterwards.
|
|
Perl-compatible regular expressions give additional functionality, and are
|
|
-documented in B<pcresyntax>(3) and B<pcrepattern>(3), but work only if
|
|
+documented in B<pcres2yntax>(3) and B<pcre2pattern>(3), but work only if
|
|
PCRE support is enabled.
|
|
.PP
|
|
The fundamental building blocks are the regular expressions
|
|
@@ -1371,9 +1371,9 @@ from the globbing syntax that the shell uses to match file names.
|
|
.BR sort (1),
|
|
.BR xargs (1),
|
|
.BR read (2),
|
|
-.BR pcre (3),
|
|
-.BR pcresyntax (3),
|
|
-.BR pcrepattern (3),
|
|
+.BR pcre2 (3),
|
|
+.BR pcre2syntax (3),
|
|
+.BR pcre2pattern (3),
|
|
.BR terminfo (5),
|
|
.BR glob (7),
|
|
.BR regex (7)
|
|
diff --git a/doc/grep.texi b/doc/grep.texi
|
|
index 01ac81e..aae8571 100644
|
|
--- a/doc/grep.texi
|
|
+++ b/doc/grep.texi
|
|
@@ -1186,7 +1186,7 @@ In other implementations, basic regular expressions are less powerful.
|
|
The following description applies to extended regular expressions;
|
|
differences for basic regular expressions are summarized afterwards.
|
|
Perl-compatible regular expressions give additional functionality, and
|
|
-are documented in the @i{pcresyntax}(3) and @i{pcrepattern}(3) manual
|
|
+are documented in the @i{pcre2syntax}(3) and @i{pcre2pattern}(3) manual
|
|
pages, but work only if PCRE is available in the system.
|
|
|
|
@menu
|
|
diff --git a/m4/pcre.m4 b/m4/pcre.m4
|
|
index 78b7fda..0ca510f 100644
|
|
--- a/m4/pcre.m4
|
|
+++ b/m4/pcre.m4
|
|
@@ -1,4 +1,4 @@
|
|
-# pcre.m4 - check for libpcre support
|
|
+# pcre.m4 - check for PCRE library support
|
|
|
|
# Copyright (C) 2010-2021 Free Software Foundation, Inc.
|
|
# This file is free software; the Free Software Foundation
|
|
@@ -9,7 +9,7 @@ AC_DEFUN([gl_FUNC_PCRE],
|
|
[
|
|
AC_ARG_ENABLE([perl-regexp],
|
|
AS_HELP_STRING([--disable-perl-regexp],
|
|
- [disable perl-regexp (pcre) support]),
|
|
+ [disable perl-regexp (pcre2) support]),
|
|
[case $enableval in
|
|
yes|no) test_pcre=$enableval;;
|
|
*) AC_MSG_ERROR([invalid value $enableval for --disable-perl-regexp]);;
|
|
@@ -21,24 +21,25 @@ AC_DEFUN([gl_FUNC_PCRE],
|
|
use_pcre=no
|
|
|
|
if test $test_pcre != no; then
|
|
- PKG_CHECK_MODULES([PCRE], [libpcre], [], [: ${PCRE_LIBS=-lpcre}])
|
|
+ PKG_CHECK_MODULES([PCRE], [libpcre2-8], [], [: ${PCRE_LIBS=-lpcre2-8}])
|
|
|
|
- AC_CACHE_CHECK([for pcre_compile], [pcre_cv_have_pcre_compile],
|
|
+ AC_CACHE_CHECK([for pcre2_compile], [pcre_cv_have_pcre2_compile],
|
|
[pcre_saved_CFLAGS=$CFLAGS
|
|
pcre_saved_LIBS=$LIBS
|
|
CFLAGS="$CFLAGS $PCRE_CFLAGS"
|
|
LIBS="$PCRE_LIBS $LIBS"
|
|
AC_LINK_IFELSE(
|
|
- [AC_LANG_PROGRAM([[#include <pcre.h>
|
|
+ [AC_LANG_PROGRAM([[#define PCRE2_CODE_UNIT_WIDTH 8
|
|
+ #include <pcre2.h>
|
|
]],
|
|
- [[pcre *p = pcre_compile (0, 0, 0, 0, 0);
|
|
+ [[pcre2_code *p = pcre2_compile (0, 0, 0, 0, 0, 0);
|
|
return !p;]])],
|
|
- [pcre_cv_have_pcre_compile=yes],
|
|
- [pcre_cv_have_pcre_compile=no])
|
|
+ [pcre_cv_have_pcre2_compile=yes],
|
|
+ [pcre_cv_have_pcre2_compile=no])
|
|
CFLAGS=$pcre_saved_CFLAGS
|
|
LIBS=$pcre_saved_LIBS])
|
|
|
|
- if test "$pcre_cv_have_pcre_compile" = yes; then
|
|
+ if test "$pcre_cv_have_pcre2_compile" = yes; then
|
|
use_pcre=yes
|
|
elif test $test_pcre = maybe; then
|
|
AC_MSG_WARN([AC_PACKAGE_NAME will be built without pcre support.])
|
|
@@ -50,7 +51,7 @@ AC_DEFUN([gl_FUNC_PCRE],
|
|
if test $use_pcre = yes; then
|
|
AC_DEFINE([HAVE_LIBPCRE], [1],
|
|
[Define to 1 if you have the Perl Compatible Regular Expressions
|
|
- library (-lpcre).])
|
|
+ library (-lpcre2).])
|
|
else
|
|
PCRE_CFLAGS=
|
|
PCRE_LIBS=
|
|
diff --git a/src/pcresearch.c b/src/pcresearch.c
|
|
index 37f7e40..caedf49 100644
|
|
--- a/src/pcresearch.c
|
|
+++ b/src/pcresearch.c
|
|
@@ -17,40 +17,32 @@
|
|
02110-1301, USA. */
|
|
|
|
/* Written August 1992 by Mike Haertel. */
|
|
+/* Updated for PCRE2 by Carlo Arenas. */
|
|
|
|
#include <config.h>
|
|
#include "search.h"
|
|
#include "die.h"
|
|
|
|
-#include <pcre.h>
|
|
+#define PCRE2_CODE_UNIT_WIDTH 8
|
|
+#include <pcre2.h>
|
|
|
|
-/* This must be at least 2; everything after that is for performance
|
|
- in pcre_exec. */
|
|
-enum { NSUB = 300 };
|
|
-
|
|
-#ifndef PCRE_EXTRA_MATCH_LIMIT_RECURSION
|
|
-# define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0
|
|
-#endif
|
|
-#ifndef PCRE_STUDY_JIT_COMPILE
|
|
-# define PCRE_STUDY_JIT_COMPILE 0
|
|
-#endif
|
|
-#ifndef PCRE_STUDY_EXTRA_NEEDED
|
|
-# define PCRE_STUDY_EXTRA_NEEDED 0
|
|
+/* Needed for backward compatibility for PCRE2 < 10.30 */
|
|
+#ifndef PCRE2_CONFIG_DEPTHLIMIT
|
|
+#define PCRE2_CONFIG_DEPTHLIMIT PCRE2_CONFIG_RECURSIONLIMIT
|
|
+#define PCRE2_ERROR_DEPTHLIMIT PCRE2_ERROR_RECURSIONLIMIT
|
|
+#define pcre2_set_depth_limit pcre2_set_recursion_limit
|
|
#endif
|
|
|
|
struct pcre_comp
|
|
{
|
|
- /* Compiled internal form of a Perl regular expression. */
|
|
- pcre *cre;
|
|
-
|
|
- /* Additional information about the pattern. */
|
|
- pcre_extra *extra;
|
|
-
|
|
-#if PCRE_STUDY_JIT_COMPILE
|
|
/* The JIT stack and its maximum size. */
|
|
- pcre_jit_stack *jit_stack;
|
|
- int jit_stack_size;
|
|
-#endif
|
|
+ pcre2_jit_stack *jit_stack;
|
|
+ PCRE2_SIZE jit_stack_size;
|
|
+
|
|
+ /* Compiled internal form of a Perl regular expression. */
|
|
+ pcre2_code *cre;
|
|
+ pcre2_match_context *mcontext;
|
|
+ pcre2_match_data *data;
|
|
|
|
/* Table, indexed by ! (flag & PCRE_NOTBOL), of whether the empty
|
|
string matches when that flag is used. */
|
|
@@ -60,51 +52,50 @@ struct pcre_comp
|
|
|
|
/* Match the already-compiled PCRE pattern against the data in SUBJECT,
|
|
of size SEARCH_BYTES and starting with offset SEARCH_OFFSET, with
|
|
- options OPTIONS, and storing resulting matches into SUB. Return
|
|
- the (nonnegative) match location or a (negative) error number. */
|
|
+ options OPTIONS.
|
|
+ Return the (nonnegative) match count or a (negative) error number. */
|
|
static int
|
|
-jit_exec (struct pcre_comp *pc, char const *subject, int search_bytes,
|
|
- int search_offset, int options, int *sub)
|
|
+jit_exec (struct pcre_comp *pc, char const *subject, PCRE2_SIZE search_bytes,
|
|
+ PCRE2_SIZE search_offset, int options)
|
|
{
|
|
while (true)
|
|
{
|
|
- int e = pcre_exec (pc->cre, pc->extra, subject, search_bytes,
|
|
- search_offset, options, sub, NSUB);
|
|
-
|
|
-#if PCRE_STUDY_JIT_COMPILE
|
|
- if (e == PCRE_ERROR_JIT_STACKLIMIT
|
|
+ int e = pcre2_match (pc->cre, (PCRE2_SPTR)subject, search_bytes,
|
|
+ search_offset, options, pc->data, pc->mcontext);
|
|
+ if (e == PCRE2_ERROR_JIT_STACKLIMIT
|
|
&& 0 < pc->jit_stack_size && pc->jit_stack_size <= INT_MAX / 2)
|
|
{
|
|
- int old_size = pc->jit_stack_size;
|
|
- int new_size = pc->jit_stack_size = old_size * 2;
|
|
+ PCRE2_SIZE old_size = pc->jit_stack_size;
|
|
+ PCRE2_SIZE new_size = pc->jit_stack_size = old_size * 2;
|
|
if (pc->jit_stack)
|
|
- pcre_jit_stack_free (pc->jit_stack);
|
|
- pc->jit_stack = pcre_jit_stack_alloc (old_size, new_size);
|
|
- if (!pc->jit_stack)
|
|
+ pcre2_jit_stack_free (pc->jit_stack);
|
|
+ pc->jit_stack = pcre2_jit_stack_create (old_size, new_size, NULL);
|
|
+
|
|
+ if (!pc->mcontext)
|
|
+ pc->mcontext = pcre2_match_context_create (NULL);
|
|
+
|
|
+ if (!pc->jit_stack || !pc->mcontext)
|
|
die (EXIT_TROUBLE, 0,
|
|
_("failed to allocate memory for the PCRE JIT stack"));
|
|
- pcre_assign_jit_stack (pc->extra, NULL, pc->jit_stack);
|
|
+ pcre2_jit_stack_assign (pc->mcontext, NULL, pc->jit_stack);
|
|
continue;
|
|
}
|
|
-#endif
|
|
|
|
-#if PCRE_EXTRA_MATCH_LIMIT_RECURSION
|
|
- if (e == PCRE_ERROR_RECURSIONLIMIT
|
|
- && (PCRE_STUDY_EXTRA_NEEDED || pc->extra))
|
|
+
|
|
+ if (e == PCRE2_ERROR_DEPTHLIMIT)
|
|
{
|
|
- unsigned long lim
|
|
- = (pc->extra->flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION
|
|
- ? pc->extra->match_limit_recursion
|
|
- : 0);
|
|
- if (lim <= ULONG_MAX / 2)
|
|
- {
|
|
- pc->extra->match_limit_recursion = lim ? 2 * lim : (1 << 24) - 1;
|
|
- pc->extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
|
|
- continue;
|
|
- }
|
|
+ uint32_t lim;
|
|
+ pcre2_config (PCRE2_CONFIG_DEPTHLIMIT, &lim);
|
|
+ if (lim >= UINT32_MAX / 2)
|
|
+ return e;
|
|
+
|
|
+ lim <<= 1;
|
|
+ if (!pc->mcontext)
|
|
+ pc->mcontext = pcre2_match_context_create (NULL);
|
|
+
|
|
+ pcre2_set_depth_limit (pc->mcontext, lim);
|
|
+ continue;
|
|
}
|
|
-#endif
|
|
-
|
|
return e;
|
|
}
|
|
}
|
|
@@ -115,27 +106,35 @@ jit_exec (struct pcre_comp *pc, char const *subject, int search_bytes,
|
|
void *
|
|
Pcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact)
|
|
{
|
|
- int e;
|
|
- char const *ep;
|
|
+ PCRE2_SIZE e;
|
|
+ int ec;
|
|
+ PCRE2_UCHAR8 ep[128]; /* 120 code units is suggested to avoid truncation */
|
|
static char const wprefix[] = "(?<!\\w)(?:";
|
|
static char const wsuffix[] = ")(?!\\w)";
|
|
static char const xprefix[] = "^(?:";
|
|
static char const xsuffix[] = ")$";
|
|
int fix_len_max = MAX (sizeof wprefix - 1 + sizeof wsuffix - 1,
|
|
sizeof xprefix - 1 + sizeof xsuffix - 1);
|
|
- char *re = xnmalloc (4, size + (fix_len_max + 4 - 1) / 4);
|
|
- int flags = PCRE_DOLLAR_ENDONLY | (match_icase ? PCRE_CASELESS : 0);
|
|
+ unsigned char *re = xmalloc (size + fix_len_max + 1);
|
|
+ int flags = PCRE2_DOLLAR_ENDONLY | (match_icase ? PCRE2_CASELESS : 0);
|
|
char *patlim = pattern + size;
|
|
- char *n = re;
|
|
- char const *p;
|
|
- char const *pnul;
|
|
+ char *n = (char *)re;
|
|
struct pcre_comp *pc = xcalloc (1, sizeof (*pc));
|
|
+ pcre2_compile_context *ccontext = pcre2_compile_context_create(NULL);
|
|
|
|
if (localeinfo.multibyte)
|
|
{
|
|
if (! localeinfo.using_utf8)
|
|
die (EXIT_TROUBLE, 0, _("-P supports only unibyte and UTF-8 locales"));
|
|
- flags |= PCRE_UTF8;
|
|
+ flags |= PCRE2_UTF;
|
|
+#if 0
|
|
+ /* do not match individual code units but only UTF-8 */
|
|
+ flags |= PCRE2_NEVER_BACKSLASH_C;
|
|
+#endif
|
|
+#ifdef PCRE2_MATCH_INVALID_UTF
|
|
+ /* consider invalid UTF-8 as a barrier, instead of error */
|
|
+ flags |= PCRE2_MATCH_INVALID_UTF;
|
|
+#endif
|
|
}
|
|
|
|
/* FIXME: Remove this restriction. */
|
|
@@ -149,55 +148,43 @@ Pcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact)
|
|
strcpy (n, xprefix);
|
|
n += strlen (n);
|
|
|
|
- /* The PCRE interface doesn't allow NUL bytes in the pattern, so
|
|
- replace each NUL byte in the pattern with the four characters
|
|
- "\000", removing a preceding backslash if there are an odd
|
|
- number of backslashes before the NUL. */
|
|
- *patlim = '\0';
|
|
- for (p = pattern; (pnul = p + strlen (p)) < patlim; p = pnul + 1)
|
|
+ memcpy (n, pattern, size);
|
|
+ n += size;
|
|
+ if (match_words && !match_lines)
|
|
{
|
|
- memcpy (n, p, pnul - p);
|
|
- n += pnul - p;
|
|
- for (p = pnul; pattern < p && p[-1] == '\\'; p--)
|
|
- continue;
|
|
- n -= (pnul - p) & 1;
|
|
- strcpy (n, "\\000");
|
|
- n += 4;
|
|
+ strcpy (n, wsuffix);
|
|
+ n += strlen(wsuffix);
|
|
}
|
|
- memcpy (n, p, patlim - p + 1);
|
|
- n += patlim - p;
|
|
- *patlim = '\n';
|
|
|
|
- if (match_words)
|
|
- strcpy (n, wsuffix);
|
|
if (match_lines)
|
|
- strcpy (n, xsuffix);
|
|
+ {
|
|
+ strcpy (n, xsuffix);
|
|
+ n += strlen(xsuffix);
|
|
+ }
|
|
|
|
- pc->cre = pcre_compile (re, flags, &ep, &e, pcre_maketables ());
|
|
+ pcre2_set_character_tables (ccontext, pcre2_maketables (NULL));
|
|
+ pc->cre = pcre2_compile (re, n - (char *)re, flags, &ec, &e, ccontext);
|
|
if (!pc->cre)
|
|
- die (EXIT_TROUBLE, 0, "%s", ep);
|
|
-
|
|
- int pcre_study_flags = PCRE_STUDY_EXTRA_NEEDED | PCRE_STUDY_JIT_COMPILE;
|
|
- pc->extra = pcre_study (pc->cre, pcre_study_flags, &ep);
|
|
- if (ep)
|
|
- die (EXIT_TROUBLE, 0, "%s", ep);
|
|
+ {
|
|
+ pcre2_get_error_message (ec, ep, sizeof (ep));
|
|
+ die (EXIT_TROUBLE, 0, "%s", ep);
|
|
+ }
|
|
|
|
-#if PCRE_STUDY_JIT_COMPILE
|
|
- if (pcre_fullinfo (pc->cre, pc->extra, PCRE_INFO_JIT, &e))
|
|
- die (EXIT_TROUBLE, 0, _("internal error (should never happen)"));
|
|
+ pc->data = pcre2_match_data_create_from_pattern (pc->cre, NULL);
|
|
|
|
- /* The PCRE documentation says that a 32 KiB stack is the default. */
|
|
- if (e)
|
|
- pc->jit_stack_size = 32 << 10;
|
|
-#endif
|
|
+ ec = pcre2_jit_compile (pc->cre, PCRE2_JIT_COMPLETE);
|
|
+ if (ec && ec != PCRE2_ERROR_JIT_BADOPTION && ec != PCRE2_ERROR_NOMEMORY)
|
|
+ die (EXIT_TROUBLE, 0, _("JIT internal error: %d"), ec);
|
|
+ else
|
|
+ {
|
|
+ /* The PCRE documentation says that a 32 KiB stack is the default. */
|
|
+ pc->jit_stack_size = 32 << 10;
|
|
+ }
|
|
|
|
free (re);
|
|
|
|
- int sub[NSUB];
|
|
- pc->empty_match[false] = pcre_exec (pc->cre, pc->extra, "", 0, 0,
|
|
- PCRE_NOTBOL, sub, NSUB);
|
|
- pc->empty_match[true] = pcre_exec (pc->cre, pc->extra, "", 0, 0, 0, sub,
|
|
- NSUB);
|
|
+ pc->empty_match[false] = jit_exec (pc, "", 0, 0, PCRE2_NOTBOL);
|
|
+ pc->empty_match[true] = jit_exec (pc, "", 0, 0, 0);
|
|
|
|
return pc;
|
|
}
|
|
@@ -206,15 +193,14 @@ size_t
|
|
Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
|
|
char const *start_ptr)
|
|
{
|
|
- int sub[NSUB];
|
|
char const *p = start_ptr ? start_ptr : buf;
|
|
bool bol = p[-1] == eolbyte;
|
|
char const *line_start = buf;
|
|
- int e = PCRE_ERROR_NOMATCH;
|
|
+ int e = PCRE2_ERROR_NOMATCH;
|
|
char const *line_end;
|
|
struct pcre_comp *pc = vcp;
|
|
-
|
|
- /* The search address to pass to pcre_exec. This is the start of
|
|
+ PCRE2_SIZE *sub = pcre2_get_ovector_pointer (pc->data);
|
|
+ /* The search address to pass to PCRE. This is the start of
|
|
the buffer, or just past the most-recently discovered encoding
|
|
error or line end. */
|
|
char const *subject = buf;
|
|
@@ -226,14 +212,14 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
|
|
better and the correctness issues were too puzzling. See
|
|
Bug#22655. */
|
|
line_end = rawmemchr (p, eolbyte);
|
|
- if (INT_MAX < line_end - p)
|
|
+ if (PCRE2_SIZE_MAX < line_end - p)
|
|
die (EXIT_TROUBLE, 0, _("exceeded PCRE's line length limit"));
|
|
|
|
for (;;)
|
|
{
|
|
/* Skip past bytes that are easily determined to be encoding
|
|
errors, treating them as data that cannot match. This is
|
|
- faster than having pcre_exec check them. */
|
|
+ faster than having PCRE check them. */
|
|
while (localeinfo.sbclen[to_uchar (*p)] == -1)
|
|
{
|
|
p++;
|
|
@@ -241,10 +227,10 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
|
|
bol = false;
|
|
}
|
|
|
|
- int search_offset = p - subject;
|
|
+ PCRE2_SIZE search_offset = p - subject;
|
|
|
|
/* Check for an empty match; this is faster than letting
|
|
- pcre_exec do it. */
|
|
+ PCRE do it. */
|
|
if (p == line_end)
|
|
{
|
|
sub[0] = sub[1] = search_offset;
|
|
@@ -254,13 +240,14 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
|
|
|
|
int options = 0;
|
|
if (!bol)
|
|
- options |= PCRE_NOTBOL;
|
|
+ options |= PCRE2_NOTBOL;
|
|
|
|
- e = jit_exec (pc, subject, line_end - subject, search_offset,
|
|
- options, sub);
|
|
- if (e != PCRE_ERROR_BADUTF8)
|
|
+ e = jit_exec (pc, subject, line_end - subject,
|
|
+ search_offset, options);
|
|
+ /* PCRE2 provides 22 different error codes for bad UTF-8 */
|
|
+ if (! (PCRE2_ERROR_UTF8_ERR21 <= e && e < PCRE2_ERROR_UTF8_ERR1))
|
|
break;
|
|
- int valid_bytes = sub[0];
|
|
+ PCRE2_SIZE valid_bytes = pcre2_get_startchar (pc->data);
|
|
|
|
if (search_offset <= valid_bytes)
|
|
{
|
|
@@ -270,14 +257,15 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
|
|
/* Handle the empty-match case specially, for speed.
|
|
This optimization is valid if VALID_BYTES is zero,
|
|
which means SEARCH_OFFSET is also zero. */
|
|
+ sub[0] = valid_bytes;
|
|
sub[1] = 0;
|
|
e = pc->empty_match[bol];
|
|
}
|
|
else
|
|
e = jit_exec (pc, subject, valid_bytes, search_offset,
|
|
- options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL, sub);
|
|
+ options | PCRE2_NO_UTF_CHECK | PCRE2_NOTEOL);
|
|
|
|
- if (e != PCRE_ERROR_NOMATCH)
|
|
+ if (e != PCRE2_ERROR_NOMATCH)
|
|
break;
|
|
|
|
/* Treat the encoding error as data that cannot match. */
|
|
@@ -288,7 +276,7 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
|
|
subject += valid_bytes + 1;
|
|
}
|
|
|
|
- if (e != PCRE_ERROR_NOMATCH)
|
|
+ if (e != PCRE2_ERROR_NOMATCH)
|
|
break;
|
|
bol = true;
|
|
p = subject = line_start = line_end + 1;
|
|
@@ -299,26 +287,34 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
|
|
{
|
|
switch (e)
|
|
{
|
|
- case PCRE_ERROR_NOMATCH:
|
|
+ case PCRE2_ERROR_NOMATCH:
|
|
break;
|
|
|
|
- case PCRE_ERROR_NOMEMORY:
|
|
+ case PCRE2_ERROR_NOMEMORY:
|
|
die (EXIT_TROUBLE, 0, _("%s: memory exhausted"), input_filename ());
|
|
|
|
-#if PCRE_STUDY_JIT_COMPILE
|
|
- case PCRE_ERROR_JIT_STACKLIMIT:
|
|
+ case PCRE2_ERROR_JIT_STACKLIMIT:
|
|
die (EXIT_TROUBLE, 0, _("%s: exhausted PCRE JIT stack"),
|
|
input_filename ());
|
|
-#endif
|
|
|
|
- case PCRE_ERROR_MATCHLIMIT:
|
|
+ case PCRE2_ERROR_MATCHLIMIT:
|
|
die (EXIT_TROUBLE, 0, _("%s: exceeded PCRE's backtracking limit"),
|
|
input_filename ());
|
|
|
|
- case PCRE_ERROR_RECURSIONLIMIT:
|
|
- die (EXIT_TROUBLE, 0, _("%s: exceeded PCRE's recursion limit"),
|
|
+ case PCRE2_ERROR_DEPTHLIMIT:
|
|
+ die (EXIT_TROUBLE, 0,
|
|
+ _("%s: exceeded PCRE's nested backtracking limit"),
|
|
input_filename ());
|
|
|
|
+ case PCRE2_ERROR_RECURSELOOP:
|
|
+ die (EXIT_TROUBLE, 0, _("%s: PCRE detected recurse loop"),
|
|
+ input_filename ());
|
|
+
|
|
+#ifdef PCRE2_ERROR_HEAPLIMIT
|
|
+ case PCRE2_ERROR_HEAPLIMIT:
|
|
+ die (EXIT_TROUBLE, 0, _("%s: exceeded PCRE's heap limit"),
|
|
+ input_filename ());
|
|
+#endif
|
|
default:
|
|
/* For now, we lump all remaining PCRE failures into this basket.
|
|
If anyone cares to provide sample grep usage that can trigger
|
|
diff --git a/tests/filename-lineno.pl b/tests/filename-lineno.pl
|
|
index 1e84b45..1ff3d6a 100755
|
|
--- a/tests/filename-lineno.pl
|
|
+++ b/tests/filename-lineno.pl
|
|
@@ -101,13 +101,13 @@ my @Tests =
|
|
],
|
|
['invalid-re-P-paren', '-P ")"', {EXIT=>2},
|
|
{ERR => $ENV{PCRE_WORKS} == 1
|
|
- ? "$prog: unmatched parentheses\n"
|
|
+ ? "$prog: unmatched closing parenthesis\n"
|
|
: $no_pcre
|
|
},
|
|
],
|
|
['invalid-re-P-star-paren', '-P "a.*)"', {EXIT=>2},
|
|
{ERR => $ENV{PCRE_WORKS} == 1
|
|
- ? "$prog: unmatched parentheses\n"
|
|
+ ? "$prog: unmatched closing parenthesis\n"
|
|
: $no_pcre
|
|
},
|
|
],
|
|
--
|
|
2.26.2
|
|
|