glib2/backport-replace-pcre1-with-pcre2.patch
2022-09-05 08:06:39 +00:00

2474 lines
102 KiB
Diff
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

From 8d5a44dc8f36cce270519bd52fcecf330ccb43b4 Mon Sep 17 00:00:00 2001
From: Aleksei Rybalkin <aleksei@rybalkin.org>
Date: Tue, 12 Jul 2022 11:46:34 +0000
Subject: [PATCH] replace pcre1 with pcre2
Conflict:NA
Reference:https://gitlab.gnome.org/GNOME/glib/-/commit/8d5a44dc8f36cce270519bd52fcecf330ccb43b4
---
docs/reference/glib/regex-syntax.xml | 46 --
glib/gregex.c | 1113 ++++++++++++++++----------
glib/gregex.h | 11 +-
glib/meson.build | 10 +-
glib/tests/meson.build | 4 +-
glib/tests/regex.c | 175 ++--
meson.build | 41 +-
po/sk.po | 2 +-
subprojects/pcre.wrap | 11 -
9 files changed, 819 insertions(+), 594 deletions(-)
delete mode 100644 subprojects/pcre.wrap
diff --git a/docs/reference/glib/regex-syntax.xml b/docs/reference/glib/regex-syntax.xml
index 5dd9291..0b413aa 100644
--- a/docs/reference/glib/regex-syntax.xml
+++ b/docs/reference/glib/regex-syntax.xml
@@ -2442,52 +2442,6 @@ processing option does not affect the called subpattern.
</para>
</refsect1>
-<!-- Callouts are not supported by GRegex
-<refsect1>
-<title>Callouts</title>
-<para>
-Perl has a feature whereby using the sequence (?{...}) causes arbitrary
-Perl code to be obeyed in the middle of matching a regular expression.
-This makes it possible, amongst other things, to extract different substrings that match the same pair of parentheses when there is a repetition.
-</para>
-
-<para>
-PCRE provides a similar feature, but of course it cannot obey arbitrary
-Perl code. The feature is called "callout". The caller of PCRE provides
-an external function by putting its entry point in the global variable
-pcre_callout. By default, this variable contains NULL, which disables
-all calling out.
-</para>
-
-<para>
-Within a regular expression, (?C) indicates the points at which the
-external function is to be called. If you want to identify different
-callout points, you can put a number less than 256 after the letter C.
-The default value is zero. For example, this pattern has two callout
-points:
-</para>
-
-<programlisting>
-(?C1)abc(?C2)def
-</programlisting>
-
-<para>
-If the PCRE_AUTO_CALLOUT flag is passed to pcre_compile(), callouts are
-automatically installed before each item in the pattern. They are all
-numbered 255.
-</para>
-
-<para>
-During matching, when PCRE reaches a callout point (and pcre_callout is
-set), the external function is called. It is provided with the number
-of the callout, the position in the pattern, and, optionally, one item
-of data originally supplied by the caller of pcre_exec(). The callout
-function may cause matching to proceed, to backtrack, or to fail altogether. A complete description of the interface to the callout function
-is given in the pcrecallout documentation.
-</para>
-</refsect1>
--->
-
<refsect1>
<title>Copyright</title>
<para>
diff --git a/glib/gregex.c b/glib/gregex.c
index 9a8229a..da37213 100644
--- a/glib/gregex.c
+++ b/glib/gregex.c
@@ -22,7 +22,8 @@
#include <string.h>
-#include <pcre.h>
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
#include "gtypes.h"
#include "gregex.h"
@@ -107,87 +108,63 @@
* library written by Philip Hazel.
*/
+/* Signifies that flags have already been converted from pcre1 to pcre2. The
+ * value 0x04000000u is also the value of PCRE2_MATCH_INVALID_UTF in pcre2.h,
+ * but it is not used in gregex, so we can reuse it for this flag.
+ */
+#define G_REGEX_FLAGS_CONVERTED 0x04000000u
/* Mask of all the possible values for GRegexCompileFlags. */
-#define G_REGEX_COMPILE_MASK (G_REGEX_CASELESS | \
- G_REGEX_MULTILINE | \
- G_REGEX_DOTALL | \
- G_REGEX_EXTENDED | \
- G_REGEX_ANCHORED | \
- G_REGEX_DOLLAR_ENDONLY | \
- G_REGEX_UNGREEDY | \
- G_REGEX_RAW | \
- G_REGEX_NO_AUTO_CAPTURE | \
- G_REGEX_OPTIMIZE | \
- G_REGEX_FIRSTLINE | \
- G_REGEX_DUPNAMES | \
- G_REGEX_NEWLINE_CR | \
- G_REGEX_NEWLINE_LF | \
- G_REGEX_NEWLINE_CRLF | \
- G_REGEX_NEWLINE_ANYCRLF | \
- G_REGEX_BSR_ANYCRLF | \
- G_REGEX_JAVASCRIPT_COMPAT)
+#define G_REGEX_COMPILE_MASK (PCRE2_CASELESS | \
+ PCRE2_MULTILINE | \
+ PCRE2_DOTALL | \
+ PCRE2_EXTENDED | \
+ PCRE2_ANCHORED | \
+ PCRE2_DOLLAR_ENDONLY | \
+ PCRE2_UNGREEDY | \
+ PCRE2_UTF | \
+ PCRE2_NO_AUTO_CAPTURE | \
+ PCRE2_FIRSTLINE | \
+ PCRE2_DUPNAMES | \
+ PCRE2_NEWLINE_CR | \
+ PCRE2_NEWLINE_LF | \
+ PCRE2_NEWLINE_CRLF | \
+ PCRE2_NEWLINE_ANYCRLF | \
+ PCRE2_BSR_ANYCRLF | \
+ G_REGEX_FLAGS_CONVERTED)
/* Mask of all GRegexCompileFlags values that are (not) passed trough to PCRE */
#define G_REGEX_COMPILE_PCRE_MASK (G_REGEX_COMPILE_MASK & ~G_REGEX_COMPILE_NONPCRE_MASK)
-#define G_REGEX_COMPILE_NONPCRE_MASK (G_REGEX_RAW | \
- G_REGEX_OPTIMIZE)
+#define G_REGEX_COMPILE_NONPCRE_MASK (PCRE2_UTF | \
+ G_REGEX_FLAGS_CONVERTED)
/* Mask of all the possible values for GRegexMatchFlags. */
-#define G_REGEX_MATCH_MASK (G_REGEX_MATCH_ANCHORED | \
- G_REGEX_MATCH_NOTBOL | \
- G_REGEX_MATCH_NOTEOL | \
- G_REGEX_MATCH_NOTEMPTY | \
- G_REGEX_MATCH_PARTIAL | \
- G_REGEX_MATCH_NEWLINE_CR | \
- G_REGEX_MATCH_NEWLINE_LF | \
- G_REGEX_MATCH_NEWLINE_CRLF | \
- G_REGEX_MATCH_NEWLINE_ANY | \
- G_REGEX_MATCH_NEWLINE_ANYCRLF | \
- G_REGEX_MATCH_BSR_ANYCRLF | \
- G_REGEX_MATCH_BSR_ANY | \
- G_REGEX_MATCH_PARTIAL_SOFT | \
- G_REGEX_MATCH_PARTIAL_HARD | \
- G_REGEX_MATCH_NOTEMPTY_ATSTART)
-
-/* we rely on these flags having the same values */
-G_STATIC_ASSERT (G_REGEX_CASELESS == PCRE_CASELESS);
-G_STATIC_ASSERT (G_REGEX_MULTILINE == PCRE_MULTILINE);
-G_STATIC_ASSERT (G_REGEX_DOTALL == PCRE_DOTALL);
-G_STATIC_ASSERT (G_REGEX_EXTENDED == PCRE_EXTENDED);
-G_STATIC_ASSERT (G_REGEX_ANCHORED == PCRE_ANCHORED);
-G_STATIC_ASSERT (G_REGEX_DOLLAR_ENDONLY == PCRE_DOLLAR_ENDONLY);
-G_STATIC_ASSERT (G_REGEX_UNGREEDY == PCRE_UNGREEDY);
-G_STATIC_ASSERT (G_REGEX_NO_AUTO_CAPTURE == PCRE_NO_AUTO_CAPTURE);
-G_STATIC_ASSERT (G_REGEX_FIRSTLINE == PCRE_FIRSTLINE);
-G_STATIC_ASSERT (G_REGEX_DUPNAMES == PCRE_DUPNAMES);
-G_STATIC_ASSERT (G_REGEX_NEWLINE_CR == PCRE_NEWLINE_CR);
-G_STATIC_ASSERT (G_REGEX_NEWLINE_LF == PCRE_NEWLINE_LF);
-G_STATIC_ASSERT (G_REGEX_NEWLINE_CRLF == PCRE_NEWLINE_CRLF);
-G_STATIC_ASSERT (G_REGEX_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF);
-G_STATIC_ASSERT (G_REGEX_BSR_ANYCRLF == PCRE_BSR_ANYCRLF);
-G_STATIC_ASSERT (G_REGEX_JAVASCRIPT_COMPAT == PCRE_JAVASCRIPT_COMPAT);
-
-G_STATIC_ASSERT (G_REGEX_MATCH_ANCHORED == PCRE_ANCHORED);
-G_STATIC_ASSERT (G_REGEX_MATCH_NOTBOL == PCRE_NOTBOL);
-G_STATIC_ASSERT (G_REGEX_MATCH_NOTEOL == PCRE_NOTEOL);
-G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY == PCRE_NOTEMPTY);
-G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL == PCRE_PARTIAL);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CR == PCRE_NEWLINE_CR);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_LF == PCRE_NEWLINE_LF);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CRLF == PCRE_NEWLINE_CRLF);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANY == PCRE_NEWLINE_ANY);
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF);
-G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANYCRLF == PCRE_BSR_ANYCRLF);
-G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANY == PCRE_BSR_UNICODE);
-G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_SOFT == PCRE_PARTIAL_SOFT);
-G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_HARD == PCRE_PARTIAL_HARD);
-G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY_ATSTART == PCRE_NOTEMPTY_ATSTART);
-
-/* These PCRE flags are unused or not exposed publicly in GRegexFlags, so
- * it should be ok to reuse them for different things.
- */
-G_STATIC_ASSERT (G_REGEX_OPTIMIZE == PCRE_NO_UTF8_CHECK);
-G_STATIC_ASSERT (G_REGEX_RAW == PCRE_UTF8);
+#define G_REGEX_MATCH_MASK (PCRE2_ANCHORED | \
+ PCRE2_NOTBOL | \
+ PCRE2_NOTEOL | \
+ PCRE2_NOTEMPTY | \
+ PCRE2_PARTIAL_SOFT | \
+ PCRE2_NEWLINE_CR | \
+ PCRE2_NEWLINE_LF | \
+ PCRE2_NEWLINE_CRLF | \
+ PCRE2_NEWLINE_ANY | \
+ PCRE2_NEWLINE_ANYCRLF | \
+ PCRE2_BSR_ANYCRLF | \
+ PCRE2_BSR_UNICODE | \
+ PCRE2_PARTIAL_SOFT | \
+ PCRE2_PARTIAL_HARD | \
+ PCRE2_NOTEMPTY_ATSTART | \
+ G_REGEX_FLAGS_CONVERTED)
+
+#define G_REGEX_NEWLINE_MASK (PCRE2_NEWLINE_CR | \
+ PCRE2_NEWLINE_LF | \
+ PCRE2_NEWLINE_CRLF | \
+ PCRE2_NEWLINE_ANYCRLF)
+
+#define G_REGEX_MATCH_NEWLINE_MASK (PCRE2_NEWLINE_CR | \
+ PCRE2_NEWLINE_LF | \
+ PCRE2_NEWLINE_CRLF | \
+ PCRE2_NEWLINE_ANYCRLF | \
+ PCRE2_NEWLINE_ANY)
/* if the string is in UTF-8 use g_utf8_ functions, else use
* use just +/- 1. */
@@ -208,24 +185,26 @@ struct _GMatchInfo
gint pos; /* position in the string where last match left off */
gint n_offsets; /* number of offsets */
gint *offsets; /* array of offsets paired 0,1 ; 2,3 ; 3,4 etc */
- gint *workspace; /* workspace for pcre_dfa_exec() */
+ gint *workspace; /* workspace for pcre2_dfa_match() */
gint n_workspace; /* number of workspace elements */
const gchar *string; /* string passed to the match function */
gssize string_len; /* length of string, in bytes */
+ pcre2_match_context *match_context;
+ pcre2_match_data *match_data;
};
struct _GRegex
{
gint ref_count; /* the ref count for the immutable part (atomic) */
gchar *pattern; /* the pattern */
- pcre *pcre_re; /* compiled form of the pattern */
- GRegexCompileFlags compile_opts; /* options used at compile time on the pattern */
+ pcre2_code *pcre_re; /* compiled form of the pattern */
+ GRegexCompileFlags compile_opts; /* options used at compile time on the pattern, pcre2 values */
+ GRegexCompileFlags orig_compile_opts; /* options used at compile time on the pattern, gregex values */
GRegexMatchFlags match_opts; /* options used at match time on the regex */
- pcre_extra *extra; /* data stored when G_REGEX_OPTIMIZE is used */
};
/* TRUE if ret is an error code, FALSE otherwise. */
-#define IS_PCRE_ERROR(ret) ((ret) < PCRE_ERROR_NOMATCH && (ret) != PCRE_ERROR_PARTIAL)
+#define IS_PCRE2_ERROR(ret) ((ret) < PCRE2_ERROR_NOMATCH && (ret) != PCRE2_ERROR_PARTIAL)
typedef struct _InterpolationData InterpolationData;
static gboolean interpolation_list_needs_match (GList *list);
@@ -236,70 +215,249 @@ static GList *split_replacement (const gchar *replacement,
GError **error);
static void free_interpolation_data (InterpolationData *data);
+static gint
+map_to_pcre2_compile_flags (gint pcre1_flags)
+{
+ /* Maps compile flags from pcre1 to pcre2 values
+ */
+ gint pcre2_flags = G_REGEX_FLAGS_CONVERTED;
+
+ if (pcre1_flags & G_REGEX_FLAGS_CONVERTED)
+ return pcre1_flags;
+
+ if (pcre1_flags & G_REGEX_CASELESS)
+ pcre2_flags |= PCRE2_CASELESS;
+ if (pcre1_flags & G_REGEX_MULTILINE)
+ pcre2_flags |= PCRE2_MULTILINE;
+ if (pcre1_flags & G_REGEX_DOTALL)
+ pcre2_flags |= PCRE2_DOTALL;
+ if (pcre1_flags & G_REGEX_EXTENDED)
+ pcre2_flags |= PCRE2_EXTENDED;
+ if (pcre1_flags & G_REGEX_ANCHORED)
+ pcre2_flags |= PCRE2_ANCHORED;
+ if (pcre1_flags & G_REGEX_DOLLAR_ENDONLY)
+ pcre2_flags |= PCRE2_DOLLAR_ENDONLY;
+ if (pcre1_flags & G_REGEX_UNGREEDY)
+ pcre2_flags |= PCRE2_UNGREEDY;
+ if (!(pcre1_flags & G_REGEX_RAW))
+ pcre2_flags |= PCRE2_UTF;
+ if (pcre1_flags & G_REGEX_NO_AUTO_CAPTURE)
+ pcre2_flags |= PCRE2_NO_AUTO_CAPTURE;
+ if (pcre1_flags & G_REGEX_FIRSTLINE)
+ pcre2_flags |= PCRE2_FIRSTLINE;
+ if (pcre1_flags & G_REGEX_DUPNAMES)
+ pcre2_flags |= PCRE2_DUPNAMES;
+ if (pcre1_flags & G_REGEX_NEWLINE_CR)
+ pcre2_flags |= PCRE2_NEWLINE_CR;
+ if (pcre1_flags & G_REGEX_NEWLINE_LF)
+ pcre2_flags |= PCRE2_NEWLINE_LF;
+ /* Check for exact match for a composite flag */
+ if ((pcre1_flags & G_REGEX_NEWLINE_CRLF) == G_REGEX_NEWLINE_CRLF)
+ pcre2_flags |= PCRE2_NEWLINE_CRLF;
+ /* Check for exact match for a composite flag */
+ if ((pcre1_flags & G_REGEX_NEWLINE_ANYCRLF) == G_REGEX_NEWLINE_ANYCRLF)
+ pcre2_flags |= PCRE2_NEWLINE_ANYCRLF;
+ if (pcre1_flags & G_REGEX_BSR_ANYCRLF)
+ pcre2_flags |= PCRE2_BSR_ANYCRLF;
+
+ /* these are not available in pcre2 */
+G_GNUC_BEGIN_IGNORE_DEPRECATIONS
+ if (pcre1_flags & G_REGEX_OPTIMIZE)
+ pcre2_flags |= 0;
+ if (pcre1_flags & G_REGEX_JAVASCRIPT_COMPAT)
+ pcre2_flags |= 0;
+G_GNUC_END_IGNORE_DEPRECATIONS
+
+ return pcre2_flags;
+}
+
+static gint
+map_to_pcre2_match_flags (gint pcre1_flags)
+{
+ /* Maps match flags from pcre1 to pcre2 values
+ */
+ gint pcre2_flags = G_REGEX_FLAGS_CONVERTED;
+
+ if (pcre1_flags & G_REGEX_FLAGS_CONVERTED)
+ return pcre1_flags;
+
+ if (pcre1_flags & G_REGEX_MATCH_ANCHORED)
+ pcre2_flags |= PCRE2_ANCHORED;
+ if (pcre1_flags & G_REGEX_MATCH_NOTBOL)
+ pcre2_flags |= PCRE2_NOTBOL;
+ if (pcre1_flags & G_REGEX_MATCH_NOTEOL)
+ pcre2_flags |= PCRE2_NOTEOL;
+ if (pcre1_flags & G_REGEX_MATCH_NOTEMPTY)
+ pcre2_flags |= PCRE2_NOTEMPTY;
+ if (pcre1_flags & G_REGEX_MATCH_PARTIAL)
+ pcre2_flags |= PCRE2_PARTIAL_SOFT;
+ if (pcre1_flags & G_REGEX_MATCH_NEWLINE_CR)
+ pcre2_flags |= PCRE2_NEWLINE_CR;
+ if (pcre1_flags & G_REGEX_MATCH_NEWLINE_LF)
+ pcre2_flags |= PCRE2_NEWLINE_LF;
+ /* Check for exact match for a composite flag */
+ if ((pcre1_flags & G_REGEX_MATCH_NEWLINE_CRLF) == G_REGEX_MATCH_NEWLINE_CRLF)
+ pcre2_flags |= PCRE2_NEWLINE_CRLF;
+ if (pcre1_flags & G_REGEX_MATCH_NEWLINE_ANY)
+ pcre2_flags |= PCRE2_NEWLINE_ANY;
+ /* Check for exact match for a composite flag */
+ if ((pcre1_flags & G_REGEX_MATCH_NEWLINE_ANYCRLF) == G_REGEX_MATCH_NEWLINE_ANYCRLF)
+ pcre2_flags |= PCRE2_NEWLINE_ANYCRLF;
+ if (pcre1_flags & G_REGEX_MATCH_BSR_ANYCRLF)
+ pcre2_flags |= PCRE2_BSR_ANYCRLF;
+ if (pcre1_flags & G_REGEX_MATCH_BSR_ANY)
+ pcre2_flags |= PCRE2_BSR_UNICODE;
+ if (pcre1_flags & G_REGEX_MATCH_PARTIAL_SOFT)
+ pcre2_flags |= PCRE2_PARTIAL_SOFT;
+ if (pcre1_flags & G_REGEX_MATCH_PARTIAL_HARD)
+ pcre2_flags |= PCRE2_PARTIAL_HARD;
+ if (pcre1_flags & G_REGEX_MATCH_NOTEMPTY_ATSTART)
+ pcre2_flags |= PCRE2_NOTEMPTY_ATSTART;
+
+ return pcre2_flags;
+}
+
+static gint
+map_to_pcre1_compile_flags (gint pcre2_flags)
+{
+ /* Maps compile flags from pcre2 to pcre1 values
+ */
+ gint pcre1_flags = 0;
+
+ if (!(pcre2_flags & G_REGEX_FLAGS_CONVERTED))
+ return pcre2_flags;
+
+ if (pcre2_flags & PCRE2_CASELESS)
+ pcre1_flags |= G_REGEX_CASELESS;
+ if (pcre2_flags & PCRE2_MULTILINE)
+ pcre1_flags |= G_REGEX_MULTILINE;
+ if (pcre2_flags & PCRE2_DOTALL)
+ pcre1_flags |= G_REGEX_DOTALL;
+ if (pcre2_flags & PCRE2_EXTENDED)
+ pcre1_flags |= G_REGEX_EXTENDED;
+ if (pcre2_flags & PCRE2_ANCHORED)
+ pcre1_flags |= G_REGEX_ANCHORED;
+ if (pcre2_flags & PCRE2_DOLLAR_ENDONLY)
+ pcre1_flags |= G_REGEX_DOLLAR_ENDONLY;
+ if (pcre2_flags & PCRE2_UNGREEDY)
+ pcre1_flags |= G_REGEX_UNGREEDY;
+ if (!(pcre2_flags & PCRE2_UTF))
+ pcre1_flags |= G_REGEX_RAW;
+ if (pcre2_flags & PCRE2_NO_AUTO_CAPTURE)
+ pcre1_flags |= G_REGEX_NO_AUTO_CAPTURE;
+ if (pcre2_flags & PCRE2_FIRSTLINE)
+ pcre1_flags |= G_REGEX_FIRSTLINE;
+ if (pcre2_flags & PCRE2_DUPNAMES)
+ pcre1_flags |= G_REGEX_DUPNAMES;
+ if (pcre2_flags & PCRE2_NEWLINE_CR)
+ pcre1_flags |= G_REGEX_NEWLINE_CR;
+ if (pcre2_flags & PCRE2_NEWLINE_LF)
+ pcre1_flags |= G_REGEX_NEWLINE_LF;
+ /* Check for exact match for a composite flag */
+ if ((pcre2_flags & PCRE2_NEWLINE_CRLF) == PCRE2_NEWLINE_CRLF)
+ pcre1_flags |= G_REGEX_NEWLINE_CRLF;
+ /* Check for exact match for a composite flag */
+ if ((pcre2_flags & PCRE2_NEWLINE_ANYCRLF) == PCRE2_NEWLINE_ANYCRLF)
+ pcre1_flags |= G_REGEX_NEWLINE_ANYCRLF;
+ if (pcre2_flags & PCRE2_BSR_ANYCRLF)
+ pcre1_flags |= G_REGEX_BSR_ANYCRLF;
+
+ return pcre1_flags;
+}
+
+static gint
+map_to_pcre1_match_flags (gint pcre2_flags)
+{
+ /* Maps match flags from pcre2 to pcre1 values
+ */
+ gint pcre1_flags = 0;
+
+ if (!(pcre2_flags & G_REGEX_FLAGS_CONVERTED))
+ return pcre2_flags;
+
+ if (pcre2_flags & PCRE2_ANCHORED)
+ pcre1_flags |= G_REGEX_MATCH_ANCHORED;
+ if (pcre2_flags & PCRE2_NOTBOL)
+ pcre1_flags |= G_REGEX_MATCH_NOTBOL;
+ if (pcre2_flags & PCRE2_NOTEOL)
+ pcre1_flags |= G_REGEX_MATCH_NOTEOL;
+ if (pcre2_flags & PCRE2_NOTEMPTY)
+ pcre1_flags |= G_REGEX_MATCH_NOTEMPTY;
+ if (pcre2_flags & PCRE2_PARTIAL_SOFT)
+ pcre1_flags |= G_REGEX_MATCH_PARTIAL;
+ if (pcre2_flags & PCRE2_NEWLINE_CR)
+ pcre1_flags |= G_REGEX_MATCH_NEWLINE_CR;
+ if (pcre2_flags & PCRE2_NEWLINE_LF)
+ pcre1_flags |= G_REGEX_MATCH_NEWLINE_LF;
+ /* Check for exact match for a composite flag */
+ if ((pcre2_flags & PCRE2_NEWLINE_CRLF) == PCRE2_NEWLINE_CRLF)
+ pcre1_flags |= G_REGEX_MATCH_NEWLINE_CRLF;
+ if (pcre2_flags & PCRE2_NEWLINE_ANY)
+ pcre1_flags |= G_REGEX_MATCH_NEWLINE_ANY;
+ /* Check for exact match for a composite flag */
+ if ((pcre2_flags & PCRE2_NEWLINE_ANYCRLF) == PCRE2_NEWLINE_ANYCRLF)
+ pcre1_flags |= G_REGEX_MATCH_NEWLINE_ANYCRLF;
+ if (pcre2_flags & PCRE2_BSR_ANYCRLF)
+ pcre1_flags |= G_REGEX_MATCH_BSR_ANYCRLF;
+ if (pcre2_flags & PCRE2_BSR_UNICODE)
+ pcre1_flags |= G_REGEX_MATCH_BSR_ANY;
+ if (pcre2_flags & PCRE2_PARTIAL_SOFT)
+ pcre1_flags |= G_REGEX_MATCH_PARTIAL_SOFT;
+ if (pcre2_flags & PCRE2_PARTIAL_HARD)
+ pcre1_flags |= G_REGEX_MATCH_PARTIAL_HARD;
+ if (pcre2_flags & PCRE2_NOTEMPTY_ATSTART)
+ pcre1_flags |= G_REGEX_MATCH_NOTEMPTY_ATSTART;
+
+ return pcre1_flags;
+}
static const gchar *
match_error (gint errcode)
{
switch (errcode)
{
- case PCRE_ERROR_NOMATCH:
+ case PCRE2_ERROR_NOMATCH:
/* not an error */
break;
- case PCRE_ERROR_NULL:
+ case PCRE2_ERROR_NULL:
/* NULL argument, this should not happen in GRegex */
g_warning ("A NULL argument was passed to PCRE");
break;
- case PCRE_ERROR_BADOPTION:
+ case PCRE2_ERROR_BADOPTION:
return "bad options";
- case PCRE_ERROR_BADMAGIC:
+ case PCRE2_ERROR_BADMAGIC:
return _("corrupted object");
- case PCRE_ERROR_UNKNOWN_OPCODE:
- return N_("internal error or corrupted object");
- case PCRE_ERROR_NOMEMORY:
+ case PCRE2_ERROR_NOMEMORY:
return _("out of memory");
- case PCRE_ERROR_NOSUBSTRING:
- /* not used by pcre_exec() */
+ case PCRE2_ERROR_NOSUBSTRING:
+ /* not used by pcre2_match() */
break;
- case PCRE_ERROR_MATCHLIMIT:
+ case PCRE2_ERROR_MATCHLIMIT:
return _("backtracking limit reached");
- case PCRE_ERROR_CALLOUT:
+ case PCRE2_ERROR_CALLOUT:
/* callouts are not implemented */
break;
- case PCRE_ERROR_BADUTF8:
- case PCRE_ERROR_BADUTF8_OFFSET:
+ case PCRE2_ERROR_BADUTFOFFSET:
/* we do not check if strings are valid */
break;
- case PCRE_ERROR_PARTIAL:
+ case PCRE2_ERROR_PARTIAL:
/* not an error */
break;
- case PCRE_ERROR_BADPARTIAL:
- return _("the pattern contains items not supported for partial matching");
- case PCRE_ERROR_INTERNAL:
+ case PCRE2_ERROR_INTERNAL:
return _("internal error");
- case PCRE_ERROR_BADCOUNT:
- /* negative ovecsize, this should not happen in GRegex */
- g_warning ("A negative ovecsize was passed to PCRE");
- break;
- case PCRE_ERROR_DFA_UITEM:
+ case PCRE2_ERROR_DFA_UITEM:
return _("the pattern contains items not supported for partial matching");
- case PCRE_ERROR_DFA_UCOND:
+ case PCRE2_ERROR_DFA_UCOND:
return _("back references as conditions are not supported for partial matching");
- case PCRE_ERROR_DFA_UMLIMIT:
- /* the match_field field is not used in GRegex */
- break;
- case PCRE_ERROR_DFA_WSSIZE:
+ case PCRE2_ERROR_DFA_WSSIZE:
/* handled expanding the workspace */
break;
- case PCRE_ERROR_DFA_RECURSE:
- case PCRE_ERROR_RECURSIONLIMIT:
+ case PCRE2_ERROR_DFA_RECURSE:
+ case PCRE2_ERROR_RECURSIONLIMIT:
return _("recursion limit reached");
- case PCRE_ERROR_BADNEWLINE:
- return _("invalid combination of newline flags");
- case PCRE_ERROR_BADOFFSET:
+ case PCRE2_ERROR_BADOFFSET:
return _("bad offset");
- case PCRE_ERROR_SHORTUTF8:
- return _("short utf8");
- case PCRE_ERROR_RECURSELOOP:
+ case PCRE2_ERROR_RECURSELOOP:
return _("recursion loop");
default:
break;
@@ -310,242 +468,263 @@ match_error (gint errcode)
static void
translate_compile_error (gint *errcode, const gchar **errmsg)
{
- /* Compile errors are created adding 100 to the error code returned
- * by PCRE.
- * If errcode is known we put the translatable error message in
- * erromsg. If errcode is unknown we put the generic
- * G_REGEX_ERROR_COMPILE error code in errcode and keep the
- * untranslated error message returned by PCRE.
+ /* If errcode is known we put the translatable error message in
+ * errmsg. If errcode is unknown we put the generic
+ * G_REGEX_ERROR_COMPILE error code in errcode.
* Note that there can be more PCRE errors with the same GRegexError
* and that some PCRE errors are useless for us.
*/
- *errcode += 100;
switch (*errcode)
{
- case G_REGEX_ERROR_STRAY_BACKSLASH:
+ case PCRE2_ERROR_END_BACKSLASH:
+ *errcode = G_REGEX_ERROR_STRAY_BACKSLASH;
*errmsg = _("\\ at end of pattern");
break;
- case G_REGEX_ERROR_MISSING_CONTROL_CHAR:
+ case PCRE2_ERROR_END_BACKSLASH_C:
+ *errcode = G_REGEX_ERROR_MISSING_CONTROL_CHAR;
*errmsg = _("\\c at end of pattern");
break;
- case G_REGEX_ERROR_UNRECOGNIZED_ESCAPE:
+ case PCRE2_ERROR_UNKNOWN_ESCAPE:
+ case PCRE2_ERROR_UNSUPPORTED_ESCAPE_SEQUENCE:
+ *errcode = G_REGEX_ERROR_UNRECOGNIZED_ESCAPE;
*errmsg = _("unrecognized character following \\");
break;
- case G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER:
+ case PCRE2_ERROR_QUANTIFIER_OUT_OF_ORDER:
+ *errcode = G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER;
*errmsg = _("numbers out of order in {} quantifier");
break;
- case G_REGEX_ERROR_QUANTIFIER_TOO_BIG:
+ case PCRE2_ERROR_QUANTIFIER_TOO_BIG:
+ *errcode = G_REGEX_ERROR_QUANTIFIER_TOO_BIG;
*errmsg = _("number too big in {} quantifier");
break;
- case G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS:
+ case PCRE2_ERROR_MISSING_SQUARE_BRACKET:
+ *errcode = G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS;
*errmsg = _("missing terminating ] for character class");
break;
- case G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS:
+ case PCRE2_ERROR_ESCAPE_INVALID_IN_CLASS:
+ *errcode = G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS;
*errmsg = _("invalid escape sequence in character class");
break;
- case G_REGEX_ERROR_RANGE_OUT_OF_ORDER:
+ case PCRE2_ERROR_CLASS_RANGE_ORDER:
+ *errcode = G_REGEX_ERROR_RANGE_OUT_OF_ORDER;
*errmsg = _("range out of order in character class");
break;
- case G_REGEX_ERROR_NOTHING_TO_REPEAT:
+ case PCRE2_ERROR_QUANTIFIER_INVALID:
+ case PCRE2_ERROR_INTERNAL_UNEXPECTED_REPEAT:
+ *errcode = G_REGEX_ERROR_NOTHING_TO_REPEAT;
*errmsg = _("nothing to repeat");
break;
- case 111: /* internal error: unexpected repeat */
- *errcode = G_REGEX_ERROR_INTERNAL;
- *errmsg = _("unexpected repeat");
- break;
- case G_REGEX_ERROR_UNRECOGNIZED_CHARACTER:
+ case PCRE2_ERROR_INVALID_AFTER_PARENS_QUERY:
+ *errcode = G_REGEX_ERROR_UNRECOGNIZED_CHARACTER;
*errmsg = _("unrecognized character after (? or (?-");
break;
- case G_REGEX_ERROR_POSIX_NAMED_CLASS_OUTSIDE_CLASS:
+ case PCRE2_ERROR_POSIX_CLASS_NOT_IN_CLASS:
+ *errcode = G_REGEX_ERROR_POSIX_NAMED_CLASS_OUTSIDE_CLASS;
*errmsg = _("POSIX named classes are supported only within a class");
break;
- case G_REGEX_ERROR_UNMATCHED_PARENTHESIS:
+ case PCRE2_ERROR_POSIX_NO_SUPPORT_COLLATING:
+ *errcode = G_REGEX_ERROR_POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED;
+ *errmsg = _("POSIX collating elements are not supported");
+ break;
+ case PCRE2_ERROR_MISSING_CLOSING_PARENTHESIS:
+ case PCRE2_ERROR_UNMATCHED_CLOSING_PARENTHESIS:
+ case PCRE2_ERROR_PARENS_QUERY_R_MISSING_CLOSING:
+ *errcode = G_REGEX_ERROR_UNMATCHED_PARENTHESIS;
*errmsg = _("missing terminating )");
break;
- case G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE:
+ case PCRE2_ERROR_BAD_SUBPATTERN_REFERENCE:
+ *errcode = G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE;
*errmsg = _("reference to non-existent subpattern");
break;
- case G_REGEX_ERROR_UNTERMINATED_COMMENT:
+ case PCRE2_ERROR_MISSING_COMMENT_CLOSING:
+ *errcode = G_REGEX_ERROR_UNTERMINATED_COMMENT;
*errmsg = _("missing ) after comment");
break;
- case G_REGEX_ERROR_EXPRESSION_TOO_LARGE:
+ case PCRE2_ERROR_PATTERN_TOO_LARGE:
+ *errcode = G_REGEX_ERROR_EXPRESSION_TOO_LARGE;
*errmsg = _("regular expression is too large");
break;
- case G_REGEX_ERROR_MEMORY_ERROR:
- *errmsg = _("failed to get memory");
- break;
- case 122: /* unmatched parentheses */
- *errcode = G_REGEX_ERROR_UNMATCHED_PARENTHESIS;
- *errmsg = _(") without opening (");
- break;
- case 123: /* internal error: code overflow */
- *errcode = G_REGEX_ERROR_INTERNAL;
- *errmsg = _("code overflow");
- break;
- case 124: /* "unrecognized character after (?<\0 */
- *errcode = G_REGEX_ERROR_UNRECOGNIZED_CHARACTER;
- *errmsg = _("unrecognized character after (?<");
+ case PCRE2_ERROR_MISSING_CONDITION_CLOSING:
+ *errcode = G_REGEX_ERROR_MALFORMED_CONDITION;
+ *errmsg = _("malformed number or name after (?(");
break;
- case G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND:
+ case PCRE2_ERROR_LOOKBEHIND_NOT_FIXED_LENGTH:
+ *errcode = G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND;
*errmsg = _("lookbehind assertion is not fixed length");
break;
- case G_REGEX_ERROR_MALFORMED_CONDITION:
- *errmsg = _("malformed number or name after (?(");
- break;
- case G_REGEX_ERROR_TOO_MANY_CONDITIONAL_BRANCHES:
+ case PCRE2_ERROR_TOO_MANY_CONDITION_BRANCHES:
+ *errcode = G_REGEX_ERROR_TOO_MANY_CONDITIONAL_BRANCHES;
*errmsg = _("conditional group contains more than two branches");
break;
- case G_REGEX_ERROR_ASSERTION_EXPECTED:
+ case PCRE2_ERROR_CONDITION_ASSERTION_EXPECTED:
+ *errcode = G_REGEX_ERROR_ASSERTION_EXPECTED;
*errmsg = _("assertion expected after (?(");
break;
- case 129:
- *errcode = G_REGEX_ERROR_UNMATCHED_PARENTHESIS;
- /* translators: '(?R' and '(?[+-]digits' are both meant as (groups of)
- * sequences here, '(?-54' would be an example for the second group.
- */
- *errmsg = _("(?R or (?[+-]digits must be followed by )");
+ case PCRE2_ERROR_BAD_RELATIVE_REFERENCE:
+ *errcode = G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE;
+ *errmsg = _("a numbered reference must not be zero");
break;
- case G_REGEX_ERROR_UNKNOWN_POSIX_CLASS_NAME:
+ case PCRE2_ERROR_UNKNOWN_POSIX_CLASS:
+ *errcode = G_REGEX_ERROR_UNKNOWN_POSIX_CLASS_NAME;
*errmsg = _("unknown POSIX class name");
break;
- case G_REGEX_ERROR_POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED:
- *errmsg = _("POSIX collating elements are not supported");
- break;
- case G_REGEX_ERROR_HEX_CODE_TOO_LARGE:
+ case PCRE2_ERROR_CODE_POINT_TOO_BIG:
+ case PCRE2_ERROR_INVALID_HEXADECIMAL:
+ *errcode = G_REGEX_ERROR_HEX_CODE_TOO_LARGE;
*errmsg = _("character value in \\x{...} sequence is too large");
break;
- case G_REGEX_ERROR_INVALID_CONDITION:
- *errmsg = _("invalid condition (?(0)");
- break;
- case G_REGEX_ERROR_SINGLE_BYTE_MATCH_IN_LOOKBEHIND:
+ case PCRE2_ERROR_LOOKBEHIND_INVALID_BACKSLASH_C:
+ *errcode = G_REGEX_ERROR_SINGLE_BYTE_MATCH_IN_LOOKBEHIND;
*errmsg = _("\\C not allowed in lookbehind assertion");
break;
- case 137: /* PCRE does not support \\L, \\l, \\N{name}, \\U, or \\u\0 */
- /* A number of Perl escapes are not handled by PCRE.
- * Therefore it explicitly raises ERR37.
- */
- *errcode = G_REGEX_ERROR_UNRECOGNIZED_ESCAPE;
- *errmsg = _("escapes \\L, \\l, \\N{name}, \\U, and \\u are not supported");
- break;
- case G_REGEX_ERROR_INFINITE_LOOP:
- *errmsg = _("recursive call could loop indefinitely");
- break;
- case 141: /* unrecognized character after (?P\0 */
- *errcode = G_REGEX_ERROR_UNRECOGNIZED_CHARACTER;
- *errmsg = _("unrecognized character after (?P");
- break;
- case G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR:
+ case PCRE2_ERROR_MISSING_NAME_TERMINATOR:
+ *errcode = G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR;
*errmsg = _("missing terminator in subpattern name");
break;
- case G_REGEX_ERROR_DUPLICATE_SUBPATTERN_NAME:
+ case PCRE2_ERROR_DUPLICATE_SUBPATTERN_NAME:
+ *errcode = G_REGEX_ERROR_DUPLICATE_SUBPATTERN_NAME;
*errmsg = _("two named subpatterns have the same name");
break;
- case G_REGEX_ERROR_MALFORMED_PROPERTY:
+ case PCRE2_ERROR_MALFORMED_UNICODE_PROPERTY:
+ *errcode = G_REGEX_ERROR_MALFORMED_PROPERTY;
*errmsg = _("malformed \\P or \\p sequence");
break;
- case G_REGEX_ERROR_UNKNOWN_PROPERTY:
+ case PCRE2_ERROR_UNKNOWN_UNICODE_PROPERTY:
+ *errcode = G_REGEX_ERROR_UNKNOWN_PROPERTY;
*errmsg = _("unknown property name after \\P or \\p");
break;
- case G_REGEX_ERROR_SUBPATTERN_NAME_TOO_LONG:
+ case PCRE2_ERROR_SUBPATTERN_NAME_TOO_LONG:
+ *errcode = G_REGEX_ERROR_SUBPATTERN_NAME_TOO_LONG;
*errmsg = _("subpattern name is too long (maximum 32 characters)");
break;
- case G_REGEX_ERROR_TOO_MANY_SUBPATTERNS:
+ case PCRE2_ERROR_TOO_MANY_NAMED_SUBPATTERNS:
+ *errcode = G_REGEX_ERROR_TOO_MANY_SUBPATTERNS;
*errmsg = _("too many named subpatterns (maximum 10,000)");
break;
- case G_REGEX_ERROR_INVALID_OCTAL_VALUE:
+ case PCRE2_ERROR_OCTAL_BYTE_TOO_BIG:
+ *errcode = G_REGEX_ERROR_INVALID_OCTAL_VALUE;
*errmsg = _("octal value is greater than \\377");
break;
- case 152: /* internal error: overran compiling workspace */
- *errcode = G_REGEX_ERROR_INTERNAL;
- *errmsg = _("overran compiling workspace");
- break;
- case 153: /* internal error: previously-checked referenced subpattern not found */
- *errcode = G_REGEX_ERROR_INTERNAL;
- *errmsg = _("previously-checked referenced subpattern not found");
- break;
- case G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE:
+ case PCRE2_ERROR_DEFINE_TOO_MANY_BRANCHES:
+ *errcode = G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE;
*errmsg = _("DEFINE group contains more than one branch");
break;
- case G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS:
+ case PCRE2_ERROR_INTERNAL_UNKNOWN_NEWLINE:
+ *errcode = G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS;
*errmsg = _("inconsistent NEWLINE options");
break;
- case G_REGEX_ERROR_MISSING_BACK_REFERENCE:
+ case PCRE2_ERROR_BACKSLASH_G_SYNTAX:
+ *errcode = G_REGEX_ERROR_MISSING_BACK_REFERENCE;
*errmsg = _("\\g is not followed by a braced, angle-bracketed, or quoted name or "
"number, or by a plain number");
break;
- case G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE:
- *errmsg = _("a numbered reference must not be zero");
- break;
- case G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN:
+ case PCRE2_ERROR_VERB_ARGUMENT_NOT_ALLOWED:
+ *errcode = G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN;
*errmsg = _("an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)");
break;
- case G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB:
+ case PCRE2_ERROR_VERB_UNKNOWN:
+ *errcode = G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB;
*errmsg = _("(*VERB) not recognized");
break;
- case G_REGEX_ERROR_NUMBER_TOO_BIG:
+ case PCRE2_ERROR_SUBPATTERN_NUMBER_TOO_BIG:
+ *errcode = G_REGEX_ERROR_NUMBER_TOO_BIG;
*errmsg = _("number is too big");
break;
- case G_REGEX_ERROR_MISSING_SUBPATTERN_NAME:
+ case PCRE2_ERROR_SUBPATTERN_NAME_EXPECTED:
+ *errcode = G_REGEX_ERROR_MISSING_SUBPATTERN_NAME;
*errmsg = _("missing subpattern name after (?&");
break;
- case G_REGEX_ERROR_MISSING_DIGIT:
- *errmsg = _("digit expected after (?+");
- break;
- case G_REGEX_ERROR_INVALID_DATA_CHARACTER:
- *errmsg = _("] is an invalid data character in JavaScript compatibility mode");
- break;
- case G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME:
+ case PCRE2_ERROR_SUBPATTERN_NAMES_MISMATCH:
+ *errcode = G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME;
*errmsg = _("different names for subpatterns of the same number are not allowed");
break;
- case G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED:
+ case PCRE2_ERROR_MARK_MISSING_ARGUMENT:
+ *errcode = G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED;
*errmsg = _("(*MARK) must have an argument");
break;
- case G_REGEX_ERROR_INVALID_CONTROL_CHAR:
+ case PCRE2_ERROR_BACKSLASH_C_SYNTAX:
+ *errcode = G_REGEX_ERROR_INVALID_CONTROL_CHAR;
*errmsg = _( "\\c must be followed by an ASCII character");
break;
- case G_REGEX_ERROR_MISSING_NAME:
+ case PCRE2_ERROR_BACKSLASH_K_SYNTAX:
+ *errcode = G_REGEX_ERROR_MISSING_NAME;
*errmsg = _("\\k is not followed by a braced, angle-bracketed, or quoted name");
break;
- case G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS:
+ case PCRE2_ERROR_BACKSLASH_N_IN_CLASS:
+ *errcode = G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS;
*errmsg = _("\\N is not supported in a class");
break;
- case G_REGEX_ERROR_TOO_MANY_FORWARD_REFERENCES:
- *errmsg = _("too many forward references");
- break;
- case G_REGEX_ERROR_NAME_TOO_LONG:
+ case PCRE2_ERROR_VERB_NAME_TOO_LONG:
+ *errcode = G_REGEX_ERROR_NAME_TOO_LONG;
*errmsg = _("name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)");
break;
- case G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE:
- *errmsg = _("character value in \\u.... sequence is too large");
+ case PCRE2_ERROR_INTERNAL_CODE_OVERFLOW:
+ *errcode = G_REGEX_ERROR_INTERNAL;
+ *errmsg = _("code overflow");
break;
-
- case 116: /* erroffset passed as NULL */
- /* This should not happen as we never pass a NULL erroffset */
- g_warning ("erroffset passed as NULL");
- *errcode = G_REGEX_ERROR_COMPILE;
+ case PCRE2_ERROR_UNRECOGNIZED_AFTER_QUERY_P:
+ *errcode = G_REGEX_ERROR_UNRECOGNIZED_CHARACTER;
+ *errmsg = _("unrecognized character after (?P");
break;
- case 117: /* unknown option bit(s) set */
- /* This should not happen as we check options before passing them
- * to pcre_compile2() */
- g_warning ("unknown option bit(s) set");
- *errcode = G_REGEX_ERROR_COMPILE;
+ case PCRE2_ERROR_INTERNAL_OVERRAN_WORKSPACE:
+ *errcode = G_REGEX_ERROR_INTERNAL;
+ *errmsg = _("overran compiling workspace");
break;
- case 132: /* this version of PCRE is compiled without UTF support */
- case 144: /* invalid UTF-8 string */
- case 145: /* support for \\P, \\p, and \\X has not been compiled */
- case 167: /* this version of PCRE is not compiled with Unicode property support */
- case 173: /* disallowed Unicode code point (>= 0xd800 && <= 0xdfff) */
- case 174: /* invalid UTF-16 string */
- /* These errors should not happen as we are using an UTF-8 and UCP-enabled PCRE
- * and we do not check if strings are valid */
- case 170: /* internal error: unknown opcode in find_fixedlength() */
+ case PCRE2_ERROR_INTERNAL_MISSING_SUBPATTERN:
*errcode = G_REGEX_ERROR_INTERNAL;
+ *errmsg = _("previously-checked referenced subpattern not found");
break;
-
+ case PCRE2_ERROR_HEAP_FAILED:
+ case PCRE2_ERROR_INTERNAL_PARSED_OVERFLOW:
+ case PCRE2_ERROR_UNICODE_NOT_SUPPORTED:
+ case PCRE2_ERROR_UNICODE_DISALLOWED_CODE_POINT:
+ case PCRE2_ERROR_NO_SURROGATES_IN_UTF16:
+ case PCRE2_ERROR_INTERNAL_BAD_CODE_LOOKBEHINDS:
+ case PCRE2_ERROR_UNICODE_PROPERTIES_UNAVAILABLE:
+ case PCRE2_ERROR_INTERNAL_STUDY_ERROR:
+ case PCRE2_ERROR_UTF_IS_DISABLED:
+ case PCRE2_ERROR_UCP_IS_DISABLED:
+ case PCRE2_ERROR_INTERNAL_BAD_CODE_AUTO_POSSESS:
+ case PCRE2_ERROR_BACKSLASH_C_LIBRARY_DISABLED:
+ case PCRE2_ERROR_INTERNAL_BAD_CODE:
+ case PCRE2_ERROR_INTERNAL_BAD_CODE_IN_SKIP:
+ *errcode = G_REGEX_ERROR_INTERNAL;
+ *errmsg = _("internal error");
+ break;
+ case PCRE2_ERROR_INVALID_SUBPATTERN_NAME:
+ case PCRE2_ERROR_CLASS_INVALID_RANGE:
+ case PCRE2_ERROR_ZERO_RELATIVE_REFERENCE:
+ case PCRE2_ERROR_PARENTHESES_STACK_CHECK:
+ case PCRE2_ERROR_LOOKBEHIND_TOO_COMPLICATED:
+ case PCRE2_ERROR_CALLOUT_NUMBER_TOO_BIG:
+ case PCRE2_ERROR_MISSING_CALLOUT_CLOSING:
+ case PCRE2_ERROR_ESCAPE_INVALID_IN_VERB:
+ case PCRE2_ERROR_NULL_PATTERN:
+ case PCRE2_ERROR_BAD_OPTIONS:
+ case PCRE2_ERROR_PARENTHESES_NEST_TOO_DEEP:
+ case PCRE2_ERROR_BACKSLASH_O_MISSING_BRACE:
+ case PCRE2_ERROR_INVALID_OCTAL:
+ case PCRE2_ERROR_CALLOUT_STRING_TOO_LONG:
+ case PCRE2_ERROR_BACKSLASH_U_CODE_POINT_TOO_BIG:
+ case PCRE2_ERROR_MISSING_OCTAL_OR_HEX_DIGITS:
+ case PCRE2_ERROR_VERSION_CONDITION_SYNTAX:
+ case PCRE2_ERROR_CALLOUT_NO_STRING_DELIMITER:
+ case PCRE2_ERROR_CALLOUT_BAD_STRING_DELIMITER:
+ case PCRE2_ERROR_BACKSLASH_C_CALLER_DISABLED:
+ case PCRE2_ERROR_QUERY_BARJX_NEST_TOO_DEEP:
+ case PCRE2_ERROR_PATTERN_TOO_COMPLICATED:
+ case PCRE2_ERROR_LOOKBEHIND_TOO_LONG:
+ case PCRE2_ERROR_PATTERN_STRING_TOO_LONG:
+ case PCRE2_ERROR_BAD_LITERAL_OPTIONS:
default:
*errcode = G_REGEX_ERROR_COMPILE;
+ *errmsg = _("internal error");
+ break;
}
+
+ g_assert (*errcode != 0);
+ g_assert (*errmsg != NULL);
}
/* GMatchInfo */
@@ -568,12 +747,16 @@ match_info_new (const GRegex *regex,
match_info->regex = g_regex_ref ((GRegex *)regex);
match_info->string = string;
match_info->string_len = string_len;
- match_info->matches = PCRE_ERROR_NOMATCH;
+ match_info->matches = PCRE2_ERROR_NOMATCH;
match_info->pos = start_position;
match_info->match_opts = match_options;
- pcre_fullinfo (regex->pcre_re, regex->extra,
- PCRE_INFO_CAPTURECOUNT, &match_info->n_subpatterns);
+ pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_CAPTURECOUNT,
+ &match_info->n_subpatterns);
+
+ match_info->match_context = pcre2_match_context_create (NULL);
+ pcre2_set_match_limit (match_info->match_context, 65536); /* should be plenty */
+ pcre2_set_recursion_limit (match_info->match_context, 64); /* should be plenty */
if (is_dfa)
{
@@ -593,9 +776,41 @@ match_info_new (const GRegex *regex,
match_info->offsets[0] = -1;
match_info->offsets[1] = -1;
+ match_info->match_data = pcre2_match_data_create_from_pattern (
+ match_info->regex->pcre_re,
+ NULL);
+
return match_info;
}
+static gboolean
+recalc_match_offsets (GMatchInfo *match_info,
+ GError **error)
+{
+ PCRE2_SIZE *ovector;
+ gint i;
+
+ if (pcre2_get_ovector_count (match_info->match_data) > G_MAXINT / 2)
+ {
+ g_set_error (error, G_REGEX_ERROR, G_REGEX_ERROR_MATCH,
+ _("Error while matching regular expression %s: %s"),
+ match_info->regex->pattern, _("code overflow"));
+ return FALSE;
+ }
+
+ match_info->n_offsets = pcre2_get_ovector_count (match_info->match_data) * 2;
+ ovector = pcre2_get_ovector_pointer (match_info->match_data);
+ match_info->offsets = g_realloc_n (match_info->offsets,
+ match_info->n_offsets,
+ sizeof (gint));
+ for (i = 0; i < match_info->n_offsets; i++)
+ {
+ match_info->offsets[i] = (int) ovector[i];
+ }
+
+ return TRUE;
+}
+
/**
* g_match_info_get_regex:
* @match_info: a #GMatchInfo
@@ -667,6 +882,10 @@ g_match_info_unref (GMatchInfo *match_info)
if (g_atomic_int_dec_and_test (&match_info->ref_count))
{
g_regex_unref (match_info->regex);
+ if (match_info->match_context)
+ pcre2_match_context_free (match_info->match_context);
+ if (match_info->match_data)
+ pcre2_match_data_free (match_info->match_data);
g_free (match_info->offsets);
g_free (match_info->workspace);
g_free (match_info);
@@ -713,6 +932,7 @@ g_match_info_next (GMatchInfo *match_info,
{
gint prev_match_start;
gint prev_match_end;
+ gint opts;
g_return_val_if_fail (match_info != NULL, FALSE);
g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
@@ -725,25 +945,29 @@ g_match_info_next (GMatchInfo *match_info,
{
/* we have reached the end of the string */
match_info->pos = -1;
- match_info->matches = PCRE_ERROR_NOMATCH;
+ match_info->matches = PCRE2_ERROR_NOMATCH;
return FALSE;
}
- match_info->matches = pcre_exec (match_info->regex->pcre_re,
- match_info->regex->extra,
- match_info->string,
- match_info->string_len,
- match_info->pos,
- match_info->regex->match_opts | match_info->match_opts,
- match_info->offsets,
- match_info->n_offsets);
- if (IS_PCRE_ERROR (match_info->matches))
+ opts = map_to_pcre2_match_flags (match_info->regex->match_opts | match_info->match_opts);
+ match_info->matches = pcre2_match (match_info->regex->pcre_re,
+ (PCRE2_SPTR8) match_info->string,
+ match_info->string_len,
+ match_info->pos,
+ opts & ~G_REGEX_FLAGS_CONVERTED,
+ match_info->match_data,
+ match_info->match_context);
+
+ if (IS_PCRE2_ERROR (match_info->matches))
{
g_set_error (error, G_REGEX_ERROR, G_REGEX_ERROR_MATCH,
_("Error while matching regular expression %s: %s"),
match_info->regex->pattern, match_error (match_info->matches));
return FALSE;
}
+ else
+ if (!recalc_match_offsets (match_info, error))
+ return FALSE;
/* avoid infinite loops if the pattern is an empty string or something
* equivalent */
@@ -753,7 +977,7 @@ g_match_info_next (GMatchInfo *match_info,
{
/* we have reached the end of the string */
match_info->pos = -1;
- match_info->matches = PCRE_ERROR_NOMATCH;
+ match_info->matches = PCRE2_ERROR_NOMATCH;
return FALSE;
}
@@ -831,10 +1055,10 @@ g_match_info_get_match_count (const GMatchInfo *match_info)
{
g_return_val_if_fail (match_info, -1);
- if (match_info->matches == PCRE_ERROR_NOMATCH)
+ if (match_info->matches == PCRE2_ERROR_NOMATCH)
/* no match */
return 0;
- else if (match_info->matches < PCRE_ERROR_NOMATCH)
+ else if (match_info->matches < PCRE2_ERROR_NOMATCH)
/* error */
return -1;
else
@@ -889,7 +1113,7 @@ g_match_info_is_partial_match (const GMatchInfo *match_info)
{
g_return_val_if_fail (match_info != NULL, FALSE);
- return match_info->matches == PCRE_ERROR_PARTIAL;
+ return match_info->matches == PCRE2_ERROR_PARTIAL;
}
/**
@@ -986,8 +1210,6 @@ gchar *
g_match_info_fetch (const GMatchInfo *match_info,
gint match_num)
{
- /* we cannot use pcre_get_substring() because it allocates the
- * string using pcre_malloc(). */
gchar *match = NULL;
gint start, end;
@@ -1067,24 +1289,25 @@ g_match_info_fetch_pos (const GMatchInfo *match_info,
* Returns number of first matched subpattern with name @name.
* There may be more than one in case when DUPNAMES is used,
* and not all subpatterns with that name match;
- * pcre_get_stringnumber() does not work in that case.
+ * pcre2_substring_number_from_name() does not work in that case.
*/
static gint
get_matched_substring_number (const GMatchInfo *match_info,
const gchar *name)
{
gint entrysize;
- gchar *first, *last;
+ PCRE2_SPTR first, last;
guchar *entry;
- if (!(match_info->regex->compile_opts & G_REGEX_DUPNAMES))
- return pcre_get_stringnumber (match_info->regex->pcre_re, name);
+ if (!(match_info->regex->compile_opts & PCRE2_DUPNAMES))
+ return pcre2_substring_number_from_name (match_info->regex->pcre_re, (PCRE2_SPTR8) name);
- /* This code is copied from pcre_get.c: get_first_set() */
- entrysize = pcre_get_stringtable_entries (match_info->regex->pcre_re,
- name,
- &first,
- &last);
+ /* This code is analogous to code from pcre2_substring.c:
+ * pcre2_substring_get_byname() */
+ entrysize = pcre2_substring_nametable_scan (match_info->regex->pcre_re,
+ (PCRE2_SPTR8) name,
+ &first,
+ &last);
if (entrysize <= 0)
return entrysize;
@@ -1122,8 +1345,6 @@ gchar *
g_match_info_fetch_named (const GMatchInfo *match_info,
const gchar *name)
{
- /* we cannot use pcre_get_named_substring() because it allocates the
- * string using pcre_malloc(). */
gint num;
g_return_val_if_fail (match_info != NULL, NULL);
@@ -1205,8 +1426,6 @@ g_match_info_fetch_named_pos (const GMatchInfo *match_info,
gchar **
g_match_info_fetch_all (const GMatchInfo *match_info)
{
- /* we cannot use pcre_get_substring_list() because the returned value
- * isn't suitable for g_strfreev(). */
gchar **result;
gint i;
@@ -1264,9 +1483,7 @@ g_regex_unref (GRegex *regex)
{
g_free (regex->pattern);
if (regex->pcre_re != NULL)
- pcre_free (regex->pcre_re);
- if (regex->extra != NULL)
- pcre_free (regex->extra);
+ pcre2_code_free (regex->pcre_re);
g_free (regex);
}
}
@@ -1274,11 +1491,11 @@ g_regex_unref (GRegex *regex)
/*
* @match_options: (inout) (optional):
*/
-static pcre *regex_compile (const gchar *pattern,
- GRegexCompileFlags compile_options,
- GRegexCompileFlags *compile_options_out,
- GRegexMatchFlags *match_options,
- GError **error);
+static pcre2_code *regex_compile (const gchar *pattern,
+ GRegexCompileFlags compile_options,
+ GRegexCompileFlags *compile_options_out,
+ GRegexMatchFlags *match_options,
+ GError **error);
/**
* g_regex_new:
@@ -1302,10 +1519,13 @@ g_regex_new (const gchar *pattern,
GError **error)
{
GRegex *regex;
- pcre *re;
- const gchar *errmsg;
- gboolean optimize = FALSE;
+ pcre2_code *re;
static gsize initialised = 0;
+ GRegexCompileFlags orig_compile_opts;
+
+ orig_compile_opts = compile_options;
+ compile_options = map_to_pcre2_compile_flags (compile_options);
+ match_options = map_to_pcre2_match_flags (match_options);
g_return_val_if_fail (pattern != NULL, NULL);
g_return_val_if_fail (error == NULL || *error == NULL, NULL);
@@ -1314,17 +1534,13 @@ g_regex_new (const gchar *pattern,
if (g_once_init_enter (&initialised))
{
- int supports_utf8, supports_ucp;
+ int supports_utf8;
- pcre_config (PCRE_CONFIG_UTF8, &supports_utf8);
+ pcre2_config (PCRE2_CONFIG_UNICODE, &supports_utf8);
if (!supports_utf8)
g_critical (_("PCRE library is compiled without UTF8 support"));
- pcre_config (PCRE_CONFIG_UNICODE_PROPERTIES, &supports_ucp);
- if (!supports_ucp)
- g_critical (_("PCRE library is compiled without UTF8 properties support"));
-
- g_once_init_leave (&initialised, supports_utf8 && supports_ucp ? 1 : 2);
+ g_once_init_leave (&initialised, supports_utf8 ? 1 : 2);
}
if (G_UNLIKELY (initialised != 1))
@@ -1334,14 +1550,22 @@ g_regex_new (const gchar *pattern,
return NULL;
}
- /* G_REGEX_OPTIMIZE has the same numeric value of PCRE_NO_UTF8_CHECK,
- * as we do not need to wrap PCRE_NO_UTF8_CHECK. */
- if (compile_options & G_REGEX_OPTIMIZE)
- optimize = TRUE;
+ switch (compile_options & G_REGEX_NEWLINE_MASK)
+ {
+ case 0: /* PCRE2_NEWLINE_ANY */
+ case PCRE2_NEWLINE_CR:
+ case PCRE2_NEWLINE_LF:
+ case PCRE2_NEWLINE_CRLF:
+ case PCRE2_NEWLINE_ANYCRLF:
+ break;
+ default:
+ g_set_error (error, G_REGEX_ERROR, G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS,
+ "Invalid newline flags");
+ return NULL;
+ }
re = regex_compile (pattern, compile_options, &compile_options,
&match_options, error);
-
if (re == NULL)
return NULL;
@@ -1350,80 +1574,85 @@ g_regex_new (const gchar *pattern,
regex->pattern = g_strdup (pattern);
regex->pcre_re = re;
regex->compile_opts = compile_options;
+ regex->orig_compile_opts = orig_compile_opts;
regex->match_opts = match_options;
- if (optimize)
- {
- regex->extra = pcre_study (regex->pcre_re, 0, &errmsg);
- if (errmsg != NULL)
- {
- GError *tmp_error = g_error_new (G_REGEX_ERROR,
- G_REGEX_ERROR_OPTIMIZE,
- _("Error while optimizing "
- "regular expression %s: %s"),
- regex->pattern,
- errmsg);
- g_propagate_error (error, tmp_error);
-
- g_regex_unref (regex);
- return NULL;
- }
- }
-
return regex;
}
-static pcre *
-regex_compile (const gchar *pattern,
- GRegexCompileFlags compile_options,
- GRegexCompileFlags *compile_options_out,
- GRegexMatchFlags *match_options,
- GError **error)
+static gint
+extract_newline_options (const GRegexCompileFlags compile_options,
+ const GRegexMatchFlags *match_options)
+{
+ gint newline_options = PCRE2_NEWLINE_ANY;
+
+ if (compile_options & G_REGEX_NEWLINE_MASK)
+ newline_options = compile_options & G_REGEX_NEWLINE_MASK;
+ if (match_options && *match_options & G_REGEX_MATCH_NEWLINE_MASK)
+ newline_options = *match_options & G_REGEX_MATCH_NEWLINE_MASK;
+
+ return newline_options;
+}
+
+static gint
+extract_bsr_options (const GRegexCompileFlags compile_options,
+ const GRegexMatchFlags *match_options)
+{
+ gint bsr_options = PCRE2_BSR_UNICODE;
+
+ if (compile_options & PCRE2_BSR_ANYCRLF)
+ bsr_options = PCRE2_BSR_ANYCRLF;
+ if (match_options && *match_options & PCRE2_BSR_ANYCRLF)
+ bsr_options = PCRE2_BSR_ANYCRLF;
+ if (match_options && *match_options & PCRE2_BSR_UNICODE)
+ bsr_options = PCRE2_BSR_UNICODE;
+
+ return bsr_options;
+}
+
+static pcre2_code *
+regex_compile (const gchar *pattern,
+ GRegexCompileFlags compile_options,
+ GRegexCompileFlags *compile_options_out,
+ GRegexMatchFlags *match_options,
+ GError **error)
{
- pcre *re;
+ pcre2_code *re;
+ pcre2_compile_context *context;
const gchar *errmsg;
- gint erroffset;
+ PCRE2_SIZE erroffset;
gint errcode;
GRegexCompileFlags nonpcre_compile_options;
unsigned long int pcre_compile_options;
nonpcre_compile_options = compile_options & G_REGEX_COMPILE_NONPCRE_MASK;
- /* In GRegex the string are, by default, UTF-8 encoded. PCRE
- * instead uses UTF-8 only if required with PCRE_UTF8. */
- if (compile_options & G_REGEX_RAW)
- {
- /* disable utf-8 */
- compile_options &= ~G_REGEX_RAW;
- }
- else
- {
- /* enable utf-8 */
- compile_options |= PCRE_UTF8 | PCRE_NO_UTF8_CHECK;
+ context = pcre2_compile_context_create (NULL);
- if (match_options != NULL)
- *match_options |= PCRE_NO_UTF8_CHECK;
- }
+ /* set newline options */
+ pcre2_set_newline (context, extract_newline_options (compile_options, match_options));
+
+ /* set bsr options */
+ pcre2_set_bsr (context, extract_bsr_options (compile_options, match_options));
- /* PCRE_NEWLINE_ANY is the default for the internal PCRE but
- * not for the system one. */
- if (!(compile_options & G_REGEX_NEWLINE_CR) &&
- !(compile_options & G_REGEX_NEWLINE_LF))
+ /* In case UTF-8 mode is used, also set PCRE2_NO_UTF_CHECK */
+ if (compile_options & PCRE2_UTF)
{
- compile_options |= PCRE_NEWLINE_ANY;
+ compile_options |= PCRE2_NO_UTF_CHECK;
+ if (match_options != NULL)
+ *match_options |= PCRE2_NO_UTF_CHECK;
}
- compile_options |= PCRE_UCP;
-
- /* PCRE_BSR_UNICODE is the default for the internal PCRE but
- * possibly not for the system one.
- */
- if (~compile_options & G_REGEX_BSR_ANYCRLF)
- compile_options |= PCRE_BSR_UNICODE;
+ compile_options |= PCRE2_UCP;
/* compile the pattern */
- re = pcre_compile2 (pattern, compile_options, &errcode,
- &errmsg, &erroffset, NULL);
+ re = pcre2_compile ((PCRE2_SPTR8) pattern,
+ PCRE2_ZERO_TERMINATED,
+ compile_options & ~G_REGEX_FLAGS_CONVERTED,
+ &errcode,
+ &erroffset,
+ context);
+ pcre2_compile_context_free (context);
/* if the compilation failed, set the error member and return
* immediately */
@@ -1440,7 +1669,7 @@ regex_compile (const gchar *pattern,
tmp_error = g_error_new (G_REGEX_ERROR, errcode,
_("Error while compiling regular "
- "expression %s at char %d: %s"),
+ "expression %s at char %" G_GSIZE_FORMAT ": %s"),
pattern, erroffset, errmsg);
g_propagate_error (error, tmp_error);
@@ -1449,22 +1678,22 @@ regex_compile (const gchar *pattern,
/* For options set at the beginning of the pattern, pcre puts them into
* compile options, e.g. "(?i)foo" will make the pcre structure store
- * PCRE_CASELESS even though it wasn't explicitly given for compilation. */
- pcre_fullinfo (re, NULL, PCRE_INFO_OPTIONS, &pcre_compile_options);
+ * PCRE2_CASELESS even though it wasn't explicitly given for compilation. */
+ pcre2_pattern_info (re, PCRE2_INFO_ALLOPTIONS, &pcre_compile_options);
compile_options = pcre_compile_options & G_REGEX_COMPILE_PCRE_MASK;
- /* Don't leak PCRE_NEWLINE_ANY, which is part of PCRE_NEWLINE_ANYCRLF */
- if ((pcre_compile_options & PCRE_NEWLINE_ANYCRLF) != PCRE_NEWLINE_ANYCRLF)
- compile_options &= ~PCRE_NEWLINE_ANY;
+ /* Don't leak PCRE2_NEWLINE_ANY, which is part of PCRE2_NEWLINE_ANYCRLF */
+ if ((pcre_compile_options & PCRE2_NEWLINE_ANYCRLF) != PCRE2_NEWLINE_ANYCRLF)
+ compile_options &= ~PCRE2_NEWLINE_ANY;
compile_options |= nonpcre_compile_options;
- if (!(compile_options & G_REGEX_DUPNAMES))
+ if (!(compile_options & PCRE2_DUPNAMES))
{
gboolean jchanged = FALSE;
- pcre_fullinfo (re, NULL, PCRE_INFO_JCHANGED, &jchanged);
+ pcre2_pattern_info (re, PCRE2_INFO_JCHANGED, &jchanged);
if (jchanged)
- compile_options |= G_REGEX_DUPNAMES;
+ compile_options |= PCRE2_DUPNAMES;
}
if (compile_options_out != 0)
@@ -1509,8 +1738,7 @@ g_regex_get_max_backref (const GRegex *regex)
{
gint value;
- pcre_fullinfo (regex->pcre_re, regex->extra,
- PCRE_INFO_BACKREFMAX, &value);
+ pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_BACKREFMAX, &value);
return value;
}
@@ -1530,8 +1758,7 @@ g_regex_get_capture_count (const GRegex *regex)
{
gint value;
- pcre_fullinfo (regex->pcre_re, regex->extra,
- PCRE_INFO_CAPTURECOUNT, &value);
+ pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_CAPTURECOUNT, &value);
return value;
}
@@ -1551,8 +1778,7 @@ g_regex_get_has_cr_or_lf (const GRegex *regex)
{
gint value;
- pcre_fullinfo (regex->pcre_re, regex->extra,
- PCRE_INFO_HASCRORLF, &value);
+ pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_HASCRORLF, &value);
return !!value;
}
@@ -1574,8 +1800,8 @@ g_regex_get_max_lookbehind (const GRegex *regex)
{
gint max_lookbehind;
- pcre_fullinfo (regex->pcre_re, regex->extra,
- PCRE_INFO_MAXLOOKBEHIND, &max_lookbehind);
+ pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_MAXLOOKBEHIND,
+ &max_lookbehind);
return max_lookbehind;
}
@@ -1597,9 +1823,47 @@ g_regex_get_max_lookbehind (const GRegex *regex)
GRegexCompileFlags
g_regex_get_compile_flags (const GRegex *regex)
{
+ gint extra_flags, info_value;
+
g_return_val_if_fail (regex != NULL, 0);
- return regex->compile_opts;
+G_GNUC_BEGIN_IGNORE_DEPRECATIONS
+ /* Preserve original G_REGEX_OPTIMIZE */
+ extra_flags = (regex->orig_compile_opts & G_REGEX_OPTIMIZE);
+G_GNUC_END_IGNORE_DEPRECATIONS
+
+ /* Also include the newline options */
+ pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_NEWLINE, &info_value);
+ switch (info_value)
+ {
+ case PCRE2_NEWLINE_ANYCRLF:
+ extra_flags |= G_REGEX_NEWLINE_ANYCRLF;
+ break;
+ case PCRE2_NEWLINE_CRLF:
+ extra_flags |= G_REGEX_NEWLINE_CRLF;
+ break;
+ case PCRE2_NEWLINE_LF:
+ extra_flags |= G_REGEX_NEWLINE_LF;
+ break;
+ case PCRE2_NEWLINE_CR:
+ extra_flags |= G_REGEX_NEWLINE_CR;
+ break;
+ default:
+ break;
+ }
+
+ /* Also include the bsr options */
+ pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_BSR, &info_value);
+ switch (info_value)
+ {
+ case PCRE2_BSR_ANYCRLF:
+ extra_flags |= G_REGEX_BSR_ANYCRLF;
+ break;
+ default:
+ break;
+ }
+
+ return map_to_pcre1_compile_flags (regex->compile_opts) | extra_flags;
}
/**
@@ -1617,7 +1881,7 @@ g_regex_get_match_flags (const GRegex *regex)
{
g_return_val_if_fail (regex != NULL, 0);
- return regex->match_opts & G_REGEX_MATCH_MASK;
+ return map_to_pcre1_match_flags (regex->match_opts & G_REGEX_MATCH_MASK);
}
/**
@@ -1651,6 +1915,9 @@ g_regex_match_simple (const gchar *pattern,
GRegex *regex;
gboolean result;
+ compile_options = map_to_pcre2_compile_flags (compile_options);
+ match_options = map_to_pcre2_match_flags (match_options);
+
regex = g_regex_new (pattern, compile_options, G_REGEX_MATCH_DEFAULT, NULL);
if (!regex)
return FALSE;
@@ -1718,6 +1985,8 @@ g_regex_match (const GRegex *regex,
GRegexMatchFlags match_options,
GMatchInfo **match_info)
{
+ match_options = map_to_pcre2_match_flags (match_options);
+
return g_regex_match_full (regex, string, -1, 0, match_options,
match_info, NULL);
}
@@ -1801,6 +2070,8 @@ g_regex_match_full (const GRegex *regex,
GMatchInfo *info;
gboolean match_ok;
+ match_options = map_to_pcre2_match_flags (match_options);
+
g_return_val_if_fail (regex != NULL, FALSE);
g_return_val_if_fail (string != NULL, FALSE);
g_return_val_if_fail (start_position >= 0, FALSE);
@@ -1851,6 +2122,8 @@ g_regex_match_all (const GRegex *regex,
GRegexMatchFlags match_options,
GMatchInfo **match_info)
{
+ match_options = map_to_pcre2_match_flags (match_options);
+
return g_regex_match_all_full (regex, string, -1, 0, match_options,
match_info, NULL);
}
@@ -1920,39 +2193,29 @@ g_regex_match_all_full (const GRegex *regex,
{
GMatchInfo *info;
gboolean done;
- pcre *pcre_re;
- pcre_extra *extra;
+ pcre2_code *pcre_re;
gboolean retval;
+ match_options = map_to_pcre2_match_flags (match_options);
+
g_return_val_if_fail (regex != NULL, FALSE);
g_return_val_if_fail (string != NULL, FALSE);
g_return_val_if_fail (start_position >= 0, FALSE);
g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, FALSE);
-#ifdef PCRE_NO_AUTO_POSSESS
- /* For PCRE >= 8.34 we need to turn off PCRE_NO_AUTO_POSSESS, which
- * is an optimization for normal regex matching, but results in omitting
- * some shorter matches here, and an observable behaviour change.
+ /* For PCRE2 we need to turn off PCRE2_NO_AUTO_POSSESS, which is an
+ * optimization for normal regex matching, but results in omitting some
+ * shorter matches here, and an observable behaviour change.
*
* DFA matching is rather niche, and very rarely used according to
* codesearch.debian.net, so don't bother caching the recompiled RE. */
pcre_re = regex_compile (regex->pattern,
- regex->compile_opts | PCRE_NO_AUTO_POSSESS,
+ regex->compile_opts | PCRE2_NO_AUTO_POSSESS,
NULL, NULL, error);
-
if (pcre_re == NULL)
return FALSE;
- /* Not bothering to cache the optimization data either, with similar
- * reasoning */
- extra = NULL;
-#else
- /* For PCRE < 8.33 the precompiled regex is fine. */
- pcre_re = regex->pcre_re;
- extra = regex->extra;
-#endif
-
info = match_info_new (regex, string, string_len, start_position,
match_options, TRUE);
@@ -1960,29 +2223,38 @@ g_regex_match_all_full (const GRegex *regex,
while (!done)
{
done = TRUE;
- info->matches = pcre_dfa_exec (pcre_re, extra,
- info->string, info->string_len,
- info->pos,
- regex->match_opts | match_options,
- info->offsets, info->n_offsets,
- info->workspace, info->n_workspace);
- if (info->matches == PCRE_ERROR_DFA_WSSIZE)
+ info->matches = pcre2_dfa_match (pcre_re,
+ (PCRE2_SPTR8) info->string, info->string_len,
+ info->pos,
+ (regex->match_opts | match_options | PCRE2_NO_UTF_CHECK) & ~G_REGEX_FLAGS_CONVERTED,
+ info->match_data,
+ info->match_context,
+ info->workspace, info->n_workspace);
+
+ if (!recalc_match_offsets (info, error))
+ return FALSE;
+
+ if (info->matches == PCRE2_ERROR_DFA_WSSIZE)
{
/* info->workspace is too small. */
info->n_workspace *= 2;
- info->workspace = g_realloc (info->workspace,
- info->n_workspace * sizeof (gint));
+ info->workspace = g_realloc_n (info->workspace,
+ info->n_workspace,
+ sizeof (gint));
done = FALSE;
}
else if (info->matches == 0)
{
/* info->offsets is too small. */
info->n_offsets *= 2;
- info->offsets = g_realloc (info->offsets,
- info->n_offsets * sizeof (gint));
+ info->offsets = g_realloc_n (info->offsets,
+ info->n_offsets,
+ sizeof (gint));
+ pcre2_match_data_free (info->match_data);
+ info->match_data = pcre2_match_data_create (info->n_offsets, NULL);
done = FALSE;
}
- else if (IS_PCRE_ERROR (info->matches))
+ else if (IS_PCRE2_ERROR (info->matches))
{
g_set_error (error, G_REGEX_ERROR, G_REGEX_ERROR_MATCH,
_("Error while matching regular expression %s: %s"),
@@ -1990,9 +2262,7 @@ g_regex_match_all_full (const GRegex *regex,
}
}
-#ifdef PCRE_NO_AUTO_POSSESS
- pcre_free (pcre_re);
-#endif
+ pcre2_code_free (pcre_re);
/* dont assert that (info->matches <= info->n_subpatterns + 1) as that only
* holds true for a single match, rather than matching all */
@@ -2030,8 +2300,8 @@ g_regex_get_string_number (const GRegex *regex,
g_return_val_if_fail (regex != NULL, -1);
g_return_val_if_fail (name != NULL, -1);
- num = pcre_get_stringnumber (regex->pcre_re, name);
- if (num == PCRE_ERROR_NOSUBSTRING)
+ num = pcre2_substring_number_from_name (regex->pcre_re, (PCRE2_SPTR8) name);
+ if (num == PCRE2_ERROR_NOSUBSTRING)
num = -1;
return num;
@@ -2086,6 +2356,9 @@ g_regex_split_simple (const gchar *pattern,
GRegex *regex;
gchar **result;
+ compile_options = map_to_pcre2_compile_flags (compile_options);
+ match_options = map_to_pcre2_match_flags (match_options);
+
regex = g_regex_new (pattern, compile_options, 0, NULL);
if (!regex)
return NULL;
@@ -2129,6 +2402,8 @@ g_regex_split (const GRegex *regex,
const gchar *string,
GRegexMatchFlags match_options)
{
+ match_options = map_to_pcre2_match_flags (match_options);
+
return g_regex_split_full (regex, string, -1, 0,
match_options, 0, NULL);
}
@@ -2193,6 +2468,8 @@ g_regex_split_full (const GRegex *regex,
/* the returned array of char **s */
gchar **string_list;
+ match_options = map_to_pcre2_match_flags (match_options);
+
g_return_val_if_fail (regex != NULL, NULL);
g_return_val_if_fail (string != NULL, NULL);
g_return_val_if_fail (start_position >= 0, NULL);
@@ -2817,6 +3094,8 @@ g_regex_replace (const GRegex *regex,
GList *list;
GError *tmp_error = NULL;
+ match_options = map_to_pcre2_match_flags (match_options);
+
g_return_val_if_fail (regex != NULL, NULL);
g_return_val_if_fail (string != NULL, NULL);
g_return_val_if_fail (start_position >= 0, NULL);
@@ -2886,6 +3165,8 @@ g_regex_replace_literal (const GRegex *regex,
GRegexMatchFlags match_options,
GError **error)
{
+ match_options = map_to_pcre2_match_flags (match_options);
+
g_return_val_if_fail (replacement != NULL, NULL);
g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, NULL);
@@ -2974,6 +3255,8 @@ g_regex_replace_eval (const GRegex *regex,
gboolean done = FALSE;
GError *tmp_error = NULL;
+ match_options = map_to_pcre2_match_flags (match_options);
+
g_return_val_if_fail (regex != NULL, NULL);
g_return_val_if_fail (string != NULL, NULL);
g_return_val_if_fail (start_position >= 0, NULL);
diff --git a/glib/gregex.h b/glib/gregex.h
index 817f667..11b419d 100644
--- a/glib/gregex.h
+++ b/glib/gregex.h
@@ -262,7 +262,9 @@ GQuark g_regex_error_quark (void);
* in the usual way).
* @G_REGEX_OPTIMIZE: Optimize the regular expression. If the pattern will
* be used many times, then it may be worth the effort to optimize it
- * to improve the speed of matches.
+ * to improve the speed of matches. Deprecated in GLib 2.74 which now uses
+ * libpcre2, which doesn’t require separate optimization of queries. This
+ * option is now a no-op. Deprecated: 2.74
* @G_REGEX_FIRSTLINE: Limits an unanchored pattern to match before (or at) the
* first newline. Since: 2.34
* @G_REGEX_DUPNAMES: Names used to identify capturing subpatterns need not
@@ -285,7 +287,8 @@ GQuark g_regex_error_quark (void);
* is recognised. If this option is set, then "\R" only recognizes the newline
* characters '\r', '\n' and '\r\n'. Since: 2.34
* @G_REGEX_JAVASCRIPT_COMPAT: Changes behaviour so that it is compatible with
- * JavaScript rather than PCRE. Since: 2.34
+ * JavaScript rather than PCRE. Since GLib 2.74 this is no longer supported,
+ * as libpcre2 does not support it. Since: 2.34 Deprecated: 2.74
*
* Flags specifying compile-time options.
*
@@ -306,7 +309,7 @@ typedef enum
G_REGEX_UNGREEDY = 1 << 9,
G_REGEX_RAW = 1 << 11,
G_REGEX_NO_AUTO_CAPTURE = 1 << 12,
- G_REGEX_OPTIMIZE = 1 << 13,
+ G_REGEX_OPTIMIZE GLIB_DEPRECATED_ENUMERATOR_IN_2_74 = 1 << 13,
G_REGEX_FIRSTLINE = 1 << 18,
G_REGEX_DUPNAMES = 1 << 19,
G_REGEX_NEWLINE_CR = 1 << 20,
@@ -314,7 +317,7 @@ typedef enum
G_REGEX_NEWLINE_CRLF = G_REGEX_NEWLINE_CR | G_REGEX_NEWLINE_LF,
G_REGEX_NEWLINE_ANYCRLF = G_REGEX_NEWLINE_CR | 1 << 22,
G_REGEX_BSR_ANYCRLF = 1 << 23,
- G_REGEX_JAVASCRIPT_COMPAT = 1 << 25
+ G_REGEX_JAVASCRIPT_COMPAT GLIB_DEPRECATED_ENUMERATOR_IN_2_74 = 1 << 25
} GRegexCompileFlags;
/**
diff --git a/glib/meson.build b/glib/meson.build
index 93fa504..5bf82da 100644
--- a/glib/meson.build
+++ b/glib/meson.build
@@ -357,13 +357,13 @@ else
glib_dtrace_hdr = []
endif
-pcre_static_args = []
+pcre2_static_args = []
-if use_pcre_static_flag
- pcre_static_args = ['-DPCRE_STATIC']
+if use_pcre2_static_flag
+ pcre2_static_args = ['-DPCRE2_STATIC']
endif
-glib_c_args = ['-DG_LOG_DOMAIN="GLib"', '-DGLIB_COMPILATION'] + pcre_static_args + glib_hidden_visibility_args
+glib_c_args = ['-DG_LOG_DOMAIN="GLib"', '-DGLIB_COMPILATION'] + pcre2_static_args + glib_hidden_visibility_args
libglib = library('glib-2.0',
glib_dtrace_obj, glib_dtrace_hdr,
sources : [deprecated_sources, glib_sources],
@@ -375,7 +375,7 @@ libglib = library('glib-2.0',
link_args : [noseh_link_args, glib_link_flags, win32_ldflags],
include_directories : configinc,
link_with: [charset_lib, gnulib_lib],
- dependencies : [pcre, thread_dep, librt] + libintl_deps + libiconv + platform_deps + [gnulib_libm_dependency, libm] + [libsysprof_capture_dep],
+ dependencies : [pcre2, thread_dep, librt] + libintl_deps + libiconv + platform_deps + [gnulib_libm_dependency, libm] + [libsysprof_capture_dep],
c_args : glib_c_args,
objc_args : glib_c_args,
)
diff --git a/glib/tests/meson.build b/glib/tests/meson.build
index 301158e..c1a9ceb 100644
--- a/glib/tests/meson.build
+++ b/glib/tests/meson.build
@@ -86,8 +86,8 @@ glib_tests = {
},
'refstring' : {},
'regex' : {
- 'dependencies' : [pcre],
- 'c_args' : use_pcre_static_flag ? ['-DPCRE_STATIC'] : [],
+ 'dependencies' : [pcre2],
+ 'c_args' : use_pcre2_static_flag ? ['-DPCRE2_STATIC'] : [],
},
'relation' : {},
'rwlock' : {},
diff --git a/glib/tests/regex.c b/glib/tests/regex.c
index 50fd9c6..36982fb 100644
--- a/glib/tests/regex.c
+++ b/glib/tests/regex.c
@@ -25,7 +25,8 @@
#include <locale.h>
#include "glib.h"
-#include <pcre.h>
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
/* U+20AC EURO SIGN (symbol, currency) */
#define EURO "\xe2\x82\xac"
@@ -1501,7 +1502,7 @@ test_properties (void)
gchar *str;
error = NULL;
- regex = g_regex_new ("\\p{L}\\p{Ll}\\p{Lu}\\p{L&}\\p{N}\\p{Nd}", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("\\p{L}\\p{Ll}\\p{Lu}\\p{L&}\\p{N}\\p{Nd}", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
res = g_regex_match (regex, "ppPP01", 0, &match);
g_assert (res);
str = g_match_info_fetch (match, 0);
@@ -1522,7 +1523,7 @@ test_class (void)
gchar *str;
error = NULL;
- regex = g_regex_new ("[abc\\x{0B1E}\\p{Mn}\\x{0391}-\\x{03A9}]", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("[abc\\x{0B1E}\\p{Mn}\\x{0391}-\\x{03A9}]", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
res = g_regex_match (regex, "a:b:\340\254\236:\333\253:\316\240", 0, &match);
g_assert (res);
str = g_match_info_fetch (match, 0);
@@ -1568,7 +1569,7 @@ test_lookahead (void)
gint start, end;
error = NULL;
- regex = g_regex_new ("\\w+(?=;)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("\\w+(?=;)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "word1 word2: word3;", 0, &match);
@@ -1582,7 +1583,7 @@ test_lookahead (void)
g_regex_unref (regex);
error = NULL;
- regex = g_regex_new ("foo(?!bar)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("foo(?!bar)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "foobar foobaz", 0, &match);
@@ -1597,7 +1598,7 @@ test_lookahead (void)
g_regex_unref (regex);
error = NULL;
- regex = g_regex_new ("(?!bar)foo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("(?!bar)foo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "foobar foobaz", 0, &match);
@@ -1630,7 +1631,7 @@ test_lookbehind (void)
gint start, end;
error = NULL;
- regex = g_regex_new ("(?<!foo)bar", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("(?<!foo)bar", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "foobar boobar", 0, &match);
@@ -1645,7 +1646,7 @@ test_lookbehind (void)
g_regex_unref (regex);
error = NULL;
- regex = g_regex_new ("(?<=bullock|donkey) poo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("(?<=bullock|donkey) poo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "don poo, and bullock poo", 0, &match);
@@ -1658,17 +1659,17 @@ test_lookbehind (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("(?<!dogs?|cats?) x", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("(?<!dogs?|cats?) x", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex == NULL);
g_assert_error (error, G_REGEX_ERROR, G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND);
g_clear_error (&error);
- regex = g_regex_new ("(?<=ab(c|de)) foo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("(?<=ab(c|de)) foo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex == NULL);
g_assert_error (error, G_REGEX_ERROR, G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND);
g_clear_error (&error);
- regex = g_regex_new ("(?<=abc|abde)foo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("(?<=abc|abde)foo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "abfoo, abdfoo, abcfoo", 0, &match);
@@ -1680,7 +1681,7 @@ test_lookbehind (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("^.*+(?<=abcd)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("^.*+(?<=abcd)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "abcabcabcabcabcabcabcabcabcd", 0, &match);
@@ -1689,7 +1690,7 @@ test_lookbehind (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("(?<=\\d{3})(?<!999)foo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("(?<=\\d{3})(?<!999)foo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "999foo 123abcfoo 123foo", 0, &match);
@@ -1701,7 +1702,7 @@ test_lookbehind (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("(?<=\\d{3}...)(?<!999)foo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("(?<=\\d{3}...)(?<!999)foo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "999foo 123abcfoo 123foo", 0, &match);
@@ -1713,7 +1714,7 @@ test_lookbehind (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("(?<=\\d{3}(?!999)...)foo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("(?<=\\d{3}(?!999)...)foo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "999foo 123abcfoo 123foo", 0, &match);
@@ -1725,7 +1726,7 @@ test_lookbehind (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("(?<=(?<!foo)bar)baz", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("(?<=(?<!foo)bar)baz", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "foobarbaz barfoobaz barbarbaz", 0, &match);
@@ -1750,7 +1751,7 @@ test_subpattern (void)
gint start;
error = NULL;
- regex = g_regex_new ("cat(aract|erpillar|)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("cat(aract|erpillar|)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
g_assert_cmpint (g_regex_get_capture_count (regex), ==, 1);
@@ -1768,7 +1769,7 @@ test_subpattern (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("the ((red|white) (king|queen))", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("the ((red|white) (king|queen))", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
g_assert_cmpint (g_regex_get_capture_count (regex), ==, 3);
@@ -1792,7 +1793,7 @@ test_subpattern (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("the ((?:red|white) (king|queen))", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("the ((?:red|white) (king|queen))", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "the white queen", 0, &match);
@@ -1812,7 +1813,7 @@ test_subpattern (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("(?|(Sat)(ur)|(Sun))day (morning|afternoon)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("(?|(Sat)(ur)|(Sun))day (morning|afternoon)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
g_assert_cmpint (g_regex_get_capture_count (regex), ==, 3);
@@ -1832,7 +1833,7 @@ test_subpattern (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("(?|(abc)|(def))\\1", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("(?|(abc)|(def))\\1", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
g_assert_cmpint (g_regex_get_max_backref (regex), ==, 1);
@@ -1850,7 +1851,7 @@ test_subpattern (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("(?|(abc)|(def))(?1)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("(?|(abc)|(def))(?1)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "abcabc abcdef defabc defdef", 0, &match);
@@ -1867,7 +1868,7 @@ test_subpattern (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("(?<DN>Mon|Fri|Sun)(?:day)?|(?<DN>Tue)(?:sday)?|(?<DN>Wed)(?:nesday)?|(?<DN>Thu)(?:rsday)?|(?<DN>Sat)(?:urday)?", G_REGEX_OPTIMIZE|G_REGEX_DUPNAMES, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("(?<DN>Mon|Fri|Sun)(?:day)?|(?<DN>Tue)(?:sday)?|(?<DN>Wed)(?:nesday)?|(?<DN>Thu)(?:rsday)?|(?<DN>Sat)(?:urday)?", G_REGEX_DUPNAMES, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "Mon Tuesday Wed Saturday", 0, &match);
@@ -1894,7 +1895,7 @@ test_subpattern (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("^(a|b\\1)+$", G_REGEX_OPTIMIZE|G_REGEX_DUPNAMES, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("^(a|b\\1)+$", G_REGEX_DUPNAMES, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "aaaaaaaaaaaaaaaa", 0, &match);
@@ -1918,7 +1919,7 @@ test_condition (void)
gboolean res;
error = NULL;
- regex = g_regex_new ("^(a+)(\\()?[^()]+(?(-1)\\))(b+)$", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("^(a+)(\\()?[^()]+(?(-1)\\))(b+)$", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "a(zzzzzz)b", 0, &match);
@@ -1932,7 +1933,7 @@ test_condition (void)
g_regex_unref (regex);
error = NULL;
- regex = g_regex_new ("^(a+)(?<OPEN>\\()?[^()]+(?(<OPEN>)\\))(b+)$", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("^(a+)(?<OPEN>\\()?[^()]+(?(<OPEN>)\\))(b+)$", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "a(zzzzzz)b", 0, &match);
@@ -1945,7 +1946,7 @@ test_condition (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("^(a+)(?(+1)\\[|\\<)?[^()]+(\\])?(b+)$", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("^(a+)(?(+1)\\[|\\<)?[^()]+(\\])?(b+)$", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "a[zzzzzz]b", 0, &match);
@@ -1960,7 +1961,7 @@ test_condition (void)
regex = g_regex_new ("(?(DEFINE) (?<byte> 2[0-4]\\d | 25[0-5] | 1\\d\\d | [1-9]?\\d) )"
"\\b (?&byte) (\\.(?&byte)){3} \\b",
- G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, 0, &error);
+ G_REGEX_EXTENDED, 0, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "128.0.0.1", 0, &match);
@@ -1979,7 +1980,7 @@ test_condition (void)
regex = g_regex_new ("^(?(?=[^a-z]*[a-z])"
"\\d{2}-[a-z]{3}-\\d{2} | \\d{2}-\\d{2}-\\d{2} )$",
- G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, 0, &error);
+ G_REGEX_EXTENDED, 0, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "01-abc-24", 0, &match);
@@ -2012,7 +2013,7 @@ test_recursion (void)
gint start;
error = NULL;
- regex = g_regex_new ("\\( ( [^()]++ | (?R) )* \\)", G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("\\( ( [^()]++ | (?R) )* \\)", G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "(middle)", 0, &match);
@@ -2029,7 +2030,7 @@ test_recursion (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("^( \\( ( [^()]++ | (?1) )* \\) )$", G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("^( \\( ( [^()]++ | (?1) )* \\) )$", G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "((((((((((((((((middle))))))))))))))))", 0, &match);
@@ -2042,7 +2043,7 @@ test_recursion (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("^(?<pn> \\( ( [^()]++ | (?&pn) )* \\) )$", G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("^(?<pn> \\( ( [^()]++ | (?&pn) )* \\) )$", G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
g_regex_match (regex, "(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa()", 0, &match);
@@ -2051,7 +2052,7 @@ test_recursion (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("< (?: (?(R) \\d++ | [^<>]*+) | (?R)) * >", G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("< (?: (?(R) \\d++ | [^<>]*+) | (?R)) * >", G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "<ab<01<23<4>>>>", 0, &match);
@@ -2070,7 +2071,7 @@ test_recursion (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("^((.)(?1)\\2|.)$", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("^((.)(?1)\\2|.)$", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "abcdcba", 0, &match);
@@ -2083,7 +2084,7 @@ test_recursion (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("^(?:((.)(?1)\\2|)|((.)(?3)\\4|.))$", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("^(?:((.)(?1)\\2|)|((.)(?3)\\4|.))$", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "abcdcba", 0, &match);
@@ -2096,7 +2097,7 @@ test_recursion (void)
g_match_info_free (match);
g_regex_unref (regex);
- regex = g_regex_new ("^\\W*+(?:((.)\\W*+(?1)\\W*+\\2|)|((.)\\W*+(?3)\\W*+\\4|\\W*+.\\W*+))\\W*+$", G_REGEX_OPTIMIZE|G_REGEX_CASELESS, G_REGEX_MATCH_DEFAULT, &error);
+ regex = g_regex_new ("^\\W*+(?:((.)\\W*+(?1)\\W*+\\2|)|((.)\\W*+(?3)\\W*+\\4|\\W*+.\\W*+))\\W*+$", G_REGEX_CASELESS, G_REGEX_MATCH_DEFAULT, &error);
g_assert (regex);
g_assert_no_error (error);
res = g_regex_match (regex, "abcdcba", 0, &match);
@@ -2167,21 +2168,21 @@ test_max_lookbehind (void)
}
static gboolean
-pcre_ge (guint64 major, guint64 minor)
+pcre2_ge (guint64 major, guint64 minor)
{
- const char *version;
- gchar *ptr;
- guint64 pcre_major, pcre_minor;
+ gchar version[32];
+ const gchar *ptr;
+ guint64 pcre2_major, pcre2_minor;
- /* e.g. 8.35 2014-04-04 */
- version = pcre_version ();
+ /* e.g. 10.36 2020-12-04 */
+ pcre2_config (PCRE2_CONFIG_VERSION, version);
- pcre_major = g_ascii_strtoull (version, &ptr, 10);
+ pcre2_major = g_ascii_strtoull (version, (gchar **) &ptr, 10);
/* ptr points to ".MINOR (release date)" */
g_assert (ptr[0] == '.');
- pcre_minor = g_ascii_strtoull (ptr + 1, NULL, 10);
+ pcre2_minor = g_ascii_strtoull (ptr + 1, NULL, 10);
- return (pcre_major > major) || (pcre_major == major && pcre_minor >= minor);
+ return (pcre2_major > major) || (pcre2_major == major && pcre2_minor >= minor);
}
int
@@ -2203,18 +2204,26 @@ main (int argc, char *argv[])
g_test_add_func ("/regex/max-lookbehind", test_max_lookbehind);
/* TEST_NEW(pattern, compile_opts, match_opts) */
+G_GNUC_BEGIN_IGNORE_DEPRECATIONS
TEST_NEW("[A-Z]+", G_REGEX_CASELESS | G_REGEX_EXTENDED | G_REGEX_OPTIMIZE, G_REGEX_MATCH_NOTBOL | G_REGEX_MATCH_PARTIAL);
+G_GNUC_END_IGNORE_DEPRECATIONS
TEST_NEW("", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT);
TEST_NEW(".*", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT);
+G_GNUC_BEGIN_IGNORE_DEPRECATIONS
TEST_NEW(".*", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT);
+G_GNUC_END_IGNORE_DEPRECATIONS
TEST_NEW(".*", G_REGEX_MULTILINE, G_REGEX_MATCH_DEFAULT);
TEST_NEW(".*", G_REGEX_DOTALL, G_REGEX_MATCH_DEFAULT);
TEST_NEW(".*", G_REGEX_DOTALL, G_REGEX_MATCH_NOTBOL);
TEST_NEW("(123\\d*)[a-zA-Z]+(?P<hello>.*)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT);
TEST_NEW("(123\\d*)[a-zA-Z]+(?P<hello>.*)", G_REGEX_CASELESS, G_REGEX_MATCH_DEFAULT);
+G_GNUC_BEGIN_IGNORE_DEPRECATIONS
TEST_NEW("(123\\d*)[a-zA-Z]+(?P<hello>.*)", G_REGEX_CASELESS | G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT);
+G_GNUC_END_IGNORE_DEPRECATIONS
TEST_NEW("(?P<A>x)|(?P<A>y)", G_REGEX_DUPNAMES, G_REGEX_MATCH_DEFAULT);
+G_GNUC_BEGIN_IGNORE_DEPRECATIONS
TEST_NEW("(?P<A>x)|(?P<A>y)", G_REGEX_DUPNAMES | G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT);
+G_GNUC_END_IGNORE_DEPRECATIONS
/* This gives "internal error: code overflow" with pcre 6.0 */
TEST_NEW("(?i)(?-i)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT);
TEST_NEW ("(?i)a", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT);
@@ -2225,9 +2234,10 @@ main (int argc, char *argv[])
TEST_NEW ("(?U)[a-z]+", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT);
/* TEST_NEW_CHECK_FLAGS(pattern, compile_opts, match_ops, real_compile_opts, real_match_opts) */
+G_GNUC_BEGIN_IGNORE_DEPRECATIONS
TEST_NEW_CHECK_FLAGS ("a", G_REGEX_OPTIMIZE, 0, G_REGEX_OPTIMIZE, 0);
+G_GNUC_END_IGNORE_DEPRECATIONS
TEST_NEW_CHECK_FLAGS ("a", G_REGEX_RAW, 0, G_REGEX_RAW, 0);
- TEST_NEW_CHECK_FLAGS ("(?X)a", 0, 0, 0 /* not exposed by GRegex */, 0);
TEST_NEW_CHECK_FLAGS ("^.*", 0, 0, G_REGEX_ANCHORED, 0);
TEST_NEW_CHECK_FLAGS ("(*UTF8)a", 0, 0, 0 /* this is the default in GRegex */, 0);
TEST_NEW_CHECK_FLAGS ("(*UCP)a", 0, 0, 0 /* this always on in GRegex */, 0);
@@ -2255,16 +2265,16 @@ main (int argc, char *argv[])
TEST_NEW_FAIL ("a{4,2}", 0, G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER);
TEST_NEW_FAIL ("a{999999,}", 0, G_REGEX_ERROR_QUANTIFIER_TOO_BIG);
TEST_NEW_FAIL ("[a-z", 0, G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS);
- TEST_NEW_FAIL ("(?X)[\\B]", 0, G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS);
+ TEST_NEW_FAIL ("[\\B]", 0, G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS);
TEST_NEW_FAIL ("[z-a]", 0, G_REGEX_ERROR_RANGE_OUT_OF_ORDER);
TEST_NEW_FAIL ("{2,4}", 0, G_REGEX_ERROR_NOTHING_TO_REPEAT);
TEST_NEW_FAIL ("a(?u)", 0, G_REGEX_ERROR_UNRECOGNIZED_CHARACTER);
- TEST_NEW_FAIL ("a(?<$foo)bar", 0, G_REGEX_ERROR_UNRECOGNIZED_CHARACTER);
+ TEST_NEW_FAIL ("a(?<$foo)bar", 0, G_REGEX_ERROR_MISSING_SUBPATTERN_NAME);
TEST_NEW_FAIL ("a[:alpha:]b", 0, G_REGEX_ERROR_POSIX_NAMED_CLASS_OUTSIDE_CLASS);
TEST_NEW_FAIL ("a(b", 0, G_REGEX_ERROR_UNMATCHED_PARENTHESIS);
TEST_NEW_FAIL ("a)b", 0, G_REGEX_ERROR_UNMATCHED_PARENTHESIS);
TEST_NEW_FAIL ("a(?R", 0, G_REGEX_ERROR_UNMATCHED_PARENTHESIS);
- TEST_NEW_FAIL ("a(?-54", 0, G_REGEX_ERROR_UNMATCHED_PARENTHESIS);
+ TEST_NEW_FAIL ("a(?-54", 0, G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE);
TEST_NEW_FAIL ("(ab\\2)", 0, G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE);
TEST_NEW_FAIL ("a(?#abc", 0, G_REGEX_ERROR_UNTERMINATED_COMMENT);
TEST_NEW_FAIL ("(?<=a+)b", 0, G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND);
@@ -2274,51 +2284,31 @@ main (int argc, char *argv[])
TEST_NEW_FAIL ("a[[:fubar:]]b", 0, G_REGEX_ERROR_UNKNOWN_POSIX_CLASS_NAME);
TEST_NEW_FAIL ("[[.ch.]]", 0, G_REGEX_ERROR_POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED);
TEST_NEW_FAIL ("\\x{110000}", 0, G_REGEX_ERROR_HEX_CODE_TOO_LARGE);
- TEST_NEW_FAIL ("^(?(0)f|b)oo", 0, G_REGEX_ERROR_INVALID_CONDITION);
+ TEST_NEW_FAIL ("^(?(0)f|b)oo", 0, G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE);
TEST_NEW_FAIL ("(?<=\\C)X", 0, G_REGEX_ERROR_SINGLE_BYTE_MATCH_IN_LOOKBEHIND);
- TEST_NEW_FAIL ("(?!\\w)(?R)", 0, G_REGEX_ERROR_INFINITE_LOOP);
- if (pcre_ge (8, 37))
- {
- /* The expected errors changed here. */
- TEST_NEW_FAIL ("(?(?<ab))", 0, G_REGEX_ERROR_ASSERTION_EXPECTED);
- }
- else
- {
- TEST_NEW_FAIL ("(?(?<ab))", 0, G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR);
- }
-
- if (pcre_ge (8, 35))
- {
- /* The expected errors changed here. */
- TEST_NEW_FAIL ("(?P<sub>foo)\\g<sub", 0, G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR);
- }
- else
- {
- TEST_NEW_FAIL ("(?P<sub>foo)\\g<sub", 0, G_REGEX_ERROR_MISSING_BACK_REFERENCE);
- }
+ TEST_NEW ("(?!\\w)(?R)", 0, 0);
+ TEST_NEW_FAIL ("(?(?<ab))", 0, G_REGEX_ERROR_ASSERTION_EXPECTED);
+ TEST_NEW_FAIL ("(?P<sub>foo)\\g<sub", 0, G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR);
TEST_NEW_FAIL ("(?P<x>eks)(?P<x>eccs)", 0, G_REGEX_ERROR_DUPLICATE_SUBPATTERN_NAME);
-#if 0
- TEST_NEW_FAIL (?, 0, G_REGEX_ERROR_MALFORMED_PROPERTY);
- TEST_NEW_FAIL (?, 0, G_REGEX_ERROR_UNKNOWN_PROPERTY);
-#endif
TEST_NEW_FAIL ("\\666", G_REGEX_RAW, G_REGEX_ERROR_INVALID_OCTAL_VALUE);
TEST_NEW_FAIL ("^(?(DEFINE) abc | xyz ) ", 0, G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE);
TEST_NEW_FAIL ("a", G_REGEX_NEWLINE_CRLF | G_REGEX_NEWLINE_ANYCRLF, G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS);
TEST_NEW_FAIL ("^(a)\\g{3", 0, G_REGEX_ERROR_MISSING_BACK_REFERENCE);
- TEST_NEW_FAIL ("^(a)\\g{0}", 0, G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE);
- TEST_NEW_FAIL ("abc(*FAIL:123)xyz", 0, G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN);
+ TEST_NEW_FAIL ("^(a)\\g{0}", 0, G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE);
+ TEST_NEW ("abc(*FAIL:123)xyz", 0, 0);
TEST_NEW_FAIL ("a(*FOOBAR)b", 0, G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB);
- TEST_NEW_FAIL ("(?i:A{1,}\\6666666666)", 0, G_REGEX_ERROR_NUMBER_TOO_BIG);
+ if (pcre2_ge (10, 37))
+ {
+ TEST_NEW ("(?i:A{1,}\\6666666666)", 0, 0);
+ }
TEST_NEW_FAIL ("(?<a>)(?&)", 0, G_REGEX_ERROR_MISSING_SUBPATTERN_NAME);
- TEST_NEW_FAIL ("(?+-a)", 0, G_REGEX_ERROR_MISSING_DIGIT);
- TEST_NEW_FAIL ("TA]", G_REGEX_JAVASCRIPT_COMPAT, G_REGEX_ERROR_INVALID_DATA_CHARACTER);
+ TEST_NEW_FAIL ("(?+-a)", 0, G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE);
TEST_NEW_FAIL ("(?|(?<a>A)|(?<b>B))", 0, G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME);
TEST_NEW_FAIL ("a(*MARK)b", 0, G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED);
TEST_NEW_FAIL ("^\\c€", 0, G_REGEX_ERROR_INVALID_CONTROL_CHAR);
TEST_NEW_FAIL ("\\k", 0, G_REGEX_ERROR_MISSING_NAME);
TEST_NEW_FAIL ("a[\\NB]c", 0, G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS);
TEST_NEW_FAIL ("(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEFG)XX", 0, G_REGEX_ERROR_NAME_TOO_LONG);
- TEST_NEW_FAIL ("\\u0100", G_REGEX_RAW | G_REGEX_JAVASCRIPT_COMPAT, G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE);
/* These errors can't really be tested easily:
* G_REGEX_ERROR_EXPRESSION_TOO_LARGE
@@ -2474,7 +2464,15 @@ main (int argc, char *argv[])
TEST_MATCH("a#\nb", G_REGEX_EXTENDED, 0, "a", -1, 0, 0, FALSE);
TEST_MATCH("a#\r\nb", G_REGEX_EXTENDED, 0, "a", -1, 0, 0, FALSE);
TEST_MATCH("a#\rb", G_REGEX_EXTENDED, 0, "a", -1, 0, 0, FALSE);
- TEST_MATCH("a#\nb", G_REGEX_EXTENDED, G_REGEX_MATCH_NEWLINE_CR, "a", -1, 0, 0, FALSE);
+ /* Due to PCRE2 only supporting newline settings passed to pcre2_compile (and
+ * not to pcre2_match also), we have to compile the pattern with the
+ * effective (combined from compile and match options) newline setting.
+ * However, this setting also affects how newlines are interpreted *inside*
+ * the pattern. With G_REGEX_EXTENDED, this changes where the comment
+ * (started with `#`) ends.
+ */
+ /* On PCRE1, this test expected no match; on PCRE2 it matches because of the above. */
+ TEST_MATCH("a#\nb", G_REGEX_EXTENDED, G_REGEX_MATCH_NEWLINE_CR, "a", -1, 0, 0, TRUE /*FALSE*/);
TEST_MATCH("a#\nb", G_REGEX_EXTENDED | G_REGEX_NEWLINE_CR, 0, "a", -1, 0, 0, TRUE);
TEST_MATCH("line\nbreak", G_REGEX_MULTILINE, 0, "this is a line\nbreak", -1, 0, 0, TRUE);
@@ -2487,21 +2485,19 @@ main (int argc, char *argv[])
* with pcre's internal tables. Bug #678273 */
TEST_MATCH("[DŽ]", G_REGEX_CASELESS, 0, "DŽ", -1, 0, 0, TRUE);
TEST_MATCH("[DŽ]", G_REGEX_CASELESS, 0, "dž", -1, 0, 0, TRUE);
-#if PCRE_MAJOR > 8 || (PCRE_MAJOR == 8 && PCRE_MINOR >= 32)
- /* This would incorrectly fail to match in pcre < 8.32, so only assert
- * this for known-good pcre. */
TEST_MATCH("[DŽ]", G_REGEX_CASELESS, 0, "Dž", -1, 0, 0, TRUE);
-#endif
/* TEST_MATCH_NEXT#(pattern, string, string_len, start_position, ...) */
TEST_MATCH_NEXT0("a", "x", -1, 0);
TEST_MATCH_NEXT0("a", "ax", -1, 1);
TEST_MATCH_NEXT0("a", "xa", 1, 0);
TEST_MATCH_NEXT0("a", "axa", 1, 2);
+ TEST_MATCH_NEXT1("", "", -1, 0, "", 0, 0);
TEST_MATCH_NEXT1("a", "a", -1, 0, "a", 0, 1);
TEST_MATCH_NEXT1("a", "xax", -1, 0, "a", 1, 2);
TEST_MATCH_NEXT1(EURO, ENG EURO, -1, 0, EURO, 2, 5);
TEST_MATCH_NEXT1("a*", "", -1, 0, "", 0, 0);
+ TEST_MATCH_NEXT2("", "a", -1, 0, "", 0, 0, "", 1, 1);
TEST_MATCH_NEXT2("a*", "aa", -1, 0, "aa", 0, 2, "", 2, 2);
TEST_MATCH_NEXT2(EURO "*", EURO EURO, -1, 0, EURO EURO, 0, 6, "", 6, 6);
TEST_MATCH_NEXT2("a", "axa", -1, 0, "a", 0, 1, "a", 2, 3);
@@ -2675,11 +2671,6 @@ main (int argc, char *argv[])
TEST_EXPAND("a", "a", "\\0130", FALSE, "X");
TEST_EXPAND("a", "a", "\\\\\\0", FALSE, "\\a");
TEST_EXPAND("a(?P<G>.)c", "xabcy", "X\\g<G>X", FALSE, "XbX");
-#if !(PCRE_MAJOR > 8 || (PCRE_MAJOR == 8 && PCRE_MINOR >= 34))
- /* PCRE >= 8.34 no longer allows this usage. */
- TEST_EXPAND("(.)(?P<1>.)", "ab", "\\1", FALSE, "a");
- TEST_EXPAND("(.)(?P<1>.)", "ab", "\\g<1>", FALSE, "a");
-#endif
TEST_EXPAND(".", EURO, "\\0", FALSE, EURO);
TEST_EXPAND("(.)", EURO, "\\1", FALSE, EURO);
TEST_EXPAND("(?P<G>.)", EURO, "\\g<G>", FALSE, EURO);
@@ -2798,6 +2789,10 @@ main (int argc, char *argv[])
TEST_GET_STRING_NUMBER("(?P<A>.)(?P<B>a)", "A", 1);
TEST_GET_STRING_NUMBER("(?P<A>.)(?P<B>a)", "B", 2);
TEST_GET_STRING_NUMBER("(?P<A>.)(?P<B>a)", "C", -1);
+ TEST_GET_STRING_NUMBER("(?P<A>.)(?P<B>a)(?P<C>b)", "A", 1);
+ TEST_GET_STRING_NUMBER("(?P<A>.)(?P<B>a)(?P<C>b)", "B", 2);
+ TEST_GET_STRING_NUMBER("(?P<A>.)(?P<B>a)(?P<C>b)", "C", 3);
+ TEST_GET_STRING_NUMBER("(?P<A>.)(?P<B>a)(?P<C>b)", "D", -1);
TEST_GET_STRING_NUMBER("(?P<A>.)(.)(?P<B>a)", "A", 1);
TEST_GET_STRING_NUMBER("(?P<A>.)(.)(?P<B>a)", "B", 3);
TEST_GET_STRING_NUMBER("(?P<A>.)(.)(?P<B>a)", "C", -1);
diff --git a/meson.build b/meson.build
index 882049c..657e9f6 100644
--- a/meson.build
+++ b/meson.build
@@ -2024,37 +2024,38 @@ else
endif
endif
-pcre = dependency('libpcre', version: '>= 8.31', required : false) # Should check for Unicode support, too. FIXME
-if not pcre.found()
+pcre2 = dependency('libpcre2-8', version: '>= 10.32', required : false)
+if not pcre2.found()
if cc.get_id() == 'msvc' or cc.get_id() == 'clang-cl'
- # MSVC: Search for the PCRE library by the configuration, which corresponds
- # to the output of CMake builds of PCRE. Note that debugoptimized
+ # MSVC: Search for the PCRE2 library by the configuration, which corresponds
+ # to the output of CMake builds of PCRE2. Note that debugoptimized
# is really a Release build with .PDB files.
if vs_crt == 'debug'
- pcre = cc.find_library('pcred', required : false)
+ pcre2 = cc.find_library('pcre2d-8', required : false)
else
- pcre = cc.find_library('pcre', required : false)
+ pcre2 = cc.find_library('pcre2-8', required : false)
endif
endif
endif
# Try again with the fallback
-if not pcre.found()
- pcre = dependency('libpcre', required : true, fallback : ['pcre', 'pcre_dep'])
- use_pcre_static_flag = true
+if not pcre2.found()
+ pcre2 = dependency('libpcre2-8', required : true, fallback : ['pcre2', 'libpcre2_8'])
+ use_pcre2_static_flag = true
elif host_system == 'windows'
- pcre_static = cc.links('''#define PCRE_STATIC
- #include <pcre.h>
- int main() {
- void *p = NULL;
- pcre_free(p);
- return 0;
- }''',
- dependencies: pcre,
- name : 'Windows system PCRE is a static build')
- use_pcre_static_flag = pcre_static
+ pcre2_static = cc.links('''#define PCRE2_STATIC
+ #define PCRE2_CODE_UNIT_WIDTH 8
+ #include <pcre2.h>
+ int main() {
+ void *p = NULL;
+ pcre2_code_free(p);
+ return 0;
+ }''',
+ dependencies: pcre2,
+ name : 'Windows system PCRE2 is a static build')
+ use_pcre2_static_flag = pcre2_static
else
- use_pcre_static_flag = false
+ use_pcre2_static_flag = false
endif
libm = cc.find_library('m', required : false)
diff --git a/po/sk.po b/po/sk.po
index 8d6a1ce..747ad27 100644
--- a/po/sk.po
+++ b/po/sk.po
@@ -5630,7 +5630,7 @@ msgstr "zlý ofset"
msgid "short utf8"
msgstr "krátke utf8"
-# Ide o omyl programátora: case PCRE_ERROR_RECURSELOOP: return _("recursion loop");
+# Ide o omyl programátora: case PCRE2_ERROR_RECURSELOOP: return _("recursion loop");
#: glib/gregex.c:303
msgid "recursion loop"
msgstr "rekurzívna slučka"
diff --git a/subprojects/pcre.wrap b/subprojects/pcre.wrap
deleted file mode 100644
index a6b07b9..0000000
--- a/subprojects/pcre.wrap
+++ /dev/null
@@ -1,11 +0,0 @@
-[wrap-file]
-directory = pcre-8.37
-source_url = https://sourceforge.net/projects/pcre/files/pcre/8.37/pcre-8.37.tar.bz2
-source_filename = pcre-8.37.tar.bz2
-source_hash = 51679ea8006ce31379fb0860e46dd86665d864b5020fc9cd19e71260eef4789d
-patch_filename = pcre_8.37-4_patch.zip
-patch_url = https://wrapdb.mesonbuild.com/v2/pcre_8.37-4/get_patch
-patch_hash = c957f42da6f6378300eb8a18f4a5cccdb8e2aada51a703cac842982f9f785399
-
-[provide]
-libpcre = pcre_dep
--
2.33.0