2474 lines
102 KiB
Diff
2474 lines
102 KiB
Diff
From 8d5a44dc8f36cce270519bd52fcecf330ccb43b4 Mon Sep 17 00:00:00 2001
|
||
From: Aleksei Rybalkin <aleksei@rybalkin.org>
|
||
Date: Tue, 12 Jul 2022 11:46:34 +0000
|
||
Subject: [PATCH] replace pcre1 with pcre2
|
||
|
||
Conflict:NA
|
||
Reference:https://gitlab.gnome.org/GNOME/glib/-/commit/8d5a44dc8f36cce270519bd52fcecf330ccb43b4
|
||
|
||
---
|
||
docs/reference/glib/regex-syntax.xml | 46 --
|
||
glib/gregex.c | 1113 ++++++++++++++++----------
|
||
glib/gregex.h | 11 +-
|
||
glib/meson.build | 10 +-
|
||
glib/tests/meson.build | 4 +-
|
||
glib/tests/regex.c | 175 ++--
|
||
meson.build | 41 +-
|
||
po/sk.po | 2 +-
|
||
subprojects/pcre.wrap | 11 -
|
||
9 files changed, 819 insertions(+), 594 deletions(-)
|
||
delete mode 100644 subprojects/pcre.wrap
|
||
|
||
diff --git a/docs/reference/glib/regex-syntax.xml b/docs/reference/glib/regex-syntax.xml
|
||
index 5dd9291..0b413aa 100644
|
||
--- a/docs/reference/glib/regex-syntax.xml
|
||
+++ b/docs/reference/glib/regex-syntax.xml
|
||
@@ -2442,52 +2442,6 @@ processing option does not affect the called subpattern.
|
||
</para>
|
||
</refsect1>
|
||
|
||
-<!-- Callouts are not supported by GRegex
|
||
-<refsect1>
|
||
-<title>Callouts</title>
|
||
-<para>
|
||
-Perl has a feature whereby using the sequence (?{...}) causes arbitrary
|
||
-Perl code to be obeyed in the middle of matching a regular expression.
|
||
-This makes it possible, amongst other things, to extract different substrings that match the same pair of parentheses when there is a repetition.
|
||
-</para>
|
||
-
|
||
-<para>
|
||
-PCRE provides a similar feature, but of course it cannot obey arbitrary
|
||
-Perl code. The feature is called "callout". The caller of PCRE provides
|
||
-an external function by putting its entry point in the global variable
|
||
-pcre_callout. By default, this variable contains NULL, which disables
|
||
-all calling out.
|
||
-</para>
|
||
-
|
||
-<para>
|
||
-Within a regular expression, (?C) indicates the points at which the
|
||
-external function is to be called. If you want to identify different
|
||
-callout points, you can put a number less than 256 after the letter C.
|
||
-The default value is zero. For example, this pattern has two callout
|
||
-points:
|
||
-</para>
|
||
-
|
||
-<programlisting>
|
||
-(?C1)abc(?C2)def
|
||
-</programlisting>
|
||
-
|
||
-<para>
|
||
-If the PCRE_AUTO_CALLOUT flag is passed to pcre_compile(), callouts are
|
||
-automatically installed before each item in the pattern. They are all
|
||
-numbered 255.
|
||
-</para>
|
||
-
|
||
-<para>
|
||
-During matching, when PCRE reaches a callout point (and pcre_callout is
|
||
-set), the external function is called. It is provided with the number
|
||
-of the callout, the position in the pattern, and, optionally, one item
|
||
-of data originally supplied by the caller of pcre_exec(). The callout
|
||
-function may cause matching to proceed, to backtrack, or to fail altogether. A complete description of the interface to the callout function
|
||
-is given in the pcrecallout documentation.
|
||
-</para>
|
||
-</refsect1>
|
||
--->
|
||
-
|
||
<refsect1>
|
||
<title>Copyright</title>
|
||
<para>
|
||
diff --git a/glib/gregex.c b/glib/gregex.c
|
||
index 9a8229a..da37213 100644
|
||
--- a/glib/gregex.c
|
||
+++ b/glib/gregex.c
|
||
@@ -22,7 +22,8 @@
|
||
|
||
#include <string.h>
|
||
|
||
-#include <pcre.h>
|
||
+#define PCRE2_CODE_UNIT_WIDTH 8
|
||
+#include <pcre2.h>
|
||
|
||
#include "gtypes.h"
|
||
#include "gregex.h"
|
||
@@ -107,87 +108,63 @@
|
||
* library written by Philip Hazel.
|
||
*/
|
||
|
||
+/* Signifies that flags have already been converted from pcre1 to pcre2. The
|
||
+ * value 0x04000000u is also the value of PCRE2_MATCH_INVALID_UTF in pcre2.h,
|
||
+ * but it is not used in gregex, so we can reuse it for this flag.
|
||
+ */
|
||
+#define G_REGEX_FLAGS_CONVERTED 0x04000000u
|
||
/* Mask of all the possible values for GRegexCompileFlags. */
|
||
-#define G_REGEX_COMPILE_MASK (G_REGEX_CASELESS | \
|
||
- G_REGEX_MULTILINE | \
|
||
- G_REGEX_DOTALL | \
|
||
- G_REGEX_EXTENDED | \
|
||
- G_REGEX_ANCHORED | \
|
||
- G_REGEX_DOLLAR_ENDONLY | \
|
||
- G_REGEX_UNGREEDY | \
|
||
- G_REGEX_RAW | \
|
||
- G_REGEX_NO_AUTO_CAPTURE | \
|
||
- G_REGEX_OPTIMIZE | \
|
||
- G_REGEX_FIRSTLINE | \
|
||
- G_REGEX_DUPNAMES | \
|
||
- G_REGEX_NEWLINE_CR | \
|
||
- G_REGEX_NEWLINE_LF | \
|
||
- G_REGEX_NEWLINE_CRLF | \
|
||
- G_REGEX_NEWLINE_ANYCRLF | \
|
||
- G_REGEX_BSR_ANYCRLF | \
|
||
- G_REGEX_JAVASCRIPT_COMPAT)
|
||
+#define G_REGEX_COMPILE_MASK (PCRE2_CASELESS | \
|
||
+ PCRE2_MULTILINE | \
|
||
+ PCRE2_DOTALL | \
|
||
+ PCRE2_EXTENDED | \
|
||
+ PCRE2_ANCHORED | \
|
||
+ PCRE2_DOLLAR_ENDONLY | \
|
||
+ PCRE2_UNGREEDY | \
|
||
+ PCRE2_UTF | \
|
||
+ PCRE2_NO_AUTO_CAPTURE | \
|
||
+ PCRE2_FIRSTLINE | \
|
||
+ PCRE2_DUPNAMES | \
|
||
+ PCRE2_NEWLINE_CR | \
|
||
+ PCRE2_NEWLINE_LF | \
|
||
+ PCRE2_NEWLINE_CRLF | \
|
||
+ PCRE2_NEWLINE_ANYCRLF | \
|
||
+ PCRE2_BSR_ANYCRLF | \
|
||
+ G_REGEX_FLAGS_CONVERTED)
|
||
|
||
/* Mask of all GRegexCompileFlags values that are (not) passed trough to PCRE */
|
||
#define G_REGEX_COMPILE_PCRE_MASK (G_REGEX_COMPILE_MASK & ~G_REGEX_COMPILE_NONPCRE_MASK)
|
||
-#define G_REGEX_COMPILE_NONPCRE_MASK (G_REGEX_RAW | \
|
||
- G_REGEX_OPTIMIZE)
|
||
+#define G_REGEX_COMPILE_NONPCRE_MASK (PCRE2_UTF | \
|
||
+ G_REGEX_FLAGS_CONVERTED)
|
||
|
||
/* Mask of all the possible values for GRegexMatchFlags. */
|
||
-#define G_REGEX_MATCH_MASK (G_REGEX_MATCH_ANCHORED | \
|
||
- G_REGEX_MATCH_NOTBOL | \
|
||
- G_REGEX_MATCH_NOTEOL | \
|
||
- G_REGEX_MATCH_NOTEMPTY | \
|
||
- G_REGEX_MATCH_PARTIAL | \
|
||
- G_REGEX_MATCH_NEWLINE_CR | \
|
||
- G_REGEX_MATCH_NEWLINE_LF | \
|
||
- G_REGEX_MATCH_NEWLINE_CRLF | \
|
||
- G_REGEX_MATCH_NEWLINE_ANY | \
|
||
- G_REGEX_MATCH_NEWLINE_ANYCRLF | \
|
||
- G_REGEX_MATCH_BSR_ANYCRLF | \
|
||
- G_REGEX_MATCH_BSR_ANY | \
|
||
- G_REGEX_MATCH_PARTIAL_SOFT | \
|
||
- G_REGEX_MATCH_PARTIAL_HARD | \
|
||
- G_REGEX_MATCH_NOTEMPTY_ATSTART)
|
||
-
|
||
-/* we rely on these flags having the same values */
|
||
-G_STATIC_ASSERT (G_REGEX_CASELESS == PCRE_CASELESS);
|
||
-G_STATIC_ASSERT (G_REGEX_MULTILINE == PCRE_MULTILINE);
|
||
-G_STATIC_ASSERT (G_REGEX_DOTALL == PCRE_DOTALL);
|
||
-G_STATIC_ASSERT (G_REGEX_EXTENDED == PCRE_EXTENDED);
|
||
-G_STATIC_ASSERT (G_REGEX_ANCHORED == PCRE_ANCHORED);
|
||
-G_STATIC_ASSERT (G_REGEX_DOLLAR_ENDONLY == PCRE_DOLLAR_ENDONLY);
|
||
-G_STATIC_ASSERT (G_REGEX_UNGREEDY == PCRE_UNGREEDY);
|
||
-G_STATIC_ASSERT (G_REGEX_NO_AUTO_CAPTURE == PCRE_NO_AUTO_CAPTURE);
|
||
-G_STATIC_ASSERT (G_REGEX_FIRSTLINE == PCRE_FIRSTLINE);
|
||
-G_STATIC_ASSERT (G_REGEX_DUPNAMES == PCRE_DUPNAMES);
|
||
-G_STATIC_ASSERT (G_REGEX_NEWLINE_CR == PCRE_NEWLINE_CR);
|
||
-G_STATIC_ASSERT (G_REGEX_NEWLINE_LF == PCRE_NEWLINE_LF);
|
||
-G_STATIC_ASSERT (G_REGEX_NEWLINE_CRLF == PCRE_NEWLINE_CRLF);
|
||
-G_STATIC_ASSERT (G_REGEX_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF);
|
||
-G_STATIC_ASSERT (G_REGEX_BSR_ANYCRLF == PCRE_BSR_ANYCRLF);
|
||
-G_STATIC_ASSERT (G_REGEX_JAVASCRIPT_COMPAT == PCRE_JAVASCRIPT_COMPAT);
|
||
-
|
||
-G_STATIC_ASSERT (G_REGEX_MATCH_ANCHORED == PCRE_ANCHORED);
|
||
-G_STATIC_ASSERT (G_REGEX_MATCH_NOTBOL == PCRE_NOTBOL);
|
||
-G_STATIC_ASSERT (G_REGEX_MATCH_NOTEOL == PCRE_NOTEOL);
|
||
-G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY == PCRE_NOTEMPTY);
|
||
-G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL == PCRE_PARTIAL);
|
||
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CR == PCRE_NEWLINE_CR);
|
||
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_LF == PCRE_NEWLINE_LF);
|
||
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CRLF == PCRE_NEWLINE_CRLF);
|
||
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANY == PCRE_NEWLINE_ANY);
|
||
-G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF);
|
||
-G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANYCRLF == PCRE_BSR_ANYCRLF);
|
||
-G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANY == PCRE_BSR_UNICODE);
|
||
-G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_SOFT == PCRE_PARTIAL_SOFT);
|
||
-G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_HARD == PCRE_PARTIAL_HARD);
|
||
-G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY_ATSTART == PCRE_NOTEMPTY_ATSTART);
|
||
-
|
||
-/* These PCRE flags are unused or not exposed publicly in GRegexFlags, so
|
||
- * it should be ok to reuse them for different things.
|
||
- */
|
||
-G_STATIC_ASSERT (G_REGEX_OPTIMIZE == PCRE_NO_UTF8_CHECK);
|
||
-G_STATIC_ASSERT (G_REGEX_RAW == PCRE_UTF8);
|
||
+#define G_REGEX_MATCH_MASK (PCRE2_ANCHORED | \
|
||
+ PCRE2_NOTBOL | \
|
||
+ PCRE2_NOTEOL | \
|
||
+ PCRE2_NOTEMPTY | \
|
||
+ PCRE2_PARTIAL_SOFT | \
|
||
+ PCRE2_NEWLINE_CR | \
|
||
+ PCRE2_NEWLINE_LF | \
|
||
+ PCRE2_NEWLINE_CRLF | \
|
||
+ PCRE2_NEWLINE_ANY | \
|
||
+ PCRE2_NEWLINE_ANYCRLF | \
|
||
+ PCRE2_BSR_ANYCRLF | \
|
||
+ PCRE2_BSR_UNICODE | \
|
||
+ PCRE2_PARTIAL_SOFT | \
|
||
+ PCRE2_PARTIAL_HARD | \
|
||
+ PCRE2_NOTEMPTY_ATSTART | \
|
||
+ G_REGEX_FLAGS_CONVERTED)
|
||
+
|
||
+#define G_REGEX_NEWLINE_MASK (PCRE2_NEWLINE_CR | \
|
||
+ PCRE2_NEWLINE_LF | \
|
||
+ PCRE2_NEWLINE_CRLF | \
|
||
+ PCRE2_NEWLINE_ANYCRLF)
|
||
+
|
||
+#define G_REGEX_MATCH_NEWLINE_MASK (PCRE2_NEWLINE_CR | \
|
||
+ PCRE2_NEWLINE_LF | \
|
||
+ PCRE2_NEWLINE_CRLF | \
|
||
+ PCRE2_NEWLINE_ANYCRLF | \
|
||
+ PCRE2_NEWLINE_ANY)
|
||
|
||
/* if the string is in UTF-8 use g_utf8_ functions, else use
|
||
* use just +/- 1. */
|
||
@@ -208,24 +185,26 @@ struct _GMatchInfo
|
||
gint pos; /* position in the string where last match left off */
|
||
gint n_offsets; /* number of offsets */
|
||
gint *offsets; /* array of offsets paired 0,1 ; 2,3 ; 3,4 etc */
|
||
- gint *workspace; /* workspace for pcre_dfa_exec() */
|
||
+ gint *workspace; /* workspace for pcre2_dfa_match() */
|
||
gint n_workspace; /* number of workspace elements */
|
||
const gchar *string; /* string passed to the match function */
|
||
gssize string_len; /* length of string, in bytes */
|
||
+ pcre2_match_context *match_context;
|
||
+ pcre2_match_data *match_data;
|
||
};
|
||
|
||
struct _GRegex
|
||
{
|
||
gint ref_count; /* the ref count for the immutable part (atomic) */
|
||
gchar *pattern; /* the pattern */
|
||
- pcre *pcre_re; /* compiled form of the pattern */
|
||
- GRegexCompileFlags compile_opts; /* options used at compile time on the pattern */
|
||
+ pcre2_code *pcre_re; /* compiled form of the pattern */
|
||
+ GRegexCompileFlags compile_opts; /* options used at compile time on the pattern, pcre2 values */
|
||
+ GRegexCompileFlags orig_compile_opts; /* options used at compile time on the pattern, gregex values */
|
||
GRegexMatchFlags match_opts; /* options used at match time on the regex */
|
||
- pcre_extra *extra; /* data stored when G_REGEX_OPTIMIZE is used */
|
||
};
|
||
|
||
/* TRUE if ret is an error code, FALSE otherwise. */
|
||
-#define IS_PCRE_ERROR(ret) ((ret) < PCRE_ERROR_NOMATCH && (ret) != PCRE_ERROR_PARTIAL)
|
||
+#define IS_PCRE2_ERROR(ret) ((ret) < PCRE2_ERROR_NOMATCH && (ret) != PCRE2_ERROR_PARTIAL)
|
||
|
||
typedef struct _InterpolationData InterpolationData;
|
||
static gboolean interpolation_list_needs_match (GList *list);
|
||
@@ -236,70 +215,249 @@ static GList *split_replacement (const gchar *replacement,
|
||
GError **error);
|
||
static void free_interpolation_data (InterpolationData *data);
|
||
|
||
+static gint
|
||
+map_to_pcre2_compile_flags (gint pcre1_flags)
|
||
+{
|
||
+ /* Maps compile flags from pcre1 to pcre2 values
|
||
+ */
|
||
+ gint pcre2_flags = G_REGEX_FLAGS_CONVERTED;
|
||
+
|
||
+ if (pcre1_flags & G_REGEX_FLAGS_CONVERTED)
|
||
+ return pcre1_flags;
|
||
+
|
||
+ if (pcre1_flags & G_REGEX_CASELESS)
|
||
+ pcre2_flags |= PCRE2_CASELESS;
|
||
+ if (pcre1_flags & G_REGEX_MULTILINE)
|
||
+ pcre2_flags |= PCRE2_MULTILINE;
|
||
+ if (pcre1_flags & G_REGEX_DOTALL)
|
||
+ pcre2_flags |= PCRE2_DOTALL;
|
||
+ if (pcre1_flags & G_REGEX_EXTENDED)
|
||
+ pcre2_flags |= PCRE2_EXTENDED;
|
||
+ if (pcre1_flags & G_REGEX_ANCHORED)
|
||
+ pcre2_flags |= PCRE2_ANCHORED;
|
||
+ if (pcre1_flags & G_REGEX_DOLLAR_ENDONLY)
|
||
+ pcre2_flags |= PCRE2_DOLLAR_ENDONLY;
|
||
+ if (pcre1_flags & G_REGEX_UNGREEDY)
|
||
+ pcre2_flags |= PCRE2_UNGREEDY;
|
||
+ if (!(pcre1_flags & G_REGEX_RAW))
|
||
+ pcre2_flags |= PCRE2_UTF;
|
||
+ if (pcre1_flags & G_REGEX_NO_AUTO_CAPTURE)
|
||
+ pcre2_flags |= PCRE2_NO_AUTO_CAPTURE;
|
||
+ if (pcre1_flags & G_REGEX_FIRSTLINE)
|
||
+ pcre2_flags |= PCRE2_FIRSTLINE;
|
||
+ if (pcre1_flags & G_REGEX_DUPNAMES)
|
||
+ pcre2_flags |= PCRE2_DUPNAMES;
|
||
+ if (pcre1_flags & G_REGEX_NEWLINE_CR)
|
||
+ pcre2_flags |= PCRE2_NEWLINE_CR;
|
||
+ if (pcre1_flags & G_REGEX_NEWLINE_LF)
|
||
+ pcre2_flags |= PCRE2_NEWLINE_LF;
|
||
+ /* Check for exact match for a composite flag */
|
||
+ if ((pcre1_flags & G_REGEX_NEWLINE_CRLF) == G_REGEX_NEWLINE_CRLF)
|
||
+ pcre2_flags |= PCRE2_NEWLINE_CRLF;
|
||
+ /* Check for exact match for a composite flag */
|
||
+ if ((pcre1_flags & G_REGEX_NEWLINE_ANYCRLF) == G_REGEX_NEWLINE_ANYCRLF)
|
||
+ pcre2_flags |= PCRE2_NEWLINE_ANYCRLF;
|
||
+ if (pcre1_flags & G_REGEX_BSR_ANYCRLF)
|
||
+ pcre2_flags |= PCRE2_BSR_ANYCRLF;
|
||
+
|
||
+ /* these are not available in pcre2 */
|
||
+G_GNUC_BEGIN_IGNORE_DEPRECATIONS
|
||
+ if (pcre1_flags & G_REGEX_OPTIMIZE)
|
||
+ pcre2_flags |= 0;
|
||
+ if (pcre1_flags & G_REGEX_JAVASCRIPT_COMPAT)
|
||
+ pcre2_flags |= 0;
|
||
+G_GNUC_END_IGNORE_DEPRECATIONS
|
||
+
|
||
+ return pcre2_flags;
|
||
+}
|
||
+
|
||
+static gint
|
||
+map_to_pcre2_match_flags (gint pcre1_flags)
|
||
+{
|
||
+ /* Maps match flags from pcre1 to pcre2 values
|
||
+ */
|
||
+ gint pcre2_flags = G_REGEX_FLAGS_CONVERTED;
|
||
+
|
||
+ if (pcre1_flags & G_REGEX_FLAGS_CONVERTED)
|
||
+ return pcre1_flags;
|
||
+
|
||
+ if (pcre1_flags & G_REGEX_MATCH_ANCHORED)
|
||
+ pcre2_flags |= PCRE2_ANCHORED;
|
||
+ if (pcre1_flags & G_REGEX_MATCH_NOTBOL)
|
||
+ pcre2_flags |= PCRE2_NOTBOL;
|
||
+ if (pcre1_flags & G_REGEX_MATCH_NOTEOL)
|
||
+ pcre2_flags |= PCRE2_NOTEOL;
|
||
+ if (pcre1_flags & G_REGEX_MATCH_NOTEMPTY)
|
||
+ pcre2_flags |= PCRE2_NOTEMPTY;
|
||
+ if (pcre1_flags & G_REGEX_MATCH_PARTIAL)
|
||
+ pcre2_flags |= PCRE2_PARTIAL_SOFT;
|
||
+ if (pcre1_flags & G_REGEX_MATCH_NEWLINE_CR)
|
||
+ pcre2_flags |= PCRE2_NEWLINE_CR;
|
||
+ if (pcre1_flags & G_REGEX_MATCH_NEWLINE_LF)
|
||
+ pcre2_flags |= PCRE2_NEWLINE_LF;
|
||
+ /* Check for exact match for a composite flag */
|
||
+ if ((pcre1_flags & G_REGEX_MATCH_NEWLINE_CRLF) == G_REGEX_MATCH_NEWLINE_CRLF)
|
||
+ pcre2_flags |= PCRE2_NEWLINE_CRLF;
|
||
+ if (pcre1_flags & G_REGEX_MATCH_NEWLINE_ANY)
|
||
+ pcre2_flags |= PCRE2_NEWLINE_ANY;
|
||
+ /* Check for exact match for a composite flag */
|
||
+ if ((pcre1_flags & G_REGEX_MATCH_NEWLINE_ANYCRLF) == G_REGEX_MATCH_NEWLINE_ANYCRLF)
|
||
+ pcre2_flags |= PCRE2_NEWLINE_ANYCRLF;
|
||
+ if (pcre1_flags & G_REGEX_MATCH_BSR_ANYCRLF)
|
||
+ pcre2_flags |= PCRE2_BSR_ANYCRLF;
|
||
+ if (pcre1_flags & G_REGEX_MATCH_BSR_ANY)
|
||
+ pcre2_flags |= PCRE2_BSR_UNICODE;
|
||
+ if (pcre1_flags & G_REGEX_MATCH_PARTIAL_SOFT)
|
||
+ pcre2_flags |= PCRE2_PARTIAL_SOFT;
|
||
+ if (pcre1_flags & G_REGEX_MATCH_PARTIAL_HARD)
|
||
+ pcre2_flags |= PCRE2_PARTIAL_HARD;
|
||
+ if (pcre1_flags & G_REGEX_MATCH_NOTEMPTY_ATSTART)
|
||
+ pcre2_flags |= PCRE2_NOTEMPTY_ATSTART;
|
||
+
|
||
+ return pcre2_flags;
|
||
+}
|
||
+
|
||
+static gint
|
||
+map_to_pcre1_compile_flags (gint pcre2_flags)
|
||
+{
|
||
+ /* Maps compile flags from pcre2 to pcre1 values
|
||
+ */
|
||
+ gint pcre1_flags = 0;
|
||
+
|
||
+ if (!(pcre2_flags & G_REGEX_FLAGS_CONVERTED))
|
||
+ return pcre2_flags;
|
||
+
|
||
+ if (pcre2_flags & PCRE2_CASELESS)
|
||
+ pcre1_flags |= G_REGEX_CASELESS;
|
||
+ if (pcre2_flags & PCRE2_MULTILINE)
|
||
+ pcre1_flags |= G_REGEX_MULTILINE;
|
||
+ if (pcre2_flags & PCRE2_DOTALL)
|
||
+ pcre1_flags |= G_REGEX_DOTALL;
|
||
+ if (pcre2_flags & PCRE2_EXTENDED)
|
||
+ pcre1_flags |= G_REGEX_EXTENDED;
|
||
+ if (pcre2_flags & PCRE2_ANCHORED)
|
||
+ pcre1_flags |= G_REGEX_ANCHORED;
|
||
+ if (pcre2_flags & PCRE2_DOLLAR_ENDONLY)
|
||
+ pcre1_flags |= G_REGEX_DOLLAR_ENDONLY;
|
||
+ if (pcre2_flags & PCRE2_UNGREEDY)
|
||
+ pcre1_flags |= G_REGEX_UNGREEDY;
|
||
+ if (!(pcre2_flags & PCRE2_UTF))
|
||
+ pcre1_flags |= G_REGEX_RAW;
|
||
+ if (pcre2_flags & PCRE2_NO_AUTO_CAPTURE)
|
||
+ pcre1_flags |= G_REGEX_NO_AUTO_CAPTURE;
|
||
+ if (pcre2_flags & PCRE2_FIRSTLINE)
|
||
+ pcre1_flags |= G_REGEX_FIRSTLINE;
|
||
+ if (pcre2_flags & PCRE2_DUPNAMES)
|
||
+ pcre1_flags |= G_REGEX_DUPNAMES;
|
||
+ if (pcre2_flags & PCRE2_NEWLINE_CR)
|
||
+ pcre1_flags |= G_REGEX_NEWLINE_CR;
|
||
+ if (pcre2_flags & PCRE2_NEWLINE_LF)
|
||
+ pcre1_flags |= G_REGEX_NEWLINE_LF;
|
||
+ /* Check for exact match for a composite flag */
|
||
+ if ((pcre2_flags & PCRE2_NEWLINE_CRLF) == PCRE2_NEWLINE_CRLF)
|
||
+ pcre1_flags |= G_REGEX_NEWLINE_CRLF;
|
||
+ /* Check for exact match for a composite flag */
|
||
+ if ((pcre2_flags & PCRE2_NEWLINE_ANYCRLF) == PCRE2_NEWLINE_ANYCRLF)
|
||
+ pcre1_flags |= G_REGEX_NEWLINE_ANYCRLF;
|
||
+ if (pcre2_flags & PCRE2_BSR_ANYCRLF)
|
||
+ pcre1_flags |= G_REGEX_BSR_ANYCRLF;
|
||
+
|
||
+ return pcre1_flags;
|
||
+}
|
||
+
|
||
+static gint
|
||
+map_to_pcre1_match_flags (gint pcre2_flags)
|
||
+{
|
||
+ /* Maps match flags from pcre2 to pcre1 values
|
||
+ */
|
||
+ gint pcre1_flags = 0;
|
||
+
|
||
+ if (!(pcre2_flags & G_REGEX_FLAGS_CONVERTED))
|
||
+ return pcre2_flags;
|
||
+
|
||
+ if (pcre2_flags & PCRE2_ANCHORED)
|
||
+ pcre1_flags |= G_REGEX_MATCH_ANCHORED;
|
||
+ if (pcre2_flags & PCRE2_NOTBOL)
|
||
+ pcre1_flags |= G_REGEX_MATCH_NOTBOL;
|
||
+ if (pcre2_flags & PCRE2_NOTEOL)
|
||
+ pcre1_flags |= G_REGEX_MATCH_NOTEOL;
|
||
+ if (pcre2_flags & PCRE2_NOTEMPTY)
|
||
+ pcre1_flags |= G_REGEX_MATCH_NOTEMPTY;
|
||
+ if (pcre2_flags & PCRE2_PARTIAL_SOFT)
|
||
+ pcre1_flags |= G_REGEX_MATCH_PARTIAL;
|
||
+ if (pcre2_flags & PCRE2_NEWLINE_CR)
|
||
+ pcre1_flags |= G_REGEX_MATCH_NEWLINE_CR;
|
||
+ if (pcre2_flags & PCRE2_NEWLINE_LF)
|
||
+ pcre1_flags |= G_REGEX_MATCH_NEWLINE_LF;
|
||
+ /* Check for exact match for a composite flag */
|
||
+ if ((pcre2_flags & PCRE2_NEWLINE_CRLF) == PCRE2_NEWLINE_CRLF)
|
||
+ pcre1_flags |= G_REGEX_MATCH_NEWLINE_CRLF;
|
||
+ if (pcre2_flags & PCRE2_NEWLINE_ANY)
|
||
+ pcre1_flags |= G_REGEX_MATCH_NEWLINE_ANY;
|
||
+ /* Check for exact match for a composite flag */
|
||
+ if ((pcre2_flags & PCRE2_NEWLINE_ANYCRLF) == PCRE2_NEWLINE_ANYCRLF)
|
||
+ pcre1_flags |= G_REGEX_MATCH_NEWLINE_ANYCRLF;
|
||
+ if (pcre2_flags & PCRE2_BSR_ANYCRLF)
|
||
+ pcre1_flags |= G_REGEX_MATCH_BSR_ANYCRLF;
|
||
+ if (pcre2_flags & PCRE2_BSR_UNICODE)
|
||
+ pcre1_flags |= G_REGEX_MATCH_BSR_ANY;
|
||
+ if (pcre2_flags & PCRE2_PARTIAL_SOFT)
|
||
+ pcre1_flags |= G_REGEX_MATCH_PARTIAL_SOFT;
|
||
+ if (pcre2_flags & PCRE2_PARTIAL_HARD)
|
||
+ pcre1_flags |= G_REGEX_MATCH_PARTIAL_HARD;
|
||
+ if (pcre2_flags & PCRE2_NOTEMPTY_ATSTART)
|
||
+ pcre1_flags |= G_REGEX_MATCH_NOTEMPTY_ATSTART;
|
||
+
|
||
+ return pcre1_flags;
|
||
+}
|
||
|
||
static const gchar *
|
||
match_error (gint errcode)
|
||
{
|
||
switch (errcode)
|
||
{
|
||
- case PCRE_ERROR_NOMATCH:
|
||
+ case PCRE2_ERROR_NOMATCH:
|
||
/* not an error */
|
||
break;
|
||
- case PCRE_ERROR_NULL:
|
||
+ case PCRE2_ERROR_NULL:
|
||
/* NULL argument, this should not happen in GRegex */
|
||
g_warning ("A NULL argument was passed to PCRE");
|
||
break;
|
||
- case PCRE_ERROR_BADOPTION:
|
||
+ case PCRE2_ERROR_BADOPTION:
|
||
return "bad options";
|
||
- case PCRE_ERROR_BADMAGIC:
|
||
+ case PCRE2_ERROR_BADMAGIC:
|
||
return _("corrupted object");
|
||
- case PCRE_ERROR_UNKNOWN_OPCODE:
|
||
- return N_("internal error or corrupted object");
|
||
- case PCRE_ERROR_NOMEMORY:
|
||
+ case PCRE2_ERROR_NOMEMORY:
|
||
return _("out of memory");
|
||
- case PCRE_ERROR_NOSUBSTRING:
|
||
- /* not used by pcre_exec() */
|
||
+ case PCRE2_ERROR_NOSUBSTRING:
|
||
+ /* not used by pcre2_match() */
|
||
break;
|
||
- case PCRE_ERROR_MATCHLIMIT:
|
||
+ case PCRE2_ERROR_MATCHLIMIT:
|
||
return _("backtracking limit reached");
|
||
- case PCRE_ERROR_CALLOUT:
|
||
+ case PCRE2_ERROR_CALLOUT:
|
||
/* callouts are not implemented */
|
||
break;
|
||
- case PCRE_ERROR_BADUTF8:
|
||
- case PCRE_ERROR_BADUTF8_OFFSET:
|
||
+ case PCRE2_ERROR_BADUTFOFFSET:
|
||
/* we do not check if strings are valid */
|
||
break;
|
||
- case PCRE_ERROR_PARTIAL:
|
||
+ case PCRE2_ERROR_PARTIAL:
|
||
/* not an error */
|
||
break;
|
||
- case PCRE_ERROR_BADPARTIAL:
|
||
- return _("the pattern contains items not supported for partial matching");
|
||
- case PCRE_ERROR_INTERNAL:
|
||
+ case PCRE2_ERROR_INTERNAL:
|
||
return _("internal error");
|
||
- case PCRE_ERROR_BADCOUNT:
|
||
- /* negative ovecsize, this should not happen in GRegex */
|
||
- g_warning ("A negative ovecsize was passed to PCRE");
|
||
- break;
|
||
- case PCRE_ERROR_DFA_UITEM:
|
||
+ case PCRE2_ERROR_DFA_UITEM:
|
||
return _("the pattern contains items not supported for partial matching");
|
||
- case PCRE_ERROR_DFA_UCOND:
|
||
+ case PCRE2_ERROR_DFA_UCOND:
|
||
return _("back references as conditions are not supported for partial matching");
|
||
- case PCRE_ERROR_DFA_UMLIMIT:
|
||
- /* the match_field field is not used in GRegex */
|
||
- break;
|
||
- case PCRE_ERROR_DFA_WSSIZE:
|
||
+ case PCRE2_ERROR_DFA_WSSIZE:
|
||
/* handled expanding the workspace */
|
||
break;
|
||
- case PCRE_ERROR_DFA_RECURSE:
|
||
- case PCRE_ERROR_RECURSIONLIMIT:
|
||
+ case PCRE2_ERROR_DFA_RECURSE:
|
||
+ case PCRE2_ERROR_RECURSIONLIMIT:
|
||
return _("recursion limit reached");
|
||
- case PCRE_ERROR_BADNEWLINE:
|
||
- return _("invalid combination of newline flags");
|
||
- case PCRE_ERROR_BADOFFSET:
|
||
+ case PCRE2_ERROR_BADOFFSET:
|
||
return _("bad offset");
|
||
- case PCRE_ERROR_SHORTUTF8:
|
||
- return _("short utf8");
|
||
- case PCRE_ERROR_RECURSELOOP:
|
||
+ case PCRE2_ERROR_RECURSELOOP:
|
||
return _("recursion loop");
|
||
default:
|
||
break;
|
||
@@ -310,242 +468,263 @@ match_error (gint errcode)
|
||
static void
|
||
translate_compile_error (gint *errcode, const gchar **errmsg)
|
||
{
|
||
- /* Compile errors are created adding 100 to the error code returned
|
||
- * by PCRE.
|
||
- * If errcode is known we put the translatable error message in
|
||
- * erromsg. If errcode is unknown we put the generic
|
||
- * G_REGEX_ERROR_COMPILE error code in errcode and keep the
|
||
- * untranslated error message returned by PCRE.
|
||
+ /* If errcode is known we put the translatable error message in
|
||
+ * errmsg. If errcode is unknown we put the generic
|
||
+ * G_REGEX_ERROR_COMPILE error code in errcode.
|
||
* Note that there can be more PCRE errors with the same GRegexError
|
||
* and that some PCRE errors are useless for us.
|
||
*/
|
||
- *errcode += 100;
|
||
|
||
switch (*errcode)
|
||
{
|
||
- case G_REGEX_ERROR_STRAY_BACKSLASH:
|
||
+ case PCRE2_ERROR_END_BACKSLASH:
|
||
+ *errcode = G_REGEX_ERROR_STRAY_BACKSLASH;
|
||
*errmsg = _("\\ at end of pattern");
|
||
break;
|
||
- case G_REGEX_ERROR_MISSING_CONTROL_CHAR:
|
||
+ case PCRE2_ERROR_END_BACKSLASH_C:
|
||
+ *errcode = G_REGEX_ERROR_MISSING_CONTROL_CHAR;
|
||
*errmsg = _("\\c at end of pattern");
|
||
break;
|
||
- case G_REGEX_ERROR_UNRECOGNIZED_ESCAPE:
|
||
+ case PCRE2_ERROR_UNKNOWN_ESCAPE:
|
||
+ case PCRE2_ERROR_UNSUPPORTED_ESCAPE_SEQUENCE:
|
||
+ *errcode = G_REGEX_ERROR_UNRECOGNIZED_ESCAPE;
|
||
*errmsg = _("unrecognized character following \\");
|
||
break;
|
||
- case G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER:
|
||
+ case PCRE2_ERROR_QUANTIFIER_OUT_OF_ORDER:
|
||
+ *errcode = G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER;
|
||
*errmsg = _("numbers out of order in {} quantifier");
|
||
break;
|
||
- case G_REGEX_ERROR_QUANTIFIER_TOO_BIG:
|
||
+ case PCRE2_ERROR_QUANTIFIER_TOO_BIG:
|
||
+ *errcode = G_REGEX_ERROR_QUANTIFIER_TOO_BIG;
|
||
*errmsg = _("number too big in {} quantifier");
|
||
break;
|
||
- case G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS:
|
||
+ case PCRE2_ERROR_MISSING_SQUARE_BRACKET:
|
||
+ *errcode = G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS;
|
||
*errmsg = _("missing terminating ] for character class");
|
||
break;
|
||
- case G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS:
|
||
+ case PCRE2_ERROR_ESCAPE_INVALID_IN_CLASS:
|
||
+ *errcode = G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS;
|
||
*errmsg = _("invalid escape sequence in character class");
|
||
break;
|
||
- case G_REGEX_ERROR_RANGE_OUT_OF_ORDER:
|
||
+ case PCRE2_ERROR_CLASS_RANGE_ORDER:
|
||
+ *errcode = G_REGEX_ERROR_RANGE_OUT_OF_ORDER;
|
||
*errmsg = _("range out of order in character class");
|
||
break;
|
||
- case G_REGEX_ERROR_NOTHING_TO_REPEAT:
|
||
+ case PCRE2_ERROR_QUANTIFIER_INVALID:
|
||
+ case PCRE2_ERROR_INTERNAL_UNEXPECTED_REPEAT:
|
||
+ *errcode = G_REGEX_ERROR_NOTHING_TO_REPEAT;
|
||
*errmsg = _("nothing to repeat");
|
||
break;
|
||
- case 111: /* internal error: unexpected repeat */
|
||
- *errcode = G_REGEX_ERROR_INTERNAL;
|
||
- *errmsg = _("unexpected repeat");
|
||
- break;
|
||
- case G_REGEX_ERROR_UNRECOGNIZED_CHARACTER:
|
||
+ case PCRE2_ERROR_INVALID_AFTER_PARENS_QUERY:
|
||
+ *errcode = G_REGEX_ERROR_UNRECOGNIZED_CHARACTER;
|
||
*errmsg = _("unrecognized character after (? or (?-");
|
||
break;
|
||
- case G_REGEX_ERROR_POSIX_NAMED_CLASS_OUTSIDE_CLASS:
|
||
+ case PCRE2_ERROR_POSIX_CLASS_NOT_IN_CLASS:
|
||
+ *errcode = G_REGEX_ERROR_POSIX_NAMED_CLASS_OUTSIDE_CLASS;
|
||
*errmsg = _("POSIX named classes are supported only within a class");
|
||
break;
|
||
- case G_REGEX_ERROR_UNMATCHED_PARENTHESIS:
|
||
+ case PCRE2_ERROR_POSIX_NO_SUPPORT_COLLATING:
|
||
+ *errcode = G_REGEX_ERROR_POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED;
|
||
+ *errmsg = _("POSIX collating elements are not supported");
|
||
+ break;
|
||
+ case PCRE2_ERROR_MISSING_CLOSING_PARENTHESIS:
|
||
+ case PCRE2_ERROR_UNMATCHED_CLOSING_PARENTHESIS:
|
||
+ case PCRE2_ERROR_PARENS_QUERY_R_MISSING_CLOSING:
|
||
+ *errcode = G_REGEX_ERROR_UNMATCHED_PARENTHESIS;
|
||
*errmsg = _("missing terminating )");
|
||
break;
|
||
- case G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE:
|
||
+ case PCRE2_ERROR_BAD_SUBPATTERN_REFERENCE:
|
||
+ *errcode = G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE;
|
||
*errmsg = _("reference to non-existent subpattern");
|
||
break;
|
||
- case G_REGEX_ERROR_UNTERMINATED_COMMENT:
|
||
+ case PCRE2_ERROR_MISSING_COMMENT_CLOSING:
|
||
+ *errcode = G_REGEX_ERROR_UNTERMINATED_COMMENT;
|
||
*errmsg = _("missing ) after comment");
|
||
break;
|
||
- case G_REGEX_ERROR_EXPRESSION_TOO_LARGE:
|
||
+ case PCRE2_ERROR_PATTERN_TOO_LARGE:
|
||
+ *errcode = G_REGEX_ERROR_EXPRESSION_TOO_LARGE;
|
||
*errmsg = _("regular expression is too large");
|
||
break;
|
||
- case G_REGEX_ERROR_MEMORY_ERROR:
|
||
- *errmsg = _("failed to get memory");
|
||
- break;
|
||
- case 122: /* unmatched parentheses */
|
||
- *errcode = G_REGEX_ERROR_UNMATCHED_PARENTHESIS;
|
||
- *errmsg = _(") without opening (");
|
||
- break;
|
||
- case 123: /* internal error: code overflow */
|
||
- *errcode = G_REGEX_ERROR_INTERNAL;
|
||
- *errmsg = _("code overflow");
|
||
- break;
|
||
- case 124: /* "unrecognized character after (?<\0 */
|
||
- *errcode = G_REGEX_ERROR_UNRECOGNIZED_CHARACTER;
|
||
- *errmsg = _("unrecognized character after (?<");
|
||
+ case PCRE2_ERROR_MISSING_CONDITION_CLOSING:
|
||
+ *errcode = G_REGEX_ERROR_MALFORMED_CONDITION;
|
||
+ *errmsg = _("malformed number or name after (?(");
|
||
break;
|
||
- case G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND:
|
||
+ case PCRE2_ERROR_LOOKBEHIND_NOT_FIXED_LENGTH:
|
||
+ *errcode = G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND;
|
||
*errmsg = _("lookbehind assertion is not fixed length");
|
||
break;
|
||
- case G_REGEX_ERROR_MALFORMED_CONDITION:
|
||
- *errmsg = _("malformed number or name after (?(");
|
||
- break;
|
||
- case G_REGEX_ERROR_TOO_MANY_CONDITIONAL_BRANCHES:
|
||
+ case PCRE2_ERROR_TOO_MANY_CONDITION_BRANCHES:
|
||
+ *errcode = G_REGEX_ERROR_TOO_MANY_CONDITIONAL_BRANCHES;
|
||
*errmsg = _("conditional group contains more than two branches");
|
||
break;
|
||
- case G_REGEX_ERROR_ASSERTION_EXPECTED:
|
||
+ case PCRE2_ERROR_CONDITION_ASSERTION_EXPECTED:
|
||
+ *errcode = G_REGEX_ERROR_ASSERTION_EXPECTED;
|
||
*errmsg = _("assertion expected after (?(");
|
||
break;
|
||
- case 129:
|
||
- *errcode = G_REGEX_ERROR_UNMATCHED_PARENTHESIS;
|
||
- /* translators: '(?R' and '(?[+-]digits' are both meant as (groups of)
|
||
- * sequences here, '(?-54' would be an example for the second group.
|
||
- */
|
||
- *errmsg = _("(?R or (?[+-]digits must be followed by )");
|
||
+ case PCRE2_ERROR_BAD_RELATIVE_REFERENCE:
|
||
+ *errcode = G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE;
|
||
+ *errmsg = _("a numbered reference must not be zero");
|
||
break;
|
||
- case G_REGEX_ERROR_UNKNOWN_POSIX_CLASS_NAME:
|
||
+ case PCRE2_ERROR_UNKNOWN_POSIX_CLASS:
|
||
+ *errcode = G_REGEX_ERROR_UNKNOWN_POSIX_CLASS_NAME;
|
||
*errmsg = _("unknown POSIX class name");
|
||
break;
|
||
- case G_REGEX_ERROR_POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED:
|
||
- *errmsg = _("POSIX collating elements are not supported");
|
||
- break;
|
||
- case G_REGEX_ERROR_HEX_CODE_TOO_LARGE:
|
||
+ case PCRE2_ERROR_CODE_POINT_TOO_BIG:
|
||
+ case PCRE2_ERROR_INVALID_HEXADECIMAL:
|
||
+ *errcode = G_REGEX_ERROR_HEX_CODE_TOO_LARGE;
|
||
*errmsg = _("character value in \\x{...} sequence is too large");
|
||
break;
|
||
- case G_REGEX_ERROR_INVALID_CONDITION:
|
||
- *errmsg = _("invalid condition (?(0)");
|
||
- break;
|
||
- case G_REGEX_ERROR_SINGLE_BYTE_MATCH_IN_LOOKBEHIND:
|
||
+ case PCRE2_ERROR_LOOKBEHIND_INVALID_BACKSLASH_C:
|
||
+ *errcode = G_REGEX_ERROR_SINGLE_BYTE_MATCH_IN_LOOKBEHIND;
|
||
*errmsg = _("\\C not allowed in lookbehind assertion");
|
||
break;
|
||
- case 137: /* PCRE does not support \\L, \\l, \\N{name}, \\U, or \\u\0 */
|
||
- /* A number of Perl escapes are not handled by PCRE.
|
||
- * Therefore it explicitly raises ERR37.
|
||
- */
|
||
- *errcode = G_REGEX_ERROR_UNRECOGNIZED_ESCAPE;
|
||
- *errmsg = _("escapes \\L, \\l, \\N{name}, \\U, and \\u are not supported");
|
||
- break;
|
||
- case G_REGEX_ERROR_INFINITE_LOOP:
|
||
- *errmsg = _("recursive call could loop indefinitely");
|
||
- break;
|
||
- case 141: /* unrecognized character after (?P\0 */
|
||
- *errcode = G_REGEX_ERROR_UNRECOGNIZED_CHARACTER;
|
||
- *errmsg = _("unrecognized character after (?P");
|
||
- break;
|
||
- case G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR:
|
||
+ case PCRE2_ERROR_MISSING_NAME_TERMINATOR:
|
||
+ *errcode = G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR;
|
||
*errmsg = _("missing terminator in subpattern name");
|
||
break;
|
||
- case G_REGEX_ERROR_DUPLICATE_SUBPATTERN_NAME:
|
||
+ case PCRE2_ERROR_DUPLICATE_SUBPATTERN_NAME:
|
||
+ *errcode = G_REGEX_ERROR_DUPLICATE_SUBPATTERN_NAME;
|
||
*errmsg = _("two named subpatterns have the same name");
|
||
break;
|
||
- case G_REGEX_ERROR_MALFORMED_PROPERTY:
|
||
+ case PCRE2_ERROR_MALFORMED_UNICODE_PROPERTY:
|
||
+ *errcode = G_REGEX_ERROR_MALFORMED_PROPERTY;
|
||
*errmsg = _("malformed \\P or \\p sequence");
|
||
break;
|
||
- case G_REGEX_ERROR_UNKNOWN_PROPERTY:
|
||
+ case PCRE2_ERROR_UNKNOWN_UNICODE_PROPERTY:
|
||
+ *errcode = G_REGEX_ERROR_UNKNOWN_PROPERTY;
|
||
*errmsg = _("unknown property name after \\P or \\p");
|
||
break;
|
||
- case G_REGEX_ERROR_SUBPATTERN_NAME_TOO_LONG:
|
||
+ case PCRE2_ERROR_SUBPATTERN_NAME_TOO_LONG:
|
||
+ *errcode = G_REGEX_ERROR_SUBPATTERN_NAME_TOO_LONG;
|
||
*errmsg = _("subpattern name is too long (maximum 32 characters)");
|
||
break;
|
||
- case G_REGEX_ERROR_TOO_MANY_SUBPATTERNS:
|
||
+ case PCRE2_ERROR_TOO_MANY_NAMED_SUBPATTERNS:
|
||
+ *errcode = G_REGEX_ERROR_TOO_MANY_SUBPATTERNS;
|
||
*errmsg = _("too many named subpatterns (maximum 10,000)");
|
||
break;
|
||
- case G_REGEX_ERROR_INVALID_OCTAL_VALUE:
|
||
+ case PCRE2_ERROR_OCTAL_BYTE_TOO_BIG:
|
||
+ *errcode = G_REGEX_ERROR_INVALID_OCTAL_VALUE;
|
||
*errmsg = _("octal value is greater than \\377");
|
||
break;
|
||
- case 152: /* internal error: overran compiling workspace */
|
||
- *errcode = G_REGEX_ERROR_INTERNAL;
|
||
- *errmsg = _("overran compiling workspace");
|
||
- break;
|
||
- case 153: /* internal error: previously-checked referenced subpattern not found */
|
||
- *errcode = G_REGEX_ERROR_INTERNAL;
|
||
- *errmsg = _("previously-checked referenced subpattern not found");
|
||
- break;
|
||
- case G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE:
|
||
+ case PCRE2_ERROR_DEFINE_TOO_MANY_BRANCHES:
|
||
+ *errcode = G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE;
|
||
*errmsg = _("DEFINE group contains more than one branch");
|
||
break;
|
||
- case G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS:
|
||
+ case PCRE2_ERROR_INTERNAL_UNKNOWN_NEWLINE:
|
||
+ *errcode = G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS;
|
||
*errmsg = _("inconsistent NEWLINE options");
|
||
break;
|
||
- case G_REGEX_ERROR_MISSING_BACK_REFERENCE:
|
||
+ case PCRE2_ERROR_BACKSLASH_G_SYNTAX:
|
||
+ *errcode = G_REGEX_ERROR_MISSING_BACK_REFERENCE;
|
||
*errmsg = _("\\g is not followed by a braced, angle-bracketed, or quoted name or "
|
||
"number, or by a plain number");
|
||
break;
|
||
- case G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE:
|
||
- *errmsg = _("a numbered reference must not be zero");
|
||
- break;
|
||
- case G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN:
|
||
+ case PCRE2_ERROR_VERB_ARGUMENT_NOT_ALLOWED:
|
||
+ *errcode = G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN;
|
||
*errmsg = _("an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)");
|
||
break;
|
||
- case G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB:
|
||
+ case PCRE2_ERROR_VERB_UNKNOWN:
|
||
+ *errcode = G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB;
|
||
*errmsg = _("(*VERB) not recognized");
|
||
break;
|
||
- case G_REGEX_ERROR_NUMBER_TOO_BIG:
|
||
+ case PCRE2_ERROR_SUBPATTERN_NUMBER_TOO_BIG:
|
||
+ *errcode = G_REGEX_ERROR_NUMBER_TOO_BIG;
|
||
*errmsg = _("number is too big");
|
||
break;
|
||
- case G_REGEX_ERROR_MISSING_SUBPATTERN_NAME:
|
||
+ case PCRE2_ERROR_SUBPATTERN_NAME_EXPECTED:
|
||
+ *errcode = G_REGEX_ERROR_MISSING_SUBPATTERN_NAME;
|
||
*errmsg = _("missing subpattern name after (?&");
|
||
break;
|
||
- case G_REGEX_ERROR_MISSING_DIGIT:
|
||
- *errmsg = _("digit expected after (?+");
|
||
- break;
|
||
- case G_REGEX_ERROR_INVALID_DATA_CHARACTER:
|
||
- *errmsg = _("] is an invalid data character in JavaScript compatibility mode");
|
||
- break;
|
||
- case G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME:
|
||
+ case PCRE2_ERROR_SUBPATTERN_NAMES_MISMATCH:
|
||
+ *errcode = G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME;
|
||
*errmsg = _("different names for subpatterns of the same number are not allowed");
|
||
break;
|
||
- case G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED:
|
||
+ case PCRE2_ERROR_MARK_MISSING_ARGUMENT:
|
||
+ *errcode = G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED;
|
||
*errmsg = _("(*MARK) must have an argument");
|
||
break;
|
||
- case G_REGEX_ERROR_INVALID_CONTROL_CHAR:
|
||
+ case PCRE2_ERROR_BACKSLASH_C_SYNTAX:
|
||
+ *errcode = G_REGEX_ERROR_INVALID_CONTROL_CHAR;
|
||
*errmsg = _( "\\c must be followed by an ASCII character");
|
||
break;
|
||
- case G_REGEX_ERROR_MISSING_NAME:
|
||
+ case PCRE2_ERROR_BACKSLASH_K_SYNTAX:
|
||
+ *errcode = G_REGEX_ERROR_MISSING_NAME;
|
||
*errmsg = _("\\k is not followed by a braced, angle-bracketed, or quoted name");
|
||
break;
|
||
- case G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS:
|
||
+ case PCRE2_ERROR_BACKSLASH_N_IN_CLASS:
|
||
+ *errcode = G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS;
|
||
*errmsg = _("\\N is not supported in a class");
|
||
break;
|
||
- case G_REGEX_ERROR_TOO_MANY_FORWARD_REFERENCES:
|
||
- *errmsg = _("too many forward references");
|
||
- break;
|
||
- case G_REGEX_ERROR_NAME_TOO_LONG:
|
||
+ case PCRE2_ERROR_VERB_NAME_TOO_LONG:
|
||
+ *errcode = G_REGEX_ERROR_NAME_TOO_LONG;
|
||
*errmsg = _("name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)");
|
||
break;
|
||
- case G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE:
|
||
- *errmsg = _("character value in \\u.... sequence is too large");
|
||
+ case PCRE2_ERROR_INTERNAL_CODE_OVERFLOW:
|
||
+ *errcode = G_REGEX_ERROR_INTERNAL;
|
||
+ *errmsg = _("code overflow");
|
||
break;
|
||
-
|
||
- case 116: /* erroffset passed as NULL */
|
||
- /* This should not happen as we never pass a NULL erroffset */
|
||
- g_warning ("erroffset passed as NULL");
|
||
- *errcode = G_REGEX_ERROR_COMPILE;
|
||
+ case PCRE2_ERROR_UNRECOGNIZED_AFTER_QUERY_P:
|
||
+ *errcode = G_REGEX_ERROR_UNRECOGNIZED_CHARACTER;
|
||
+ *errmsg = _("unrecognized character after (?P");
|
||
break;
|
||
- case 117: /* unknown option bit(s) set */
|
||
- /* This should not happen as we check options before passing them
|
||
- * to pcre_compile2() */
|
||
- g_warning ("unknown option bit(s) set");
|
||
- *errcode = G_REGEX_ERROR_COMPILE;
|
||
+ case PCRE2_ERROR_INTERNAL_OVERRAN_WORKSPACE:
|
||
+ *errcode = G_REGEX_ERROR_INTERNAL;
|
||
+ *errmsg = _("overran compiling workspace");
|
||
break;
|
||
- case 132: /* this version of PCRE is compiled without UTF support */
|
||
- case 144: /* invalid UTF-8 string */
|
||
- case 145: /* support for \\P, \\p, and \\X has not been compiled */
|
||
- case 167: /* this version of PCRE is not compiled with Unicode property support */
|
||
- case 173: /* disallowed Unicode code point (>= 0xd800 && <= 0xdfff) */
|
||
- case 174: /* invalid UTF-16 string */
|
||
- /* These errors should not happen as we are using an UTF-8 and UCP-enabled PCRE
|
||
- * and we do not check if strings are valid */
|
||
- case 170: /* internal error: unknown opcode in find_fixedlength() */
|
||
+ case PCRE2_ERROR_INTERNAL_MISSING_SUBPATTERN:
|
||
*errcode = G_REGEX_ERROR_INTERNAL;
|
||
+ *errmsg = _("previously-checked referenced subpattern not found");
|
||
break;
|
||
-
|
||
+ case PCRE2_ERROR_HEAP_FAILED:
|
||
+ case PCRE2_ERROR_INTERNAL_PARSED_OVERFLOW:
|
||
+ case PCRE2_ERROR_UNICODE_NOT_SUPPORTED:
|
||
+ case PCRE2_ERROR_UNICODE_DISALLOWED_CODE_POINT:
|
||
+ case PCRE2_ERROR_NO_SURROGATES_IN_UTF16:
|
||
+ case PCRE2_ERROR_INTERNAL_BAD_CODE_LOOKBEHINDS:
|
||
+ case PCRE2_ERROR_UNICODE_PROPERTIES_UNAVAILABLE:
|
||
+ case PCRE2_ERROR_INTERNAL_STUDY_ERROR:
|
||
+ case PCRE2_ERROR_UTF_IS_DISABLED:
|
||
+ case PCRE2_ERROR_UCP_IS_DISABLED:
|
||
+ case PCRE2_ERROR_INTERNAL_BAD_CODE_AUTO_POSSESS:
|
||
+ case PCRE2_ERROR_BACKSLASH_C_LIBRARY_DISABLED:
|
||
+ case PCRE2_ERROR_INTERNAL_BAD_CODE:
|
||
+ case PCRE2_ERROR_INTERNAL_BAD_CODE_IN_SKIP:
|
||
+ *errcode = G_REGEX_ERROR_INTERNAL;
|
||
+ *errmsg = _("internal error");
|
||
+ break;
|
||
+ case PCRE2_ERROR_INVALID_SUBPATTERN_NAME:
|
||
+ case PCRE2_ERROR_CLASS_INVALID_RANGE:
|
||
+ case PCRE2_ERROR_ZERO_RELATIVE_REFERENCE:
|
||
+ case PCRE2_ERROR_PARENTHESES_STACK_CHECK:
|
||
+ case PCRE2_ERROR_LOOKBEHIND_TOO_COMPLICATED:
|
||
+ case PCRE2_ERROR_CALLOUT_NUMBER_TOO_BIG:
|
||
+ case PCRE2_ERROR_MISSING_CALLOUT_CLOSING:
|
||
+ case PCRE2_ERROR_ESCAPE_INVALID_IN_VERB:
|
||
+ case PCRE2_ERROR_NULL_PATTERN:
|
||
+ case PCRE2_ERROR_BAD_OPTIONS:
|
||
+ case PCRE2_ERROR_PARENTHESES_NEST_TOO_DEEP:
|
||
+ case PCRE2_ERROR_BACKSLASH_O_MISSING_BRACE:
|
||
+ case PCRE2_ERROR_INVALID_OCTAL:
|
||
+ case PCRE2_ERROR_CALLOUT_STRING_TOO_LONG:
|
||
+ case PCRE2_ERROR_BACKSLASH_U_CODE_POINT_TOO_BIG:
|
||
+ case PCRE2_ERROR_MISSING_OCTAL_OR_HEX_DIGITS:
|
||
+ case PCRE2_ERROR_VERSION_CONDITION_SYNTAX:
|
||
+ case PCRE2_ERROR_CALLOUT_NO_STRING_DELIMITER:
|
||
+ case PCRE2_ERROR_CALLOUT_BAD_STRING_DELIMITER:
|
||
+ case PCRE2_ERROR_BACKSLASH_C_CALLER_DISABLED:
|
||
+ case PCRE2_ERROR_QUERY_BARJX_NEST_TOO_DEEP:
|
||
+ case PCRE2_ERROR_PATTERN_TOO_COMPLICATED:
|
||
+ case PCRE2_ERROR_LOOKBEHIND_TOO_LONG:
|
||
+ case PCRE2_ERROR_PATTERN_STRING_TOO_LONG:
|
||
+ case PCRE2_ERROR_BAD_LITERAL_OPTIONS:
|
||
default:
|
||
*errcode = G_REGEX_ERROR_COMPILE;
|
||
+ *errmsg = _("internal error");
|
||
+ break;
|
||
}
|
||
+
|
||
+ g_assert (*errcode != 0);
|
||
+ g_assert (*errmsg != NULL);
|
||
}
|
||
|
||
/* GMatchInfo */
|
||
@@ -568,12 +747,16 @@ match_info_new (const GRegex *regex,
|
||
match_info->regex = g_regex_ref ((GRegex *)regex);
|
||
match_info->string = string;
|
||
match_info->string_len = string_len;
|
||
- match_info->matches = PCRE_ERROR_NOMATCH;
|
||
+ match_info->matches = PCRE2_ERROR_NOMATCH;
|
||
match_info->pos = start_position;
|
||
match_info->match_opts = match_options;
|
||
|
||
- pcre_fullinfo (regex->pcre_re, regex->extra,
|
||
- PCRE_INFO_CAPTURECOUNT, &match_info->n_subpatterns);
|
||
+ pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_CAPTURECOUNT,
|
||
+ &match_info->n_subpatterns);
|
||
+
|
||
+ match_info->match_context = pcre2_match_context_create (NULL);
|
||
+ pcre2_set_match_limit (match_info->match_context, 65536); /* should be plenty */
|
||
+ pcre2_set_recursion_limit (match_info->match_context, 64); /* should be plenty */
|
||
|
||
if (is_dfa)
|
||
{
|
||
@@ -593,9 +776,41 @@ match_info_new (const GRegex *regex,
|
||
match_info->offsets[0] = -1;
|
||
match_info->offsets[1] = -1;
|
||
|
||
+ match_info->match_data = pcre2_match_data_create_from_pattern (
|
||
+ match_info->regex->pcre_re,
|
||
+ NULL);
|
||
+
|
||
return match_info;
|
||
}
|
||
|
||
+static gboolean
|
||
+recalc_match_offsets (GMatchInfo *match_info,
|
||
+ GError **error)
|
||
+{
|
||
+ PCRE2_SIZE *ovector;
|
||
+ gint i;
|
||
+
|
||
+ if (pcre2_get_ovector_count (match_info->match_data) > G_MAXINT / 2)
|
||
+ {
|
||
+ g_set_error (error, G_REGEX_ERROR, G_REGEX_ERROR_MATCH,
|
||
+ _("Error while matching regular expression %s: %s"),
|
||
+ match_info->regex->pattern, _("code overflow"));
|
||
+ return FALSE;
|
||
+ }
|
||
+
|
||
+ match_info->n_offsets = pcre2_get_ovector_count (match_info->match_data) * 2;
|
||
+ ovector = pcre2_get_ovector_pointer (match_info->match_data);
|
||
+ match_info->offsets = g_realloc_n (match_info->offsets,
|
||
+ match_info->n_offsets,
|
||
+ sizeof (gint));
|
||
+ for (i = 0; i < match_info->n_offsets; i++)
|
||
+ {
|
||
+ match_info->offsets[i] = (int) ovector[i];
|
||
+ }
|
||
+
|
||
+ return TRUE;
|
||
+}
|
||
+
|
||
/**
|
||
* g_match_info_get_regex:
|
||
* @match_info: a #GMatchInfo
|
||
@@ -667,6 +882,10 @@ g_match_info_unref (GMatchInfo *match_info)
|
||
if (g_atomic_int_dec_and_test (&match_info->ref_count))
|
||
{
|
||
g_regex_unref (match_info->regex);
|
||
+ if (match_info->match_context)
|
||
+ pcre2_match_context_free (match_info->match_context);
|
||
+ if (match_info->match_data)
|
||
+ pcre2_match_data_free (match_info->match_data);
|
||
g_free (match_info->offsets);
|
||
g_free (match_info->workspace);
|
||
g_free (match_info);
|
||
@@ -713,6 +932,7 @@ g_match_info_next (GMatchInfo *match_info,
|
||
{
|
||
gint prev_match_start;
|
||
gint prev_match_end;
|
||
+ gint opts;
|
||
|
||
g_return_val_if_fail (match_info != NULL, FALSE);
|
||
g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
|
||
@@ -725,25 +945,29 @@ g_match_info_next (GMatchInfo *match_info,
|
||
{
|
||
/* we have reached the end of the string */
|
||
match_info->pos = -1;
|
||
- match_info->matches = PCRE_ERROR_NOMATCH;
|
||
+ match_info->matches = PCRE2_ERROR_NOMATCH;
|
||
return FALSE;
|
||
}
|
||
|
||
- match_info->matches = pcre_exec (match_info->regex->pcre_re,
|
||
- match_info->regex->extra,
|
||
- match_info->string,
|
||
- match_info->string_len,
|
||
- match_info->pos,
|
||
- match_info->regex->match_opts | match_info->match_opts,
|
||
- match_info->offsets,
|
||
- match_info->n_offsets);
|
||
- if (IS_PCRE_ERROR (match_info->matches))
|
||
+ opts = map_to_pcre2_match_flags (match_info->regex->match_opts | match_info->match_opts);
|
||
+ match_info->matches = pcre2_match (match_info->regex->pcre_re,
|
||
+ (PCRE2_SPTR8) match_info->string,
|
||
+ match_info->string_len,
|
||
+ match_info->pos,
|
||
+ opts & ~G_REGEX_FLAGS_CONVERTED,
|
||
+ match_info->match_data,
|
||
+ match_info->match_context);
|
||
+
|
||
+ if (IS_PCRE2_ERROR (match_info->matches))
|
||
{
|
||
g_set_error (error, G_REGEX_ERROR, G_REGEX_ERROR_MATCH,
|
||
_("Error while matching regular expression %s: %s"),
|
||
match_info->regex->pattern, match_error (match_info->matches));
|
||
return FALSE;
|
||
}
|
||
+ else
|
||
+ if (!recalc_match_offsets (match_info, error))
|
||
+ return FALSE;
|
||
|
||
/* avoid infinite loops if the pattern is an empty string or something
|
||
* equivalent */
|
||
@@ -753,7 +977,7 @@ g_match_info_next (GMatchInfo *match_info,
|
||
{
|
||
/* we have reached the end of the string */
|
||
match_info->pos = -1;
|
||
- match_info->matches = PCRE_ERROR_NOMATCH;
|
||
+ match_info->matches = PCRE2_ERROR_NOMATCH;
|
||
return FALSE;
|
||
}
|
||
|
||
@@ -831,10 +1055,10 @@ g_match_info_get_match_count (const GMatchInfo *match_info)
|
||
{
|
||
g_return_val_if_fail (match_info, -1);
|
||
|
||
- if (match_info->matches == PCRE_ERROR_NOMATCH)
|
||
+ if (match_info->matches == PCRE2_ERROR_NOMATCH)
|
||
/* no match */
|
||
return 0;
|
||
- else if (match_info->matches < PCRE_ERROR_NOMATCH)
|
||
+ else if (match_info->matches < PCRE2_ERROR_NOMATCH)
|
||
/* error */
|
||
return -1;
|
||
else
|
||
@@ -889,7 +1113,7 @@ g_match_info_is_partial_match (const GMatchInfo *match_info)
|
||
{
|
||
g_return_val_if_fail (match_info != NULL, FALSE);
|
||
|
||
- return match_info->matches == PCRE_ERROR_PARTIAL;
|
||
+ return match_info->matches == PCRE2_ERROR_PARTIAL;
|
||
}
|
||
|
||
/**
|
||
@@ -986,8 +1210,6 @@ gchar *
|
||
g_match_info_fetch (const GMatchInfo *match_info,
|
||
gint match_num)
|
||
{
|
||
- /* we cannot use pcre_get_substring() because it allocates the
|
||
- * string using pcre_malloc(). */
|
||
gchar *match = NULL;
|
||
gint start, end;
|
||
|
||
@@ -1067,24 +1289,25 @@ g_match_info_fetch_pos (const GMatchInfo *match_info,
|
||
* Returns number of first matched subpattern with name @name.
|
||
* There may be more than one in case when DUPNAMES is used,
|
||
* and not all subpatterns with that name match;
|
||
- * pcre_get_stringnumber() does not work in that case.
|
||
+ * pcre2_substring_number_from_name() does not work in that case.
|
||
*/
|
||
static gint
|
||
get_matched_substring_number (const GMatchInfo *match_info,
|
||
const gchar *name)
|
||
{
|
||
gint entrysize;
|
||
- gchar *first, *last;
|
||
+ PCRE2_SPTR first, last;
|
||
guchar *entry;
|
||
|
||
- if (!(match_info->regex->compile_opts & G_REGEX_DUPNAMES))
|
||
- return pcre_get_stringnumber (match_info->regex->pcre_re, name);
|
||
+ if (!(match_info->regex->compile_opts & PCRE2_DUPNAMES))
|
||
+ return pcre2_substring_number_from_name (match_info->regex->pcre_re, (PCRE2_SPTR8) name);
|
||
|
||
- /* This code is copied from pcre_get.c: get_first_set() */
|
||
- entrysize = pcre_get_stringtable_entries (match_info->regex->pcre_re,
|
||
- name,
|
||
- &first,
|
||
- &last);
|
||
+ /* This code is analogous to code from pcre2_substring.c:
|
||
+ * pcre2_substring_get_byname() */
|
||
+ entrysize = pcre2_substring_nametable_scan (match_info->regex->pcre_re,
|
||
+ (PCRE2_SPTR8) name,
|
||
+ &first,
|
||
+ &last);
|
||
|
||
if (entrysize <= 0)
|
||
return entrysize;
|
||
@@ -1122,8 +1345,6 @@ gchar *
|
||
g_match_info_fetch_named (const GMatchInfo *match_info,
|
||
const gchar *name)
|
||
{
|
||
- /* we cannot use pcre_get_named_substring() because it allocates the
|
||
- * string using pcre_malloc(). */
|
||
gint num;
|
||
|
||
g_return_val_if_fail (match_info != NULL, NULL);
|
||
@@ -1205,8 +1426,6 @@ g_match_info_fetch_named_pos (const GMatchInfo *match_info,
|
||
gchar **
|
||
g_match_info_fetch_all (const GMatchInfo *match_info)
|
||
{
|
||
- /* we cannot use pcre_get_substring_list() because the returned value
|
||
- * isn't suitable for g_strfreev(). */
|
||
gchar **result;
|
||
gint i;
|
||
|
||
@@ -1264,9 +1483,7 @@ g_regex_unref (GRegex *regex)
|
||
{
|
||
g_free (regex->pattern);
|
||
if (regex->pcre_re != NULL)
|
||
- pcre_free (regex->pcre_re);
|
||
- if (regex->extra != NULL)
|
||
- pcre_free (regex->extra);
|
||
+ pcre2_code_free (regex->pcre_re);
|
||
g_free (regex);
|
||
}
|
||
}
|
||
@@ -1274,11 +1491,11 @@ g_regex_unref (GRegex *regex)
|
||
/*
|
||
* @match_options: (inout) (optional):
|
||
*/
|
||
-static pcre *regex_compile (const gchar *pattern,
|
||
- GRegexCompileFlags compile_options,
|
||
- GRegexCompileFlags *compile_options_out,
|
||
- GRegexMatchFlags *match_options,
|
||
- GError **error);
|
||
+static pcre2_code *regex_compile (const gchar *pattern,
|
||
+ GRegexCompileFlags compile_options,
|
||
+ GRegexCompileFlags *compile_options_out,
|
||
+ GRegexMatchFlags *match_options,
|
||
+ GError **error);
|
||
|
||
/**
|
||
* g_regex_new:
|
||
@@ -1302,10 +1519,13 @@ g_regex_new (const gchar *pattern,
|
||
GError **error)
|
||
{
|
||
GRegex *regex;
|
||
- pcre *re;
|
||
- const gchar *errmsg;
|
||
- gboolean optimize = FALSE;
|
||
+ pcre2_code *re;
|
||
static gsize initialised = 0;
|
||
+ GRegexCompileFlags orig_compile_opts;
|
||
+
|
||
+ orig_compile_opts = compile_options;
|
||
+ compile_options = map_to_pcre2_compile_flags (compile_options);
|
||
+ match_options = map_to_pcre2_match_flags (match_options);
|
||
|
||
g_return_val_if_fail (pattern != NULL, NULL);
|
||
g_return_val_if_fail (error == NULL || *error == NULL, NULL);
|
||
@@ -1314,17 +1534,13 @@ g_regex_new (const gchar *pattern,
|
||
|
||
if (g_once_init_enter (&initialised))
|
||
{
|
||
- int supports_utf8, supports_ucp;
|
||
+ int supports_utf8;
|
||
|
||
- pcre_config (PCRE_CONFIG_UTF8, &supports_utf8);
|
||
+ pcre2_config (PCRE2_CONFIG_UNICODE, &supports_utf8);
|
||
if (!supports_utf8)
|
||
g_critical (_("PCRE library is compiled without UTF8 support"));
|
||
|
||
- pcre_config (PCRE_CONFIG_UNICODE_PROPERTIES, &supports_ucp);
|
||
- if (!supports_ucp)
|
||
- g_critical (_("PCRE library is compiled without UTF8 properties support"));
|
||
-
|
||
- g_once_init_leave (&initialised, supports_utf8 && supports_ucp ? 1 : 2);
|
||
+ g_once_init_leave (&initialised, supports_utf8 ? 1 : 2);
|
||
}
|
||
|
||
if (G_UNLIKELY (initialised != 1))
|
||
@@ -1334,14 +1550,22 @@ g_regex_new (const gchar *pattern,
|
||
return NULL;
|
||
}
|
||
|
||
- /* G_REGEX_OPTIMIZE has the same numeric value of PCRE_NO_UTF8_CHECK,
|
||
- * as we do not need to wrap PCRE_NO_UTF8_CHECK. */
|
||
- if (compile_options & G_REGEX_OPTIMIZE)
|
||
- optimize = TRUE;
|
||
+ switch (compile_options & G_REGEX_NEWLINE_MASK)
|
||
+ {
|
||
+ case 0: /* PCRE2_NEWLINE_ANY */
|
||
+ case PCRE2_NEWLINE_CR:
|
||
+ case PCRE2_NEWLINE_LF:
|
||
+ case PCRE2_NEWLINE_CRLF:
|
||
+ case PCRE2_NEWLINE_ANYCRLF:
|
||
+ break;
|
||
+ default:
|
||
+ g_set_error (error, G_REGEX_ERROR, G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS,
|
||
+ "Invalid newline flags");
|
||
+ return NULL;
|
||
+ }
|
||
|
||
re = regex_compile (pattern, compile_options, &compile_options,
|
||
&match_options, error);
|
||
-
|
||
if (re == NULL)
|
||
return NULL;
|
||
|
||
@@ -1350,80 +1574,85 @@ g_regex_new (const gchar *pattern,
|
||
regex->pattern = g_strdup (pattern);
|
||
regex->pcre_re = re;
|
||
regex->compile_opts = compile_options;
|
||
+ regex->orig_compile_opts = orig_compile_opts;
|
||
regex->match_opts = match_options;
|
||
|
||
- if (optimize)
|
||
- {
|
||
- regex->extra = pcre_study (regex->pcre_re, 0, &errmsg);
|
||
- if (errmsg != NULL)
|
||
- {
|
||
- GError *tmp_error = g_error_new (G_REGEX_ERROR,
|
||
- G_REGEX_ERROR_OPTIMIZE,
|
||
- _("Error while optimizing "
|
||
- "regular expression %s: %s"),
|
||
- regex->pattern,
|
||
- errmsg);
|
||
- g_propagate_error (error, tmp_error);
|
||
-
|
||
- g_regex_unref (regex);
|
||
- return NULL;
|
||
- }
|
||
- }
|
||
-
|
||
return regex;
|
||
}
|
||
|
||
-static pcre *
|
||
-regex_compile (const gchar *pattern,
|
||
- GRegexCompileFlags compile_options,
|
||
- GRegexCompileFlags *compile_options_out,
|
||
- GRegexMatchFlags *match_options,
|
||
- GError **error)
|
||
+static gint
|
||
+extract_newline_options (const GRegexCompileFlags compile_options,
|
||
+ const GRegexMatchFlags *match_options)
|
||
+{
|
||
+ gint newline_options = PCRE2_NEWLINE_ANY;
|
||
+
|
||
+ if (compile_options & G_REGEX_NEWLINE_MASK)
|
||
+ newline_options = compile_options & G_REGEX_NEWLINE_MASK;
|
||
+ if (match_options && *match_options & G_REGEX_MATCH_NEWLINE_MASK)
|
||
+ newline_options = *match_options & G_REGEX_MATCH_NEWLINE_MASK;
|
||
+
|
||
+ return newline_options;
|
||
+}
|
||
+
|
||
+static gint
|
||
+extract_bsr_options (const GRegexCompileFlags compile_options,
|
||
+ const GRegexMatchFlags *match_options)
|
||
+{
|
||
+ gint bsr_options = PCRE2_BSR_UNICODE;
|
||
+
|
||
+ if (compile_options & PCRE2_BSR_ANYCRLF)
|
||
+ bsr_options = PCRE2_BSR_ANYCRLF;
|
||
+ if (match_options && *match_options & PCRE2_BSR_ANYCRLF)
|
||
+ bsr_options = PCRE2_BSR_ANYCRLF;
|
||
+ if (match_options && *match_options & PCRE2_BSR_UNICODE)
|
||
+ bsr_options = PCRE2_BSR_UNICODE;
|
||
+
|
||
+ return bsr_options;
|
||
+}
|
||
+
|
||
+static pcre2_code *
|
||
+regex_compile (const gchar *pattern,
|
||
+ GRegexCompileFlags compile_options,
|
||
+ GRegexCompileFlags *compile_options_out,
|
||
+ GRegexMatchFlags *match_options,
|
||
+ GError **error)
|
||
{
|
||
- pcre *re;
|
||
+ pcre2_code *re;
|
||
+ pcre2_compile_context *context;
|
||
const gchar *errmsg;
|
||
- gint erroffset;
|
||
+ PCRE2_SIZE erroffset;
|
||
gint errcode;
|
||
GRegexCompileFlags nonpcre_compile_options;
|
||
unsigned long int pcre_compile_options;
|
||
|
||
nonpcre_compile_options = compile_options & G_REGEX_COMPILE_NONPCRE_MASK;
|
||
|
||
- /* In GRegex the string are, by default, UTF-8 encoded. PCRE
|
||
- * instead uses UTF-8 only if required with PCRE_UTF8. */
|
||
- if (compile_options & G_REGEX_RAW)
|
||
- {
|
||
- /* disable utf-8 */
|
||
- compile_options &= ~G_REGEX_RAW;
|
||
- }
|
||
- else
|
||
- {
|
||
- /* enable utf-8 */
|
||
- compile_options |= PCRE_UTF8 | PCRE_NO_UTF8_CHECK;
|
||
+ context = pcre2_compile_context_create (NULL);
|
||
|
||
- if (match_options != NULL)
|
||
- *match_options |= PCRE_NO_UTF8_CHECK;
|
||
- }
|
||
+ /* set newline options */
|
||
+ pcre2_set_newline (context, extract_newline_options (compile_options, match_options));
|
||
+
|
||
+ /* set bsr options */
|
||
+ pcre2_set_bsr (context, extract_bsr_options (compile_options, match_options));
|
||
|
||
- /* PCRE_NEWLINE_ANY is the default for the internal PCRE but
|
||
- * not for the system one. */
|
||
- if (!(compile_options & G_REGEX_NEWLINE_CR) &&
|
||
- !(compile_options & G_REGEX_NEWLINE_LF))
|
||
+ /* In case UTF-8 mode is used, also set PCRE2_NO_UTF_CHECK */
|
||
+ if (compile_options & PCRE2_UTF)
|
||
{
|
||
- compile_options |= PCRE_NEWLINE_ANY;
|
||
+ compile_options |= PCRE2_NO_UTF_CHECK;
|
||
+ if (match_options != NULL)
|
||
+ *match_options |= PCRE2_NO_UTF_CHECK;
|
||
}
|
||
|
||
- compile_options |= PCRE_UCP;
|
||
-
|
||
- /* PCRE_BSR_UNICODE is the default for the internal PCRE but
|
||
- * possibly not for the system one.
|
||
- */
|
||
- if (~compile_options & G_REGEX_BSR_ANYCRLF)
|
||
- compile_options |= PCRE_BSR_UNICODE;
|
||
+ compile_options |= PCRE2_UCP;
|
||
|
||
/* compile the pattern */
|
||
- re = pcre_compile2 (pattern, compile_options, &errcode,
|
||
- &errmsg, &erroffset, NULL);
|
||
+ re = pcre2_compile ((PCRE2_SPTR8) pattern,
|
||
+ PCRE2_ZERO_TERMINATED,
|
||
+ compile_options & ~G_REGEX_FLAGS_CONVERTED,
|
||
+ &errcode,
|
||
+ &erroffset,
|
||
+ context);
|
||
+ pcre2_compile_context_free (context);
|
||
|
||
/* if the compilation failed, set the error member and return
|
||
* immediately */
|
||
@@ -1440,7 +1669,7 @@ regex_compile (const gchar *pattern,
|
||
|
||
tmp_error = g_error_new (G_REGEX_ERROR, errcode,
|
||
_("Error while compiling regular "
|
||
- "expression %s at char %d: %s"),
|
||
+ "expression %s at char %" G_GSIZE_FORMAT ": %s"),
|
||
pattern, erroffset, errmsg);
|
||
g_propagate_error (error, tmp_error);
|
||
|
||
@@ -1449,22 +1678,22 @@ regex_compile (const gchar *pattern,
|
||
|
||
/* For options set at the beginning of the pattern, pcre puts them into
|
||
* compile options, e.g. "(?i)foo" will make the pcre structure store
|
||
- * PCRE_CASELESS even though it wasn't explicitly given for compilation. */
|
||
- pcre_fullinfo (re, NULL, PCRE_INFO_OPTIONS, &pcre_compile_options);
|
||
+ * PCRE2_CASELESS even though it wasn't explicitly given for compilation. */
|
||
+ pcre2_pattern_info (re, PCRE2_INFO_ALLOPTIONS, &pcre_compile_options);
|
||
compile_options = pcre_compile_options & G_REGEX_COMPILE_PCRE_MASK;
|
||
|
||
- /* Don't leak PCRE_NEWLINE_ANY, which is part of PCRE_NEWLINE_ANYCRLF */
|
||
- if ((pcre_compile_options & PCRE_NEWLINE_ANYCRLF) != PCRE_NEWLINE_ANYCRLF)
|
||
- compile_options &= ~PCRE_NEWLINE_ANY;
|
||
+ /* Don't leak PCRE2_NEWLINE_ANY, which is part of PCRE2_NEWLINE_ANYCRLF */
|
||
+ if ((pcre_compile_options & PCRE2_NEWLINE_ANYCRLF) != PCRE2_NEWLINE_ANYCRLF)
|
||
+ compile_options &= ~PCRE2_NEWLINE_ANY;
|
||
|
||
compile_options |= nonpcre_compile_options;
|
||
|
||
- if (!(compile_options & G_REGEX_DUPNAMES))
|
||
+ if (!(compile_options & PCRE2_DUPNAMES))
|
||
{
|
||
gboolean jchanged = FALSE;
|
||
- pcre_fullinfo (re, NULL, PCRE_INFO_JCHANGED, &jchanged);
|
||
+ pcre2_pattern_info (re, PCRE2_INFO_JCHANGED, &jchanged);
|
||
if (jchanged)
|
||
- compile_options |= G_REGEX_DUPNAMES;
|
||
+ compile_options |= PCRE2_DUPNAMES;
|
||
}
|
||
|
||
if (compile_options_out != 0)
|
||
@@ -1509,8 +1738,7 @@ g_regex_get_max_backref (const GRegex *regex)
|
||
{
|
||
gint value;
|
||
|
||
- pcre_fullinfo (regex->pcre_re, regex->extra,
|
||
- PCRE_INFO_BACKREFMAX, &value);
|
||
+ pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_BACKREFMAX, &value);
|
||
|
||
return value;
|
||
}
|
||
@@ -1530,8 +1758,7 @@ g_regex_get_capture_count (const GRegex *regex)
|
||
{
|
||
gint value;
|
||
|
||
- pcre_fullinfo (regex->pcre_re, regex->extra,
|
||
- PCRE_INFO_CAPTURECOUNT, &value);
|
||
+ pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_CAPTURECOUNT, &value);
|
||
|
||
return value;
|
||
}
|
||
@@ -1551,8 +1778,7 @@ g_regex_get_has_cr_or_lf (const GRegex *regex)
|
||
{
|
||
gint value;
|
||
|
||
- pcre_fullinfo (regex->pcre_re, regex->extra,
|
||
- PCRE_INFO_HASCRORLF, &value);
|
||
+ pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_HASCRORLF, &value);
|
||
|
||
return !!value;
|
||
}
|
||
@@ -1574,8 +1800,8 @@ g_regex_get_max_lookbehind (const GRegex *regex)
|
||
{
|
||
gint max_lookbehind;
|
||
|
||
- pcre_fullinfo (regex->pcre_re, regex->extra,
|
||
- PCRE_INFO_MAXLOOKBEHIND, &max_lookbehind);
|
||
+ pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_MAXLOOKBEHIND,
|
||
+ &max_lookbehind);
|
||
|
||
return max_lookbehind;
|
||
}
|
||
@@ -1597,9 +1823,47 @@ g_regex_get_max_lookbehind (const GRegex *regex)
|
||
GRegexCompileFlags
|
||
g_regex_get_compile_flags (const GRegex *regex)
|
||
{
|
||
+ gint extra_flags, info_value;
|
||
+
|
||
g_return_val_if_fail (regex != NULL, 0);
|
||
|
||
- return regex->compile_opts;
|
||
+G_GNUC_BEGIN_IGNORE_DEPRECATIONS
|
||
+ /* Preserve original G_REGEX_OPTIMIZE */
|
||
+ extra_flags = (regex->orig_compile_opts & G_REGEX_OPTIMIZE);
|
||
+G_GNUC_END_IGNORE_DEPRECATIONS
|
||
+
|
||
+ /* Also include the newline options */
|
||
+ pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_NEWLINE, &info_value);
|
||
+ switch (info_value)
|
||
+ {
|
||
+ case PCRE2_NEWLINE_ANYCRLF:
|
||
+ extra_flags |= G_REGEX_NEWLINE_ANYCRLF;
|
||
+ break;
|
||
+ case PCRE2_NEWLINE_CRLF:
|
||
+ extra_flags |= G_REGEX_NEWLINE_CRLF;
|
||
+ break;
|
||
+ case PCRE2_NEWLINE_LF:
|
||
+ extra_flags |= G_REGEX_NEWLINE_LF;
|
||
+ break;
|
||
+ case PCRE2_NEWLINE_CR:
|
||
+ extra_flags |= G_REGEX_NEWLINE_CR;
|
||
+ break;
|
||
+ default:
|
||
+ break;
|
||
+ }
|
||
+
|
||
+ /* Also include the bsr options */
|
||
+ pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_BSR, &info_value);
|
||
+ switch (info_value)
|
||
+ {
|
||
+ case PCRE2_BSR_ANYCRLF:
|
||
+ extra_flags |= G_REGEX_BSR_ANYCRLF;
|
||
+ break;
|
||
+ default:
|
||
+ break;
|
||
+ }
|
||
+
|
||
+ return map_to_pcre1_compile_flags (regex->compile_opts) | extra_flags;
|
||
}
|
||
|
||
/**
|
||
@@ -1617,7 +1881,7 @@ g_regex_get_match_flags (const GRegex *regex)
|
||
{
|
||
g_return_val_if_fail (regex != NULL, 0);
|
||
|
||
- return regex->match_opts & G_REGEX_MATCH_MASK;
|
||
+ return map_to_pcre1_match_flags (regex->match_opts & G_REGEX_MATCH_MASK);
|
||
}
|
||
|
||
/**
|
||
@@ -1651,6 +1915,9 @@ g_regex_match_simple (const gchar *pattern,
|
||
GRegex *regex;
|
||
gboolean result;
|
||
|
||
+ compile_options = map_to_pcre2_compile_flags (compile_options);
|
||
+ match_options = map_to_pcre2_match_flags (match_options);
|
||
+
|
||
regex = g_regex_new (pattern, compile_options, G_REGEX_MATCH_DEFAULT, NULL);
|
||
if (!regex)
|
||
return FALSE;
|
||
@@ -1718,6 +1985,8 @@ g_regex_match (const GRegex *regex,
|
||
GRegexMatchFlags match_options,
|
||
GMatchInfo **match_info)
|
||
{
|
||
+ match_options = map_to_pcre2_match_flags (match_options);
|
||
+
|
||
return g_regex_match_full (regex, string, -1, 0, match_options,
|
||
match_info, NULL);
|
||
}
|
||
@@ -1801,6 +2070,8 @@ g_regex_match_full (const GRegex *regex,
|
||
GMatchInfo *info;
|
||
gboolean match_ok;
|
||
|
||
+ match_options = map_to_pcre2_match_flags (match_options);
|
||
+
|
||
g_return_val_if_fail (regex != NULL, FALSE);
|
||
g_return_val_if_fail (string != NULL, FALSE);
|
||
g_return_val_if_fail (start_position >= 0, FALSE);
|
||
@@ -1851,6 +2122,8 @@ g_regex_match_all (const GRegex *regex,
|
||
GRegexMatchFlags match_options,
|
||
GMatchInfo **match_info)
|
||
{
|
||
+ match_options = map_to_pcre2_match_flags (match_options);
|
||
+
|
||
return g_regex_match_all_full (regex, string, -1, 0, match_options,
|
||
match_info, NULL);
|
||
}
|
||
@@ -1920,39 +2193,29 @@ g_regex_match_all_full (const GRegex *regex,
|
||
{
|
||
GMatchInfo *info;
|
||
gboolean done;
|
||
- pcre *pcre_re;
|
||
- pcre_extra *extra;
|
||
+ pcre2_code *pcre_re;
|
||
gboolean retval;
|
||
|
||
+ match_options = map_to_pcre2_match_flags (match_options);
|
||
+
|
||
g_return_val_if_fail (regex != NULL, FALSE);
|
||
g_return_val_if_fail (string != NULL, FALSE);
|
||
g_return_val_if_fail (start_position >= 0, FALSE);
|
||
g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
|
||
g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, FALSE);
|
||
|
||
-#ifdef PCRE_NO_AUTO_POSSESS
|
||
- /* For PCRE >= 8.34 we need to turn off PCRE_NO_AUTO_POSSESS, which
|
||
- * is an optimization for normal regex matching, but results in omitting
|
||
- * some shorter matches here, and an observable behaviour change.
|
||
+ /* For PCRE2 we need to turn off PCRE2_NO_AUTO_POSSESS, which is an
|
||
+ * optimization for normal regex matching, but results in omitting some
|
||
+ * shorter matches here, and an observable behaviour change.
|
||
*
|
||
* DFA matching is rather niche, and very rarely used according to
|
||
* codesearch.debian.net, so don't bother caching the recompiled RE. */
|
||
pcre_re = regex_compile (regex->pattern,
|
||
- regex->compile_opts | PCRE_NO_AUTO_POSSESS,
|
||
+ regex->compile_opts | PCRE2_NO_AUTO_POSSESS,
|
||
NULL, NULL, error);
|
||
-
|
||
if (pcre_re == NULL)
|
||
return FALSE;
|
||
|
||
- /* Not bothering to cache the optimization data either, with similar
|
||
- * reasoning */
|
||
- extra = NULL;
|
||
-#else
|
||
- /* For PCRE < 8.33 the precompiled regex is fine. */
|
||
- pcre_re = regex->pcre_re;
|
||
- extra = regex->extra;
|
||
-#endif
|
||
-
|
||
info = match_info_new (regex, string, string_len, start_position,
|
||
match_options, TRUE);
|
||
|
||
@@ -1960,29 +2223,38 @@ g_regex_match_all_full (const GRegex *regex,
|
||
while (!done)
|
||
{
|
||
done = TRUE;
|
||
- info->matches = pcre_dfa_exec (pcre_re, extra,
|
||
- info->string, info->string_len,
|
||
- info->pos,
|
||
- regex->match_opts | match_options,
|
||
- info->offsets, info->n_offsets,
|
||
- info->workspace, info->n_workspace);
|
||
- if (info->matches == PCRE_ERROR_DFA_WSSIZE)
|
||
+ info->matches = pcre2_dfa_match (pcre_re,
|
||
+ (PCRE2_SPTR8) info->string, info->string_len,
|
||
+ info->pos,
|
||
+ (regex->match_opts | match_options | PCRE2_NO_UTF_CHECK) & ~G_REGEX_FLAGS_CONVERTED,
|
||
+ info->match_data,
|
||
+ info->match_context,
|
||
+ info->workspace, info->n_workspace);
|
||
+
|
||
+ if (!recalc_match_offsets (info, error))
|
||
+ return FALSE;
|
||
+
|
||
+ if (info->matches == PCRE2_ERROR_DFA_WSSIZE)
|
||
{
|
||
/* info->workspace is too small. */
|
||
info->n_workspace *= 2;
|
||
- info->workspace = g_realloc (info->workspace,
|
||
- info->n_workspace * sizeof (gint));
|
||
+ info->workspace = g_realloc_n (info->workspace,
|
||
+ info->n_workspace,
|
||
+ sizeof (gint));
|
||
done = FALSE;
|
||
}
|
||
else if (info->matches == 0)
|
||
{
|
||
/* info->offsets is too small. */
|
||
info->n_offsets *= 2;
|
||
- info->offsets = g_realloc (info->offsets,
|
||
- info->n_offsets * sizeof (gint));
|
||
+ info->offsets = g_realloc_n (info->offsets,
|
||
+ info->n_offsets,
|
||
+ sizeof (gint));
|
||
+ pcre2_match_data_free (info->match_data);
|
||
+ info->match_data = pcre2_match_data_create (info->n_offsets, NULL);
|
||
done = FALSE;
|
||
}
|
||
- else if (IS_PCRE_ERROR (info->matches))
|
||
+ else if (IS_PCRE2_ERROR (info->matches))
|
||
{
|
||
g_set_error (error, G_REGEX_ERROR, G_REGEX_ERROR_MATCH,
|
||
_("Error while matching regular expression %s: %s"),
|
||
@@ -1990,9 +2262,7 @@ g_regex_match_all_full (const GRegex *regex,
|
||
}
|
||
}
|
||
|
||
-#ifdef PCRE_NO_AUTO_POSSESS
|
||
- pcre_free (pcre_re);
|
||
-#endif
|
||
+ pcre2_code_free (pcre_re);
|
||
|
||
/* don’t assert that (info->matches <= info->n_subpatterns + 1) as that only
|
||
* holds true for a single match, rather than matching all */
|
||
@@ -2030,8 +2300,8 @@ g_regex_get_string_number (const GRegex *regex,
|
||
g_return_val_if_fail (regex != NULL, -1);
|
||
g_return_val_if_fail (name != NULL, -1);
|
||
|
||
- num = pcre_get_stringnumber (regex->pcre_re, name);
|
||
- if (num == PCRE_ERROR_NOSUBSTRING)
|
||
+ num = pcre2_substring_number_from_name (regex->pcre_re, (PCRE2_SPTR8) name);
|
||
+ if (num == PCRE2_ERROR_NOSUBSTRING)
|
||
num = -1;
|
||
|
||
return num;
|
||
@@ -2086,6 +2356,9 @@ g_regex_split_simple (const gchar *pattern,
|
||
GRegex *regex;
|
||
gchar **result;
|
||
|
||
+ compile_options = map_to_pcre2_compile_flags (compile_options);
|
||
+ match_options = map_to_pcre2_match_flags (match_options);
|
||
+
|
||
regex = g_regex_new (pattern, compile_options, 0, NULL);
|
||
if (!regex)
|
||
return NULL;
|
||
@@ -2129,6 +2402,8 @@ g_regex_split (const GRegex *regex,
|
||
const gchar *string,
|
||
GRegexMatchFlags match_options)
|
||
{
|
||
+ match_options = map_to_pcre2_match_flags (match_options);
|
||
+
|
||
return g_regex_split_full (regex, string, -1, 0,
|
||
match_options, 0, NULL);
|
||
}
|
||
@@ -2193,6 +2468,8 @@ g_regex_split_full (const GRegex *regex,
|
||
/* the returned array of char **s */
|
||
gchar **string_list;
|
||
|
||
+ match_options = map_to_pcre2_match_flags (match_options);
|
||
+
|
||
g_return_val_if_fail (regex != NULL, NULL);
|
||
g_return_val_if_fail (string != NULL, NULL);
|
||
g_return_val_if_fail (start_position >= 0, NULL);
|
||
@@ -2817,6 +3094,8 @@ g_regex_replace (const GRegex *regex,
|
||
GList *list;
|
||
GError *tmp_error = NULL;
|
||
|
||
+ match_options = map_to_pcre2_match_flags (match_options);
|
||
+
|
||
g_return_val_if_fail (regex != NULL, NULL);
|
||
g_return_val_if_fail (string != NULL, NULL);
|
||
g_return_val_if_fail (start_position >= 0, NULL);
|
||
@@ -2886,6 +3165,8 @@ g_regex_replace_literal (const GRegex *regex,
|
||
GRegexMatchFlags match_options,
|
||
GError **error)
|
||
{
|
||
+ match_options = map_to_pcre2_match_flags (match_options);
|
||
+
|
||
g_return_val_if_fail (replacement != NULL, NULL);
|
||
g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, NULL);
|
||
|
||
@@ -2974,6 +3255,8 @@ g_regex_replace_eval (const GRegex *regex,
|
||
gboolean done = FALSE;
|
||
GError *tmp_error = NULL;
|
||
|
||
+ match_options = map_to_pcre2_match_flags (match_options);
|
||
+
|
||
g_return_val_if_fail (regex != NULL, NULL);
|
||
g_return_val_if_fail (string != NULL, NULL);
|
||
g_return_val_if_fail (start_position >= 0, NULL);
|
||
diff --git a/glib/gregex.h b/glib/gregex.h
|
||
index 817f667..11b419d 100644
|
||
--- a/glib/gregex.h
|
||
+++ b/glib/gregex.h
|
||
@@ -262,7 +262,9 @@ GQuark g_regex_error_quark (void);
|
||
* in the usual way).
|
||
* @G_REGEX_OPTIMIZE: Optimize the regular expression. If the pattern will
|
||
* be used many times, then it may be worth the effort to optimize it
|
||
- * to improve the speed of matches.
|
||
+ * to improve the speed of matches. Deprecated in GLib 2.74 which now uses
|
||
+ * libpcre2, which doesn’t require separate optimization of queries. This
|
||
+ * option is now a no-op. Deprecated: 2.74
|
||
* @G_REGEX_FIRSTLINE: Limits an unanchored pattern to match before (or at) the
|
||
* first newline. Since: 2.34
|
||
* @G_REGEX_DUPNAMES: Names used to identify capturing subpatterns need not
|
||
@@ -285,7 +287,8 @@ GQuark g_regex_error_quark (void);
|
||
* is recognised. If this option is set, then "\R" only recognizes the newline
|
||
* characters '\r', '\n' and '\r\n'. Since: 2.34
|
||
* @G_REGEX_JAVASCRIPT_COMPAT: Changes behaviour so that it is compatible with
|
||
- * JavaScript rather than PCRE. Since: 2.34
|
||
+ * JavaScript rather than PCRE. Since GLib 2.74 this is no longer supported,
|
||
+ * as libpcre2 does not support it. Since: 2.34 Deprecated: 2.74
|
||
*
|
||
* Flags specifying compile-time options.
|
||
*
|
||
@@ -306,7 +309,7 @@ typedef enum
|
||
G_REGEX_UNGREEDY = 1 << 9,
|
||
G_REGEX_RAW = 1 << 11,
|
||
G_REGEX_NO_AUTO_CAPTURE = 1 << 12,
|
||
- G_REGEX_OPTIMIZE = 1 << 13,
|
||
+ G_REGEX_OPTIMIZE GLIB_DEPRECATED_ENUMERATOR_IN_2_74 = 1 << 13,
|
||
G_REGEX_FIRSTLINE = 1 << 18,
|
||
G_REGEX_DUPNAMES = 1 << 19,
|
||
G_REGEX_NEWLINE_CR = 1 << 20,
|
||
@@ -314,7 +317,7 @@ typedef enum
|
||
G_REGEX_NEWLINE_CRLF = G_REGEX_NEWLINE_CR | G_REGEX_NEWLINE_LF,
|
||
G_REGEX_NEWLINE_ANYCRLF = G_REGEX_NEWLINE_CR | 1 << 22,
|
||
G_REGEX_BSR_ANYCRLF = 1 << 23,
|
||
- G_REGEX_JAVASCRIPT_COMPAT = 1 << 25
|
||
+ G_REGEX_JAVASCRIPT_COMPAT GLIB_DEPRECATED_ENUMERATOR_IN_2_74 = 1 << 25
|
||
} GRegexCompileFlags;
|
||
|
||
/**
|
||
diff --git a/glib/meson.build b/glib/meson.build
|
||
index 93fa504..5bf82da 100644
|
||
--- a/glib/meson.build
|
||
+++ b/glib/meson.build
|
||
@@ -357,13 +357,13 @@ else
|
||
glib_dtrace_hdr = []
|
||
endif
|
||
|
||
-pcre_static_args = []
|
||
+pcre2_static_args = []
|
||
|
||
-if use_pcre_static_flag
|
||
- pcre_static_args = ['-DPCRE_STATIC']
|
||
+if use_pcre2_static_flag
|
||
+ pcre2_static_args = ['-DPCRE2_STATIC']
|
||
endif
|
||
|
||
-glib_c_args = ['-DG_LOG_DOMAIN="GLib"', '-DGLIB_COMPILATION'] + pcre_static_args + glib_hidden_visibility_args
|
||
+glib_c_args = ['-DG_LOG_DOMAIN="GLib"', '-DGLIB_COMPILATION'] + pcre2_static_args + glib_hidden_visibility_args
|
||
libglib = library('glib-2.0',
|
||
glib_dtrace_obj, glib_dtrace_hdr,
|
||
sources : [deprecated_sources, glib_sources],
|
||
@@ -375,7 +375,7 @@ libglib = library('glib-2.0',
|
||
link_args : [noseh_link_args, glib_link_flags, win32_ldflags],
|
||
include_directories : configinc,
|
||
link_with: [charset_lib, gnulib_lib],
|
||
- dependencies : [pcre, thread_dep, librt] + libintl_deps + libiconv + platform_deps + [gnulib_libm_dependency, libm] + [libsysprof_capture_dep],
|
||
+ dependencies : [pcre2, thread_dep, librt] + libintl_deps + libiconv + platform_deps + [gnulib_libm_dependency, libm] + [libsysprof_capture_dep],
|
||
c_args : glib_c_args,
|
||
objc_args : glib_c_args,
|
||
)
|
||
diff --git a/glib/tests/meson.build b/glib/tests/meson.build
|
||
index 301158e..c1a9ceb 100644
|
||
--- a/glib/tests/meson.build
|
||
+++ b/glib/tests/meson.build
|
||
@@ -86,8 +86,8 @@ glib_tests = {
|
||
},
|
||
'refstring' : {},
|
||
'regex' : {
|
||
- 'dependencies' : [pcre],
|
||
- 'c_args' : use_pcre_static_flag ? ['-DPCRE_STATIC'] : [],
|
||
+ 'dependencies' : [pcre2],
|
||
+ 'c_args' : use_pcre2_static_flag ? ['-DPCRE2_STATIC'] : [],
|
||
},
|
||
'relation' : {},
|
||
'rwlock' : {},
|
||
diff --git a/glib/tests/regex.c b/glib/tests/regex.c
|
||
index 50fd9c6..36982fb 100644
|
||
--- a/glib/tests/regex.c
|
||
+++ b/glib/tests/regex.c
|
||
@@ -25,7 +25,8 @@
|
||
#include <locale.h>
|
||
#include "glib.h"
|
||
|
||
-#include <pcre.h>
|
||
+#define PCRE2_CODE_UNIT_WIDTH 8
|
||
+#include <pcre2.h>
|
||
|
||
/* U+20AC EURO SIGN (symbol, currency) */
|
||
#define EURO "\xe2\x82\xac"
|
||
@@ -1501,7 +1502,7 @@ test_properties (void)
|
||
gchar *str;
|
||
|
||
error = NULL;
|
||
- regex = g_regex_new ("\\p{L}\\p{Ll}\\p{Lu}\\p{L&}\\p{N}\\p{Nd}", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||
+ regex = g_regex_new ("\\p{L}\\p{Ll}\\p{Lu}\\p{L&}\\p{N}\\p{Nd}", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||
res = g_regex_match (regex, "ppPP01", 0, &match);
|
||
g_assert (res);
|
||
str = g_match_info_fetch (match, 0);
|
||
@@ -1522,7 +1523,7 @@ test_class (void)
|
||
gchar *str;
|
||
|
||
error = NULL;
|
||
- regex = g_regex_new ("[abc\\x{0B1E}\\p{Mn}\\x{0391}-\\x{03A9}]", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||
+ regex = g_regex_new ("[abc\\x{0B1E}\\p{Mn}\\x{0391}-\\x{03A9}]", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||
res = g_regex_match (regex, "a:b:\340\254\236:\333\253:\316\240", 0, &match);
|
||
g_assert (res);
|
||
str = g_match_info_fetch (match, 0);
|
||
@@ -1568,7 +1569,7 @@ test_lookahead (void)
|
||
gint start, end;
|
||
|
||
error = NULL;
|
||
- regex = g_regex_new ("\\w+(?=;)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||
+ regex = g_regex_new ("\\w+(?=;)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||
g_assert (regex);
|
||
g_assert_no_error (error);
|
||
res = g_regex_match (regex, "word1 word2: word3;", 0, &match);
|
||
@@ -1582,7 +1583,7 @@ test_lookahead (void)
|
||
g_regex_unref (regex);
|
||
|
||
error = NULL;
|
||
- regex = g_regex_new ("foo(?!bar)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||
+ regex = g_regex_new ("foo(?!bar)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||
g_assert (regex);
|
||
g_assert_no_error (error);
|
||
res = g_regex_match (regex, "foobar foobaz", 0, &match);
|
||
@@ -1597,7 +1598,7 @@ test_lookahead (void)
|
||
g_regex_unref (regex);
|
||
|
||
error = NULL;
|
||
- regex = g_regex_new ("(?!bar)foo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||
+ regex = g_regex_new ("(?!bar)foo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||
g_assert (regex);
|
||
g_assert_no_error (error);
|
||
res = g_regex_match (regex, "foobar foobaz", 0, &match);
|
||
@@ -1630,7 +1631,7 @@ test_lookbehind (void)
|
||
gint start, end;
|
||
|
||
error = NULL;
|
||
- regex = g_regex_new ("(?<!foo)bar", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||
+ regex = g_regex_new ("(?<!foo)bar", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||
g_assert (regex);
|
||
g_assert_no_error (error);
|
||
res = g_regex_match (regex, "foobar boobar", 0, &match);
|
||
@@ -1645,7 +1646,7 @@ test_lookbehind (void)
|
||
g_regex_unref (regex);
|
||
|
||
error = NULL;
|
||
- regex = g_regex_new ("(?<=bullock|donkey) poo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||
+ regex = g_regex_new ("(?<=bullock|donkey) poo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||
g_assert (regex);
|
||
g_assert_no_error (error);
|
||
res = g_regex_match (regex, "don poo, and bullock poo", 0, &match);
|
||
@@ -1658,17 +1659,17 @@ test_lookbehind (void)
|
||
g_match_info_free (match);
|
||
g_regex_unref (regex);
|
||
|
||
- regex = g_regex_new ("(?<!dogs?|cats?) x", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||
+ regex = g_regex_new ("(?<!dogs?|cats?) x", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||
g_assert (regex == NULL);
|
||
g_assert_error (error, G_REGEX_ERROR, G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND);
|
||
g_clear_error (&error);
|
||
|
||
- regex = g_regex_new ("(?<=ab(c|de)) foo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||
+ regex = g_regex_new ("(?<=ab(c|de)) foo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||
g_assert (regex == NULL);
|
||
g_assert_error (error, G_REGEX_ERROR, G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND);
|
||
g_clear_error (&error);
|
||
|
||
- regex = g_regex_new ("(?<=abc|abde)foo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||
+ regex = g_regex_new ("(?<=abc|abde)foo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||
g_assert (regex);
|
||
g_assert_no_error (error);
|
||
res = g_regex_match (regex, "abfoo, abdfoo, abcfoo", 0, &match);
|
||
@@ -1680,7 +1681,7 @@ test_lookbehind (void)
|
||
g_match_info_free (match);
|
||
g_regex_unref (regex);
|
||
|
||
- regex = g_regex_new ("^.*+(?<=abcd)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||
+ regex = g_regex_new ("^.*+(?<=abcd)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||
g_assert (regex);
|
||
g_assert_no_error (error);
|
||
res = g_regex_match (regex, "abcabcabcabcabcabcabcabcabcd", 0, &match);
|
||
@@ -1689,7 +1690,7 @@ test_lookbehind (void)
|
||
g_match_info_free (match);
|
||
g_regex_unref (regex);
|
||
|
||
- regex = g_regex_new ("(?<=\\d{3})(?<!999)foo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||
+ regex = g_regex_new ("(?<=\\d{3})(?<!999)foo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||
g_assert (regex);
|
||
g_assert_no_error (error);
|
||
res = g_regex_match (regex, "999foo 123abcfoo 123foo", 0, &match);
|
||
@@ -1701,7 +1702,7 @@ test_lookbehind (void)
|
||
g_match_info_free (match);
|
||
g_regex_unref (regex);
|
||
|
||
- regex = g_regex_new ("(?<=\\d{3}...)(?<!999)foo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||
+ regex = g_regex_new ("(?<=\\d{3}...)(?<!999)foo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||
g_assert (regex);
|
||
g_assert_no_error (error);
|
||
res = g_regex_match (regex, "999foo 123abcfoo 123foo", 0, &match);
|
||
@@ -1713,7 +1714,7 @@ test_lookbehind (void)
|
||
g_match_info_free (match);
|
||
g_regex_unref (regex);
|
||
|
||
- regex = g_regex_new ("(?<=\\d{3}(?!999)...)foo", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||
+ regex = g_regex_new ("(?<=\\d{3}(?!999)...)foo", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||
g_assert (regex);
|
||
g_assert_no_error (error);
|
||
res = g_regex_match (regex, "999foo 123abcfoo 123foo", 0, &match);
|
||
@@ -1725,7 +1726,7 @@ test_lookbehind (void)
|
||
g_match_info_free (match);
|
||
g_regex_unref (regex);
|
||
|
||
- regex = g_regex_new ("(?<=(?<!foo)bar)baz", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||
+ regex = g_regex_new ("(?<=(?<!foo)bar)baz", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||
g_assert (regex);
|
||
g_assert_no_error (error);
|
||
res = g_regex_match (regex, "foobarbaz barfoobaz barbarbaz", 0, &match);
|
||
@@ -1750,7 +1751,7 @@ test_subpattern (void)
|
||
gint start;
|
||
|
||
error = NULL;
|
||
- regex = g_regex_new ("cat(aract|erpillar|)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||
+ regex = g_regex_new ("cat(aract|erpillar|)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||
g_assert (regex);
|
||
g_assert_no_error (error);
|
||
g_assert_cmpint (g_regex_get_capture_count (regex), ==, 1);
|
||
@@ -1768,7 +1769,7 @@ test_subpattern (void)
|
||
g_match_info_free (match);
|
||
g_regex_unref (regex);
|
||
|
||
- regex = g_regex_new ("the ((red|white) (king|queen))", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||
+ regex = g_regex_new ("the ((red|white) (king|queen))", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||
g_assert (regex);
|
||
g_assert_no_error (error);
|
||
g_assert_cmpint (g_regex_get_capture_count (regex), ==, 3);
|
||
@@ -1792,7 +1793,7 @@ test_subpattern (void)
|
||
g_match_info_free (match);
|
||
g_regex_unref (regex);
|
||
|
||
- regex = g_regex_new ("the ((?:red|white) (king|queen))", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||
+ regex = g_regex_new ("the ((?:red|white) (king|queen))", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||
g_assert (regex);
|
||
g_assert_no_error (error);
|
||
res = g_regex_match (regex, "the white queen", 0, &match);
|
||
@@ -1812,7 +1813,7 @@ test_subpattern (void)
|
||
g_match_info_free (match);
|
||
g_regex_unref (regex);
|
||
|
||
- regex = g_regex_new ("(?|(Sat)(ur)|(Sun))day (morning|afternoon)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||
+ regex = g_regex_new ("(?|(Sat)(ur)|(Sun))day (morning|afternoon)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||
g_assert (regex);
|
||
g_assert_no_error (error);
|
||
g_assert_cmpint (g_regex_get_capture_count (regex), ==, 3);
|
||
@@ -1832,7 +1833,7 @@ test_subpattern (void)
|
||
g_match_info_free (match);
|
||
g_regex_unref (regex);
|
||
|
||
- regex = g_regex_new ("(?|(abc)|(def))\\1", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||
+ regex = g_regex_new ("(?|(abc)|(def))\\1", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||
g_assert (regex);
|
||
g_assert_no_error (error);
|
||
g_assert_cmpint (g_regex_get_max_backref (regex), ==, 1);
|
||
@@ -1850,7 +1851,7 @@ test_subpattern (void)
|
||
g_match_info_free (match);
|
||
g_regex_unref (regex);
|
||
|
||
- regex = g_regex_new ("(?|(abc)|(def))(?1)", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||
+ regex = g_regex_new ("(?|(abc)|(def))(?1)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||
g_assert (regex);
|
||
g_assert_no_error (error);
|
||
res = g_regex_match (regex, "abcabc abcdef defabc defdef", 0, &match);
|
||
@@ -1867,7 +1868,7 @@ test_subpattern (void)
|
||
g_match_info_free (match);
|
||
g_regex_unref (regex);
|
||
|
||
- regex = g_regex_new ("(?<DN>Mon|Fri|Sun)(?:day)?|(?<DN>Tue)(?:sday)?|(?<DN>Wed)(?:nesday)?|(?<DN>Thu)(?:rsday)?|(?<DN>Sat)(?:urday)?", G_REGEX_OPTIMIZE|G_REGEX_DUPNAMES, G_REGEX_MATCH_DEFAULT, &error);
|
||
+ regex = g_regex_new ("(?<DN>Mon|Fri|Sun)(?:day)?|(?<DN>Tue)(?:sday)?|(?<DN>Wed)(?:nesday)?|(?<DN>Thu)(?:rsday)?|(?<DN>Sat)(?:urday)?", G_REGEX_DUPNAMES, G_REGEX_MATCH_DEFAULT, &error);
|
||
g_assert (regex);
|
||
g_assert_no_error (error);
|
||
res = g_regex_match (regex, "Mon Tuesday Wed Saturday", 0, &match);
|
||
@@ -1894,7 +1895,7 @@ test_subpattern (void)
|
||
g_match_info_free (match);
|
||
g_regex_unref (regex);
|
||
|
||
- regex = g_regex_new ("^(a|b\\1)+$", G_REGEX_OPTIMIZE|G_REGEX_DUPNAMES, G_REGEX_MATCH_DEFAULT, &error);
|
||
+ regex = g_regex_new ("^(a|b\\1)+$", G_REGEX_DUPNAMES, G_REGEX_MATCH_DEFAULT, &error);
|
||
g_assert (regex);
|
||
g_assert_no_error (error);
|
||
res = g_regex_match (regex, "aaaaaaaaaaaaaaaa", 0, &match);
|
||
@@ -1918,7 +1919,7 @@ test_condition (void)
|
||
gboolean res;
|
||
|
||
error = NULL;
|
||
- regex = g_regex_new ("^(a+)(\\()?[^()]+(?(-1)\\))(b+)$", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||
+ regex = g_regex_new ("^(a+)(\\()?[^()]+(?(-1)\\))(b+)$", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||
g_assert (regex);
|
||
g_assert_no_error (error);
|
||
res = g_regex_match (regex, "a(zzzzzz)b", 0, &match);
|
||
@@ -1932,7 +1933,7 @@ test_condition (void)
|
||
g_regex_unref (regex);
|
||
|
||
error = NULL;
|
||
- regex = g_regex_new ("^(a+)(?<OPEN>\\()?[^()]+(?(<OPEN>)\\))(b+)$", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||
+ regex = g_regex_new ("^(a+)(?<OPEN>\\()?[^()]+(?(<OPEN>)\\))(b+)$", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||
g_assert (regex);
|
||
g_assert_no_error (error);
|
||
res = g_regex_match (regex, "a(zzzzzz)b", 0, &match);
|
||
@@ -1945,7 +1946,7 @@ test_condition (void)
|
||
g_match_info_free (match);
|
||
g_regex_unref (regex);
|
||
|
||
- regex = g_regex_new ("^(a+)(?(+1)\\[|\\<)?[^()]+(\\])?(b+)$", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||
+ regex = g_regex_new ("^(a+)(?(+1)\\[|\\<)?[^()]+(\\])?(b+)$", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||
g_assert (regex);
|
||
g_assert_no_error (error);
|
||
res = g_regex_match (regex, "a[zzzzzz]b", 0, &match);
|
||
@@ -1960,7 +1961,7 @@ test_condition (void)
|
||
|
||
regex = g_regex_new ("(?(DEFINE) (?<byte> 2[0-4]\\d | 25[0-5] | 1\\d\\d | [1-9]?\\d) )"
|
||
"\\b (?&byte) (\\.(?&byte)){3} \\b",
|
||
- G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, 0, &error);
|
||
+ G_REGEX_EXTENDED, 0, &error);
|
||
g_assert (regex);
|
||
g_assert_no_error (error);
|
||
res = g_regex_match (regex, "128.0.0.1", 0, &match);
|
||
@@ -1979,7 +1980,7 @@ test_condition (void)
|
||
|
||
regex = g_regex_new ("^(?(?=[^a-z]*[a-z])"
|
||
"\\d{2}-[a-z]{3}-\\d{2} | \\d{2}-\\d{2}-\\d{2} )$",
|
||
- G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, 0, &error);
|
||
+ G_REGEX_EXTENDED, 0, &error);
|
||
g_assert (regex);
|
||
g_assert_no_error (error);
|
||
res = g_regex_match (regex, "01-abc-24", 0, &match);
|
||
@@ -2012,7 +2013,7 @@ test_recursion (void)
|
||
gint start;
|
||
|
||
error = NULL;
|
||
- regex = g_regex_new ("\\( ( [^()]++ | (?R) )* \\)", G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error);
|
||
+ regex = g_regex_new ("\\( ( [^()]++ | (?R) )* \\)", G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error);
|
||
g_assert (regex);
|
||
g_assert_no_error (error);
|
||
res = g_regex_match (regex, "(middle)", 0, &match);
|
||
@@ -2029,7 +2030,7 @@ test_recursion (void)
|
||
g_match_info_free (match);
|
||
g_regex_unref (regex);
|
||
|
||
- regex = g_regex_new ("^( \\( ( [^()]++ | (?1) )* \\) )$", G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error);
|
||
+ regex = g_regex_new ("^( \\( ( [^()]++ | (?1) )* \\) )$", G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error);
|
||
g_assert (regex);
|
||
g_assert_no_error (error);
|
||
res = g_regex_match (regex, "((((((((((((((((middle))))))))))))))))", 0, &match);
|
||
@@ -2042,7 +2043,7 @@ test_recursion (void)
|
||
g_match_info_free (match);
|
||
g_regex_unref (regex);
|
||
|
||
- regex = g_regex_new ("^(?<pn> \\( ( [^()]++ | (?&pn) )* \\) )$", G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error);
|
||
+ regex = g_regex_new ("^(?<pn> \\( ( [^()]++ | (?&pn) )* \\) )$", G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error);
|
||
g_assert (regex);
|
||
g_assert_no_error (error);
|
||
g_regex_match (regex, "(aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa()", 0, &match);
|
||
@@ -2051,7 +2052,7 @@ test_recursion (void)
|
||
g_match_info_free (match);
|
||
g_regex_unref (regex);
|
||
|
||
- regex = g_regex_new ("< (?: (?(R) \\d++ | [^<>]*+) | (?R)) * >", G_REGEX_OPTIMIZE|G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error);
|
||
+ regex = g_regex_new ("< (?: (?(R) \\d++ | [^<>]*+) | (?R)) * >", G_REGEX_EXTENDED, G_REGEX_MATCH_DEFAULT, &error);
|
||
g_assert (regex);
|
||
g_assert_no_error (error);
|
||
res = g_regex_match (regex, "<ab<01<23<4>>>>", 0, &match);
|
||
@@ -2070,7 +2071,7 @@ test_recursion (void)
|
||
g_match_info_free (match);
|
||
g_regex_unref (regex);
|
||
|
||
- regex = g_regex_new ("^((.)(?1)\\2|.)$", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||
+ regex = g_regex_new ("^((.)(?1)\\2|.)$", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||
g_assert (regex);
|
||
g_assert_no_error (error);
|
||
res = g_regex_match (regex, "abcdcba", 0, &match);
|
||
@@ -2083,7 +2084,7 @@ test_recursion (void)
|
||
g_match_info_free (match);
|
||
g_regex_unref (regex);
|
||
|
||
- regex = g_regex_new ("^(?:((.)(?1)\\2|)|((.)(?3)\\4|.))$", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT, &error);
|
||
+ regex = g_regex_new ("^(?:((.)(?1)\\2|)|((.)(?3)\\4|.))$", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT, &error);
|
||
g_assert (regex);
|
||
g_assert_no_error (error);
|
||
res = g_regex_match (regex, "abcdcba", 0, &match);
|
||
@@ -2096,7 +2097,7 @@ test_recursion (void)
|
||
g_match_info_free (match);
|
||
g_regex_unref (regex);
|
||
|
||
- regex = g_regex_new ("^\\W*+(?:((.)\\W*+(?1)\\W*+\\2|)|((.)\\W*+(?3)\\W*+\\4|\\W*+.\\W*+))\\W*+$", G_REGEX_OPTIMIZE|G_REGEX_CASELESS, G_REGEX_MATCH_DEFAULT, &error);
|
||
+ regex = g_regex_new ("^\\W*+(?:((.)\\W*+(?1)\\W*+\\2|)|((.)\\W*+(?3)\\W*+\\4|\\W*+.\\W*+))\\W*+$", G_REGEX_CASELESS, G_REGEX_MATCH_DEFAULT, &error);
|
||
g_assert (regex);
|
||
g_assert_no_error (error);
|
||
res = g_regex_match (regex, "abcdcba", 0, &match);
|
||
@@ -2167,21 +2168,21 @@ test_max_lookbehind (void)
|
||
}
|
||
|
||
static gboolean
|
||
-pcre_ge (guint64 major, guint64 minor)
|
||
+pcre2_ge (guint64 major, guint64 minor)
|
||
{
|
||
- const char *version;
|
||
- gchar *ptr;
|
||
- guint64 pcre_major, pcre_minor;
|
||
+ gchar version[32];
|
||
+ const gchar *ptr;
|
||
+ guint64 pcre2_major, pcre2_minor;
|
||
|
||
- /* e.g. 8.35 2014-04-04 */
|
||
- version = pcre_version ();
|
||
+ /* e.g. 10.36 2020-12-04 */
|
||
+ pcre2_config (PCRE2_CONFIG_VERSION, version);
|
||
|
||
- pcre_major = g_ascii_strtoull (version, &ptr, 10);
|
||
+ pcre2_major = g_ascii_strtoull (version, (gchar **) &ptr, 10);
|
||
/* ptr points to ".MINOR (release date)" */
|
||
g_assert (ptr[0] == '.');
|
||
- pcre_minor = g_ascii_strtoull (ptr + 1, NULL, 10);
|
||
+ pcre2_minor = g_ascii_strtoull (ptr + 1, NULL, 10);
|
||
|
||
- return (pcre_major > major) || (pcre_major == major && pcre_minor >= minor);
|
||
+ return (pcre2_major > major) || (pcre2_major == major && pcre2_minor >= minor);
|
||
}
|
||
|
||
int
|
||
@@ -2203,18 +2204,26 @@ main (int argc, char *argv[])
|
||
g_test_add_func ("/regex/max-lookbehind", test_max_lookbehind);
|
||
|
||
/* TEST_NEW(pattern, compile_opts, match_opts) */
|
||
+G_GNUC_BEGIN_IGNORE_DEPRECATIONS
|
||
TEST_NEW("[A-Z]+", G_REGEX_CASELESS | G_REGEX_EXTENDED | G_REGEX_OPTIMIZE, G_REGEX_MATCH_NOTBOL | G_REGEX_MATCH_PARTIAL);
|
||
+G_GNUC_END_IGNORE_DEPRECATIONS
|
||
TEST_NEW("", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT);
|
||
TEST_NEW(".*", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT);
|
||
+G_GNUC_BEGIN_IGNORE_DEPRECATIONS
|
||
TEST_NEW(".*", G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT);
|
||
+G_GNUC_END_IGNORE_DEPRECATIONS
|
||
TEST_NEW(".*", G_REGEX_MULTILINE, G_REGEX_MATCH_DEFAULT);
|
||
TEST_NEW(".*", G_REGEX_DOTALL, G_REGEX_MATCH_DEFAULT);
|
||
TEST_NEW(".*", G_REGEX_DOTALL, G_REGEX_MATCH_NOTBOL);
|
||
TEST_NEW("(123\\d*)[a-zA-Z]+(?P<hello>.*)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT);
|
||
TEST_NEW("(123\\d*)[a-zA-Z]+(?P<hello>.*)", G_REGEX_CASELESS, G_REGEX_MATCH_DEFAULT);
|
||
+G_GNUC_BEGIN_IGNORE_DEPRECATIONS
|
||
TEST_NEW("(123\\d*)[a-zA-Z]+(?P<hello>.*)", G_REGEX_CASELESS | G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT);
|
||
+G_GNUC_END_IGNORE_DEPRECATIONS
|
||
TEST_NEW("(?P<A>x)|(?P<A>y)", G_REGEX_DUPNAMES, G_REGEX_MATCH_DEFAULT);
|
||
+G_GNUC_BEGIN_IGNORE_DEPRECATIONS
|
||
TEST_NEW("(?P<A>x)|(?P<A>y)", G_REGEX_DUPNAMES | G_REGEX_OPTIMIZE, G_REGEX_MATCH_DEFAULT);
|
||
+G_GNUC_END_IGNORE_DEPRECATIONS
|
||
/* This gives "internal error: code overflow" with pcre 6.0 */
|
||
TEST_NEW("(?i)(?-i)", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT);
|
||
TEST_NEW ("(?i)a", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT);
|
||
@@ -2225,9 +2234,10 @@ main (int argc, char *argv[])
|
||
TEST_NEW ("(?U)[a-z]+", G_REGEX_DEFAULT, G_REGEX_MATCH_DEFAULT);
|
||
|
||
/* TEST_NEW_CHECK_FLAGS(pattern, compile_opts, match_ops, real_compile_opts, real_match_opts) */
|
||
+G_GNUC_BEGIN_IGNORE_DEPRECATIONS
|
||
TEST_NEW_CHECK_FLAGS ("a", G_REGEX_OPTIMIZE, 0, G_REGEX_OPTIMIZE, 0);
|
||
+G_GNUC_END_IGNORE_DEPRECATIONS
|
||
TEST_NEW_CHECK_FLAGS ("a", G_REGEX_RAW, 0, G_REGEX_RAW, 0);
|
||
- TEST_NEW_CHECK_FLAGS ("(?X)a", 0, 0, 0 /* not exposed by GRegex */, 0);
|
||
TEST_NEW_CHECK_FLAGS ("^.*", 0, 0, G_REGEX_ANCHORED, 0);
|
||
TEST_NEW_CHECK_FLAGS ("(*UTF8)a", 0, 0, 0 /* this is the default in GRegex */, 0);
|
||
TEST_NEW_CHECK_FLAGS ("(*UCP)a", 0, 0, 0 /* this always on in GRegex */, 0);
|
||
@@ -2255,16 +2265,16 @@ main (int argc, char *argv[])
|
||
TEST_NEW_FAIL ("a{4,2}", 0, G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER);
|
||
TEST_NEW_FAIL ("a{999999,}", 0, G_REGEX_ERROR_QUANTIFIER_TOO_BIG);
|
||
TEST_NEW_FAIL ("[a-z", 0, G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS);
|
||
- TEST_NEW_FAIL ("(?X)[\\B]", 0, G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS);
|
||
+ TEST_NEW_FAIL ("[\\B]", 0, G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS);
|
||
TEST_NEW_FAIL ("[z-a]", 0, G_REGEX_ERROR_RANGE_OUT_OF_ORDER);
|
||
TEST_NEW_FAIL ("{2,4}", 0, G_REGEX_ERROR_NOTHING_TO_REPEAT);
|
||
TEST_NEW_FAIL ("a(?u)", 0, G_REGEX_ERROR_UNRECOGNIZED_CHARACTER);
|
||
- TEST_NEW_FAIL ("a(?<$foo)bar", 0, G_REGEX_ERROR_UNRECOGNIZED_CHARACTER);
|
||
+ TEST_NEW_FAIL ("a(?<$foo)bar", 0, G_REGEX_ERROR_MISSING_SUBPATTERN_NAME);
|
||
TEST_NEW_FAIL ("a[:alpha:]b", 0, G_REGEX_ERROR_POSIX_NAMED_CLASS_OUTSIDE_CLASS);
|
||
TEST_NEW_FAIL ("a(b", 0, G_REGEX_ERROR_UNMATCHED_PARENTHESIS);
|
||
TEST_NEW_FAIL ("a)b", 0, G_REGEX_ERROR_UNMATCHED_PARENTHESIS);
|
||
TEST_NEW_FAIL ("a(?R", 0, G_REGEX_ERROR_UNMATCHED_PARENTHESIS);
|
||
- TEST_NEW_FAIL ("a(?-54", 0, G_REGEX_ERROR_UNMATCHED_PARENTHESIS);
|
||
+ TEST_NEW_FAIL ("a(?-54", 0, G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE);
|
||
TEST_NEW_FAIL ("(ab\\2)", 0, G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE);
|
||
TEST_NEW_FAIL ("a(?#abc", 0, G_REGEX_ERROR_UNTERMINATED_COMMENT);
|
||
TEST_NEW_FAIL ("(?<=a+)b", 0, G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND);
|
||
@@ -2274,51 +2284,31 @@ main (int argc, char *argv[])
|
||
TEST_NEW_FAIL ("a[[:fubar:]]b", 0, G_REGEX_ERROR_UNKNOWN_POSIX_CLASS_NAME);
|
||
TEST_NEW_FAIL ("[[.ch.]]", 0, G_REGEX_ERROR_POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED);
|
||
TEST_NEW_FAIL ("\\x{110000}", 0, G_REGEX_ERROR_HEX_CODE_TOO_LARGE);
|
||
- TEST_NEW_FAIL ("^(?(0)f|b)oo", 0, G_REGEX_ERROR_INVALID_CONDITION);
|
||
+ TEST_NEW_FAIL ("^(?(0)f|b)oo", 0, G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE);
|
||
TEST_NEW_FAIL ("(?<=\\C)X", 0, G_REGEX_ERROR_SINGLE_BYTE_MATCH_IN_LOOKBEHIND);
|
||
- TEST_NEW_FAIL ("(?!\\w)(?R)", 0, G_REGEX_ERROR_INFINITE_LOOP);
|
||
- if (pcre_ge (8, 37))
|
||
- {
|
||
- /* The expected errors changed here. */
|
||
- TEST_NEW_FAIL ("(?(?<ab))", 0, G_REGEX_ERROR_ASSERTION_EXPECTED);
|
||
- }
|
||
- else
|
||
- {
|
||
- TEST_NEW_FAIL ("(?(?<ab))", 0, G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR);
|
||
- }
|
||
-
|
||
- if (pcre_ge (8, 35))
|
||
- {
|
||
- /* The expected errors changed here. */
|
||
- TEST_NEW_FAIL ("(?P<sub>foo)\\g<sub", 0, G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR);
|
||
- }
|
||
- else
|
||
- {
|
||
- TEST_NEW_FAIL ("(?P<sub>foo)\\g<sub", 0, G_REGEX_ERROR_MISSING_BACK_REFERENCE);
|
||
- }
|
||
+ TEST_NEW ("(?!\\w)(?R)", 0, 0);
|
||
+ TEST_NEW_FAIL ("(?(?<ab))", 0, G_REGEX_ERROR_ASSERTION_EXPECTED);
|
||
+ TEST_NEW_FAIL ("(?P<sub>foo)\\g<sub", 0, G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR);
|
||
TEST_NEW_FAIL ("(?P<x>eks)(?P<x>eccs)", 0, G_REGEX_ERROR_DUPLICATE_SUBPATTERN_NAME);
|
||
-#if 0
|
||
- TEST_NEW_FAIL (?, 0, G_REGEX_ERROR_MALFORMED_PROPERTY);
|
||
- TEST_NEW_FAIL (?, 0, G_REGEX_ERROR_UNKNOWN_PROPERTY);
|
||
-#endif
|
||
TEST_NEW_FAIL ("\\666", G_REGEX_RAW, G_REGEX_ERROR_INVALID_OCTAL_VALUE);
|
||
TEST_NEW_FAIL ("^(?(DEFINE) abc | xyz ) ", 0, G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE);
|
||
TEST_NEW_FAIL ("a", G_REGEX_NEWLINE_CRLF | G_REGEX_NEWLINE_ANYCRLF, G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS);
|
||
TEST_NEW_FAIL ("^(a)\\g{3", 0, G_REGEX_ERROR_MISSING_BACK_REFERENCE);
|
||
- TEST_NEW_FAIL ("^(a)\\g{0}", 0, G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE);
|
||
- TEST_NEW_FAIL ("abc(*FAIL:123)xyz", 0, G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN);
|
||
+ TEST_NEW_FAIL ("^(a)\\g{0}", 0, G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE);
|
||
+ TEST_NEW ("abc(*FAIL:123)xyz", 0, 0);
|
||
TEST_NEW_FAIL ("a(*FOOBAR)b", 0, G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB);
|
||
- TEST_NEW_FAIL ("(?i:A{1,}\\6666666666)", 0, G_REGEX_ERROR_NUMBER_TOO_BIG);
|
||
+ if (pcre2_ge (10, 37))
|
||
+ {
|
||
+ TEST_NEW ("(?i:A{1,}\\6666666666)", 0, 0);
|
||
+ }
|
||
TEST_NEW_FAIL ("(?<a>)(?&)", 0, G_REGEX_ERROR_MISSING_SUBPATTERN_NAME);
|
||
- TEST_NEW_FAIL ("(?+-a)", 0, G_REGEX_ERROR_MISSING_DIGIT);
|
||
- TEST_NEW_FAIL ("TA]", G_REGEX_JAVASCRIPT_COMPAT, G_REGEX_ERROR_INVALID_DATA_CHARACTER);
|
||
+ TEST_NEW_FAIL ("(?+-a)", 0, G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE);
|
||
TEST_NEW_FAIL ("(?|(?<a>A)|(?<b>B))", 0, G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME);
|
||
TEST_NEW_FAIL ("a(*MARK)b", 0, G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED);
|
||
TEST_NEW_FAIL ("^\\c€", 0, G_REGEX_ERROR_INVALID_CONTROL_CHAR);
|
||
TEST_NEW_FAIL ("\\k", 0, G_REGEX_ERROR_MISSING_NAME);
|
||
TEST_NEW_FAIL ("a[\\NB]c", 0, G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS);
|
||
TEST_NEW_FAIL ("(*:0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEFG)XX", 0, G_REGEX_ERROR_NAME_TOO_LONG);
|
||
- TEST_NEW_FAIL ("\\u0100", G_REGEX_RAW | G_REGEX_JAVASCRIPT_COMPAT, G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE);
|
||
|
||
/* These errors can't really be tested easily:
|
||
* G_REGEX_ERROR_EXPRESSION_TOO_LARGE
|
||
@@ -2474,7 +2464,15 @@ main (int argc, char *argv[])
|
||
TEST_MATCH("a#\nb", G_REGEX_EXTENDED, 0, "a", -1, 0, 0, FALSE);
|
||
TEST_MATCH("a#\r\nb", G_REGEX_EXTENDED, 0, "a", -1, 0, 0, FALSE);
|
||
TEST_MATCH("a#\rb", G_REGEX_EXTENDED, 0, "a", -1, 0, 0, FALSE);
|
||
- TEST_MATCH("a#\nb", G_REGEX_EXTENDED, G_REGEX_MATCH_NEWLINE_CR, "a", -1, 0, 0, FALSE);
|
||
+ /* Due to PCRE2 only supporting newline settings passed to pcre2_compile (and
|
||
+ * not to pcre2_match also), we have to compile the pattern with the
|
||
+ * effective (combined from compile and match options) newline setting.
|
||
+ * However, this setting also affects how newlines are interpreted *inside*
|
||
+ * the pattern. With G_REGEX_EXTENDED, this changes where the comment
|
||
+ * (started with `#`) ends.
|
||
+ */
|
||
+ /* On PCRE1, this test expected no match; on PCRE2 it matches because of the above. */
|
||
+ TEST_MATCH("a#\nb", G_REGEX_EXTENDED, G_REGEX_MATCH_NEWLINE_CR, "a", -1, 0, 0, TRUE /*FALSE*/);
|
||
TEST_MATCH("a#\nb", G_REGEX_EXTENDED | G_REGEX_NEWLINE_CR, 0, "a", -1, 0, 0, TRUE);
|
||
|
||
TEST_MATCH("line\nbreak", G_REGEX_MULTILINE, 0, "this is a line\nbreak", -1, 0, 0, TRUE);
|
||
@@ -2487,21 +2485,19 @@ main (int argc, char *argv[])
|
||
* with pcre's internal tables. Bug #678273 */
|
||
TEST_MATCH("[DŽ]", G_REGEX_CASELESS, 0, "DŽ", -1, 0, 0, TRUE);
|
||
TEST_MATCH("[DŽ]", G_REGEX_CASELESS, 0, "dž", -1, 0, 0, TRUE);
|
||
-#if PCRE_MAJOR > 8 || (PCRE_MAJOR == 8 && PCRE_MINOR >= 32)
|
||
- /* This would incorrectly fail to match in pcre < 8.32, so only assert
|
||
- * this for known-good pcre. */
|
||
TEST_MATCH("[DŽ]", G_REGEX_CASELESS, 0, "Dž", -1, 0, 0, TRUE);
|
||
-#endif
|
||
|
||
/* TEST_MATCH_NEXT#(pattern, string, string_len, start_position, ...) */
|
||
TEST_MATCH_NEXT0("a", "x", -1, 0);
|
||
TEST_MATCH_NEXT0("a", "ax", -1, 1);
|
||
TEST_MATCH_NEXT0("a", "xa", 1, 0);
|
||
TEST_MATCH_NEXT0("a", "axa", 1, 2);
|
||
+ TEST_MATCH_NEXT1("", "", -1, 0, "", 0, 0);
|
||
TEST_MATCH_NEXT1("a", "a", -1, 0, "a", 0, 1);
|
||
TEST_MATCH_NEXT1("a", "xax", -1, 0, "a", 1, 2);
|
||
TEST_MATCH_NEXT1(EURO, ENG EURO, -1, 0, EURO, 2, 5);
|
||
TEST_MATCH_NEXT1("a*", "", -1, 0, "", 0, 0);
|
||
+ TEST_MATCH_NEXT2("", "a", -1, 0, "", 0, 0, "", 1, 1);
|
||
TEST_MATCH_NEXT2("a*", "aa", -1, 0, "aa", 0, 2, "", 2, 2);
|
||
TEST_MATCH_NEXT2(EURO "*", EURO EURO, -1, 0, EURO EURO, 0, 6, "", 6, 6);
|
||
TEST_MATCH_NEXT2("a", "axa", -1, 0, "a", 0, 1, "a", 2, 3);
|
||
@@ -2675,11 +2671,6 @@ main (int argc, char *argv[])
|
||
TEST_EXPAND("a", "a", "\\0130", FALSE, "X");
|
||
TEST_EXPAND("a", "a", "\\\\\\0", FALSE, "\\a");
|
||
TEST_EXPAND("a(?P<G>.)c", "xabcy", "X\\g<G>X", FALSE, "XbX");
|
||
-#if !(PCRE_MAJOR > 8 || (PCRE_MAJOR == 8 && PCRE_MINOR >= 34))
|
||
- /* PCRE >= 8.34 no longer allows this usage. */
|
||
- TEST_EXPAND("(.)(?P<1>.)", "ab", "\\1", FALSE, "a");
|
||
- TEST_EXPAND("(.)(?P<1>.)", "ab", "\\g<1>", FALSE, "a");
|
||
-#endif
|
||
TEST_EXPAND(".", EURO, "\\0", FALSE, EURO);
|
||
TEST_EXPAND("(.)", EURO, "\\1", FALSE, EURO);
|
||
TEST_EXPAND("(?P<G>.)", EURO, "\\g<G>", FALSE, EURO);
|
||
@@ -2798,6 +2789,10 @@ main (int argc, char *argv[])
|
||
TEST_GET_STRING_NUMBER("(?P<A>.)(?P<B>a)", "A", 1);
|
||
TEST_GET_STRING_NUMBER("(?P<A>.)(?P<B>a)", "B", 2);
|
||
TEST_GET_STRING_NUMBER("(?P<A>.)(?P<B>a)", "C", -1);
|
||
+ TEST_GET_STRING_NUMBER("(?P<A>.)(?P<B>a)(?P<C>b)", "A", 1);
|
||
+ TEST_GET_STRING_NUMBER("(?P<A>.)(?P<B>a)(?P<C>b)", "B", 2);
|
||
+ TEST_GET_STRING_NUMBER("(?P<A>.)(?P<B>a)(?P<C>b)", "C", 3);
|
||
+ TEST_GET_STRING_NUMBER("(?P<A>.)(?P<B>a)(?P<C>b)", "D", -1);
|
||
TEST_GET_STRING_NUMBER("(?P<A>.)(.)(?P<B>a)", "A", 1);
|
||
TEST_GET_STRING_NUMBER("(?P<A>.)(.)(?P<B>a)", "B", 3);
|
||
TEST_GET_STRING_NUMBER("(?P<A>.)(.)(?P<B>a)", "C", -1);
|
||
diff --git a/meson.build b/meson.build
|
||
index 882049c..657e9f6 100644
|
||
--- a/meson.build
|
||
+++ b/meson.build
|
||
@@ -2024,37 +2024,38 @@ else
|
||
endif
|
||
endif
|
||
|
||
-pcre = dependency('libpcre', version: '>= 8.31', required : false) # Should check for Unicode support, too. FIXME
|
||
-if not pcre.found()
|
||
+pcre2 = dependency('libpcre2-8', version: '>= 10.32', required : false)
|
||
+if not pcre2.found()
|
||
if cc.get_id() == 'msvc' or cc.get_id() == 'clang-cl'
|
||
- # MSVC: Search for the PCRE library by the configuration, which corresponds
|
||
- # to the output of CMake builds of PCRE. Note that debugoptimized
|
||
+ # MSVC: Search for the PCRE2 library by the configuration, which corresponds
|
||
+ # to the output of CMake builds of PCRE2. Note that debugoptimized
|
||
# is really a Release build with .PDB files.
|
||
if vs_crt == 'debug'
|
||
- pcre = cc.find_library('pcred', required : false)
|
||
+ pcre2 = cc.find_library('pcre2d-8', required : false)
|
||
else
|
||
- pcre = cc.find_library('pcre', required : false)
|
||
+ pcre2 = cc.find_library('pcre2-8', required : false)
|
||
endif
|
||
endif
|
||
endif
|
||
|
||
# Try again with the fallback
|
||
-if not pcre.found()
|
||
- pcre = dependency('libpcre', required : true, fallback : ['pcre', 'pcre_dep'])
|
||
- use_pcre_static_flag = true
|
||
+if not pcre2.found()
|
||
+ pcre2 = dependency('libpcre2-8', required : true, fallback : ['pcre2', 'libpcre2_8'])
|
||
+ use_pcre2_static_flag = true
|
||
elif host_system == 'windows'
|
||
- pcre_static = cc.links('''#define PCRE_STATIC
|
||
- #include <pcre.h>
|
||
- int main() {
|
||
- void *p = NULL;
|
||
- pcre_free(p);
|
||
- return 0;
|
||
- }''',
|
||
- dependencies: pcre,
|
||
- name : 'Windows system PCRE is a static build')
|
||
- use_pcre_static_flag = pcre_static
|
||
+ pcre2_static = cc.links('''#define PCRE2_STATIC
|
||
+ #define PCRE2_CODE_UNIT_WIDTH 8
|
||
+ #include <pcre2.h>
|
||
+ int main() {
|
||
+ void *p = NULL;
|
||
+ pcre2_code_free(p);
|
||
+ return 0;
|
||
+ }''',
|
||
+ dependencies: pcre2,
|
||
+ name : 'Windows system PCRE2 is a static build')
|
||
+ use_pcre2_static_flag = pcre2_static
|
||
else
|
||
- use_pcre_static_flag = false
|
||
+ use_pcre2_static_flag = false
|
||
endif
|
||
|
||
libm = cc.find_library('m', required : false)
|
||
diff --git a/po/sk.po b/po/sk.po
|
||
index 8d6a1ce..747ad27 100644
|
||
--- a/po/sk.po
|
||
+++ b/po/sk.po
|
||
@@ -5630,7 +5630,7 @@ msgstr "zlý ofset"
|
||
msgid "short utf8"
|
||
msgstr "krátke utf8"
|
||
|
||
-# Ide o omyl programátora: case PCRE_ERROR_RECURSELOOP: return _("recursion loop");
|
||
+# Ide o omyl programátora: case PCRE2_ERROR_RECURSELOOP: return _("recursion loop");
|
||
#: glib/gregex.c:303
|
||
msgid "recursion loop"
|
||
msgstr "rekurzívna slučka"
|
||
diff --git a/subprojects/pcre.wrap b/subprojects/pcre.wrap
|
||
deleted file mode 100644
|
||
index a6b07b9..0000000
|
||
--- a/subprojects/pcre.wrap
|
||
+++ /dev/null
|
||
@@ -1,11 +0,0 @@
|
||
-[wrap-file]
|
||
-directory = pcre-8.37
|
||
-source_url = https://sourceforge.net/projects/pcre/files/pcre/8.37/pcre-8.37.tar.bz2
|
||
-source_filename = pcre-8.37.tar.bz2
|
||
-source_hash = 51679ea8006ce31379fb0860e46dd86665d864b5020fc9cd19e71260eef4789d
|
||
-patch_filename = pcre_8.37-4_patch.zip
|
||
-patch_url = https://wrapdb.mesonbuild.com/v2/pcre_8.37-4/get_patch
|
||
-patch_hash = c957f42da6f6378300eb8a18f4a5cccdb8e2aada51a703cac842982f9f785399
|
||
-
|
||
-[provide]
|
||
-libpcre = pcre_dep
|
||
--
|
||
2.33.0
|
||
|