1063 lines
44 KiB
Diff
1063 lines
44 KiB
Diff
From d639c4ec009537b743dcd2209184638d9f5d68b9 Mon Sep 17 00:00:00 2001
|
|
From: =?UTF-8?q?Marco=20Trevisan=20=28Trevi=C3=B1o=29?= <mail@3v1n0.net>
|
|
Date: Tue, 6 Sep 2022 14:49:10 +0200
|
|
Subject: [PATCH] regex: Do not mix PCRE2 Compile, Match, Newline and BSR flags
|
|
|
|
As per the PCRE2 port we still used to try to map the old GRegex flags
|
|
(PCRE1 based) with the new PCRE2 ones, but doing that we were also
|
|
mixing flags with enums, leading to unexpected behaviors when trying to
|
|
get new line and BSR options out of bigger flags arrays.
|
|
|
|
So, avoid doing any mapping and store the values as native PCRE2 flags
|
|
internally and converting them back only when requested.
|
|
|
|
This fixes some regressions on newline handling.
|
|
|
|
Fixes: #2729
|
|
Fixes: #2688
|
|
Fixes: GNOME/gtksourceview#278
|
|
---
|
|
glib/gregex.c | 637 +++++++++++++++++++++++----------------------
|
|
glib/tests/regex.c | 18 ++
|
|
2 files changed, 341 insertions(+), 314 deletions(-)
|
|
|
|
diff --git a/glib/gregex.c b/glib/gregex.c
|
|
index a16ea98..95695f7 100644
|
|
--- a/glib/gregex.c
|
|
+++ b/glib/gregex.c
|
|
@@ -3,6 +3,7 @@
|
|
* Copyright (C) 1999, 2000 Scott Wimer
|
|
* Copyright (C) 2004, Matthias Clasen <mclasen@redhat.com>
|
|
* Copyright (C) 2005 - 2007, Marco Barisione <marco@barisione.org>
|
|
+ * Copyright (C) 2022, Marco Trevisan <marco.trevisan@canonical.com>
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
@@ -108,62 +109,105 @@
|
|
* library written by Philip Hazel.
|
|
*/
|
|
|
|
-/* Signifies that flags have already been converted from pcre1 to pcre2. The
|
|
- * value 0x04000000u is also the value of PCRE2_MATCH_INVALID_UTF in pcre2.h,
|
|
- * but it is not used in gregex, so we can reuse it for this flag.
|
|
- */
|
|
-#define G_REGEX_FLAGS_CONVERTED 0x04000000u
|
|
+#define G_REGEX_PCRE_GENERIC_MASK (PCRE2_ANCHORED | \
|
|
+ PCRE2_NO_UTF_CHECK | \
|
|
+ PCRE2_ENDANCHORED)
|
|
+
|
|
/* Mask of all the possible values for GRegexCompileFlags. */
|
|
-#define G_REGEX_COMPILE_MASK (PCRE2_CASELESS | \
|
|
- PCRE2_MULTILINE | \
|
|
- PCRE2_DOTALL | \
|
|
- PCRE2_EXTENDED | \
|
|
- PCRE2_ANCHORED | \
|
|
- PCRE2_DOLLAR_ENDONLY | \
|
|
- PCRE2_UNGREEDY | \
|
|
- PCRE2_UTF | \
|
|
- PCRE2_NO_AUTO_CAPTURE | \
|
|
- PCRE2_FIRSTLINE | \
|
|
- PCRE2_DUPNAMES | \
|
|
- PCRE2_NEWLINE_CR | \
|
|
- PCRE2_NEWLINE_LF | \
|
|
- PCRE2_NEWLINE_CRLF | \
|
|
- PCRE2_NEWLINE_ANYCRLF | \
|
|
- PCRE2_BSR_ANYCRLF | \
|
|
- G_REGEX_FLAGS_CONVERTED)
|
|
-
|
|
-/* Mask of all GRegexCompileFlags values that are (not) passed trough to PCRE */
|
|
-#define G_REGEX_COMPILE_PCRE_MASK (G_REGEX_COMPILE_MASK & ~G_REGEX_COMPILE_NONPCRE_MASK)
|
|
-#define G_REGEX_COMPILE_NONPCRE_MASK (PCRE2_UTF | \
|
|
- G_REGEX_FLAGS_CONVERTED)
|
|
+#define G_REGEX_COMPILE_MASK (G_REGEX_DEFAULT | \
|
|
+ G_REGEX_CASELESS | \
|
|
+ G_REGEX_MULTILINE | \
|
|
+ G_REGEX_DOTALL | \
|
|
+ G_REGEX_EXTENDED | \
|
|
+ G_REGEX_ANCHORED | \
|
|
+ G_REGEX_DOLLAR_ENDONLY | \
|
|
+ G_REGEX_UNGREEDY | \
|
|
+ G_REGEX_RAW | \
|
|
+ G_REGEX_NO_AUTO_CAPTURE | \
|
|
+ G_REGEX_OPTIMIZE | \
|
|
+ G_REGEX_FIRSTLINE | \
|
|
+ G_REGEX_DUPNAMES | \
|
|
+ G_REGEX_NEWLINE_CR | \
|
|
+ G_REGEX_NEWLINE_LF | \
|
|
+ G_REGEX_NEWLINE_CRLF | \
|
|
+ G_REGEX_NEWLINE_ANYCRLF | \
|
|
+ G_REGEX_BSR_ANYCRLF)
|
|
+
|
|
+#define G_REGEX_PCRE2_COMPILE_MASK (PCRE2_ALLOW_EMPTY_CLASS | \
|
|
+ PCRE2_ALT_BSUX | \
|
|
+ PCRE2_AUTO_CALLOUT | \
|
|
+ PCRE2_CASELESS | \
|
|
+ PCRE2_DOLLAR_ENDONLY | \
|
|
+ PCRE2_DOTALL | \
|
|
+ PCRE2_DUPNAMES | \
|
|
+ PCRE2_EXTENDED | \
|
|
+ PCRE2_FIRSTLINE | \
|
|
+ PCRE2_MATCH_UNSET_BACKREF | \
|
|
+ PCRE2_MULTILINE | \
|
|
+ PCRE2_NEVER_UCP | \
|
|
+ PCRE2_NEVER_UTF | \
|
|
+ PCRE2_NO_AUTO_CAPTURE | \
|
|
+ PCRE2_NO_AUTO_POSSESS | \
|
|
+ PCRE2_NO_DOTSTAR_ANCHOR | \
|
|
+ PCRE2_NO_START_OPTIMIZE | \
|
|
+ PCRE2_UCP | \
|
|
+ PCRE2_UNGREEDY | \
|
|
+ PCRE2_UTF | \
|
|
+ PCRE2_NEVER_BACKSLASH_C | \
|
|
+ PCRE2_ALT_CIRCUMFLEX | \
|
|
+ PCRE2_ALT_VERBNAMES | \
|
|
+ PCRE2_USE_OFFSET_LIMIT | \
|
|
+ PCRE2_EXTENDED_MORE | \
|
|
+ PCRE2_LITERAL | \
|
|
+ PCRE2_MATCH_INVALID_UTF | \
|
|
+ G_REGEX_PCRE_GENERIC_MASK)
|
|
+
|
|
+#define G_REGEX_COMPILE_NONPCRE_MASK (PCRE2_UTF)
|
|
|
|
/* Mask of all the possible values for GRegexMatchFlags. */
|
|
-#define G_REGEX_MATCH_MASK (PCRE2_ANCHORED | \
|
|
- PCRE2_NOTBOL | \
|
|
- PCRE2_NOTEOL | \
|
|
- PCRE2_NOTEMPTY | \
|
|
- PCRE2_NEWLINE_CR | \
|
|
- PCRE2_NEWLINE_LF | \
|
|
- PCRE2_NEWLINE_CRLF | \
|
|
- PCRE2_NEWLINE_ANY | \
|
|
- PCRE2_NEWLINE_ANYCRLF | \
|
|
- PCRE2_BSR_ANYCRLF | \
|
|
- PCRE2_BSR_UNICODE | \
|
|
- PCRE2_PARTIAL_SOFT | \
|
|
- PCRE2_PARTIAL_HARD | \
|
|
- PCRE2_NOTEMPTY_ATSTART | \
|
|
- G_REGEX_FLAGS_CONVERTED)
|
|
-
|
|
+#define G_REGEX_MATCH_MASK (G_REGEX_MATCH_DEFAULT | \
|
|
+ G_REGEX_MATCH_ANCHORED | \
|
|
+ G_REGEX_MATCH_NOTBOL | \
|
|
+ G_REGEX_MATCH_NOTEOL | \
|
|
+ G_REGEX_MATCH_NOTEMPTY | \
|
|
+ G_REGEX_MATCH_PARTIAL | \
|
|
+ G_REGEX_MATCH_NEWLINE_CR | \
|
|
+ G_REGEX_MATCH_NEWLINE_LF | \
|
|
+ G_REGEX_MATCH_NEWLINE_CRLF | \
|
|
+ G_REGEX_MATCH_NEWLINE_ANY | \
|
|
+ G_REGEX_MATCH_NEWLINE_ANYCRLF | \
|
|
+ G_REGEX_MATCH_BSR_ANYCRLF | \
|
|
+ G_REGEX_MATCH_BSR_ANY | \
|
|
+ G_REGEX_MATCH_PARTIAL_SOFT | \
|
|
+ G_REGEX_MATCH_PARTIAL_HARD | \
|
|
+ G_REGEX_MATCH_NOTEMPTY_ATSTART)
|
|
+
|
|
+#define G_REGEX_PCRE2_MATCH_MASK (PCRE2_NOTBOL |\
|
|
+ PCRE2_NOTEOL |\
|
|
+ PCRE2_NOTEMPTY |\
|
|
+ PCRE2_NOTEMPTY_ATSTART |\
|
|
+ PCRE2_PARTIAL_SOFT |\
|
|
+ PCRE2_PARTIAL_HARD |\
|
|
+ PCRE2_NO_JIT |\
|
|
+ PCRE2_COPY_MATCHED_SUBJECT |\
|
|
+ G_REGEX_PCRE_GENERIC_MASK)
|
|
+
|
|
+/* TODO: Support PCRE2_NEWLINE_NUL */
|
|
#define G_REGEX_NEWLINE_MASK (PCRE2_NEWLINE_CR | \
|
|
PCRE2_NEWLINE_LF | \
|
|
PCRE2_NEWLINE_CRLF | \
|
|
PCRE2_NEWLINE_ANYCRLF)
|
|
|
|
-#define G_REGEX_MATCH_NEWLINE_MASK (PCRE2_NEWLINE_CR | \
|
|
- PCRE2_NEWLINE_LF | \
|
|
- PCRE2_NEWLINE_CRLF | \
|
|
- PCRE2_NEWLINE_ANYCRLF | \
|
|
- PCRE2_NEWLINE_ANY)
|
|
+#define G_REGEX_COMPILE_NEWLINE_MASK (G_REGEX_NEWLINE_CR | \
|
|
+ G_REGEX_NEWLINE_LF | \
|
|
+ G_REGEX_NEWLINE_CRLF | \
|
|
+ G_REGEX_NEWLINE_ANYCRLF)
|
|
+
|
|
+#define G_REGEX_MATCH_NEWLINE_MASK (G_REGEX_MATCH_NEWLINE_CR | \
|
|
+ G_REGEX_MATCH_NEWLINE_LF | \
|
|
+ G_REGEX_MATCH_NEWLINE_CRLF | \
|
|
+ G_REGEX_MATCH_NEWLINE_ANY | \
|
|
+ G_REGEX_MATCH_NEWLINE_ANYCRLF)
|
|
|
|
/* if the string is in UTF-8 use g_utf8_ functions, else use
|
|
* use just +/- 1. */
|
|
@@ -178,7 +222,7 @@ struct _GMatchInfo
|
|
{
|
|
gint ref_count; /* the ref count (atomic) */
|
|
GRegex *regex; /* the regex */
|
|
- GRegexMatchFlags match_opts; /* options used at match time on the regex */
|
|
+ uint32_t match_opts; /* pcre match options used at match time on the regex */
|
|
gint matches; /* number of matching sub patterns, guaranteed to be <= (n_subpatterns + 1) if doing a single match (rather than matching all) */
|
|
gint n_subpatterns; /* total number of sub patterns in the regex */
|
|
gint pos; /* position in the string where last match left off */
|
|
@@ -204,9 +248,10 @@ struct _GRegex
|
|
gint ref_count; /* the ref count for the immutable part (atomic) */
|
|
gchar *pattern; /* the pattern */
|
|
pcre2_code *pcre_re; /* compiled form of the pattern */
|
|
- GRegexCompileFlags compile_opts; /* options used at compile time on the pattern, pcre2 values */
|
|
+ uint32_t compile_opts; /* options used at compile time on the pattern, pcre2 values */
|
|
GRegexCompileFlags orig_compile_opts; /* options used at compile time on the pattern, gregex values */
|
|
- GRegexMatchFlags match_opts; /* options used at match time on the regex */
|
|
+ uint32_t match_opts; /* pcre2 options used at match time on the regex */
|
|
+ GRegexMatchFlags orig_match_opts; /* options used as default match options, gregex values */
|
|
gint jit_options; /* options which were enabled for jit compiler */
|
|
JITStatus jit_status; /* indicates the status of jit compiler for this compiled regex */
|
|
};
|
|
@@ -223,197 +268,182 @@ static GList *split_replacement (const gchar *replacement,
|
|
GError **error);
|
|
static void free_interpolation_data (InterpolationData *data);
|
|
|
|
-static gint
|
|
-map_to_pcre2_compile_flags (gint pcre1_flags)
|
|
+static uint32_t
|
|
+get_pcre2_compile_options (GRegexCompileFlags compile_flags)
|
|
{
|
|
- /* Maps compile flags from pcre1 to pcre2 values
|
|
- */
|
|
- gint pcre2_flags = G_REGEX_FLAGS_CONVERTED;
|
|
-
|
|
- if (pcre1_flags & G_REGEX_FLAGS_CONVERTED)
|
|
- return pcre1_flags;
|
|
+ /* Maps compile flags to pcre2 values */
|
|
+ uint32_t pcre2_flags = 0;
|
|
|
|
- if (pcre1_flags & G_REGEX_CASELESS)
|
|
+ if (compile_flags & G_REGEX_CASELESS)
|
|
pcre2_flags |= PCRE2_CASELESS;
|
|
- if (pcre1_flags & G_REGEX_MULTILINE)
|
|
+ if (compile_flags & G_REGEX_MULTILINE)
|
|
pcre2_flags |= PCRE2_MULTILINE;
|
|
- if (pcre1_flags & G_REGEX_DOTALL)
|
|
+ if (compile_flags & G_REGEX_DOTALL)
|
|
pcre2_flags |= PCRE2_DOTALL;
|
|
- if (pcre1_flags & G_REGEX_EXTENDED)
|
|
+ if (compile_flags & G_REGEX_EXTENDED)
|
|
pcre2_flags |= PCRE2_EXTENDED;
|
|
- if (pcre1_flags & G_REGEX_ANCHORED)
|
|
+ if (compile_flags & G_REGEX_ANCHORED)
|
|
pcre2_flags |= PCRE2_ANCHORED;
|
|
- if (pcre1_flags & G_REGEX_DOLLAR_ENDONLY)
|
|
+ if (compile_flags & G_REGEX_DOLLAR_ENDONLY)
|
|
pcre2_flags |= PCRE2_DOLLAR_ENDONLY;
|
|
- if (pcre1_flags & G_REGEX_UNGREEDY)
|
|
+ if (compile_flags & G_REGEX_UNGREEDY)
|
|
pcre2_flags |= PCRE2_UNGREEDY;
|
|
- if (!(pcre1_flags & G_REGEX_RAW))
|
|
+ if (!(compile_flags & G_REGEX_RAW))
|
|
pcre2_flags |= PCRE2_UTF;
|
|
- if (pcre1_flags & G_REGEX_NO_AUTO_CAPTURE)
|
|
+ if (compile_flags & G_REGEX_NO_AUTO_CAPTURE)
|
|
pcre2_flags |= PCRE2_NO_AUTO_CAPTURE;
|
|
- if (pcre1_flags & G_REGEX_FIRSTLINE)
|
|
+ if (compile_flags & G_REGEX_FIRSTLINE)
|
|
pcre2_flags |= PCRE2_FIRSTLINE;
|
|
- if (pcre1_flags & G_REGEX_DUPNAMES)
|
|
+ if (compile_flags & G_REGEX_DUPNAMES)
|
|
pcre2_flags |= PCRE2_DUPNAMES;
|
|
- if (pcre1_flags & G_REGEX_NEWLINE_CR)
|
|
- pcre2_flags |= PCRE2_NEWLINE_CR;
|
|
- if (pcre1_flags & G_REGEX_NEWLINE_LF)
|
|
- pcre2_flags |= PCRE2_NEWLINE_LF;
|
|
- /* Check for exact match for a composite flag */
|
|
- if ((pcre1_flags & G_REGEX_NEWLINE_CRLF) == G_REGEX_NEWLINE_CRLF)
|
|
- pcre2_flags |= PCRE2_NEWLINE_CRLF;
|
|
- /* Check for exact match for a composite flag */
|
|
- if ((pcre1_flags & G_REGEX_NEWLINE_ANYCRLF) == G_REGEX_NEWLINE_ANYCRLF)
|
|
- pcre2_flags |= PCRE2_NEWLINE_ANYCRLF;
|
|
- if (pcre1_flags & G_REGEX_BSR_ANYCRLF)
|
|
- pcre2_flags |= PCRE2_BSR_ANYCRLF;
|
|
-
|
|
- /* these are not available in pcre2, but we use G_REGEX_OPTIMIZE as a special
|
|
- * case to request JIT compilation */
|
|
- if (pcre1_flags & G_REGEX_OPTIMIZE)
|
|
- pcre2_flags |= 0;
|
|
-G_GNUC_BEGIN_IGNORE_DEPRECATIONS
|
|
- if (pcre1_flags & G_REGEX_JAVASCRIPT_COMPAT)
|
|
- pcre2_flags |= 0;
|
|
-G_GNUC_END_IGNORE_DEPRECATIONS
|
|
-
|
|
- return pcre2_flags;
|
|
+
|
|
+ return pcre2_flags & G_REGEX_PCRE2_COMPILE_MASK;
|
|
}
|
|
|
|
-static gint
|
|
-map_to_pcre2_match_flags (gint pcre1_flags)
|
|
+static uint32_t
|
|
+get_pcre2_match_options (GRegexMatchFlags match_flags,
|
|
+ GRegexCompileFlags compile_flags)
|
|
{
|
|
- /* Maps match flags from pcre1 to pcre2 values
|
|
- */
|
|
- gint pcre2_flags = G_REGEX_FLAGS_CONVERTED;
|
|
-
|
|
- if (pcre1_flags & G_REGEX_FLAGS_CONVERTED)
|
|
- return pcre1_flags;
|
|
+ /* Maps match flags to pcre2 values */
|
|
+ uint32_t pcre2_flags = 0;
|
|
|
|
- if (pcre1_flags & G_REGEX_MATCH_ANCHORED)
|
|
+ if (match_flags & G_REGEX_MATCH_ANCHORED)
|
|
pcre2_flags |= PCRE2_ANCHORED;
|
|
- if (pcre1_flags & G_REGEX_MATCH_NOTBOL)
|
|
+ if (match_flags & G_REGEX_MATCH_NOTBOL)
|
|
pcre2_flags |= PCRE2_NOTBOL;
|
|
- if (pcre1_flags & G_REGEX_MATCH_NOTEOL)
|
|
+ if (match_flags & G_REGEX_MATCH_NOTEOL)
|
|
pcre2_flags |= PCRE2_NOTEOL;
|
|
- if (pcre1_flags & G_REGEX_MATCH_NOTEMPTY)
|
|
+ if (match_flags & G_REGEX_MATCH_NOTEMPTY)
|
|
pcre2_flags |= PCRE2_NOTEMPTY;
|
|
- if (pcre1_flags & G_REGEX_MATCH_NEWLINE_CR)
|
|
- pcre2_flags |= PCRE2_NEWLINE_CR;
|
|
- if (pcre1_flags & G_REGEX_MATCH_NEWLINE_LF)
|
|
- pcre2_flags |= PCRE2_NEWLINE_LF;
|
|
- /* Check for exact match for a composite flag */
|
|
- if ((pcre1_flags & G_REGEX_MATCH_NEWLINE_CRLF) == G_REGEX_MATCH_NEWLINE_CRLF)
|
|
- pcre2_flags |= PCRE2_NEWLINE_CRLF;
|
|
- if (pcre1_flags & G_REGEX_MATCH_NEWLINE_ANY)
|
|
- pcre2_flags |= PCRE2_NEWLINE_ANY;
|
|
- /* Check for exact match for a composite flag */
|
|
- if ((pcre1_flags & G_REGEX_MATCH_NEWLINE_ANYCRLF) == G_REGEX_MATCH_NEWLINE_ANYCRLF)
|
|
- pcre2_flags |= PCRE2_NEWLINE_ANYCRLF;
|
|
- if (pcre1_flags & G_REGEX_MATCH_BSR_ANYCRLF)
|
|
- pcre2_flags |= PCRE2_BSR_ANYCRLF;
|
|
- if (pcre1_flags & G_REGEX_MATCH_BSR_ANY)
|
|
- pcre2_flags |= PCRE2_BSR_UNICODE;
|
|
- if (pcre1_flags & G_REGEX_MATCH_PARTIAL_SOFT)
|
|
+ if (match_flags & G_REGEX_MATCH_PARTIAL_SOFT)
|
|
pcre2_flags |= PCRE2_PARTIAL_SOFT;
|
|
- if (pcre1_flags & G_REGEX_MATCH_PARTIAL_HARD)
|
|
+ if (match_flags & G_REGEX_MATCH_PARTIAL_HARD)
|
|
pcre2_flags |= PCRE2_PARTIAL_HARD;
|
|
- if (pcre1_flags & G_REGEX_MATCH_NOTEMPTY_ATSTART)
|
|
+ if (match_flags & G_REGEX_MATCH_NOTEMPTY_ATSTART)
|
|
pcre2_flags |= PCRE2_NOTEMPTY_ATSTART;
|
|
|
|
- return pcre2_flags;
|
|
+ if (compile_flags & G_REGEX_RAW)
|
|
+ pcre2_flags |= PCRE2_NO_UTF_CHECK;
|
|
+
|
|
+ return pcre2_flags & G_REGEX_PCRE2_MATCH_MASK;
|
|
}
|
|
|
|
-static gint
|
|
-map_to_pcre1_compile_flags (gint pcre2_flags)
|
|
+static GRegexCompileFlags
|
|
+g_regex_compile_flags_from_pcre2 (uint32_t pcre2_flags)
|
|
{
|
|
- /* Maps compile flags from pcre2 to pcre1 values
|
|
- */
|
|
- gint pcre1_flags = 0;
|
|
-
|
|
- if (!(pcre2_flags & G_REGEX_FLAGS_CONVERTED))
|
|
- return pcre2_flags;
|
|
+ GRegexCompileFlags compile_flags = G_REGEX_DEFAULT;
|
|
|
|
if (pcre2_flags & PCRE2_CASELESS)
|
|
- pcre1_flags |= G_REGEX_CASELESS;
|
|
+ compile_flags |= G_REGEX_CASELESS;
|
|
if (pcre2_flags & PCRE2_MULTILINE)
|
|
- pcre1_flags |= G_REGEX_MULTILINE;
|
|
+ compile_flags |= G_REGEX_MULTILINE;
|
|
if (pcre2_flags & PCRE2_DOTALL)
|
|
- pcre1_flags |= G_REGEX_DOTALL;
|
|
+ compile_flags |= G_REGEX_DOTALL;
|
|
if (pcre2_flags & PCRE2_EXTENDED)
|
|
- pcre1_flags |= G_REGEX_EXTENDED;
|
|
+ compile_flags |= G_REGEX_EXTENDED;
|
|
if (pcre2_flags & PCRE2_ANCHORED)
|
|
- pcre1_flags |= G_REGEX_ANCHORED;
|
|
+ compile_flags |= G_REGEX_ANCHORED;
|
|
if (pcre2_flags & PCRE2_DOLLAR_ENDONLY)
|
|
- pcre1_flags |= G_REGEX_DOLLAR_ENDONLY;
|
|
+ compile_flags |= G_REGEX_DOLLAR_ENDONLY;
|
|
if (pcre2_flags & PCRE2_UNGREEDY)
|
|
- pcre1_flags |= G_REGEX_UNGREEDY;
|
|
+ compile_flags |= G_REGEX_UNGREEDY;
|
|
if (!(pcre2_flags & PCRE2_UTF))
|
|
- pcre1_flags |= G_REGEX_RAW;
|
|
+ compile_flags |= G_REGEX_RAW;
|
|
if (pcre2_flags & PCRE2_NO_AUTO_CAPTURE)
|
|
- pcre1_flags |= G_REGEX_NO_AUTO_CAPTURE;
|
|
+ compile_flags |= G_REGEX_NO_AUTO_CAPTURE;
|
|
if (pcre2_flags & PCRE2_FIRSTLINE)
|
|
- pcre1_flags |= G_REGEX_FIRSTLINE;
|
|
+ compile_flags |= G_REGEX_FIRSTLINE;
|
|
if (pcre2_flags & PCRE2_DUPNAMES)
|
|
- pcre1_flags |= G_REGEX_DUPNAMES;
|
|
- if (pcre2_flags & PCRE2_NEWLINE_CR)
|
|
- pcre1_flags |= G_REGEX_NEWLINE_CR;
|
|
- if (pcre2_flags & PCRE2_NEWLINE_LF)
|
|
- pcre1_flags |= G_REGEX_NEWLINE_LF;
|
|
- /* Check for exact match for a composite flag */
|
|
- if ((pcre2_flags & PCRE2_NEWLINE_CRLF) == PCRE2_NEWLINE_CRLF)
|
|
- pcre1_flags |= G_REGEX_NEWLINE_CRLF;
|
|
- /* Check for exact match for a composite flag */
|
|
- if ((pcre2_flags & PCRE2_NEWLINE_ANYCRLF) == PCRE2_NEWLINE_ANYCRLF)
|
|
- pcre1_flags |= G_REGEX_NEWLINE_ANYCRLF;
|
|
- if (pcre2_flags & PCRE2_BSR_ANYCRLF)
|
|
- pcre1_flags |= G_REGEX_BSR_ANYCRLF;
|
|
-
|
|
- return pcre1_flags;
|
|
+ compile_flags |= G_REGEX_DUPNAMES;
|
|
+
|
|
+ return compile_flags & G_REGEX_COMPILE_MASK;
|
|
}
|
|
|
|
-static gint
|
|
-map_to_pcre1_match_flags (gint pcre2_flags)
|
|
+static GRegexMatchFlags
|
|
+g_regex_match_flags_from_pcre2 (uint32_t pcre2_flags)
|
|
{
|
|
- /* Maps match flags from pcre2 to pcre1 values
|
|
- */
|
|
- gint pcre1_flags = 0;
|
|
-
|
|
- if (!(pcre2_flags & G_REGEX_FLAGS_CONVERTED))
|
|
- return pcre2_flags;
|
|
+ GRegexMatchFlags match_flags = G_REGEX_MATCH_DEFAULT;
|
|
|
|
if (pcre2_flags & PCRE2_ANCHORED)
|
|
- pcre1_flags |= G_REGEX_MATCH_ANCHORED;
|
|
+ match_flags |= G_REGEX_MATCH_ANCHORED;
|
|
if (pcre2_flags & PCRE2_NOTBOL)
|
|
- pcre1_flags |= G_REGEX_MATCH_NOTBOL;
|
|
+ match_flags |= G_REGEX_MATCH_NOTBOL;
|
|
if (pcre2_flags & PCRE2_NOTEOL)
|
|
- pcre1_flags |= G_REGEX_MATCH_NOTEOL;
|
|
+ match_flags |= G_REGEX_MATCH_NOTEOL;
|
|
if (pcre2_flags & PCRE2_NOTEMPTY)
|
|
- pcre1_flags |= G_REGEX_MATCH_NOTEMPTY;
|
|
- if (pcre2_flags & PCRE2_NEWLINE_CR)
|
|
- pcre1_flags |= G_REGEX_MATCH_NEWLINE_CR;
|
|
- if (pcre2_flags & PCRE2_NEWLINE_LF)
|
|
- pcre1_flags |= G_REGEX_MATCH_NEWLINE_LF;
|
|
- /* Check for exact match for a composite flag */
|
|
- if ((pcre2_flags & PCRE2_NEWLINE_CRLF) == PCRE2_NEWLINE_CRLF)
|
|
- pcre1_flags |= G_REGEX_MATCH_NEWLINE_CRLF;
|
|
- if (pcre2_flags & PCRE2_NEWLINE_ANY)
|
|
- pcre1_flags |= G_REGEX_MATCH_NEWLINE_ANY;
|
|
- /* Check for exact match for a composite flag */
|
|
- if ((pcre2_flags & PCRE2_NEWLINE_ANYCRLF) == PCRE2_NEWLINE_ANYCRLF)
|
|
- pcre1_flags |= G_REGEX_MATCH_NEWLINE_ANYCRLF;
|
|
- if (pcre2_flags & PCRE2_BSR_ANYCRLF)
|
|
- pcre1_flags |= G_REGEX_MATCH_BSR_ANYCRLF;
|
|
- if (pcre2_flags & PCRE2_BSR_UNICODE)
|
|
- pcre1_flags |= G_REGEX_MATCH_BSR_ANY;
|
|
+ match_flags |= G_REGEX_MATCH_NOTEMPTY;
|
|
if (pcre2_flags & PCRE2_PARTIAL_SOFT)
|
|
- pcre1_flags |= G_REGEX_MATCH_PARTIAL_SOFT;
|
|
+ match_flags |= G_REGEX_MATCH_PARTIAL_SOFT;
|
|
if (pcre2_flags & PCRE2_PARTIAL_HARD)
|
|
- pcre1_flags |= G_REGEX_MATCH_PARTIAL_HARD;
|
|
+ match_flags |= G_REGEX_MATCH_PARTIAL_HARD;
|
|
if (pcre2_flags & PCRE2_NOTEMPTY_ATSTART)
|
|
- pcre1_flags |= G_REGEX_MATCH_NOTEMPTY_ATSTART;
|
|
+ match_flags |= G_REGEX_MATCH_NOTEMPTY_ATSTART;
|
|
+
|
|
+ return (match_flags & G_REGEX_MATCH_MASK);
|
|
+}
|
|
+
|
|
+static uint32_t
|
|
+get_pcre2_newline_compile_options (GRegexCompileFlags compile_flags)
|
|
+{
|
|
+ compile_flags &= G_REGEX_COMPILE_NEWLINE_MASK;
|
|
+
|
|
+ switch (compile_flags)
|
|
+ {
|
|
+ case G_REGEX_NEWLINE_CR:
|
|
+ return PCRE2_NEWLINE_CR;
|
|
+ case G_REGEX_NEWLINE_LF:
|
|
+ return PCRE2_NEWLINE_LF;
|
|
+ case G_REGEX_NEWLINE_CRLF:
|
|
+ return PCRE2_NEWLINE_CRLF;
|
|
+ case G_REGEX_NEWLINE_ANYCRLF:
|
|
+ return PCRE2_NEWLINE_ANYCRLF;
|
|
+ default:
|
|
+ if (compile_flags != 0)
|
|
+ return 0;
|
|
+
|
|
+ return PCRE2_NEWLINE_ANY;
|
|
+ }
|
|
+}
|
|
+
|
|
+static uint32_t
|
|
+get_pcre2_newline_match_options (GRegexMatchFlags match_flags)
|
|
+{
|
|
+ switch (match_flags & G_REGEX_MATCH_NEWLINE_MASK)
|
|
+ {
|
|
+ case G_REGEX_MATCH_NEWLINE_CR:
|
|
+ return PCRE2_NEWLINE_CR;
|
|
+ case G_REGEX_MATCH_NEWLINE_LF:
|
|
+ return PCRE2_NEWLINE_LF;
|
|
+ case G_REGEX_MATCH_NEWLINE_CRLF:
|
|
+ return PCRE2_NEWLINE_CRLF;
|
|
+ case G_REGEX_MATCH_NEWLINE_ANY:
|
|
+ return PCRE2_NEWLINE_ANY;
|
|
+ case G_REGEX_MATCH_NEWLINE_ANYCRLF:
|
|
+ return PCRE2_NEWLINE_ANYCRLF;
|
|
+ default:
|
|
+ return 0;
|
|
+ }
|
|
+}
|
|
+
|
|
+static uint32_t
|
|
+get_pcre2_bsr_compile_options (GRegexCompileFlags compile_flags)
|
|
+{
|
|
+ if (compile_flags & G_REGEX_BSR_ANYCRLF)
|
|
+ return PCRE2_BSR_ANYCRLF;
|
|
|
|
- return pcre1_flags;
|
|
+ return PCRE2_BSR_UNICODE;
|
|
+}
|
|
+
|
|
+static uint32_t
|
|
+get_pcre2_bsr_match_options (GRegexMatchFlags match_flags)
|
|
+{
|
|
+ if (match_flags & G_REGEX_MATCH_BSR_ANYCRLF)
|
|
+ return PCRE2_BSR_ANYCRLF;
|
|
+
|
|
+ if (match_flags & G_REGEX_MATCH_BSR_ANY)
|
|
+ return PCRE2_BSR_UNICODE;
|
|
+
|
|
+ return 0;
|
|
}
|
|
|
|
static const gchar *
|
|
@@ -742,12 +772,12 @@ translate_compile_error (gint *errcode, const gchar **errmsg)
|
|
/* GMatchInfo */
|
|
|
|
static GMatchInfo *
|
|
-match_info_new (const GRegex *regex,
|
|
- const gchar *string,
|
|
- gint string_len,
|
|
- gint start_position,
|
|
- gint match_options,
|
|
- gboolean is_dfa)
|
|
+match_info_new (const GRegex *regex,
|
|
+ const gchar *string,
|
|
+ gint string_len,
|
|
+ gint start_position,
|
|
+ GRegexMatchFlags match_options,
|
|
+ gboolean is_dfa)
|
|
{
|
|
GMatchInfo *match_info;
|
|
|
|
@@ -761,7 +791,8 @@ match_info_new (const GRegex *regex,
|
|
match_info->string_len = string_len;
|
|
match_info->matches = PCRE2_ERROR_NOMATCH;
|
|
match_info->pos = start_position;
|
|
- match_info->match_opts = match_options;
|
|
+ match_info->match_opts =
|
|
+ get_pcre2_match_options (match_options, regex->orig_compile_opts);
|
|
|
|
pcre2_pattern_info (regex->pcre_re, PCRE2_INFO_CAPTURECOUNT,
|
|
&match_info->n_subpatterns);
|
|
@@ -822,8 +853,8 @@ recalc_match_offsets (GMatchInfo *match_info,
|
|
}
|
|
|
|
static void
|
|
-enable_jit_with_match_options (GRegex *regex,
|
|
- GRegexMatchFlags match_options)
|
|
+enable_jit_with_match_options (GRegex *regex,
|
|
+ uint32_t match_options)
|
|
{
|
|
gint old_jit_options, new_jit_options, retval;
|
|
|
|
@@ -1009,7 +1040,7 @@ g_match_info_next (GMatchInfo *match_info,
|
|
return FALSE;
|
|
}
|
|
|
|
- opts = map_to_pcre2_match_flags (match_info->regex->match_opts | match_info->match_opts);
|
|
+ opts = match_info->regex->match_opts | match_info->match_opts;
|
|
|
|
enable_jit_with_match_options (match_info->regex, opts);
|
|
if (match_info->regex->jit_status == JIT_STATUS_ENABLED)
|
|
@@ -1018,7 +1049,7 @@ g_match_info_next (GMatchInfo *match_info,
|
|
(PCRE2_SPTR8) match_info->string,
|
|
match_info->string_len,
|
|
match_info->pos,
|
|
- opts & ~G_REGEX_FLAGS_CONVERTED,
|
|
+ opts,
|
|
match_info->match_data,
|
|
match_info->match_context);
|
|
}
|
|
@@ -1028,7 +1059,7 @@ g_match_info_next (GMatchInfo *match_info,
|
|
(PCRE2_SPTR8) match_info->string,
|
|
match_info->string_len,
|
|
match_info->pos,
|
|
- opts & ~G_REGEX_FLAGS_CONVERTED,
|
|
+ opts,
|
|
match_info->match_data,
|
|
match_info->match_context);
|
|
}
|
|
@@ -1563,14 +1594,14 @@ g_regex_unref (GRegex *regex)
|
|
}
|
|
}
|
|
|
|
-/*
|
|
- * @match_options: (inout) (optional):
|
|
- */
|
|
-static pcre2_code *regex_compile (const gchar *pattern,
|
|
- GRegexCompileFlags compile_options,
|
|
- GRegexCompileFlags *compile_options_out,
|
|
- GRegexMatchFlags *match_options,
|
|
- GError **error);
|
|
+static pcre2_code * regex_compile (const gchar *pattern,
|
|
+ uint32_t compile_options,
|
|
+ uint32_t newline_options,
|
|
+ uint32_t bsr_options,
|
|
+ GError **error);
|
|
+
|
|
+static uint32_t get_pcre2_inline_compile_options (pcre2_code *re,
|
|
+ uint32_t compile_options);
|
|
|
|
/**
|
|
* g_regex_new:
|
|
@@ -1596,11 +1627,10 @@ g_regex_new (const gchar *pattern,
|
|
GRegex *regex;
|
|
pcre2_code *re;
|
|
static gsize initialised = 0;
|
|
- GRegexCompileFlags orig_compile_opts;
|
|
-
|
|
- orig_compile_opts = compile_options;
|
|
- compile_options = map_to_pcre2_compile_flags (compile_options);
|
|
- match_options = map_to_pcre2_match_flags (match_options);
|
|
+ uint32_t pcre_compile_options;
|
|
+ uint32_t pcre_match_options;
|
|
+ uint32_t newline_options;
|
|
+ uint32_t bsr_options;
|
|
|
|
g_return_val_if_fail (pattern != NULL, NULL);
|
|
g_return_val_if_fail (error == NULL || *error == NULL, NULL);
|
|
@@ -1618,113 +1648,97 @@ g_regex_new (const gchar *pattern,
|
|
g_once_init_leave (&initialised, supports_utf8 ? 1 : 2);
|
|
}
|
|
|
|
- if (G_UNLIKELY (initialised != 1))
|
|
+ if (G_UNLIKELY (initialised != 1))
|
|
{
|
|
g_set_error_literal (error, G_REGEX_ERROR, G_REGEX_ERROR_COMPILE,
|
|
_("PCRE library is compiled with incompatible options"));
|
|
return NULL;
|
|
}
|
|
|
|
- switch (compile_options & G_REGEX_NEWLINE_MASK)
|
|
+ pcre_compile_options = get_pcre2_compile_options (compile_options);
|
|
+ pcre_match_options = get_pcre2_match_options (match_options, compile_options);
|
|
+
|
|
+ newline_options = get_pcre2_newline_match_options (match_options);
|
|
+ if (newline_options == 0)
|
|
+ newline_options = get_pcre2_newline_compile_options (compile_options);
|
|
+
|
|
+ if (newline_options == 0)
|
|
{
|
|
- case 0: /* PCRE2_NEWLINE_ANY */
|
|
- case PCRE2_NEWLINE_CR:
|
|
- case PCRE2_NEWLINE_LF:
|
|
- case PCRE2_NEWLINE_CRLF:
|
|
- case PCRE2_NEWLINE_ANYCRLF:
|
|
- break;
|
|
- default:
|
|
g_set_error (error, G_REGEX_ERROR, G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS,
|
|
"Invalid newline flags");
|
|
return NULL;
|
|
}
|
|
|
|
- re = regex_compile (pattern, compile_options, &compile_options,
|
|
- &match_options, error);
|
|
+ bsr_options = get_pcre2_bsr_match_options (match_options);
|
|
+ if (!bsr_options)
|
|
+ bsr_options = get_pcre2_bsr_compile_options (compile_options);
|
|
+
|
|
+ re = regex_compile (pattern, pcre_compile_options,
|
|
+ newline_options, bsr_options, error);
|
|
if (re == NULL)
|
|
return NULL;
|
|
|
|
+ pcre_compile_options |=
|
|
+ get_pcre2_inline_compile_options (re, pcre_compile_options);
|
|
+
|
|
regex = g_new0 (GRegex, 1);
|
|
regex->ref_count = 1;
|
|
regex->pattern = g_strdup (pattern);
|
|
regex->pcre_re = re;
|
|
- regex->compile_opts = compile_options;
|
|
- regex->orig_compile_opts = orig_compile_opts;
|
|
- regex->match_opts = match_options;
|
|
+ regex->compile_opts = pcre_compile_options;
|
|
+ regex->orig_compile_opts = compile_options;
|
|
+ regex->match_opts = pcre_match_options;
|
|
+ regex->orig_match_opts = match_options;
|
|
enable_jit_with_match_options (regex, regex->match_opts);
|
|
|
|
return regex;
|
|
}
|
|
|
|
-static gint
|
|
-extract_newline_options (const GRegexCompileFlags compile_options,
|
|
- const GRegexMatchFlags *match_options)
|
|
-{
|
|
- gint newline_options = PCRE2_NEWLINE_ANY;
|
|
-
|
|
- if (compile_options & G_REGEX_NEWLINE_MASK)
|
|
- newline_options = compile_options & G_REGEX_NEWLINE_MASK;
|
|
- if (match_options && *match_options & G_REGEX_MATCH_NEWLINE_MASK)
|
|
- newline_options = *match_options & G_REGEX_MATCH_NEWLINE_MASK;
|
|
-
|
|
- return newline_options;
|
|
-}
|
|
-
|
|
-static gint
|
|
-extract_bsr_options (const GRegexCompileFlags compile_options,
|
|
- const GRegexMatchFlags *match_options)
|
|
-{
|
|
- gint bsr_options = PCRE2_BSR_UNICODE;
|
|
-
|
|
- if (compile_options & PCRE2_BSR_ANYCRLF)
|
|
- bsr_options = PCRE2_BSR_ANYCRLF;
|
|
- if (match_options && *match_options & PCRE2_BSR_ANYCRLF)
|
|
- bsr_options = PCRE2_BSR_ANYCRLF;
|
|
- if (match_options && *match_options & PCRE2_BSR_UNICODE)
|
|
- bsr_options = PCRE2_BSR_UNICODE;
|
|
-
|
|
- return bsr_options;
|
|
-}
|
|
-
|
|
static pcre2_code *
|
|
-regex_compile (const gchar *pattern,
|
|
- GRegexCompileFlags compile_options,
|
|
- GRegexCompileFlags *compile_options_out,
|
|
- GRegexMatchFlags *match_options,
|
|
- GError **error)
|
|
+regex_compile (const gchar *pattern,
|
|
+ uint32_t compile_options,
|
|
+ uint32_t newline_options,
|
|
+ uint32_t bsr_options,
|
|
+ GError **error)
|
|
{
|
|
pcre2_code *re;
|
|
pcre2_compile_context *context;
|
|
const gchar *errmsg;
|
|
PCRE2_SIZE erroffset;
|
|
gint errcode;
|
|
- GRegexCompileFlags nonpcre_compile_options;
|
|
- uint32_t pcre_compile_options;
|
|
-
|
|
- nonpcre_compile_options = compile_options & G_REGEX_COMPILE_NONPCRE_MASK;
|
|
|
|
context = pcre2_compile_context_create (NULL);
|
|
|
|
/* set newline options */
|
|
- pcre2_set_newline (context, extract_newline_options (compile_options, match_options));
|
|
+ if (pcre2_set_newline (context, newline_options) != 0)
|
|
+ {
|
|
+ g_set_error (error, G_REGEX_ERROR,
|
|
+ G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS,
|
|
+ "Invalid newline flags");
|
|
+ pcre2_compile_context_free (context);
|
|
+ return NULL;
|
|
+ }
|
|
|
|
/* set bsr options */
|
|
- pcre2_set_bsr (context, extract_bsr_options (compile_options, match_options));
|
|
+ if (pcre2_set_bsr (context, bsr_options) != 0)
|
|
+ {
|
|
+ g_set_error (error, G_REGEX_ERROR,
|
|
+ G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS,
|
|
+ "Invalid BSR flags");
|
|
+ pcre2_compile_context_free (context);
|
|
+ return NULL;
|
|
+ }
|
|
|
|
/* In case UTF-8 mode is used, also set PCRE2_NO_UTF_CHECK */
|
|
if (compile_options & PCRE2_UTF)
|
|
- {
|
|
- compile_options |= PCRE2_NO_UTF_CHECK;
|
|
- if (match_options != NULL)
|
|
- *match_options |= PCRE2_NO_UTF_CHECK;
|
|
- }
|
|
+ compile_options |= PCRE2_NO_UTF_CHECK;
|
|
|
|
compile_options |= PCRE2_UCP;
|
|
|
|
/* compile the pattern */
|
|
re = pcre2_compile ((PCRE2_SPTR8) pattern,
|
|
PCRE2_ZERO_TERMINATED,
|
|
- compile_options & ~G_REGEX_FLAGS_CONVERTED,
|
|
+ compile_options,
|
|
&errcode,
|
|
&erroffset,
|
|
context);
|
|
@@ -1755,16 +1769,22 @@ regex_compile (const gchar *pattern,
|
|
return NULL;
|
|
}
|
|
|
|
+ return re;
|
|
+}
|
|
+
|
|
+static uint32_t
|
|
+get_pcre2_inline_compile_options (pcre2_code *re,
|
|
+ uint32_t compile_options)
|
|
+{
|
|
+ uint32_t pcre_compile_options;
|
|
+ uint32_t nonpcre_compile_options;
|
|
+
|
|
/* For options set at the beginning of the pattern, pcre puts them into
|
|
* compile options, e.g. "(?i)foo" will make the pcre structure store
|
|
* PCRE2_CASELESS even though it wasn't explicitly given for compilation. */
|
|
+ nonpcre_compile_options = compile_options & G_REGEX_COMPILE_NONPCRE_MASK;
|
|
pcre2_pattern_info (re, PCRE2_INFO_ALLOPTIONS, &pcre_compile_options);
|
|
- compile_options = pcre_compile_options & G_REGEX_COMPILE_PCRE_MASK;
|
|
-
|
|
- /* Don't leak PCRE2_NEWLINE_ANY, which is part of PCRE2_NEWLINE_ANYCRLF */
|
|
- if ((pcre_compile_options & PCRE2_NEWLINE_ANYCRLF) != PCRE2_NEWLINE_ANYCRLF)
|
|
- compile_options &= ~PCRE2_NEWLINE_ANY;
|
|
-
|
|
+ compile_options = pcre_compile_options & G_REGEX_PCRE2_COMPILE_MASK;
|
|
compile_options |= nonpcre_compile_options;
|
|
|
|
if (!(compile_options & PCRE2_DUPNAMES))
|
|
@@ -1775,10 +1795,7 @@ regex_compile (const gchar *pattern,
|
|
compile_options |= PCRE2_DUPNAMES;
|
|
}
|
|
|
|
- if (compile_options_out != 0)
|
|
- *compile_options_out = compile_options;
|
|
-
|
|
- return re;
|
|
+ return compile_options;
|
|
}
|
|
|
|
/**
|
|
@@ -1940,7 +1957,7 @@ g_regex_get_compile_flags (const GRegex *regex)
|
|
break;
|
|
}
|
|
|
|
- return map_to_pcre1_compile_flags (regex->compile_opts) | extra_flags;
|
|
+ return g_regex_compile_flags_from_pcre2 (regex->compile_opts) | extra_flags;
|
|
}
|
|
|
|
/**
|
|
@@ -1956,9 +1973,15 @@ g_regex_get_compile_flags (const GRegex *regex)
|
|
GRegexMatchFlags
|
|
g_regex_get_match_flags (const GRegex *regex)
|
|
{
|
|
+ uint32_t flags;
|
|
+
|
|
g_return_val_if_fail (regex != NULL, 0);
|
|
|
|
- return map_to_pcre1_match_flags (regex->match_opts & G_REGEX_MATCH_MASK);
|
|
+ flags = g_regex_match_flags_from_pcre2 (regex->match_opts);
|
|
+ flags |= (regex->orig_match_opts & G_REGEX_MATCH_NEWLINE_MASK);
|
|
+ flags |= (regex->orig_match_opts & (G_REGEX_MATCH_BSR_ANY | G_REGEX_MATCH_BSR_ANYCRLF));
|
|
+
|
|
+ return flags;
|
|
}
|
|
|
|
/**
|
|
@@ -1992,9 +2015,6 @@ g_regex_match_simple (const gchar *pattern,
|
|
GRegex *regex;
|
|
gboolean result;
|
|
|
|
- compile_options = map_to_pcre2_compile_flags (compile_options);
|
|
- match_options = map_to_pcre2_match_flags (match_options);
|
|
-
|
|
regex = g_regex_new (pattern, compile_options, G_REGEX_MATCH_DEFAULT, NULL);
|
|
if (!regex)
|
|
return FALSE;
|
|
@@ -2062,8 +2082,6 @@ g_regex_match (const GRegex *regex,
|
|
GRegexMatchFlags match_options,
|
|
GMatchInfo **match_info)
|
|
{
|
|
- match_options = map_to_pcre2_match_flags (match_options);
|
|
-
|
|
return g_regex_match_full (regex, string, -1, 0, match_options,
|
|
match_info, NULL);
|
|
}
|
|
@@ -2147,8 +2165,6 @@ g_regex_match_full (const GRegex *regex,
|
|
GMatchInfo *info;
|
|
gboolean match_ok;
|
|
|
|
- match_options = map_to_pcre2_match_flags (match_options);
|
|
-
|
|
g_return_val_if_fail (regex != NULL, FALSE);
|
|
g_return_val_if_fail (string != NULL, FALSE);
|
|
g_return_val_if_fail (start_position >= 0, FALSE);
|
|
@@ -2199,8 +2215,6 @@ g_regex_match_all (const GRegex *regex,
|
|
GRegexMatchFlags match_options,
|
|
GMatchInfo **match_info)
|
|
{
|
|
- match_options = map_to_pcre2_match_flags (match_options);
|
|
-
|
|
return g_regex_match_all_full (regex, string, -1, 0, match_options,
|
|
match_info, NULL);
|
|
}
|
|
@@ -2272,8 +2286,8 @@ g_regex_match_all_full (const GRegex *regex,
|
|
gboolean done;
|
|
pcre2_code *pcre_re;
|
|
gboolean retval;
|
|
-
|
|
- match_options = map_to_pcre2_match_flags (match_options);
|
|
+ uint32_t newline_options;
|
|
+ uint32_t bsr_options;
|
|
|
|
g_return_val_if_fail (regex != NULL, FALSE);
|
|
g_return_val_if_fail (string != NULL, FALSE);
|
|
@@ -2281,6 +2295,14 @@ g_regex_match_all_full (const GRegex *regex,
|
|
g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
|
|
g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, FALSE);
|
|
|
|
+ newline_options = get_pcre2_newline_match_options (match_options);
|
|
+ if (!newline_options)
|
|
+ newline_options = get_pcre2_newline_compile_options (regex->orig_compile_opts);
|
|
+
|
|
+ bsr_options = get_pcre2_bsr_match_options (match_options);
|
|
+ if (!bsr_options)
|
|
+ bsr_options = get_pcre2_bsr_compile_options (regex->orig_compile_opts);
|
|
+
|
|
/* For PCRE2 we need to turn off PCRE2_NO_AUTO_POSSESS, which is an
|
|
* optimization for normal regex matching, but results in omitting some
|
|
* shorter matches here, and an observable behaviour change.
|
|
@@ -2289,7 +2311,7 @@ g_regex_match_all_full (const GRegex *regex,
|
|
* codesearch.debian.net, so don't bother caching the recompiled RE. */
|
|
pcre_re = regex_compile (regex->pattern,
|
|
regex->compile_opts | PCRE2_NO_AUTO_POSSESS,
|
|
- NULL, NULL, error);
|
|
+ newline_options, bsr_options, error);
|
|
if (pcre_re == NULL)
|
|
return FALSE;
|
|
|
|
@@ -2303,7 +2325,7 @@ g_regex_match_all_full (const GRegex *regex,
|
|
info->matches = pcre2_dfa_match (pcre_re,
|
|
(PCRE2_SPTR8) info->string, info->string_len,
|
|
info->pos,
|
|
- (regex->match_opts | match_options | PCRE2_NO_UTF_CHECK) & ~G_REGEX_FLAGS_CONVERTED,
|
|
+ (regex->match_opts | info->match_opts),
|
|
info->match_data,
|
|
info->match_context,
|
|
info->workspace, info->n_workspace);
|
|
@@ -2436,9 +2458,6 @@ g_regex_split_simple (const gchar *pattern,
|
|
GRegex *regex;
|
|
gchar **result;
|
|
|
|
- compile_options = map_to_pcre2_compile_flags (compile_options);
|
|
- match_options = map_to_pcre2_match_flags (match_options);
|
|
-
|
|
regex = g_regex_new (pattern, compile_options, 0, NULL);
|
|
if (!regex)
|
|
return NULL;
|
|
@@ -2482,8 +2501,6 @@ g_regex_split (const GRegex *regex,
|
|
const gchar *string,
|
|
GRegexMatchFlags match_options)
|
|
{
|
|
- match_options = map_to_pcre2_match_flags (match_options);
|
|
-
|
|
return g_regex_split_full (regex, string, -1, 0,
|
|
match_options, 0, NULL);
|
|
}
|
|
@@ -2548,8 +2565,6 @@ g_regex_split_full (const GRegex *regex,
|
|
/* the returned array of char **s */
|
|
gchar **string_list;
|
|
|
|
- match_options = map_to_pcre2_match_flags (match_options);
|
|
-
|
|
g_return_val_if_fail (regex != NULL, NULL);
|
|
g_return_val_if_fail (string != NULL, NULL);
|
|
g_return_val_if_fail (start_position >= 0, NULL);
|
|
@@ -3174,8 +3189,6 @@ g_regex_replace (const GRegex *regex,
|
|
GList *list;
|
|
GError *tmp_error = NULL;
|
|
|
|
- match_options = map_to_pcre2_match_flags (match_options);
|
|
-
|
|
g_return_val_if_fail (regex != NULL, NULL);
|
|
g_return_val_if_fail (string != NULL, NULL);
|
|
g_return_val_if_fail (start_position >= 0, NULL);
|
|
@@ -3245,8 +3258,6 @@ g_regex_replace_literal (const GRegex *regex,
|
|
GRegexMatchFlags match_options,
|
|
GError **error)
|
|
{
|
|
- match_options = map_to_pcre2_match_flags (match_options);
|
|
-
|
|
g_return_val_if_fail (replacement != NULL, NULL);
|
|
g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, NULL);
|
|
|
|
@@ -3335,8 +3346,6 @@ g_regex_replace_eval (const GRegex *regex,
|
|
gboolean done = FALSE;
|
|
GError *tmp_error = NULL;
|
|
|
|
- match_options = map_to_pcre2_match_flags (match_options);
|
|
-
|
|
g_return_val_if_fail (regex != NULL, NULL);
|
|
g_return_val_if_fail (string != NULL, NULL);
|
|
g_return_val_if_fail (start_position >= 0, NULL);
|
|
diff --git a/glib/tests/regex.c b/glib/tests/regex.c
|
|
index 0d01d59..79e6b4a 100644
|
|
--- a/glib/tests/regex.c
|
|
+++ b/glib/tests/regex.c
|
|
@@ -1,6 +1,7 @@
|
|
/*
|
|
* Copyright (C) 2005 - 2006, Marco Barisione <marco@barisione.org>
|
|
* Copyright (C) 2010 Red Hat, Inc.
|
|
+ * Copyright (C) 2022, Marco Trevisan <marco.trevisan@canonical.com>
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
@@ -2353,7 +2354,13 @@ main (int argc, char *argv[])
|
|
|
|
/* TEST_NEW_CHECK_FLAGS(pattern, compile_opts, match_ops, real_compile_opts, real_match_opts) */
|
|
TEST_NEW_CHECK_FLAGS ("a", G_REGEX_OPTIMIZE, 0, G_REGEX_OPTIMIZE, 0);
|
|
+ TEST_NEW_CHECK_FLAGS ("a", G_REGEX_OPTIMIZE, G_REGEX_MATCH_NOTEMPTY,
|
|
+ G_REGEX_OPTIMIZE, G_REGEX_MATCH_NOTEMPTY);
|
|
+ TEST_NEW_CHECK_FLAGS ("a", 0, G_REGEX_MATCH_NEWLINE_ANYCRLF | G_REGEX_MATCH_BSR_ANYCRLF,
|
|
+ G_REGEX_NEWLINE_ANYCRLF | G_REGEX_BSR_ANYCRLF,
|
|
+ G_REGEX_MATCH_NEWLINE_ANYCRLF | G_REGEX_MATCH_BSR_ANYCRLF);
|
|
TEST_NEW_CHECK_FLAGS ("a", G_REGEX_RAW, 0, G_REGEX_RAW, 0);
|
|
+ TEST_NEW_CHECK_FLAGS ("(?J)a", 0, 0, G_REGEX_DUPNAMES, 0);
|
|
TEST_NEW_CHECK_FLAGS ("^.*", 0, 0, G_REGEX_ANCHORED, 0);
|
|
TEST_NEW_CHECK_FLAGS ("(*UTF8)a", 0, 0, 0 /* this is the default in GRegex */, 0);
|
|
TEST_NEW_CHECK_FLAGS ("(*UCP)a", 0, 0, 0 /* this always on in GRegex */, 0);
|
|
@@ -2559,6 +2566,8 @@ main (int argc, char *argv[])
|
|
TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_CR, 0, "a\rb\rc", -1, 0, 0, TRUE);
|
|
TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_LF, 0, "a\rb\rc", -1, 0, 0, FALSE);
|
|
TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_CRLF, 0, "a\rb\rc", -1, 0, 0, FALSE);
|
|
+ TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_ANYCRLF, 0, "a\r\nb\nc", -1, 0, 0, TRUE);
|
|
+ TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_ANYCRLF, 0, "a\r\nb\rc", -1, 0, 0, TRUE);
|
|
TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_CR, "a\nb\nc", -1, 0, 0, FALSE);
|
|
TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_LF, "a\nb\nc", -1, 0, 0, TRUE);
|
|
TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_CRLF, "a\nb\nc", -1, 0, 0, FALSE);
|
|
@@ -2568,6 +2577,8 @@ main (int argc, char *argv[])
|
|
TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_CR, "a\rb\rc", -1, 0, 0, TRUE);
|
|
TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_LF, "a\rb\rc", -1, 0, 0, FALSE);
|
|
TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_CRLF, "a\rb\rc", -1, 0, 0, FALSE);
|
|
+ TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_ANYCRLF, "a\r\nb\rc", -1, 0, 0, TRUE);
|
|
+ TEST_MATCH("^b$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_ANYCRLF, "a\r\nb\nc", -1, 0, 0, TRUE);
|
|
|
|
TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_CR, G_REGEX_MATCH_NEWLINE_ANY, "a\nb\nc", -1, 0, 0, TRUE);
|
|
TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_CR, G_REGEX_MATCH_NEWLINE_ANY, "a\rb\rc", -1, 0, 0, TRUE);
|
|
@@ -2577,6 +2588,13 @@ main (int argc, char *argv[])
|
|
TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_CR, G_REGEX_MATCH_NEWLINE_CRLF, "a\r\nb\r\nc", -1, 0, 0, TRUE);
|
|
TEST_MATCH("^b$", G_REGEX_MULTILINE | G_REGEX_NEWLINE_CR, G_REGEX_MATCH_NEWLINE_CRLF, "a\rb\rc", -1, 0, 0, FALSE);
|
|
|
|
+ /* See https://gitlab.gnome.org/GNOME/glib/-/issues/2729#note_1544130 */
|
|
+ TEST_MATCH("^a$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_ANY, "a", -1, 0, 0, TRUE);
|
|
+ TEST_MATCH("^a$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_LF, "a", -1, 0, 0, TRUE);
|
|
+ TEST_MATCH("^a$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_CR, "a", -1, 0, 0, TRUE);
|
|
+ TEST_MATCH("^a$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_CRLF, "a", -1, 0, 0, TRUE);
|
|
+ TEST_MATCH("^a$", G_REGEX_MULTILINE, G_REGEX_MATCH_NEWLINE_ANYCRLF, "a", -1, 0, 0, TRUE);
|
|
+
|
|
TEST_MATCH("a#\nb", G_REGEX_EXTENDED, 0, "a", -1, 0, 0, FALSE);
|
|
TEST_MATCH("a#\r\nb", G_REGEX_EXTENDED, 0, "a", -1, 0, 0, FALSE);
|
|
TEST_MATCH("a#\rb", G_REGEX_EXTENDED, 0, "a", -1, 0, 0, FALSE);
|
|
--
|
|
2.33.0
|
|
GitLab
|