1650 lines
58 KiB
Diff
1650 lines
58 KiB
Diff
From 9a4fd79230cf583153bec4b4749a1864a55c89fb Mon Sep 17 00:00:00 2001
|
||
From: Philip Hazel <Philip.Hazel@gmail.com>
|
||
Date: Sun, 29 Jan 2023 16:46:24 +0000
|
||
Subject: [PATCH] Implement PCRE2_EXTRA_CASELESS_RESTRICT and related features
|
||
|
||
Conflict:don't modify ChangeLog; don't modify maint/* because files don't
|
||
exist; adapt context;
|
||
Reference:https://github.com/PCRE2Project/pcre2/commit/c13d54f6581fa51a270a1ec40b1b7626d686dec1
|
||
|
||
---
|
||
HACKING | 10 +-
|
||
src/pcre2.h.in | 3 +-
|
||
src/pcre2_compile.c | 259 ++++++++++++++++++++++++--------------
|
||
src/pcre2_ucd.c | 6 +-
|
||
src/pcre2test.c | 21 ++--
|
||
testdata/testinput5 | 97 ++++++++++++++
|
||
testdata/testinput7 | 97 ++++++++++++++
|
||
testdata/testoutput5 | 180 ++++++++++++++++++++++++++
|
||
testdata/testoutput7 | 180 ++++++++++++++++++++++++++
|
||
testdata/testoutput8-16-2 | 2 +-
|
||
testdata/testoutput8-8-2 | 2 +-
|
||
11 files changed, 742 insertions(+), 115 deletions(-)
|
||
|
||
diff --git a/HACKING b/HACKING
|
||
index 2f194db..88ebad5 100644
|
||
--- a/HACKING
|
||
+++ b/HACKING
|
||
@@ -1,4 +1,4 @@
|
||
-Technical Notes about PCRE2
|
||
+Technical notes about PCRE2
|
||
---------------------------
|
||
|
||
These are very rough technical notes that record potentially useful information
|
||
@@ -248,7 +248,6 @@ by a length and an offset into the pattern to specify the name.
|
||
The following have one data item that follows in the next vector element:
|
||
|
||
META_BIGVALUE Next is a literal >= META_END
|
||
-META_OPTIONS (?i) and friends (data is new option bits)
|
||
META_POSIX POSIX class item (data identifies the class)
|
||
META_POSIX_NEG negative POSIX class item (ditto)
|
||
|
||
@@ -298,6 +297,11 @@ META_MINMAX {n,m} repeat
|
||
META_MINMAX_PLUS {n,m}+ repeat
|
||
META_MINMAX_QUERY {n,m}? repeat
|
||
|
||
+This one is followed by two elements, giving the new option settings for the
|
||
+main and extra options, respectively.
|
||
+
|
||
+META_OPTIONS (?i) and friends
|
||
+
|
||
This one is followed by three elements. The first is 0 for '>' and 1 for '>=';
|
||
the next two are the major and minor numbers:
|
||
|
||
@@ -827,4 +831,4 @@ not a real opcode, but is used to check at compile time that tables indexed by
|
||
opcode are the correct length, in order to catch updating errors.
|
||
|
||
Philip Hazel
|
||
-April 2022
|
||
+January 2023
|
||
diff --git a/src/pcre2.h.in b/src/pcre2.h.in
|
||
index 7b8818d..60c2905 100644
|
||
--- a/src/pcre2.h.in
|
||
+++ b/src/pcre2.h.in
|
||
@@ -5,7 +5,7 @@
|
||
/* This is the public header file for the PCRE library, second API, to be
|
||
#included by applications that call PCRE2 functions.
|
||
|
||
- Copyright (c) 2016-2021 University of Cambridge
|
||
+ Copyright (c) 2016-2023 University of Cambridge
|
||
|
||
-----------------------------------------------------------------------------
|
||
Redistribution and use in source and binary forms, with or without
|
||
@@ -153,6 +153,7 @@ D is inspected during pcre2_dfa_match() execution
|
||
#define PCRE2_EXTRA_ESCAPED_CR_IS_LF 0x00000010u /* C */
|
||
#define PCRE2_EXTRA_ALT_BSUX 0x00000020u /* C */
|
||
#define PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK 0x00000040u /* C */
|
||
+#define PCRE2_EXTRA_CASELESS_RESTRICT 0x00000080u /* C */
|
||
|
||
/* These are for pcre2_jit_compile(). */
|
||
|
||
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
|
||
index 99ffd29..464c9db 100644
|
||
--- a/src/pcre2_compile.c
|
||
+++ b/src/pcre2_compile.c
|
||
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
|
||
|
||
Written by Philip Hazel
|
||
Original API code Copyright (c) 1997-2012 University of Cambridge
|
||
- New API code Copyright (c) 2016-2022 University of Cambridge
|
||
+ New API code Copyright (c) 2016-2023 University of Cambridge
|
||
|
||
-----------------------------------------------------------------------------
|
||
Redistribution and use in source and binary forms, with or without
|
||
@@ -118,13 +118,13 @@ them will be able to (i.e. assume a 64-bit world). */
|
||
|
||
#ifdef SUPPORT_UNICODE
|
||
static unsigned int
|
||
- add_list_to_class_internal(uint8_t *, PCRE2_UCHAR **, uint32_t,
|
||
+ add_list_to_class_internal(uint8_t *, PCRE2_UCHAR **, uint32_t, uint32_t,
|
||
compile_block *, const uint32_t *, unsigned int);
|
||
#endif
|
||
|
||
static int
|
||
- compile_regex(uint32_t, PCRE2_UCHAR **, uint32_t **, int *, uint32_t,
|
||
- uint32_t *, uint32_t *, uint32_t *, uint32_t *, branch_chain *,
|
||
+ compile_regex(uint32_t, uint32_t, PCRE2_UCHAR **, uint32_t **, int *,
|
||
+ uint32_t, uint32_t *, uint32_t *, uint32_t *, uint32_t *, branch_chain *,
|
||
compile_block *, PCRE2_SIZE *);
|
||
|
||
static int
|
||
@@ -779,7 +779,7 @@ are allowed. */
|
||
PCRE2_NO_DOTSTAR_ANCHOR|PCRE2_UCP|PCRE2_UNGREEDY)
|
||
|
||
#define PUBLIC_LITERAL_COMPILE_EXTRA_OPTIONS \
|
||
- (PCRE2_EXTRA_MATCH_LINE|PCRE2_EXTRA_MATCH_WORD)
|
||
+ (PCRE2_EXTRA_MATCH_LINE|PCRE2_EXTRA_MATCH_WORD|PCRE2_EXTRA_CASELESS_RESTRICT)
|
||
|
||
#define PUBLIC_COMPILE_EXTRA_OPTIONS \
|
||
(PUBLIC_LITERAL_COMPILE_EXTRA_OPTIONS| \
|
||
@@ -1059,7 +1059,10 @@ for (;;)
|
||
case META_SKIP: fprintf(stderr, "META (*SKIP)"); break;
|
||
case META_THEN: fprintf(stderr, "META (*THEN)"); break;
|
||
|
||
- case META_OPTIONS: fprintf(stderr, "META_OPTIONS 0x%02x", *pptr++); break;
|
||
+ case META_OPTIONS:
|
||
+ fprintf(stderr, "META_OPTIONS 0x%08x 0x%08x", pptr[0], pptr[1]);
|
||
+ pptr += 2;
|
||
+ break;
|
||
|
||
case META_LOOKBEHIND:
|
||
fprintf(stderr, "META (?<= %d offset=", meta_arg);
|
||
@@ -1491,6 +1494,7 @@ Arguments:
|
||
chptr points to a returned data character
|
||
errorcodeptr points to the errorcode variable (containing zero)
|
||
options the current options bits
|
||
+ xoptions the current extra options bits
|
||
isclass TRUE if inside a character class
|
||
cb compile data block or NULL when called from pcre2_substitute()
|
||
|
||
@@ -1502,7 +1506,7 @@ Returns: zero => a data character
|
||
|
||
int
|
||
PRIV(check_escape)(PCRE2_SPTR *ptrptr, PCRE2_SPTR ptrend, uint32_t *chptr,
|
||
- int *errorcodeptr, uint32_t options, uint32_t extra_options, BOOL isclass,
|
||
+ int *errorcodeptr, uint32_t options, uint32_t xoptions, BOOL isclass,
|
||
compile_block *cb)
|
||
{
|
||
BOOL utf = (options & PCRE2_UTF) != 0;
|
||
@@ -1539,7 +1543,7 @@ else if ((i = escapes[c - ESCAPES_FIRST]) != 0)
|
||
if (i > 0)
|
||
{
|
||
c = (uint32_t)i;
|
||
- if (c == CHAR_CR && (extra_options & PCRE2_EXTRA_ESCAPED_CR_IS_LF) != 0)
|
||
+ if (c == CHAR_CR && (xoptions & PCRE2_EXTRA_ESCAPED_CR_IS_LF) != 0)
|
||
c = CHAR_LF;
|
||
}
|
||
else /* Negative table entry */
|
||
@@ -1603,7 +1607,7 @@ else
|
||
PCRE2_SPTR oldptr;
|
||
BOOL overflow;
|
||
BOOL alt_bsux =
|
||
- ((options & PCRE2_ALT_BSUX) | (extra_options & PCRE2_EXTRA_ALT_BSUX)) != 0;
|
||
+ ((options & PCRE2_ALT_BSUX) | (xoptions & PCRE2_EXTRA_ALT_BSUX)) != 0;
|
||
|
||
/* Filter calls from pcre2_substitute(). */
|
||
|
||
@@ -1641,7 +1645,7 @@ else
|
||
|
||
if (ptr >= ptrend) break;
|
||
if (*ptr == CHAR_LEFT_CURLY_BRACKET &&
|
||
- (extra_options & PCRE2_EXTRA_ALT_BSUX) != 0)
|
||
+ (xoptions & PCRE2_EXTRA_ALT_BSUX) != 0)
|
||
{
|
||
PCRE2_SPTR hptr = ptr + 1;
|
||
cc = 0;
|
||
@@ -1685,7 +1689,7 @@ else
|
||
if (c > 0x10ffffU) *errorcodeptr = ERR77;
|
||
else
|
||
if (c >= 0xd800 && c <= 0xdfff &&
|
||
- (extra_options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0)
|
||
+ (xoptions & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0)
|
||
*errorcodeptr = ERR73;
|
||
}
|
||
else if (c > MAX_NON_UTF_CHAR) *errorcodeptr = ERR77;
|
||
@@ -1880,7 +1884,7 @@ else
|
||
else if (ptr < ptrend && *ptr++ == CHAR_RIGHT_CURLY_BRACKET)
|
||
{
|
||
if (utf && c >= 0xd800 && c <= 0xdfff &&
|
||
- (extra_options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0)
|
||
+ (xoptions & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0)
|
||
{
|
||
ptr--;
|
||
*errorcodeptr = ERR73;
|
||
@@ -1953,7 +1957,7 @@ else
|
||
else if (ptr < ptrend && *ptr++ == CHAR_RIGHT_CURLY_BRACKET)
|
||
{
|
||
if (utf && c >= 0xd800 && c <= 0xdfff &&
|
||
- (extra_options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0)
|
||
+ (xoptions & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) == 0)
|
||
{
|
||
ptr--;
|
||
*errorcodeptr = ERR73;
|
||
@@ -2564,6 +2568,7 @@ typedef struct nest_save {
|
||
uint16_t max_group;
|
||
uint16_t flags;
|
||
uint32_t options;
|
||
+ uint32_t xoptions;
|
||
} nest_save;
|
||
|
||
#define NSF_RESET 0x0001u
|
||
@@ -2578,6 +2583,8 @@ the main compiling phase. */
|
||
#define PARSE_TRACKED_OPTIONS (PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_DUPNAMES| \
|
||
PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE| \
|
||
PCRE2_UNGREEDY)
|
||
+
|
||
+#define PARSE_TRACKED_EXTRA_OPTIONS (PCRE2_EXTRA_CASELESS_RESTRICT)
|
||
|
||
/* States used for analyzing ranges in character classes. The two OK values
|
||
must be last. */
|
||
@@ -2617,7 +2624,7 @@ uint32_t *this_parsed_item = NULL;
|
||
uint32_t *prev_parsed_item = NULL;
|
||
uint32_t meta_quantifier = 0;
|
||
uint32_t add_after_mark = 0;
|
||
-uint32_t extra_options = cb->cx->extra_options;
|
||
+uint32_t xoptions = cb->cx->extra_options;
|
||
uint16_t nest_depth = 0;
|
||
int after_manual_callout = 0;
|
||
int expect_cond_assert = 0;
|
||
@@ -2641,12 +2648,12 @@ nest_save *top_nest, *end_nests;
|
||
/* Insert leading items for word and line matching (features provided for the
|
||
benefit of pcre2grep). */
|
||
|
||
-if ((extra_options & PCRE2_EXTRA_MATCH_LINE) != 0)
|
||
+if ((xoptions & PCRE2_EXTRA_MATCH_LINE) != 0)
|
||
{
|
||
*parsed_pattern++ = META_CIRCUMFLEX;
|
||
*parsed_pattern++ = META_NOCAPTURE;
|
||
}
|
||
-else if ((extra_options & PCRE2_EXTRA_MATCH_WORD) != 0)
|
||
+else if ((xoptions & PCRE2_EXTRA_MATCH_WORD) != 0)
|
||
{
|
||
*parsed_pattern++ = META_ESCAPE + ESC_b;
|
||
*parsed_pattern++ = META_NOCAPTURE;
|
||
@@ -2697,6 +2704,7 @@ while (ptr < ptrend)
|
||
int prev_expect_cond_assert;
|
||
uint32_t min_repeat = 0, max_repeat = 0;
|
||
uint32_t set, unset, *optset;
|
||
+ uint32_t xset, xunset, *xoptset;
|
||
uint32_t terminator;
|
||
uint32_t prev_meta_quantifier;
|
||
BOOL prev_okquantifier;
|
||
@@ -2834,7 +2842,7 @@ while (ptr < ptrend)
|
||
if ((options & PCRE2_ALT_VERBNAMES) != 0)
|
||
{
|
||
escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode, options,
|
||
- cb->cx->extra_options, FALSE, cb);
|
||
+ xoptions, FALSE, cb);
|
||
if (errorcode != 0) goto FAILED;
|
||
}
|
||
else escape = 0; /* Treat all as literal */
|
||
@@ -3029,11 +3037,11 @@ while (ptr < ptrend)
|
||
case CHAR_BACKSLASH:
|
||
tempptr = ptr;
|
||
escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode, options,
|
||
- cb->cx->extra_options, FALSE, cb);
|
||
+ xoptions, FALSE, cb);
|
||
if (errorcode != 0)
|
||
{
|
||
ESCAPE_FAILED:
|
||
- if ((extra_options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) == 0)
|
||
+ if ((xoptions & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) == 0)
|
||
goto FAILED;
|
||
ptr = tempptr;
|
||
if (ptr >= ptrend) c = CHAR_BACKSLASH; else
|
||
@@ -3607,11 +3615,11 @@ while (ptr < ptrend)
|
||
{
|
||
tempptr = ptr;
|
||
escape = PRIV(check_escape)(&ptr, ptrend, &c, &errorcode, options,
|
||
- cb->cx->extra_options, TRUE, cb);
|
||
+ xoptions, TRUE, cb);
|
||
|
||
if (errorcode != 0)
|
||
{
|
||
- if ((extra_options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) == 0)
|
||
+ if ((xoptions & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) == 0)
|
||
goto FAILED;
|
||
ptr = tempptr;
|
||
if (ptr >= ptrend) c = CHAR_BACKSLASH; else
|
||
@@ -3910,6 +3918,7 @@ while (ptr < ptrend)
|
||
top_nest->nest_depth = nest_depth;
|
||
top_nest->flags = NSF_ATOMICSR;
|
||
top_nest->options = options & PARSE_TRACKED_OPTIONS;
|
||
+ top_nest->xoptions = xoptions & PARSE_TRACKED_EXTRA_OPTIONS;
|
||
}
|
||
break;
|
||
#else /* SUPPORT_UNICODE */
|
||
@@ -4042,6 +4051,7 @@ while (ptr < ptrend)
|
||
top_nest->nest_depth = nest_depth;
|
||
top_nest->flags = 0;
|
||
top_nest->options = options & PARSE_TRACKED_OPTIONS;
|
||
+ top_nest->xoptions = xoptions & PARSE_TRACKED_EXTRA_OPTIONS;
|
||
|
||
/* Start of non-capturing group that resets the capture count for each
|
||
branch. */
|
||
@@ -4056,24 +4066,28 @@ while (ptr < ptrend)
|
||
ptr++;
|
||
}
|
||
|
||
- /* Scan for options imnsxJU to be set or unset. */
|
||
+ /* Scan for options imnrsxJU to be set or unset. */
|
||
|
||
else
|
||
{
|
||
BOOL hyphenok = TRUE;
|
||
uint32_t oldoptions = options;
|
||
+ uint32_t oldxoptions = xoptions;
|
||
|
||
top_nest->reset_group = 0;
|
||
top_nest->max_group = 0;
|
||
set = unset = 0;
|
||
optset = &set;
|
||
+ xset = xunset = 0;
|
||
+ xoptset = &xset;
|
||
|
||
- /* ^ at the start unsets imnsx and disables the subsequent use of - */
|
||
+ /* ^ at the start unsets irmnsx and disables the subsequent use of - */
|
||
|
||
if (ptr < ptrend && *ptr == CHAR_CIRCUMFLEX_ACCENT)
|
||
{
|
||
options &= ~(PCRE2_CASELESS|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE|
|
||
PCRE2_DOTALL|PCRE2_EXTENDED|PCRE2_EXTENDED_MORE);
|
||
+ xoptions &= ~(PCRE2_EXTRA_CASELESS_RESTRICT);
|
||
hyphenok = FALSE;
|
||
ptr++;
|
||
}
|
||
@@ -4091,6 +4105,7 @@ while (ptr < ptrend)
|
||
goto FAILED;
|
||
}
|
||
optset = &unset;
|
||
+ xoptset = &xunset;
|
||
hyphenok = FALSE;
|
||
break;
|
||
|
||
@@ -4102,6 +4117,7 @@ while (ptr < ptrend)
|
||
case CHAR_i: *optset |= PCRE2_CASELESS; break;
|
||
case CHAR_m: *optset |= PCRE2_MULTILINE; break;
|
||
case CHAR_n: *optset |= PCRE2_NO_AUTO_CAPTURE; break;
|
||
+ case CHAR_r: *xoptset|= PCRE2_EXTRA_CASELESS_RESTRICT; break;
|
||
case CHAR_s: *optset |= PCRE2_DOTALL; break;
|
||
case CHAR_U: *optset |= PCRE2_UNGREEDY; break;
|
||
|
||
@@ -4132,6 +4148,7 @@ while (ptr < ptrend)
|
||
unset |= PCRE2_EXTENDED_MORE;
|
||
|
||
options = (options | set) & (~unset);
|
||
+ xoptions = (xoptions | xset) & (~xunset);
|
||
|
||
/* If the options ended with ')' this is not the start of a nested
|
||
group with option changes, so the options change at this level.
|
||
@@ -4152,10 +4169,11 @@ while (ptr < ptrend)
|
||
|
||
/* If nothing changed, no need to record. */
|
||
|
||
- if (options != oldoptions)
|
||
+ if (options != oldoptions || xoptions != oldxoptions)
|
||
{
|
||
*parsed_pattern++ = META_OPTIONS;
|
||
*parsed_pattern++ = options;
|
||
+ *parsed_pattern++ = xoptions;
|
||
}
|
||
} /* End options processing */
|
||
break; /* End default case after (? */
|
||
@@ -4625,6 +4643,7 @@ while (ptr < ptrend)
|
||
top_nest->nest_depth = nest_depth;
|
||
top_nest->flags = NSF_CONDASSERT;
|
||
top_nest->options = options & PARSE_TRACKED_OPTIONS;
|
||
+ top_nest->xoptions = xoptions & PARSE_TRACKED_EXTRA_OPTIONS;
|
||
}
|
||
break;
|
||
|
||
@@ -4758,6 +4777,7 @@ while (ptr < ptrend)
|
||
if (top_nest != NULL && top_nest->nest_depth == nest_depth)
|
||
{
|
||
options = (options & ~PARSE_TRACKED_OPTIONS) | top_nest->options;
|
||
+ xoptions = (xoptions & ~PARSE_TRACKED_EXTRA_OPTIONS) | top_nest->xoptions;
|
||
if ((top_nest->flags & NSF_RESET) != 0 &&
|
||
top_nest->max_group > cb->bracount)
|
||
cb->bracount = top_nest->max_group;
|
||
@@ -4800,12 +4820,12 @@ parsed_pattern = manage_callouts(ptr, &previous_callout, auto_callout,
|
||
/* Insert trailing items for word and line matching (features provided for the
|
||
benefit of pcre2grep). */
|
||
|
||
-if ((extra_options & PCRE2_EXTRA_MATCH_LINE) != 0)
|
||
+if ((xoptions & PCRE2_EXTRA_MATCH_LINE) != 0)
|
||
{
|
||
*parsed_pattern++ = META_KET;
|
||
*parsed_pattern++ = META_DOLLAR;
|
||
}
|
||
-else if ((extra_options & PCRE2_EXTRA_MATCH_WORD) != 0)
|
||
+else if ((xoptions & PCRE2_EXTRA_MATCH_WORD) != 0)
|
||
{
|
||
*parsed_pattern++ = META_KET;
|
||
*parsed_pattern++ = META_ESCAPE + ESC_b;
|
||
@@ -4933,7 +4953,8 @@ for (;;)
|
||
* Get othercase range *
|
||
*************************************************/
|
||
|
||
-/* This function is passed the start and end of a class range in UCP mode. It
|
||
+/* This function is passed the start and end of a class range in UCP mode. For
|
||
+single characters the range may be just one character long. The function
|
||
searches up the characters, looking for ranges of characters in the "other"
|
||
case. Each call returns the next one, updating the start address. A character
|
||
with multiple other cases is returned on its own with a special return value.
|
||
@@ -4947,18 +4968,19 @@ Arguments:
|
||
Yield: -1 when no more
|
||
0 when a range is returned
|
||
>0 the CASESET offset for char with multiple other cases
|
||
- in this case, ocptr contains the original
|
||
+ for this return, *ocptr contains the original
|
||
*/
|
||
|
||
static int
|
||
get_othercase_range(uint32_t *cptr, uint32_t d, uint32_t *ocptr,
|
||
- uint32_t *odptr)
|
||
+ uint32_t *odptr, BOOL restricted)
|
||
{
|
||
uint32_t c, othercase, next;
|
||
unsigned int co;
|
||
|
||
/* Find the first character that has an other case. If it has multiple other
|
||
-cases, return its case offset value. In 32-bit mode, a value
|
||
+cases, return its case offset value. When CASELESS_RESTRICT is set, ignore the
|
||
+multi-case entries that begin with ASCII values. In 32-bit mode, a value
|
||
greater than the Unicode maximum ends the range. */
|
||
|
||
for (c = *cptr; c <= d; c++)
|
||
@@ -4966,12 +4988,19 @@ for (c = *cptr; c <= d; c++)
|
||
#if PCRE2_CODE_UNIT_WIDTH == 32
|
||
if (c > MAX_UTF_CODE_POINT) return -1;
|
||
#endif
|
||
- if ((co = UCD_CASESET(c)) != 0)
|
||
+ if ((co = UCD_CASESET(c)) != 0 &&
|
||
+ (!restricted || PRIV(ucd_caseless_sets)[co] > 127))
|
||
{
|
||
*ocptr = c++; /* Character that has the set */
|
||
*cptr = c; /* Rest of input range */
|
||
return (int)co;
|
||
}
|
||
+
|
||
+ /* This is not a valid multiple-case character. Check that the single other
|
||
+ case is different to the original. We don't need to check "restricted" here
|
||
+ because the non-ASCII characters with multiple cases that include an ASCII
|
||
+ character don't have a different "othercase". */
|
||
+
|
||
if ((othercase = UCD_OTHERCASE(c)) != c) break;
|
||
}
|
||
|
||
@@ -5012,7 +5041,8 @@ add_to_class().
|
||
Arguments:
|
||
classbits the bit map for characters < 256
|
||
uchardptr points to the pointer for extra data
|
||
- options the options word
|
||
+ options the options bits
|
||
+ xoptions the extra options bits
|
||
cb compile data
|
||
start start of range character
|
||
end end of range character
|
||
@@ -5023,7 +5053,8 @@ Returns: the number of < 256 characters added
|
||
|
||
static unsigned int
|
||
add_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr,
|
||
- uint32_t options, compile_block *cb, uint32_t start, uint32_t end)
|
||
+ uint32_t options, uint32_t xoptions, compile_block *cb, uint32_t start,
|
||
+ uint32_t end)
|
||
{
|
||
uint32_t c;
|
||
uint32_t classbits_end = (end <= 0xff ? end : 0xff);
|
||
@@ -5031,8 +5062,8 @@ unsigned int n8 = 0;
|
||
|
||
/* If caseless matching is required, scan the range and process alternate
|
||
cases. In Unicode, there are 8-bit characters that have alternate cases that
|
||
-are greater than 255 and vice-versa. Sometimes we can just extend the original
|
||
-range. */
|
||
+are greater than 255 and vice-versa (though these may be ignored if caseless
|
||
+restriction is in force). Sometimes we can just extend the original range. */
|
||
|
||
if ((options & PCRE2_CASELESS) != 0)
|
||
{
|
||
@@ -5045,20 +5076,23 @@ if ((options & PCRE2_CASELESS) != 0)
|
||
options &= ~PCRE2_CASELESS; /* Remove for recursive calls */
|
||
c = start;
|
||
|
||
- while ((rc = get_othercase_range(&c, end, &oc, &od)) >= 0)
|
||
+ while ((rc = get_othercase_range(&c, end, &oc, &od,
|
||
+ (xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) != 0)) >= 0)
|
||
{
|
||
/* Handle a single character that has more than one other case. */
|
||
|
||
- if (rc > 0) n8 += add_list_to_class_internal(classbits, uchardptr, options, cb,
|
||
- PRIV(ucd_caseless_sets) + rc, oc);
|
||
+ if (rc > 0) n8 += add_list_to_class_internal(classbits, uchardptr,
|
||
+ options, xoptions, cb, PRIV(ucd_caseless_sets) + rc, oc);
|
||
|
||
/* Do nothing if the other case range is within the original range. */
|
||
|
||
- else if (oc >= cb->class_range_start && od <= cb->class_range_end) continue;
|
||
+ else if (oc >= cb->class_range_start && od <= cb->class_range_end)
|
||
+ continue;
|
||
|
||
- /* Extend the original range if there is overlap, noting that if oc < c, we
|
||
- can't have od > end because a subrange is always shorter than the basic
|
||
- range. Otherwise, use a recursive call to add the additional range. */
|
||
+ /* Extend the original range if there is overlap, noting that if oc < c,
|
||
+ we can't have od > end because a subrange is always shorter than the
|
||
+ basic range. Otherwise, use a recursive call to add the additional range.
|
||
+ */
|
||
|
||
else if (oc < start && od >= start - 1) start = oc; /* Extend downwards */
|
||
else if (od > end && oc <= end + 1)
|
||
@@ -5066,7 +5100,8 @@ if ((options & PCRE2_CASELESS) != 0)
|
||
end = od; /* Extend upwards */
|
||
if (end > classbits_end) classbits_end = (end <= 0xff ? end : 0xff);
|
||
}
|
||
- else n8 += add_to_class_internal(classbits, uchardptr, options, cb, oc, od);
|
||
+ else n8 += add_to_class_internal(classbits, uchardptr, options, xoptions,
|
||
+ cb, oc, od);
|
||
}
|
||
}
|
||
else
|
||
@@ -5165,7 +5200,8 @@ add_to_class_internal(), with which it is mutually recursive.
|
||
Arguments:
|
||
classbits the bit map for characters < 256
|
||
uchardptr points to the pointer for extra data
|
||
- options the options word
|
||
+ options the options bits
|
||
+ xoptions the extra options bits
|
||
cb contains pointers to tables etc.
|
||
p points to row of 32-bit values, terminated by NOTACHAR
|
||
except character to omit; this is used when adding lists of
|
||
@@ -5178,7 +5214,8 @@ Returns: the number of < 256 characters added
|
||
|
||
static unsigned int
|
||
add_list_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr,
|
||
- uint32_t options, compile_block *cb, const uint32_t *p, unsigned int except)
|
||
+ uint32_t options, uint32_t xoptions, compile_block *cb, const uint32_t *p,
|
||
+ unsigned int except)
|
||
{
|
||
unsigned int n8 = 0;
|
||
while (p[0] < NOTACHAR)
|
||
@@ -5187,7 +5224,8 @@ while (p[0] < NOTACHAR)
|
||
if (p[0] != except)
|
||
{
|
||
while(p[n+1] == p[0] + n + 1) n++;
|
||
- n8 += add_to_class_internal(classbits, uchardptr, options, cb, p[0], p[n]);
|
||
+ n8 += add_to_class_internal(classbits, uchardptr, options, xoptions, cb,
|
||
+ p[0], p[n]);
|
||
}
|
||
p += n + 1;
|
||
}
|
||
@@ -5207,7 +5245,8 @@ to avoid duplication when handling case-independence.
|
||
Arguments:
|
||
classbits the bit map for characters < 256
|
||
uchardptr points to the pointer for extra data
|
||
- options the options word
|
||
+ options the options bits
|
||
+ xoptions the extra options bits
|
||
cb compile data
|
||
start start of range character
|
||
end end of range character
|
||
@@ -5218,11 +5257,12 @@ Returns: the number of < 256 characters added
|
||
|
||
static unsigned int
|
||
add_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, uint32_t options,
|
||
- compile_block *cb, uint32_t start, uint32_t end)
|
||
+ uint32_t xoptions, compile_block *cb, uint32_t start, uint32_t end)
|
||
{
|
||
cb->class_range_start = start;
|
||
cb->class_range_end = end;
|
||
-return add_to_class_internal(classbits, uchardptr, options, cb, start, end);
|
||
+return add_to_class_internal(classbits, uchardptr, options, xoptions, cb,
|
||
+ start, end);
|
||
}
|
||
|
||
|
||
@@ -5239,7 +5279,8 @@ case-independence.
|
||
Arguments:
|
||
classbits the bit map for characters < 256
|
||
uchardptr points to the pointer for extra data
|
||
- options the options word
|
||
+ options the options bits
|
||
+ xoptions the extra options bits
|
||
cb contains pointers to tables etc.
|
||
p points to row of 32-bit values, terminated by NOTACHAR
|
||
except character to omit; this is used when adding lists of
|
||
@@ -5252,7 +5293,7 @@ Returns: the number of < 256 characters added
|
||
|
||
static unsigned int
|
||
add_list_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, uint32_t options,
|
||
- compile_block *cb, const uint32_t *p, unsigned int except)
|
||
+ uint32_t xoptions, compile_block *cb, const uint32_t *p, unsigned int except)
|
||
{
|
||
unsigned int n8 = 0;
|
||
while (p[0] < NOTACHAR)
|
||
@@ -5263,7 +5304,8 @@ while (p[0] < NOTACHAR)
|
||
while(p[n+1] == p[0] + n + 1) n++;
|
||
cb->class_range_start = p[0];
|
||
cb->class_range_end = p[n];
|
||
- n8 += add_to_class_internal(classbits, uchardptr, options, cb, p[0], p[n]);
|
||
+ n8 += add_to_class_internal(classbits, uchardptr, options, xoptions, cb,
|
||
+ p[0], p[n]);
|
||
}
|
||
p += n + 1;
|
||
}
|
||
@@ -5282,7 +5324,8 @@ vertical whitespace to a class. The list must be in order.
|
||
Arguments:
|
||
classbits the bit map for characters < 256
|
||
uchardptr points to the pointer for extra data
|
||
- options the options word
|
||
+ options the options bits
|
||
+ xoptions the extra options bits
|
||
cb contains pointers to tables etc.
|
||
p points to row of 32-bit values, terminated by NOTACHAR
|
||
|
||
@@ -5292,16 +5335,16 @@ Returns: the number of < 256 characters added
|
||
|
||
static unsigned int
|
||
add_not_list_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr,
|
||
- uint32_t options, compile_block *cb, const uint32_t *p)
|
||
+ uint32_t options, uint32_t xoptions, compile_block *cb, const uint32_t *p)
|
||
{
|
||
BOOL utf = (options & PCRE2_UTF) != 0;
|
||
unsigned int n8 = 0;
|
||
if (p[0] > 0)
|
||
- n8 += add_to_class(classbits, uchardptr, options, cb, 0, p[0] - 1);
|
||
+ n8 += add_to_class(classbits, uchardptr, options, xoptions, cb, 0, p[0] - 1);
|
||
while (p[0] < NOTACHAR)
|
||
{
|
||
while (p[1] == p[0] + 1) p++;
|
||
- n8 += add_to_class(classbits, uchardptr, options, cb, p[0] + 1,
|
||
+ n8 += add_to_class(classbits, uchardptr, options, xoptions, cb, p[0] + 1,
|
||
(p[1] == NOTACHAR) ? (utf ? 0x10ffffu : 0xffffffffu) : p[1] - 1);
|
||
p++;
|
||
}
|
||
@@ -5392,6 +5435,7 @@ real compile phase. The value of lengthptr distinguishes the two phases.
|
||
|
||
Arguments:
|
||
optionsptr pointer to the option bits
|
||
+ xoptionsptr pointer to the extra option bits
|
||
codeptr points to the pointer to the current code point
|
||
pptrptr points to the current parsed pattern pointer
|
||
errorcodeptr points to error code variable
|
||
@@ -5410,10 +5454,11 @@ Returns: 0 There's been an error, *errorcodeptr is non-zero
|
||
*/
|
||
|
||
static int
|
||
-compile_branch(uint32_t *optionsptr, PCRE2_UCHAR **codeptr, uint32_t **pptrptr,
|
||
- int *errorcodeptr, uint32_t *firstcuptr, uint32_t *firstcuflagsptr,
|
||
- uint32_t *reqcuptr, uint32_t *reqcuflagsptr, branch_chain *bcptr,
|
||
- compile_block *cb, PCRE2_SIZE *lengthptr)
|
||
+compile_branch(uint32_t *optionsptr, uint32_t *xoptionsptr,
|
||
+ PCRE2_UCHAR **codeptr, uint32_t **pptrptr, int *errorcodeptr,
|
||
+ uint32_t *firstcuptr, uint32_t *firstcuflagsptr, uint32_t *reqcuptr,
|
||
+ uint32_t *reqcuflagsptr, branch_chain *bcptr, compile_block *cb,
|
||
+ PCRE2_SIZE *lengthptr)
|
||
{
|
||
int bravalue = 0;
|
||
int okreturn = -1;
|
||
@@ -5422,6 +5467,7 @@ uint32_t repeat_min = 0, repeat_max = 0; /* To please picky compilers */
|
||
uint32_t greedy_default, greedy_non_default;
|
||
uint32_t repeat_type, op_type;
|
||
uint32_t options = *optionsptr; /* May change dynamically */
|
||
+uint32_t xoptions = *xoptionsptr; /* May change dynamically */
|
||
uint32_t firstcu, reqcu;
|
||
uint32_t zeroreqcu, zerofirstcu;
|
||
uint32_t escape;
|
||
@@ -5447,8 +5493,8 @@ const uint8_t *cbits = cb->cbits;
|
||
uint8_t classbits[32];
|
||
|
||
/* We can fish out the UTF setting once and for all into a BOOL, but we must
|
||
-not do this for other options (e.g. PCRE2_EXTENDED) because they may change
|
||
-dynamically as we process the pattern. */
|
||
+not do this for other options (e.g. PCRE2_EXTENDED) that may change dynamically
|
||
+as we process the pattern. */
|
||
|
||
#ifdef SUPPORT_UNICODE
|
||
BOOL utf = (options & PCRE2_UTF) != 0;
|
||
@@ -5699,11 +5745,14 @@ for (;; pptr++)
|
||
|
||
/* For caseless UTF or UCP mode, check whether this character has more
|
||
than one other case. If so, generate a special OP_NOTPROP item instead of
|
||
- OP_NOTI. */
|
||
+ OP_NOTI. When restricted by PCRE2_EXTRA_CASELESS_RESTRICT, ignore any
|
||
+ caseless set that starts with an ASCII character. */
|
||
|
||
#ifdef SUPPORT_UNICODE
|
||
if ((utf||ucp) && (options & PCRE2_CASELESS) != 0 &&
|
||
- (d = UCD_CASESET(c)) != 0)
|
||
+ (d = UCD_CASESET(c)) != 0 &&
|
||
+ ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) == 0 ||
|
||
+ PRIV(ucd_caseless_sets)[d] > 127))
|
||
{
|
||
*code++ = OP_NOTPROP;
|
||
*code++ = PT_CLIST;
|
||
@@ -5711,7 +5760,7 @@ for (;; pptr++)
|
||
break; /* We are finished with this class */
|
||
}
|
||
#endif
|
||
- /* Char has only one other case, or UCP not available */
|
||
+ /* Char has only one other (usable) case, or UCP not available */
|
||
|
||
*code++ = ((options & PCRE2_CASELESS) != 0)? OP_NOTI: OP_NOT;
|
||
code += PUTCHAR(c, code);
|
||
@@ -5721,7 +5770,9 @@ for (;; pptr++)
|
||
/* Handle character classes that contain more than just one literal
|
||
character. If there are exactly two characters in a positive class, see if
|
||
they are case partners. This can be optimized to generate a caseless single
|
||
- character match (which also sets first/required code units if relevant). */
|
||
+ character match (which also sets first/required code units if relevant).
|
||
+ When casing restrictions apply, ignore a caseless set if both characters
|
||
+ are ASCII. */
|
||
|
||
if (meta == META_CLASS && pptr[1] < META_END && pptr[2] < META_END &&
|
||
pptr[3] == META_CLASS_END)
|
||
@@ -5729,7 +5780,9 @@ for (;; pptr++)
|
||
uint32_t c = pptr[1];
|
||
|
||
#ifdef SUPPORT_UNICODE
|
||
- if (UCD_CASESET(c) == 0)
|
||
+ if (UCD_CASESET(c) == 0 ||
|
||
+ ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) != 0 &&
|
||
+ c < 128 && pptr[2] < 128))
|
||
#endif
|
||
{
|
||
uint32_t d;
|
||
@@ -5981,22 +6034,24 @@ for (;; pptr++)
|
||
|
||
case ESC_h:
|
||
(void)add_list_to_class(classbits, &class_uchardata,
|
||
- options & ~PCRE2_CASELESS, cb, PRIV(hspace_list), NOTACHAR);
|
||
+ options & ~PCRE2_CASELESS, xoptions, cb, PRIV(hspace_list),
|
||
+ NOTACHAR);
|
||
break;
|
||
|
||
case ESC_H:
|
||
(void)add_not_list_to_class(classbits, &class_uchardata,
|
||
- options & ~PCRE2_CASELESS, cb, PRIV(hspace_list));
|
||
+ options & ~PCRE2_CASELESS, xoptions, cb, PRIV(hspace_list));
|
||
break;
|
||
|
||
case ESC_v:
|
||
(void)add_list_to_class(classbits, &class_uchardata,
|
||
- options & ~PCRE2_CASELESS, cb, PRIV(vspace_list), NOTACHAR);
|
||
+ options & ~PCRE2_CASELESS, xoptions, cb, PRIV(vspace_list),
|
||
+ NOTACHAR);
|
||
break;
|
||
|
||
case ESC_V:
|
||
(void)add_not_list_to_class(classbits, &class_uchardata,
|
||
- options & ~PCRE2_CASELESS, cb, PRIV(vspace_list));
|
||
+ options & ~PCRE2_CASELESS, xoptions, cb, PRIV(vspace_list));
|
||
break;
|
||
|
||
/* If Unicode is not supported, \P and \p are not allowed and are
|
||
@@ -6070,32 +6125,32 @@ for (;; pptr++)
|
||
if (C <= CHAR_i)
|
||
{
|
||
class_has_8bitchar +=
|
||
- add_to_class(classbits, &class_uchardata, options, cb, C + uc,
|
||
- ((D < CHAR_i)? D : CHAR_i) + uc);
|
||
+ add_to_class(classbits, &class_uchardata, options, xoptions,
|
||
+ cb, C + uc, ((D < CHAR_i)? D : CHAR_i) + uc);
|
||
C = CHAR_j;
|
||
}
|
||
|
||
if (C <= D && C <= CHAR_r)
|
||
{
|
||
class_has_8bitchar +=
|
||
- add_to_class(classbits, &class_uchardata, options, cb, C + uc,
|
||
- ((D < CHAR_r)? D : CHAR_r) + uc);
|
||
+ add_to_class(classbits, &class_uchardata, options, xoptions,
|
||
+ cb, C + uc, ((D < CHAR_r)? D : CHAR_r) + uc);
|
||
C = CHAR_s;
|
||
}
|
||
|
||
if (C <= D)
|
||
{
|
||
class_has_8bitchar +=
|
||
- add_to_class(classbits, &class_uchardata, options, cb, C + uc,
|
||
- D + uc);
|
||
+ add_to_class(classbits, &class_uchardata, options, xoptions,
|
||
+ cb, C + uc, D + uc);
|
||
}
|
||
}
|
||
else
|
||
#endif
|
||
/* Not an EBCDIC special range */
|
||
|
||
- class_has_8bitchar +=
|
||
- add_to_class(classbits, &class_uchardata, options, cb, c, d);
|
||
+ class_has_8bitchar += add_to_class(classbits, &class_uchardata,
|
||
+ options, xoptions, cb, c, d);
|
||
goto CONTINUE_CLASS; /* Go get the next char in the class */
|
||
} /* End of range handling */
|
||
|
||
@@ -6103,7 +6158,8 @@ for (;; pptr++)
|
||
/* Handle a single character. */
|
||
|
||
class_has_8bitchar +=
|
||
- add_to_class(classbits, &class_uchardata, options, cb, meta, meta);
|
||
+ add_to_class(classbits, &class_uchardata, options, xoptions, cb,
|
||
+ meta, meta);
|
||
}
|
||
|
||
/* Continue to the next item in the class. */
|
||
@@ -6341,6 +6397,7 @@ for (;; pptr++)
|
||
|
||
case META_OPTIONS:
|
||
*optionsptr = options = *(++pptr);
|
||
+ *xoptionsptr = xoptions = *(++pptr);
|
||
greedy_default = ((options & PCRE2_UNGREEDY) != 0);
|
||
greedy_non_default = greedy_default ^ 1;
|
||
req_caseopt = ((options & PCRE2_CASELESS) != 0)? REQ_CASELESS : 0;
|
||
@@ -6586,7 +6643,8 @@ for (;; pptr++)
|
||
|
||
if ((group_return =
|
||
compile_regex(
|
||
- options, /* The option state */
|
||
+ options, /* The options state */
|
||
+ xoptions, /* The extra options state */
|
||
&tempcode, /* Where to put code (updated) */
|
||
&pptr, /* Input pointer (updated) */
|
||
errorcodeptr, /* Where to put an error message */
|
||
@@ -7925,7 +7983,7 @@ for (;; pptr++)
|
||
done. However, there's an option, in case anyone was relying on it. */
|
||
|
||
if (cb->assert_depth > 0 && meta_arg == ESC_K &&
|
||
- (cb->cx->extra_options & PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK) == 0)
|
||
+ (xoptions & PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK) == 0)
|
||
{
|
||
*errorcodeptr = ERR99;
|
||
return 0;
|
||
@@ -7977,13 +8035,16 @@ for (;; pptr++)
|
||
|
||
/* For caseless UTF or UCP mode, check whether this character has more than
|
||
one other case. If so, generate a special OP_PROP item instead of OP_CHARI.
|
||
- */
|
||
+ When casing restrictions apply, ignore caseless sets that start with an
|
||
+ ASCII character. */
|
||
|
||
#ifdef SUPPORT_UNICODE
|
||
if ((utf||ucp) && (options & PCRE2_CASELESS) != 0)
|
||
{
|
||
uint32_t caseset = UCD_CASESET(meta);
|
||
- if (caseset != 0)
|
||
+ if (caseset != 0 &&
|
||
+ ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) == 0 ||
|
||
+ PRIV(ucd_caseless_sets)[caseset] > 127))
|
||
{
|
||
*code++ = OP_PROP;
|
||
*code++ = PT_CLIST;
|
||
@@ -8099,6 +8160,7 @@ the two phases.
|
||
|
||
Arguments:
|
||
options option bits, including any changes for this subpattern
|
||
+ xoptions extra option bits, ditto
|
||
codeptr -> the address of the current code pointer
|
||
pptrptr -> the address of the current parsed pattern pointer
|
||
errorcodeptr -> pointer to error code variable
|
||
@@ -8118,10 +8180,11 @@ Returns: 0 There has been an error
|
||
*/
|
||
|
||
static int
|
||
-compile_regex(uint32_t options, PCRE2_UCHAR **codeptr, uint32_t **pptrptr,
|
||
- int *errorcodeptr, uint32_t skipunits, uint32_t *firstcuptr,
|
||
- uint32_t *firstcuflagsptr, uint32_t *reqcuptr, uint32_t *reqcuflagsptr,
|
||
- branch_chain *bcptr, compile_block *cb, PCRE2_SIZE *lengthptr)
|
||
+compile_regex(uint32_t options, uint32_t xoptions, PCRE2_UCHAR **codeptr,
|
||
+ uint32_t **pptrptr, int *errorcodeptr, uint32_t skipunits,
|
||
+ uint32_t *firstcuptr, uint32_t *firstcuflagsptr, uint32_t *reqcuptr,
|
||
+ uint32_t *reqcuflagsptr, branch_chain *bcptr, compile_block *cb,
|
||
+ PCRE2_SIZE *lengthptr)
|
||
{
|
||
PCRE2_UCHAR *code = *codeptr;
|
||
PCRE2_UCHAR *last_branch = code;
|
||
@@ -8217,9 +8280,9 @@ for (;;)
|
||
into the length. */
|
||
|
||
if ((branch_return =
|
||
- compile_branch(&options, &code, &pptr, errorcodeptr, &branchfirstcu,
|
||
- &branchfirstcuflags, &branchreqcu, &branchreqcuflags, &bc,
|
||
- cb, (lengthptr == NULL)? NULL : &length)) == 0)
|
||
+ compile_branch(&options, &xoptions, &code, &pptr, errorcodeptr,
|
||
+ &branchfirstcu, &branchfirstcuflags, &branchreqcu, &branchreqcuflags,
|
||
+ &bc, cb, (lengthptr == NULL)? NULL : &length)) == 0)
|
||
return 0;
|
||
|
||
/* If a branch can match an empty string, so can the whole group. */
|
||
@@ -9224,7 +9287,7 @@ for (;; pptr++)
|
||
break;
|
||
|
||
case META_OPTIONS:
|
||
- pptr += 1;
|
||
+ pptr += 2;
|
||
break;
|
||
|
||
case META_BIGVALUE:
|
||
@@ -9727,7 +9790,6 @@ for (; *pptr != META_END; pptr++)
|
||
break;
|
||
|
||
case META_BIGVALUE:
|
||
- case META_OPTIONS:
|
||
case META_POSIX:
|
||
case META_POSIX_NEG:
|
||
pptr += 1;
|
||
@@ -9736,6 +9798,7 @@ for (; *pptr != META_END; pptr++)
|
||
case META_MINMAX:
|
||
case META_MINMAX_QUERY:
|
||
case META_MINMAX_PLUS:
|
||
+ case META_OPTIONS:
|
||
pptr += 2;
|
||
break;
|
||
|
||
@@ -10251,8 +10314,9 @@ pptr = cb.parsed_pattern;
|
||
code = cworkspace;
|
||
*code = OP_BRA;
|
||
|
||
-(void)compile_regex(cb.external_options, &code, &pptr, &errorcode, 0, &firstcu,
|
||
- &firstcuflags, &reqcu, &reqcuflags, NULL, &cb, &length);
|
||
+(void)compile_regex(cb.external_options, ccontext->extra_options, &code, &pptr,
|
||
+ &errorcode, 0, &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL, &cb,
|
||
+ &length);
|
||
|
||
if (errorcode != 0) goto HAD_CB_ERROR; /* Offset is in cb.erroroffset */
|
||
|
||
@@ -10349,8 +10413,9 @@ of the function here. */
|
||
pptr = cb.parsed_pattern;
|
||
code = (PCRE2_UCHAR *)codestart;
|
||
*code = OP_BRA;
|
||
-regexrc = compile_regex(re->overall_options, &code, &pptr, &errorcode, 0,
|
||
- &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL, &cb, NULL);
|
||
+regexrc = compile_regex(re->overall_options, ccontext->extra_options, &code,
|
||
+ &pptr, &errorcode, 0, &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL,
|
||
+ &cb, NULL);
|
||
if (regexrc < 0) re->flags |= PCRE2_MATCH_EMPTY;
|
||
re->top_bracket = cb.bracount;
|
||
re->top_backref = cb.top_backref;
|
||
diff --git a/src/pcre2_ucd.c b/src/pcre2_ucd.c
|
||
index 5e0fc37..a72944c 100644
|
||
--- a/src/pcre2_ucd.c
|
||
+++ b/src/pcre2_ucd.c
|
||
@@ -68,7 +68,7 @@ the tables when not needed. But don't leave a totally empty module because some
|
||
compilers barf at that. Instead, just supply some small dummy tables. */
|
||
|
||
#ifndef SUPPORT_UNICODE
|
||
-const ucd_record PRIV(ucd_records)[] = {{0,0,0,0,0,0,0 }};
|
||
+const ucd_record PRIV(ucd_records)[] = {{0,0,0,0,0,0,0}};
|
||
const uint16_t PRIV(ucd_stage1)[] = {0};
|
||
const uint16_t PRIV(ucd_stage2)[] = {0};
|
||
const uint32_t PRIV(ucd_caseless_sets)[] = {0};
|
||
@@ -498,7 +498,7 @@ const ucd_record PRIV(ucd_records)[] = { /* 16908 bytes, record size 12 */
|
||
{ 0, 5, 12, 0, 0, 18432, 60, }, /* 70 */
|
||
{ 0, 5, 12, 0, 0, 18432, 80, }, /* 71 */
|
||
{ 0, 9, 12, 0, -121, 18432, 74, }, /* 72 */
|
||
- { 0, 5, 12, 1, -268, 18432, 70, }, /* 73 */
|
||
+ { 0, 5, 12, 1, 0, 18432, 70, }, /* 73 */
|
||
{ 0, 5, 12, 0, 195, 18432, 76, }, /* 74 */
|
||
{ 0, 9, 12, 0, 210, 18432, 74, }, /* 75 */
|
||
{ 0, 9, 12, 0, 206, 18432, 74, }, /* 76 */
|
||
@@ -1155,7 +1155,7 @@ const ucd_record PRIV(ucd_records)[] = { /* 16908 bytes, record size 12 */
|
||
{ 69, 26, 14, 0, 0, 28672, 236, }, /* 727 */
|
||
{ 1, 9, 12, 96, -7517, 18432, 74, }, /* 728 */
|
||
{ 69, 26, 12, 0, 0, 28672, 118, }, /* 729 */
|
||
- { 0, 9, 12, 100, -8383, 18432, 74, }, /* 730 */
|
||
+ { 0, 9, 12, 100, 0, 18432, 74, }, /* 730 */
|
||
{ 0, 9, 12, 104, -8262, 18432, 74, }, /* 731 */
|
||
{ 69, 26, 12, 0, 0, 14336, 238, }, /* 732 */
|
||
{ 0, 9, 12, 0, 28, 18432, 74, }, /* 733 */
|
||
diff --git a/src/pcre2test.c b/src/pcre2test.c
|
||
index 4fa5884..e768798 100644
|
||
--- a/src/pcre2test.c
|
||
+++ b/src/pcre2test.c
|
||
@@ -653,6 +653,7 @@ static modstruct modlist[] = {
|
||
{ "callout_no_where", MOD_DAT, MOD_CTL, CTL2_CALLOUT_NO_WHERE, DO(control2) },
|
||
{ "callout_none", MOD_DAT, MOD_CTL, CTL_CALLOUT_NONE, DO(control) },
|
||
{ "caseless", MOD_PATP, MOD_OPT, PCRE2_CASELESS, PO(options) },
|
||
+ { "caseless_restrict", MOD_CTC, MOD_OPT, PCRE2_EXTRA_CASELESS_RESTRICT, CO(extra_options) },
|
||
{ "convert", MOD_PAT, MOD_CON, 0, PO(convert_type) },
|
||
{ "convert_glob_escape", MOD_PAT, MOD_CHR, 0, PO(convert_glob_escape) },
|
||
{ "convert_glob_separator", MOD_PAT, MOD_CHR, 0, PO(convert_glob_separator) },
|
||
@@ -833,14 +834,15 @@ typedef struct c1modstruct {
|
||
} c1modstruct;
|
||
|
||
static c1modstruct c1modlist[] = {
|
||
- { "bincode", 'B', -1 },
|
||
- { "info", 'I', -1 },
|
||
- { "global", 'g', -1 },
|
||
- { "caseless", 'i', -1 },
|
||
- { "multiline", 'm', -1 },
|
||
- { "no_auto_capture", 'n', -1 },
|
||
- { "dotall", 's', -1 },
|
||
- { "extended", 'x', -1 }
|
||
+ { "bincode", 'B', -1 },
|
||
+ { "info", 'I', -1 },
|
||
+ { "global", 'g', -1 },
|
||
+ { "caseless", 'i', -1 },
|
||
+ { "multiline", 'm', -1 },
|
||
+ { "no_auto_capture", 'n', -1 },
|
||
+ { "caseless_restrict", 'r', -1 },
|
||
+ { "dotall", 's', -1 },
|
||
+ { "extended", 'x', -1 }
|
||
};
|
||
|
||
#define C1MODLISTCOUNT sizeof(c1modlist)/sizeof(c1modstruct)
|
||
@@ -4257,7 +4259,7 @@ show_compile_extra_options(uint32_t options, const char *before,
|
||
const char *after)
|
||
{
|
||
if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
|
||
-else fprintf(outfile, "%s%s%s%s%s%s%s%s",
|
||
+else fprintf(outfile, "%s%s%s%s%s%s%s%s%s",
|
||
before,
|
||
((options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) != 0)? " allow_surrogate_escapes" : "",
|
||
((options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) != 0)? " bad_escape_is_literal" : "",
|
||
@@ -4265,6 +4267,7 @@ else fprintf(outfile, "%s%s%s%s%s%s%s%s",
|
||
((options & PCRE2_EXTRA_MATCH_WORD) != 0)? " match_word" : "",
|
||
((options & PCRE2_EXTRA_MATCH_LINE) != 0)? " match_line" : "",
|
||
((options & PCRE2_EXTRA_ESCAPED_CR_IS_LF) != 0)? " escaped_cr_is_lf" : "",
|
||
+ ((options & PCRE2_EXTRA_CASELESS_RESTRICT) != 0)? " caseless_restrict" : "",
|
||
after);
|
||
}
|
||
|
||
diff --git a/testdata/testinput5 b/testdata/testinput5
|
||
index 6bd352f..b817423 100644
|
||
--- a/testdata/testinput5
|
||
+++ b/testdata/testinput5
|
||
@@ -2212,4 +2212,101 @@
|
||
|
||
/\p{\2b[:x<>igi:t:_/
|
||
|
||
+# Tests for PCRE2_EXTRA_CASELESS_RESTRICT. Compare each test with and without
|
||
+# the restriction.
|
||
+
|
||
+/AskZ/i,utf,caseless_restrict
|
||
+ AskZ
|
||
+ aSKz
|
||
+\= Expect no match
|
||
+ A\x{17f}kZ
|
||
+ As\x{212a}Z
|
||
+
|
||
+/AskZ/i,utf
|
||
+ AskZ
|
||
+ aSKz
|
||
+ A\x{17f}kZ
|
||
+ As\x{212a}Z
|
||
+
|
||
+/A\x{17f}\x{212a}Z/ir,utf
|
||
+ \= Expect no match
|
||
+ AskZ
|
||
+
|
||
+/A\x{17f}\x{212a}Z/i,utf
|
||
+ AskZ
|
||
+
|
||
+/[AskZ]+/i,utf,caseless_restrict
|
||
+ AskZ
|
||
+ aSKz
|
||
+ A\x{17f}kZ
|
||
+ As\x{212a}Z
|
||
+
|
||
+/[AskZ]+/i,utf
|
||
+ AskZ
|
||
+ aSKz
|
||
+ A\x{17f}kZ
|
||
+ As\x{212a}Z
|
||
+
|
||
+/[\x{17f}\x{212a}]+/ir,utf
|
||
+\= Expect no match
|
||
+ AskZ
|
||
+
|
||
+/[\x{17f}\x{212a}]+/i,utf
|
||
+ AskZ
|
||
+
|
||
+/[^s]+/ir,utf
|
||
+ A\x{17f}Z
|
||
+
|
||
+/[^s]+/i,utf
|
||
+ A\x{17f}Z
|
||
+
|
||
+/[^k]+/ir,utf
|
||
+ A\x{212a}Z
|
||
+
|
||
+/[^k]+/i,utf
|
||
+ A\x{212a}Z
|
||
+
|
||
+/[^sk]+/ir,utf
|
||
+ A\x{17f}\x{212a}Z
|
||
+
|
||
+/[^sk]+/i,utf
|
||
+ A\x{17f}\x{212a}Z
|
||
+
|
||
+/[^\x{17f}]+/ir,utf
|
||
+ AsSZ
|
||
+
|
||
+/[^\x{17f}]+/i,utf
|
||
+ AsSZ
|
||
+
|
||
+/[Ss]+/irB,utf
|
||
+ Sss\x{17f}ss
|
||
+
|
||
+/[Ss]+/iB,utf
|
||
+ Sss\x{17f}ss
|
||
+
|
||
+/[S\x{17f}]/irB,utf
|
||
+
|
||
+/[S\x{17f}]/iB,utf
|
||
+
|
||
+/[\x{17f}s]/irB,utf
|
||
+
|
||
+/[\x{17f}s]/iB,utf
|
||
+
|
||
+/[\x{4b}\x{6b}]/irB,utf
|
||
+
|
||
+/[\x{4b}\x{6b}]/iB,utf
|
||
+
|
||
+/s(?r)s(?-r)s(?r:s)s/i,utf
|
||
+ \x{17f}S\x{17f}S\x{17f}
|
||
+\= Expect no match
|
||
+ \x{17f}\x{17f}\x{17f}S\x{17f}
|
||
+ \x{17f}S\x{17f}\x{17f}\x{17f}
|
||
+
|
||
+/k(?^i)k/ir,utf
|
||
+ K\x{212a}
|
||
+\= Expect no match
|
||
+ \x{212a}\x{212a}
|
||
+
|
||
+# End caseless restrict tests
|
||
+
|
||
# End of testinput5
|
||
diff --git a/testdata/testinput7 b/testdata/testinput7
|
||
index 2d90b41..991de88 100644
|
||
--- a/testdata/testinput7
|
||
+++ b/testdata/testinput7
|
||
@@ -2231,4 +2231,101 @@
|
||
/\p{sc:katakana}{3,}?/utf
|
||
\x{30a1}\x{30fa}\x{32d0}\x{1b122}\x{ff66}\x{3001}ABC
|
||
|
||
+# Tests for PCRE2_EXTRA_CASELESS_RESTRICT. Compare each test with and without
|
||
+# the restriction.
|
||
+
|
||
+/AskZ/i,utf,caseless_restrict
|
||
+ AskZ
|
||
+ aSKz
|
||
+\= Expect no match
|
||
+ A\x{17f}kZ
|
||
+ As\x{212a}Z
|
||
+
|
||
+/AskZ/i,utf
|
||
+ AskZ
|
||
+ aSKz
|
||
+ A\x{17f}kZ
|
||
+ As\x{212a}Z
|
||
+
|
||
+/A\x{17f}\x{212a}Z/ir,utf
|
||
+ \= Expect no match
|
||
+ AskZ
|
||
+
|
||
+/A\x{17f}\x{212a}Z/i,utf
|
||
+ AskZ
|
||
+
|
||
+/[AskZ]+/i,utf,caseless_restrict
|
||
+ AskZ
|
||
+ aSKz
|
||
+ A\x{17f}kZ
|
||
+ As\x{212a}Z
|
||
+
|
||
+/[AskZ]+/i,utf
|
||
+ AskZ
|
||
+ aSKz
|
||
+ A\x{17f}kZ
|
||
+ As\x{212a}Z
|
||
+
|
||
+/[\x{17f}\x{212a}]+/ir,utf
|
||
+\= Expect no match
|
||
+ AskZ
|
||
+
|
||
+/[\x{17f}\x{212a}]+/i,utf
|
||
+ AskZ
|
||
+
|
||
+/[^s]+/ir,utf
|
||
+ A\x{17f}Z
|
||
+
|
||
+/[^s]+/i,utf
|
||
+ A\x{17f}Z
|
||
+
|
||
+/[^k]+/ir,utf
|
||
+ A\x{212a}Z
|
||
+
|
||
+/[^k]+/i,utf
|
||
+ A\x{212a}Z
|
||
+
|
||
+/[^sk]+/ir,utf
|
||
+ A\x{17f}\x{212a}Z
|
||
+
|
||
+/[^sk]+/i,utf
|
||
+ A\x{17f}\x{212a}Z
|
||
+
|
||
+/[^\x{17f}]+/ir,utf
|
||
+ AsSZ
|
||
+
|
||
+/[^\x{17f}]+/i,utf
|
||
+ AsSZ
|
||
+
|
||
+/[Ss]+/irB,utf
|
||
+ Sss\x{17f}ss
|
||
+
|
||
+/[Ss]+/iB,utf
|
||
+ Sss\x{17f}ss
|
||
+
|
||
+/[S\x{17f}]/irB,utf
|
||
+
|
||
+/[S\x{17f}]/iB,utf
|
||
+
|
||
+/[\x{17f}s]/irB,utf
|
||
+
|
||
+/[\x{17f}s]/iB,utf
|
||
+
|
||
+/[\x{4b}\x{6b}]/irB,utf
|
||
+
|
||
+/[\x{4b}\x{6b}]/iB,utf
|
||
+
|
||
+/s(?r)s(?-r)s(?r:s)s/i,utf
|
||
+ \x{17f}S\x{17f}S\x{17f}
|
||
+\= Expect no match
|
||
+ \x{17f}\x{17f}\x{17f}S\x{17f}
|
||
+ \x{17f}S\x{17f}\x{17f}\x{17f}
|
||
+
|
||
+/k(?^i)k/ir,utf
|
||
+ K\x{212a}
|
||
+\= Expect no match
|
||
+ \x{212a}\x{212a}
|
||
+
|
||
+# End caseless restrict tests
|
||
+
|
||
# End of testinput7
|
||
diff --git a/testdata/testoutput5 b/testdata/testoutput5
|
||
index 2c3fe94..db42a11 100644
|
||
--- a/testdata/testoutput5
|
||
+++ b/testdata/testoutput5
|
||
@@ -5016,4 +5016,184 @@ Failed: error 147 at offset 8: unknown property after \P or \p
|
||
/\p{\2b[:x<>igi:t:_/
|
||
Failed: error 146 at offset 17: malformed \P or \p sequence
|
||
|
||
+# Tests for PCRE2_EXTRA_CASELESS_RESTRICT. Compare each test with and without
|
||
+# the restriction.
|
||
+
|
||
+/AskZ/i,utf,caseless_restrict
|
||
+ AskZ
|
||
+ 0: AskZ
|
||
+ aSKz
|
||
+ 0: aSKz
|
||
+\= Expect no match
|
||
+ A\x{17f}kZ
|
||
+No match
|
||
+ As\x{212a}Z
|
||
+No match
|
||
+
|
||
+/AskZ/i,utf
|
||
+ AskZ
|
||
+ 0: AskZ
|
||
+ aSKz
|
||
+ 0: aSKz
|
||
+ A\x{17f}kZ
|
||
+ 0: A\x{17f}kZ
|
||
+ As\x{212a}Z
|
||
+ 0: As\x{212a}Z
|
||
+
|
||
+/A\x{17f}\x{212a}Z/ir,utf
|
||
+ \= Expect no match
|
||
+ AskZ
|
||
+No match
|
||
+
|
||
+/A\x{17f}\x{212a}Z/i,utf
|
||
+ AskZ
|
||
+ 0: AskZ
|
||
+
|
||
+/[AskZ]+/i,utf,caseless_restrict
|
||
+ AskZ
|
||
+ 0: AskZ
|
||
+ aSKz
|
||
+ 0: aSKz
|
||
+ A\x{17f}kZ
|
||
+ 0: A
|
||
+ As\x{212a}Z
|
||
+ 0: As
|
||
+
|
||
+/[AskZ]+/i,utf
|
||
+ AskZ
|
||
+ 0: AskZ
|
||
+ aSKz
|
||
+ 0: aSKz
|
||
+ A\x{17f}kZ
|
||
+ 0: A\x{17f}kZ
|
||
+ As\x{212a}Z
|
||
+ 0: As\x{212a}Z
|
||
+
|
||
+/[\x{17f}\x{212a}]+/ir,utf
|
||
+\= Expect no match
|
||
+ AskZ
|
||
+No match
|
||
+
|
||
+/[\x{17f}\x{212a}]+/i,utf
|
||
+ AskZ
|
||
+ 0: sk
|
||
+
|
||
+/[^s]+/ir,utf
|
||
+ A\x{17f}Z
|
||
+ 0: A\x{17f}Z
|
||
+
|
||
+/[^s]+/i,utf
|
||
+ A\x{17f}Z
|
||
+ 0: A
|
||
+
|
||
+/[^k]+/ir,utf
|
||
+ A\x{212a}Z
|
||
+ 0: A\x{212a}Z
|
||
+
|
||
+/[^k]+/i,utf
|
||
+ A\x{212a}Z
|
||
+ 0: A
|
||
+
|
||
+/[^sk]+/ir,utf
|
||
+ A\x{17f}\x{212a}Z
|
||
+ 0: A\x{17f}\x{212a}Z
|
||
+
|
||
+/[^sk]+/i,utf
|
||
+ A\x{17f}\x{212a}Z
|
||
+ 0: A
|
||
+
|
||
+/[^\x{17f}]+/ir,utf
|
||
+ AsSZ
|
||
+ 0: AsSZ
|
||
+
|
||
+/[^\x{17f}]+/i,utf
|
||
+ AsSZ
|
||
+ 0: A
|
||
+
|
||
+/[Ss]+/irB,utf
|
||
+------------------------------------------------------------------
|
||
+ Bra
|
||
+ /i S++
|
||
+ Ket
|
||
+ End
|
||
+------------------------------------------------------------------
|
||
+ Sss\x{17f}ss
|
||
+ 0: Sss
|
||
+
|
||
+/[Ss]+/iB,utf
|
||
+------------------------------------------------------------------
|
||
+ Bra
|
||
+ [Ss\x{17f}\x{17f}]++
|
||
+ Ket
|
||
+ End
|
||
+------------------------------------------------------------------
|
||
+ Sss\x{17f}ss
|
||
+ 0: Sss\x{17f}ss
|
||
+
|
||
+/[S\x{17f}]/irB,utf
|
||
+------------------------------------------------------------------
|
||
+ Bra
|
||
+ [Ss\x{17f}]
|
||
+ Ket
|
||
+ End
|
||
+------------------------------------------------------------------
|
||
+
|
||
+/[S\x{17f}]/iB,utf
|
||
+------------------------------------------------------------------
|
||
+ Bra
|
||
+ [Ss\x{17f}\x{17f}]
|
||
+ Ket
|
||
+ End
|
||
+------------------------------------------------------------------
|
||
+
|
||
+/[\x{17f}s]/irB,utf
|
||
+------------------------------------------------------------------
|
||
+ Bra
|
||
+ [Ss\x{17f}]
|
||
+ Ket
|
||
+ End
|
||
+------------------------------------------------------------------
|
||
+
|
||
+/[\x{17f}s]/iB,utf
|
||
+------------------------------------------------------------------
|
||
+ Bra
|
||
+ [Ss\x{17f}\x{17f}]
|
||
+ Ket
|
||
+ End
|
||
+------------------------------------------------------------------
|
||
+
|
||
+/[\x{4b}\x{6b}]/irB,utf
|
||
+------------------------------------------------------------------
|
||
+ Bra
|
||
+ /i K
|
||
+ Ket
|
||
+ End
|
||
+------------------------------------------------------------------
|
||
+
|
||
+/[\x{4b}\x{6b}]/iB,utf
|
||
+------------------------------------------------------------------
|
||
+ Bra
|
||
+ [Kk\x{212a}\x{212a}]
|
||
+ Ket
|
||
+ End
|
||
+------------------------------------------------------------------
|
||
+
|
||
+/s(?r)s(?-r)s(?r:s)s/i,utf
|
||
+ \x{17f}S\x{17f}S\x{17f}
|
||
+ 0: \x{17f}S\x{17f}S\x{17f}
|
||
+\= Expect no match
|
||
+ \x{17f}\x{17f}\x{17f}S\x{17f}
|
||
+No match
|
||
+ \x{17f}S\x{17f}\x{17f}\x{17f}
|
||
+No match
|
||
+
|
||
+/k(?^i)k/ir,utf
|
||
+ K\x{212a}
|
||
+ 0: K\x{212a}
|
||
+\= Expect no match
|
||
+ \x{212a}\x{212a}
|
||
+No match
|
||
+
|
||
+# End caseless restrict tests
|
||
+
|
||
# End of testinput5
|
||
diff --git a/testdata/testoutput7 b/testdata/testoutput7
|
||
index 6e71fc8..c2291a1 100644
|
||
--- a/testdata/testoutput7
|
||
+++ b/testdata/testoutput7
|
||
@@ -3756,4 +3756,184 @@ No match
|
||
1: \x{30a1}\x{30fa}\x{32d0}\x{1b122}
|
||
2: \x{30a1}\x{30fa}\x{32d0}
|
||
|
||
+# Tests for PCRE2_EXTRA_CASELESS_RESTRICT. Compare each test with and without
|
||
+# the restriction.
|
||
+
|
||
+/AskZ/i,utf,caseless_restrict
|
||
+ AskZ
|
||
+ 0: AskZ
|
||
+ aSKz
|
||
+ 0: aSKz
|
||
+\= Expect no match
|
||
+ A\x{17f}kZ
|
||
+No match
|
||
+ As\x{212a}Z
|
||
+No match
|
||
+
|
||
+/AskZ/i,utf
|
||
+ AskZ
|
||
+ 0: AskZ
|
||
+ aSKz
|
||
+ 0: aSKz
|
||
+ A\x{17f}kZ
|
||
+ 0: A\x{17f}kZ
|
||
+ As\x{212a}Z
|
||
+ 0: As\x{212a}Z
|
||
+
|
||
+/A\x{17f}\x{212a}Z/ir,utf
|
||
+ \= Expect no match
|
||
+ AskZ
|
||
+No match
|
||
+
|
||
+/A\x{17f}\x{212a}Z/i,utf
|
||
+ AskZ
|
||
+ 0: AskZ
|
||
+
|
||
+/[AskZ]+/i,utf,caseless_restrict
|
||
+ AskZ
|
||
+ 0: AskZ
|
||
+ aSKz
|
||
+ 0: aSKz
|
||
+ A\x{17f}kZ
|
||
+ 0: A
|
||
+ As\x{212a}Z
|
||
+ 0: As
|
||
+
|
||
+/[AskZ]+/i,utf
|
||
+ AskZ
|
||
+ 0: AskZ
|
||
+ aSKz
|
||
+ 0: aSKz
|
||
+ A\x{17f}kZ
|
||
+ 0: A\x{17f}kZ
|
||
+ As\x{212a}Z
|
||
+ 0: As\x{212a}Z
|
||
+
|
||
+/[\x{17f}\x{212a}]+/ir,utf
|
||
+\= Expect no match
|
||
+ AskZ
|
||
+No match
|
||
+
|
||
+/[\x{17f}\x{212a}]+/i,utf
|
||
+ AskZ
|
||
+ 0: sk
|
||
+
|
||
+/[^s]+/ir,utf
|
||
+ A\x{17f}Z
|
||
+ 0: A\x{17f}Z
|
||
+
|
||
+/[^s]+/i,utf
|
||
+ A\x{17f}Z
|
||
+ 0: A
|
||
+
|
||
+/[^k]+/ir,utf
|
||
+ A\x{212a}Z
|
||
+ 0: A\x{212a}Z
|
||
+
|
||
+/[^k]+/i,utf
|
||
+ A\x{212a}Z
|
||
+ 0: A
|
||
+
|
||
+/[^sk]+/ir,utf
|
||
+ A\x{17f}\x{212a}Z
|
||
+ 0: A\x{17f}\x{212a}Z
|
||
+
|
||
+/[^sk]+/i,utf
|
||
+ A\x{17f}\x{212a}Z
|
||
+ 0: A
|
||
+
|
||
+/[^\x{17f}]+/ir,utf
|
||
+ AsSZ
|
||
+ 0: AsSZ
|
||
+
|
||
+/[^\x{17f}]+/i,utf
|
||
+ AsSZ
|
||
+ 0: A
|
||
+
|
||
+/[Ss]+/irB,utf
|
||
+------------------------------------------------------------------
|
||
+ Bra
|
||
+ /i S++
|
||
+ Ket
|
||
+ End
|
||
+------------------------------------------------------------------
|
||
+ Sss\x{17f}ss
|
||
+ 0: Sss
|
||
+
|
||
+/[Ss]+/iB,utf
|
||
+------------------------------------------------------------------
|
||
+ Bra
|
||
+ [Ss\x{17f}\x{17f}]++
|
||
+ Ket
|
||
+ End
|
||
+------------------------------------------------------------------
|
||
+ Sss\x{17f}ss
|
||
+ 0: Sss\x{17f}ss
|
||
+
|
||
+/[S\x{17f}]/irB,utf
|
||
+------------------------------------------------------------------
|
||
+ Bra
|
||
+ [Ss\x{17f}]
|
||
+ Ket
|
||
+ End
|
||
+------------------------------------------------------------------
|
||
+
|
||
+/[S\x{17f}]/iB,utf
|
||
+------------------------------------------------------------------
|
||
+ Bra
|
||
+ [Ss\x{17f}\x{17f}]
|
||
+ Ket
|
||
+ End
|
||
+------------------------------------------------------------------
|
||
+
|
||
+/[\x{17f}s]/irB,utf
|
||
+------------------------------------------------------------------
|
||
+ Bra
|
||
+ [Ss\x{17f}]
|
||
+ Ket
|
||
+ End
|
||
+------------------------------------------------------------------
|
||
+
|
||
+/[\x{17f}s]/iB,utf
|
||
+------------------------------------------------------------------
|
||
+ Bra
|
||
+ [Ss\x{17f}\x{17f}]
|
||
+ Ket
|
||
+ End
|
||
+------------------------------------------------------------------
|
||
+
|
||
+/[\x{4b}\x{6b}]/irB,utf
|
||
+------------------------------------------------------------------
|
||
+ Bra
|
||
+ /i K
|
||
+ Ket
|
||
+ End
|
||
+------------------------------------------------------------------
|
||
+
|
||
+/[\x{4b}\x{6b}]/iB,utf
|
||
+------------------------------------------------------------------
|
||
+ Bra
|
||
+ [Kk\x{212a}\x{212a}]
|
||
+ Ket
|
||
+ End
|
||
+------------------------------------------------------------------
|
||
+
|
||
+/s(?r)s(?-r)s(?r:s)s/i,utf
|
||
+ \x{17f}S\x{17f}S\x{17f}
|
||
+ 0: \x{17f}S\x{17f}S\x{17f}
|
||
+\= Expect no match
|
||
+ \x{17f}\x{17f}\x{17f}S\x{17f}
|
||
+No match
|
||
+ \x{17f}S\x{17f}\x{17f}\x{17f}
|
||
+No match
|
||
+
|
||
+/k(?^i)k/ir,utf
|
||
+ K\x{212a}
|
||
+ 0: K\x{212a}
|
||
+\= Expect no match
|
||
+ \x{212a}\x{212a}
|
||
+No match
|
||
+
|
||
+# End caseless restrict tests
|
||
+
|
||
# End of testinput7
|
||
diff --git a/testdata/testoutput8-16-2 b/testdata/testoutput8-16-2
|
||
index 569a860..49b1022 100644
|
||
--- a/testdata/testoutput8-16-2
|
||
+++ b/testdata/testoutput8-16-2
|
||
@@ -838,7 +838,7 @@ Memory allocation (code space): 14
|
||
/(?|(?|(?J:(?|(?x:(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|
|
||
)))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))
|
||
/parens_nest_limit=1000,-fullbincode
|
||
-Failed: error 184 at offset 1504: (?| and/or (?J: or (?x: parentheses are too deeply nested
|
||
+Failed: error 184 at offset 1129: (?| and/or (?J: or (?x: parentheses are too deeply nested
|
||
|
||
# Use "expand" to create some very long patterns with nested parentheses, in
|
||
# order to test workspace overflow. Again, this varies with code unit width,
|
||
diff --git a/testdata/testoutput8-8-2 b/testdata/testoutput8-8-2
|
||
index 8393d5c..e9568e5 100644
|
||
--- a/testdata/testoutput8-8-2
|
||
+++ b/testdata/testoutput8-8-2
|
||
@@ -838,7 +838,7 @@ Memory allocation (code space): 10
|
||
/(?|(?|(?J:(?|(?x:(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|(?|
|
||
)))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))))
|
||
/parens_nest_limit=1000,-fullbincode
|
||
-Failed: error 184 at offset 1504: (?| and/or (?J: or (?x: parentheses are too deeply nested
|
||
+Failed: error 184 at offset 1129: (?| and/or (?J: or (?x: parentheses are too deeply nested
|
||
|
||
# Use "expand" to create some very long patterns with nested parentheses, in
|
||
# order to test workspace overflow. Again, this varies with code unit width,
|
||
--
|
||
2.23.0
|
||
|