pcre2/backport-Additional-PCRE2_EXTRA_ASCII_xxx-code.patch

From a6089462a460a9f6c2db63a86e1c09fabaa81499 Mon Sep 17 00:00:00 2001
From: Philip Hazel <Philip.Hazel@gmail.com>
Date: Wed, 1 Feb 2023 17:42:29 +0000
Subject: [PATCH] Additional PCRE2_EXTRA_ASCII_xxx code

Conflict:NA
Reference:https://github.com/PCRE2Project/pcre2/commit/a6089462a460a9f6c2db63a86e1c09fabaa81499

---
 src/pcre2.h.in       |   4 +
 src/pcre2_compile.c  | 375 ++++++++++++++++++++++++++-----------------
 src/pcre2test.c      |  21 ++-
 testdata/testinput5  | 133 +++++++++++++++
 testdata/testinput7  | 133 +++++++++++++++
 testdata/testoutput5 | 179 +++++++++++++++++++++
 testdata/testoutput7 | 179 +++++++++++++++++++++
 7 files changed, 869 insertions(+), 155 deletions(-)

diff --git a/src/pcre2.h.in b/src/pcre2.h.in
index 11419a38..7202c633 100644
--- a/src/pcre2.h.in
+++ b/src/pcre2.h.in
@@ -154,6 +154,10 @@ D   is inspected during pcre2_dfa_match() execution
 #define PCRE2_EXTRA_ALT_BSUX                 0x00000020u  /* C */
 #define PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK     0x00000040u  /* C */
 #define PCRE2_EXTRA_CASELESS_RESTRICT        0x00000080u  /* C */
+#define PCRE2_EXTRA_ASCII_BSD                0x00000100u  /* C */
+#define PCRE2_EXTRA_ASCII_BSS                0x00000200u  /* C */
+#define PCRE2_EXTRA_ASCII_BSW                0x00000400u  /* C */
+#define PCRE2_EXTRA_ASCII_POSIX              0x00000800u  /* C */

 /* These are for pcre2_jit_compile(). */

diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index ed2fe8a7..b8a9e098 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -123,7 +123,7 @@ static unsigned int
 #endif

 static int
-  compile_regex(uint32_t, uint32_t, PCRE2_UCHAR **, uint32_t **, int *,
+  compile_regex(uint32_t, uint32_t, PCRE2_UCHAR **, uint32_t **, int *,
     uint32_t, uint32_t *, uint32_t *, uint32_t *, uint32_t *, branch_chain *,
     compile_block *, PCRE2_SIZE *);

@@ -694,8 +694,8 @@ static uint32_t chartypeoffset[] = {
 now all in a single string, to reduce the number of relocations when a shared
 library is dynamically loaded. The list of lengths is terminated by a zero
 length entry. The first three must be alpha, lower, upper, as this is assumed
-for handling case independence. The indices for graph, print, and punct are
-needed, so identify them. */
+for handling case independence. The indices for several classes are needed, so
+identify them. */

 static const char posix_names[] =
   STRING_alpha0 STRING_lower0 STRING_upper0 STRING_alnum0
@@ -785,7 +785,8 @@ are allowed. */
    (PUBLIC_LITERAL_COMPILE_EXTRA_OPTIONS| \
     PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES|PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL| \
     PCRE2_EXTRA_ESCAPED_CR_IS_LF|PCRE2_EXTRA_ALT_BSUX| \
-    PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK)
+    PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK|PCRE2_EXTRA_ASCII_BSD| \
+    PCRE2_EXTRA_ASCII_BSS|PCRE2_EXTRA_ASCII_BSW|PCRE2_EXTRA_ASCII_POSIX)

 /* Compile time error code numbers. They are given names so that they can more
 easily be tracked. When a new number is added, the tables called eint1 and
@@ -1059,9 +1060,9 @@ for (;;)
     case META_SKIP: fprintf(stderr, "META (*SKIP)"); break;
     case META_THEN: fprintf(stderr, "META (*THEN)"); break;

-    case META_OPTIONS:
-    fprintf(stderr, "META_OPTIONS 0x%08x 0x%08x", pptr[0], pptr[1]);
-    pptr += 2;
+    case META_OPTIONS:
+    fprintf(stderr, "META_OPTIONS 0x%08x 0x%08x", pptr[0], pptr[1]);
+    pptr += 2;
     break;

     case META_LOOKBEHIND:
@@ -1494,7 +1495,7 @@ Arguments:
   chptr          points to a returned data character
   errorcodeptr   points to the errorcode variable (containing zero)
   options        the current options bits
-  xoptions       the current extra options bits
+  xoptions       the current extra options bits
   isclass        TRUE if inside a character class
   cb             compile data block or NULL when called from pcre2_substitute()

@@ -2536,6 +2537,85 @@ return parsed_pattern;


+/*************************************************
+*          Handle \d, \D, \s, \S, \w, \W         *
+*************************************************/
+
+/* This function is called from parse_regex() below, both for freestanding
+escapes, and those within classes, to handle those escapes that may change when
+Unicode property support is requested. Note that PCRE2_UCP will never be set
+without Unicode support because that is checked when pcre2_compile() is called.
+
+Arguments:
+  escape          the ESC_... value
+  parsed_pattern  where to add the code
+  options         options bits
+  xoptions        extra options bits
+
+Returns:          updated value of parsed_pattern
+*/
+static uint32_t *
+handle_escdsw(int escape, uint32_t *parsed_pattern, uint32_t options,
+  uint32_t xoptions)
+{
+uint32_t ascii_option = 0;
+uint32_t prop = ESC_p;
+
+switch(escape)
+  {
+  case ESC_D:
+  prop = ESC_P;
+  /* Fall through */
+  case ESC_d:
+  ascii_option = PCRE2_EXTRA_ASCII_BSD;
+  break;
+
+  case ESC_S:
+  prop = ESC_P;
+  /* Fall through */
+  case ESC_s:
+  ascii_option = PCRE2_EXTRA_ASCII_BSS;
+  break;
+
+  case ESC_W:
+  prop = ESC_P;
+  /* Fall through */
+  case ESC_w:
+  ascii_option = PCRE2_EXTRA_ASCII_BSW;
+  break;
+  }
+
+if ((options & PCRE2_UCP) == 0 || (xoptions & ascii_option) != 0)
+  {
+  *parsed_pattern++ = META_ESCAPE + escape;
+  }
+else
+  {
+  *parsed_pattern++ = META_ESCAPE + prop;
+  switch(escape)
+    {
+    case ESC_d:
+    case ESC_D:
+    *parsed_pattern++ = (PT_PC << 16) | ucp_Nd;
+    break;
+
+    case ESC_s:
+    case ESC_S:
+    *parsed_pattern++ = PT_SPACE << 16;
+    break;
+
+    case ESC_w:
+    case ESC_W:
+    *parsed_pattern++ = PT_WORD << 16;
+    break;
+    }
+  }
+
+return parsed_pattern;
+}
+
+
+
 /*************************************************
 *      Parse regex and identify named groups     *
 *************************************************/
@@ -2564,7 +2644,7 @@ typedef struct nest_save {
   uint16_t  max_group;
   uint16_t  flags;
   uint32_t  options;
-  uint32_t  xoptions;
+  uint32_t  xoptions;
 } nest_save;

 #define NSF_RESET          0x0001u
@@ -2579,8 +2659,11 @@ the main compiling phase. */
 #define PARSE_TRACKED_OPTIONS (PCRE2_CASELESS|PCRE2_DOTALL|PCRE2_DUPNAMES| \
   PCRE2_EXTENDED|PCRE2_EXTENDED_MORE|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE| \
   PCRE2_UNGREEDY)
-
-#define PARSE_TRACKED_EXTRA_OPTIONS (PCRE2_EXTRA_CASELESS_RESTRICT)
+
+#define PARSE_TRACKED_EXTRA_OPTIONS (PCRE2_EXTRA_CASELESS_RESTRICT)
+
+#define PARSE_TRACKED_EXTRA_OPTIONS (PCRE2_EXTRA_CASELESS_RESTRICT| \
+  PCRE2_EXTRA_ASCII_BSD|PCRE2_EXTRA_ASCII_BSS|PCRE2_EXTRA_ASCII_BSW)

 /* States used for analyzing ranges in character classes. The two OK values
 must be last. */
@@ -3115,9 +3198,7 @@ while (ptr < ptrend)
       *parsed_pattern++ = META_ESCAPE + escape;
       break;

-      /* Escapes that change in UCP mode. Note that PCRE2_UCP will never be set
-      without Unicode support because it is checked when pcre2_compile() is
-      called. */
+      /* Escapes that may change in UCP mode. */

       case ESC_d:
       case ESC_D:
@@ -3126,33 +3207,8 @@ while (ptr < ptrend)
       case ESC_w:
       case ESC_W:
       okquantifier = TRUE;
-      if ((options & PCRE2_UCP) == 0)
-        {
-        *parsed_pattern++ = META_ESCAPE + escape;
-        }
-      else
-        {
-        *parsed_pattern++ = META_ESCAPE +
-          ((escape == ESC_d || escape == ESC_s || escape == ESC_w)?
-            ESC_p : ESC_P);
-        switch(escape)
-          {
-          case ESC_d:
-          case ESC_D:
-          *parsed_pattern++ = (PT_PC << 16) | ucp_Nd;
-          break;
-
-          case ESC_s:
-          case ESC_S:
-          *parsed_pattern++ = PT_SPACE << 16;
-          break;
-
-          case ESC_w:
-          case ESC_W:
-          *parsed_pattern++ = PT_WORD << 16;
-          break;
-          }
-        }
+      parsed_pattern = handle_escdsw(escape, parsed_pattern, options,
+        xoptions);
       break;

       /* Unicode property matching */
@@ -3515,18 +3571,22 @@ while (ptr < ptrend)

         class_range_state = RANGE_NO;

-        /* When PCRE2_UCP is set, some of the POSIX classes are converted to
-        use Unicode properties \p or \P or, in one case, \h or \H. The
-        substitutes table has two values per class, containing the type and
-        value of a \p or \P item. The special cases are specified with a
-        negative type: a non-zero value causes \h or \H to be used, and a zero
-        value falls through to behave like a non-UCP POSIX class. */
+        /* When PCRE2_UCP is set, unless PCRE2_EXTRA_ASCII_POSIX is set, some
+        of the POSIX classes are converted to use Unicode properties \p or \P
+        or, in one case, \h or \H. The substitutes table has two values per
+        class, containing the type and value of a \p or \P item. The special
+        cases are specified with a negative type: a non-zero value causes \h or
+        \H to be used, and a zero value falls through to behave like a non-UCP
+        POSIX class. There are now also some extra options that force ASCII for
+        some classes. */

 #ifdef SUPPORT_UNICODE
-        if ((options & PCRE2_UCP) != 0)
+        if ((options & PCRE2_UCP) != 0 &&
+            (xoptions & PCRE2_EXTRA_ASCII_POSIX) == 0)
           {
           int ptype = posix_substitutes[2*posix_class];
           int pvalue = posix_substitutes[2*posix_class + 1];
+
           if (ptype >= 0)
             {
             *parsed_pattern++ = META_ESCAPE + (posix_negate? ESC_P : ESC_p);
@@ -3664,7 +3724,7 @@ while (ptr < ptrend)
           *parsed_pattern++ = META_ESCAPE + escape;
           break;

-          /* These escapes are converted to Unicode property tests when
+          /* These escapes may be converted to Unicode property tests when
           PCRE2_UCP is set. */

           case ESC_d:
@@ -3673,33 +3733,8 @@ while (ptr < ptrend)
           case ESC_S:
           case ESC_w:
           case ESC_W:
-          if ((options & PCRE2_UCP) == 0)
-            {
-            *parsed_pattern++ = META_ESCAPE + escape;
-            }
-          else
-            {
-            *parsed_pattern++ = META_ESCAPE +
-              ((escape == ESC_d || escape == ESC_s || escape == ESC_w)?
-                ESC_p : ESC_P);
-            switch(escape)
-              {
-              case ESC_d:
-              case ESC_D:
-              *parsed_pattern++ = (PT_PC << 16) | ucp_Nd;
-              break;
-
-              case ESC_s:
-              case ESC_S:
-              *parsed_pattern++ = PT_SPACE << 16;
-              break;
-
-              case ESC_w:
-              case ESC_W:
-              *parsed_pattern++ = PT_WORD << 16;
-              break;
-              }
-            }
+          parsed_pattern = handle_escdsw(escape, parsed_pattern, options,
+            xoptions);
           break;

           /* Explicit Unicode property matching */
@@ -4052,7 +4087,7 @@ while (ptr < ptrend)
         {
         BOOL hyphenok = TRUE;
         uint32_t oldoptions = options;
-        uint32_t oldxoptions = xoptions;
+        uint32_t oldxoptions = xoptions;

         top_nest->reset_group = 0;
         top_nest->max_group = 0;
@@ -4067,7 +4102,7 @@ while (ptr < ptrend)
           {
           options &= ~(PCRE2_CASELESS|PCRE2_MULTILINE|PCRE2_NO_AUTO_CAPTURE|
                        PCRE2_DOTALL|PCRE2_EXTENDED|PCRE2_EXTENDED_MORE);
-          xoptions &= ~(PCRE2_EXTRA_CASELESS_RESTRICT);
+          xoptions &= ~(PCRE2_EXTRA_CASELESS_RESTRICT);
           hyphenok = FALSE;
           ptr++;
           }
@@ -4085,10 +4120,44 @@ while (ptr < ptrend)
               goto FAILED;
               }
             optset = &unset;
-            xoptset = &xunset;
+            xoptset = &xunset;
             hyphenok = FALSE;
             break;

+            /* There are some two-character sequences that start with 'a'. */
+
+            case CHAR_a:
+            if (ptr < ptrend)
+              {
+              if (*ptr == CHAR_D)
+                {
+                *xoptset |= PCRE2_EXTRA_ASCII_BSD;
+                ptr++;
+                break;
+                }
+              if (*ptr == CHAR_P)
+                {
+                *xoptset |= PCRE2_EXTRA_ASCII_POSIX;
+                ptr++;
+                break;
+                }
+              if (*ptr == CHAR_S)
+                {
+                *xoptset |= PCRE2_EXTRA_ASCII_BSS;
+                ptr++;
+                break;
+                }
+              if (*ptr == CHAR_W)
+                {
+                *xoptset |= PCRE2_EXTRA_ASCII_BSW;
+                ptr++;
+                break;
+                }
+              }
+            *xoptset |= PCRE2_EXTRA_ASCII_BSD|PCRE2_EXTRA_ASCII_BSS|
+                        PCRE2_EXTRA_ASCII_BSW|PCRE2_EXTRA_ASCII_POSIX;
+            break;
+
             case CHAR_J:  /* Record that it changed in the external options */
             *optset |= PCRE2_DUPNAMES;
             cb->external_flags |= PCRE2_JCHANGED;
@@ -4097,7 +4166,7 @@ while (ptr < ptrend)
             case CHAR_i: *optset |= PCRE2_CASELESS; break;
             case CHAR_m: *optset |= PCRE2_MULTILINE; break;
             case CHAR_n: *optset |= PCRE2_NO_AUTO_CAPTURE; break;
-            case CHAR_r: *xoptset|= PCRE2_EXTRA_CASELESS_RESTRICT; break;
+            case CHAR_r: *xoptset|= PCRE2_EXTRA_CASELESS_RESTRICT; break;
             case CHAR_s: *optset |= PCRE2_DOTALL; break;
             case CHAR_U: *optset |= PCRE2_UNGREEDY; break;

@@ -4757,7 +4826,7 @@ while (ptr < ptrend)
     if (top_nest != NULL && top_nest->nest_depth == nest_depth)
       {
       options = (options & ~PARSE_TRACKED_OPTIONS) | top_nest->options;
-      xoptions = (xoptions & ~PARSE_TRACKED_EXTRA_OPTIONS) | top_nest->xoptions;
+      xoptions = (xoptions & ~PARSE_TRACKED_EXTRA_OPTIONS) | top_nest->xoptions;
       if ((top_nest->flags & NSF_RESET) != 0 &&
           top_nest->max_group > cb->bracount)
         cb->bracount = top_nest->max_group;
@@ -5019,7 +5088,7 @@ Arguments:
   classbits     the bit map for characters < 256
   uchardptr     points to the pointer for extra data
   options       the options bits
-  xoptions      the extra options bits
+  xoptions      the extra options bits
   cb            compile data
   start         start of range character
   end           end of range character
@@ -5030,7 +5099,7 @@ Returns:        the number of < 256 characters added

 static unsigned int
 add_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr,
-  uint32_t options, uint32_t xoptions, compile_block *cb, uint32_t start,
+  uint32_t options, uint32_t xoptions, compile_block *cb, uint32_t start,
   uint32_t end)
 {
 uint32_t c;
@@ -5039,7 +5108,7 @@ unsigned int n8 = 0;

 /* If caseless matching is required, scan the range and process alternate
 cases. In Unicode, there are 8-bit characters that have alternate cases that
-are greater than 255 and vice-versa (though these may be ignored if caseless
+are greater than 255 and vice-versa (though these may be ignored if caseless
 restriction is in force). Sometimes we can just extend the original range. */

 if ((options & PCRE2_CASELESS) != 0)
@@ -5053,17 +5122,17 @@ if ((options & PCRE2_CASELESS) != 0)
     options &= ~PCRE2_CASELESS;   /* Remove for recursive calls */
     c = start;

-    while ((rc = get_othercase_range(&c, end, &oc, &od,
+    while ((rc = get_othercase_range(&c, end, &oc, &od,
              (xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) != 0)) >= 0)
       {
       /* Handle a single character that has more than one other case. */

-      if (rc > 0) n8 += add_list_to_class_internal(classbits, uchardptr,
+      if (rc > 0) n8 += add_list_to_class_internal(classbits, uchardptr,
         options, xoptions, cb, PRIV(ucd_caseless_sets) + rc, oc);

       /* Do nothing if the other case range is within the original range. */

-      else if (oc >= cb->class_range_start && od <= cb->class_range_end)
+      else if (oc >= cb->class_range_start && od <= cb->class_range_end)
         continue;

       /* Extend the original range if there is overlap, noting that if oc < c,
@@ -5178,7 +5247,7 @@ Arguments:
   classbits     the bit map for characters < 256
   uchardptr     points to the pointer for extra data
   options       the options bits
-  xoptions      the extra options bits
+  xoptions      the extra options bits
   cb            contains pointers to tables etc.
   p             points to row of 32-bit values, terminated by NOTACHAR
   except        character to omit; this is used when adding lists of
@@ -5191,7 +5260,7 @@ Returns:        the number of < 256 characters added

 static unsigned int
 add_list_to_class_internal(uint8_t *classbits, PCRE2_UCHAR **uchardptr,
-  uint32_t options, uint32_t xoptions, compile_block *cb, const uint32_t *p,
+  uint32_t options, uint32_t xoptions, compile_block *cb, const uint32_t *p,
   unsigned int except)
 {
 unsigned int n8 = 0;
@@ -5201,7 +5270,7 @@ while (p[0] < NOTACHAR)
   if (p[0] != except)
     {
     while(p[n+1] == p[0] + n + 1) n++;
-    n8 += add_to_class_internal(classbits, uchardptr, options, xoptions, cb,
+    n8 += add_to_class_internal(classbits, uchardptr, options, xoptions, cb,
       p[0], p[n]);
     }
   p += n + 1;
@@ -5223,7 +5292,7 @@ Arguments:
   classbits     the bit map for characters < 256
   uchardptr     points to the pointer for extra data
   options       the options bits
-  xoptions      the extra options bits
+  xoptions      the extra options bits
   cb            compile data
   start         start of range character
   end           end of range character
@@ -5238,7 +5307,7 @@ add_to_class(uint8_t *classbits, PCRE2_UCHAR **uchardptr, uint32_t options,
 {
 cb->class_range_start = start;
 cb->class_range_end = end;
-return add_to_class_internal(classbits, uchardptr, options, xoptions, cb,
+return add_to_class_internal(classbits, uchardptr, options, xoptions, cb,
   start, end);
 }

@@ -5257,7 +5326,7 @@ Arguments:
   classbits     the bit map for characters < 256
   uchardptr     points to the pointer for extra data
   options       the options bits
-  xoptions      the extra options bits
+  xoptions      the extra options bits
   cb            contains pointers to tables etc.
   p             points to row of 32-bit values, terminated by NOTACHAR
   except        character to omit; this is used when adding lists of
@@ -5281,7 +5350,7 @@ while (p[0] < NOTACHAR)
     while(p[n+1] == p[0] + n + 1) n++;
     cb->class_range_start = p[0];
     cb->class_range_end = p[n];
-    n8 += add_to_class_internal(classbits, uchardptr, options, xoptions, cb,
+    n8 += add_to_class_internal(classbits, uchardptr, options, xoptions, cb,
       p[0], p[n]);
     }
   p += n + 1;
@@ -5302,7 +5371,7 @@ Arguments:
   classbits     the bit map for characters < 256
   uchardptr     points to the pointer for extra data
   options       the options bits
-  xoptions      the extra options bits
+  xoptions      the extra options bits
   cb            contains pointers to tables etc.
   p             points to row of 32-bit values, terminated by NOTACHAR

@@ -5412,7 +5481,7 @@ real compile phase. The value of lengthptr distinguishes the two phases.

 Arguments:
   optionsptr        pointer to the option bits
-  xoptionsptr       pointer to the extra option bits
+  xoptionsptr       pointer to the extra option bits
   codeptr           points to the pointer to the current code point
   pptrptr           points to the current parsed pattern pointer
   errorcodeptr      points to error code variable
@@ -5431,10 +5500,10 @@ Returns:            0 There's been an error, *errorcodeptr is non-zero
 */

 static int
-compile_branch(uint32_t *optionsptr, uint32_t *xoptionsptr,
-  PCRE2_UCHAR **codeptr, uint32_t **pptrptr, int *errorcodeptr,
-  uint32_t *firstcuptr, uint32_t *firstcuflagsptr, uint32_t *reqcuptr,
-  uint32_t *reqcuflagsptr, branch_chain *bcptr, compile_block *cb,
+compile_branch(uint32_t *optionsptr, uint32_t *xoptionsptr,
+  PCRE2_UCHAR **codeptr, uint32_t **pptrptr, int *errorcodeptr,
+  uint32_t *firstcuptr, uint32_t *firstcuflagsptr, uint32_t *reqcuptr,
+  uint32_t *reqcuflagsptr, branch_chain *bcptr, compile_block *cb,
   PCRE2_SIZE *lengthptr)
 {
 int bravalue = 0;
@@ -5757,8 +5826,8 @@ for (;; pptr++)
       uint32_t c = pptr[1];

 #ifdef SUPPORT_UNICODE
-      if (UCD_CASESET(c) == 0 ||
-         ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) != 0 &&
+      if (UCD_CASESET(c) == 0 ||
+         ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) != 0 &&
          c < 128 && pptr[2] < 128))
 #endif
         {
@@ -5851,41 +5920,45 @@ for (;; pptr++)
         XCL_PROP/XCL_NOTPROP directly, which is done here. */

 #ifdef SUPPORT_UNICODE
-        if ((options & PCRE2_UCP) != 0) switch(posix_class)
+        if ((options & PCRE2_UCP) != 0 &&
+            (xoptions & PCRE2_EXTRA_ASCII_POSIX) == 0)
           {
-          case PC_GRAPH:
-          case PC_PRINT:
-          case PC_PUNCT:
-          *class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP;
-          *class_uchardata++ = (PCRE2_UCHAR)
-            ((posix_class == PC_GRAPH)? PT_PXGRAPH :
-             (posix_class == PC_PRINT)? PT_PXPRINT : PT_PXPUNCT);
-          *class_uchardata++ = 0;
-          xclass_has_prop = TRUE;
-          goto CONTINUE_CLASS;
-
-          /* For the other POSIX classes (ascii, xdigit) we are going to
-          fall through to the non-UCP case and build a bit map for
-          characters with code points less than 256. However, if we are in
-          a negated POSIX class, characters with code points greater than
-          255 must either all match or all not match, depending on whether
-          the whole class is not or is negated. For example, for
-          [[:^ascii:]... they must all match, whereas for [^[:^xdigit:]...
-          they must not.
-
-          In the special case where there are no xclass items, this is
-          automatically handled by the use of OP_CLASS or OP_NCLASS, but an
-          explicit range is needed for OP_XCLASS. Setting a flag here
-          causes the range to be generated later when it is known that
-          OP_XCLASS is required. In the 8-bit library this is relevant only in
-          utf mode, since no wide characters can exist otherwise. */
+          switch(posix_class)
+            {
+            case PC_GRAPH:
+            case PC_PRINT:
+            case PC_PUNCT:
+            *class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP;
+            *class_uchardata++ = (PCRE2_UCHAR)
+              ((posix_class == PC_GRAPH)? PT_PXGRAPH :
+               (posix_class == PC_PRINT)? PT_PXPRINT : PT_PXPUNCT);
+            *class_uchardata++ = 0;
+            xclass_has_prop = TRUE;
+            goto CONTINUE_CLASS;
+
+            /* For the other POSIX classes (ascii, xdigit) we are going to
+            fall through to the non-UCP case and build a bit map for
+            characters with code points less than 256. However, if we are in
+            a negated POSIX class, characters with code points greater than
+            255 must either all match or all not match, depending on whether
+            the whole class is not or is negated. For example, for
+            [[:^ascii:]... they must all match, whereas for [^[:^xdigit:]...
+            they must not.
+
+            In the special case where there are no xclass items, this is
+            automatically handled by the use of OP_CLASS or OP_NCLASS, but an
+            explicit range is needed for OP_XCLASS. Setting a flag here
+            causes the range to be generated later when it is known that
+            OP_XCLASS is required. In the 8-bit library this is relevant only in
+            utf mode, since no wide characters can exist otherwise. */

-          default:
+            default:
 #if PCRE2_CODE_UNIT_WIDTH == 8
-          if (utf)
+            if (utf)
 #endif
-          match_all_or_no_wide_chars |= local_negate;
-          break;
+            match_all_or_no_wide_chars |= local_negate;
+            break;
+            }
           }
 #endif  /* SUPPORT_UNICODE */

@@ -6011,7 +6084,7 @@ for (;; pptr++)

           case ESC_h:
           (void)add_list_to_class(classbits, &class_uchardata,
-            options & ~PCRE2_CASELESS, xoptions, cb, PRIV(hspace_list),
+            options & ~PCRE2_CASELESS, xoptions, cb, PRIV(hspace_list),
               NOTACHAR);
           break;

@@ -6022,7 +6095,7 @@ for (;; pptr++)

           case ESC_v:
           (void)add_list_to_class(classbits, &class_uchardata,
-            options & ~PCRE2_CASELESS, xoptions, cb, PRIV(vspace_list),
+            options & ~PCRE2_CASELESS, xoptions, cb, PRIV(vspace_list),
               NOTACHAR);
           break;

@@ -6102,7 +6175,7 @@ for (;; pptr++)
             if (C <= CHAR_i)
               {
               class_has_8bitchar +=
-                add_to_class(classbits, &class_uchardata, options, xoptions,
+                add_to_class(classbits, &class_uchardata, options, xoptions,
                   cb, C + uc, ((D < CHAR_i)? D : CHAR_i) + uc);
               C = CHAR_j;
               }
@@ -6110,7 +6183,7 @@ for (;; pptr++)
             if (C <= D && C <= CHAR_r)
               {
               class_has_8bitchar +=
-                add_to_class(classbits, &class_uchardata, options, xoptions,
+                add_to_class(classbits, &class_uchardata, options, xoptions,
                   cb, C + uc, ((D < CHAR_r)? D : CHAR_r) + uc);
               C = CHAR_s;
               }
@@ -6118,7 +6191,7 @@ for (;; pptr++)
             if (C <= D)
               {
               class_has_8bitchar +=
-                add_to_class(classbits, &class_uchardata, options, xoptions,
+                add_to_class(classbits, &class_uchardata, options, xoptions,
                   cb, C + uc, D + uc);
               }
             }
@@ -6126,7 +6199,7 @@ for (;; pptr++)
 #endif
           /* Not an EBCDIC special range */

-          class_has_8bitchar += add_to_class(classbits, &class_uchardata,
+          class_has_8bitchar += add_to_class(classbits, &class_uchardata,
             options, xoptions, cb, c, d);
           goto CONTINUE_CLASS;   /* Go get the next char in the class */
           }  /* End of range handling */
@@ -6135,7 +6208,7 @@ for (;; pptr++)
         /* Handle a single character. */

         class_has_8bitchar +=
-          add_to_class(classbits, &class_uchardata, options, xoptions, cb,
+          add_to_class(classbits, &class_uchardata, options, xoptions, cb,
             meta, meta);
         }

@@ -6621,7 +6694,7 @@ for (;; pptr++)
     if ((group_return =
          compile_regex(
          options,                         /* The options state */
-         xoptions,                        /* The extra options state */
+         xoptions,                        /* The extra options state */
          &tempcode,                       /* Where to put code (updated) */
          &pptr,                           /* Input pointer (updated) */
          errorcodeptr,                    /* Where to put an error message */
@@ -8020,7 +8093,7 @@ for (;; pptr++)
       {
       uint32_t caseset = UCD_CASESET(meta);
       if (caseset != 0 &&
-           ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) == 0 ||
+           ((xoptions & PCRE2_EXTRA_CASELESS_RESTRICT) == 0 ||
            PRIV(ucd_caseless_sets)[caseset] > 127))
         {
         *code++ = OP_PROP;
@@ -8137,7 +8210,7 @@ the two phases.

 Arguments:
   options           option bits, including any changes for this subpattern
-  xoptions          extra option bits, ditto
+  xoptions          extra option bits, ditto
   codeptr           -> the address of the current code pointer
   pptrptr           -> the address of the current parsed pattern pointer
   errorcodeptr      -> pointer to error code variable
@@ -8157,10 +8230,10 @@ Returns:            0 There has been an error
 */

 static int
-compile_regex(uint32_t options, uint32_t xoptions, PCRE2_UCHAR **codeptr,
-  uint32_t **pptrptr, int *errorcodeptr, uint32_t skipunits,
-  uint32_t *firstcuptr, uint32_t *firstcuflagsptr, uint32_t *reqcuptr,
-  uint32_t *reqcuflagsptr, branch_chain *bcptr, compile_block *cb,
+compile_regex(uint32_t options, uint32_t xoptions, PCRE2_UCHAR **codeptr,
+  uint32_t **pptrptr, int *errorcodeptr, uint32_t skipunits,
+  uint32_t *firstcuptr, uint32_t *firstcuflagsptr, uint32_t *reqcuptr,
+  uint32_t *reqcuflagsptr, branch_chain *bcptr, compile_block *cb,
   PCRE2_SIZE *lengthptr)
 {
 PCRE2_UCHAR *code = *codeptr;
@@ -8257,7 +8330,7 @@ for (;;)
   into the length. */

   if ((branch_return =
-        compile_branch(&options, &xoptions, &code, &pptr, errorcodeptr,
+        compile_branch(&options, &xoptions, &code, &pptr, errorcodeptr,
           &branchfirstcu, &branchfirstcuflags, &branchreqcu, &branchreqcuflags,
           &bc, cb, (lengthptr == NULL)? NULL : &length)) == 0)
     return 0;
@@ -10292,7 +10365,7 @@ code = cworkspace;
 *code = OP_BRA;

 (void)compile_regex(cb.external_options, ccontext->extra_options, &code, &pptr,
-   &errorcode, 0, &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL, &cb,
+   &errorcode, 0, &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL, &cb,
    &length);

 if (errorcode != 0) goto HAD_CB_ERROR;  /* Offset is in cb.erroroffset */
@@ -10390,8 +10463,8 @@ of the function here. */
 pptr = cb.parsed_pattern;
 code = (PCRE2_UCHAR *)codestart;
 *code = OP_BRA;
-regexrc = compile_regex(re->overall_options, ccontext->extra_options, &code,
-  &pptr, &errorcode, 0, &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL,
+regexrc = compile_regex(re->overall_options, ccontext->extra_options, &code,
+  &pptr, &errorcode, 0, &firstcu, &firstcuflags, &reqcu, &reqcuflags, NULL,
   &cb, NULL);
 if (regexrc < 0) re->flags |= PCRE2_MATCH_EMPTY;
 re->top_bracket = cb.bracount;
diff --git a/src/pcre2test.c b/src/pcre2test.c
index 169c6181..6bae5bb5 100644
--- a/src/pcre2test.c
+++ b/src/pcre2test.c
@@ -628,6 +628,9 @@ typedef struct modstruct {
   PCRE2_SIZE    offset;
 } modstruct;

+#define PCRE2_EXTRA_ASCII_ALL (PCRE2_EXTRA_ASCII_BSD|PCRE2_EXTRA_ASCII_BSS| \
+  PCRE2_EXTRA_ASCII_BSW|PCRE2_EXTRA_ASCII_POSIX)
+
 static modstruct modlist[] = {
   { "aftertext",                   MOD_PNDP, MOD_CTL, CTL_AFTERTEXT,              PO(control) },
   { "allaftertext",                MOD_PNDP, MOD_CTL, CTL_ALLAFTERTEXT,           PO(control) },
@@ -642,6 +645,11 @@ static modstruct modlist[] = {
   { "alt_verbnames",               MOD_PAT,  MOD_OPT, PCRE2_ALT_VERBNAMES,        PO(options) },
   { "altglobal",                   MOD_PND,  MOD_CTL, CTL_ALTGLOBAL,              PO(control) },
   { "anchored",                    MOD_PD,   MOD_OPT, PCRE2_ANCHORED,             PD(options) },
+  { "ascii_all",                   MOD_CTC,  MOD_OPT, PCRE2_EXTRA_ASCII_ALL,      CO(extra_options) },
+  { "ascii_bsd",                   MOD_CTC,  MOD_OPT, PCRE2_EXTRA_ASCII_BSD,      CO(extra_options) },
+  { "ascii_bss",                   MOD_CTC,  MOD_OPT, PCRE2_EXTRA_ASCII_BSS,      CO(extra_options) },
+  { "ascii_bsw",                   MOD_CTC,  MOD_OPT, PCRE2_EXTRA_ASCII_BSW,      CO(extra_options) },
+  { "ascii_posix",                 MOD_CTC,  MOD_OPT, PCRE2_EXTRA_ASCII_POSIX,    CO(extra_options) },
   { "auto_callout",                MOD_PAT,  MOD_OPT, PCRE2_AUTO_CALLOUT,         PO(options) },
   { "bad_escape_is_literal",       MOD_CTC,  MOD_OPT, PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL, CO(extra_options) },
   { "bincode",                     MOD_PAT,  MOD_CTL, CTL_BINCODE,                PO(control) },
@@ -839,6 +847,7 @@ typedef struct c1modstruct {
 static c1modstruct c1modlist[] = {
   { "bincode",           'B',           -1 },
   { "info",              'I',           -1 },
+  { "ascii_all",         'a',           -1 },
   { "global",            'g',           -1 },
   { "caseless",          'i',           -1 },
   { "multiline",         'm',           -1 },
@@ -4283,15 +4292,19 @@ show_compile_extra_options(uint32_t options, const char *before,
   const char *after)
 {
 if (options == 0) fprintf(outfile, "%s <none>%s", before, after);
-else fprintf(outfile, "%s%s%s%s%s%s%s%s%s",
+else fprintf(outfile, "%s%s%s%s%s%s%s%s%s%s%s%s%s",
   before,
   ((options & PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES) != 0)? " allow_surrogate_escapes" : "",
+  ((options & PCRE2_EXTRA_ALT_BSUX) != 0)? " alt_bsux" : "",
+  ((options & PCRE2_EXTRA_ASCII_BSD) != 0)? " ascii_bsd" : "",
+  ((options & PCRE2_EXTRA_ASCII_BSS) != 0)? " ascii_bss" : "",
+  ((options & PCRE2_EXTRA_ASCII_BSW) != 0)? " ascii_bsw" : "",
+  ((options & PCRE2_EXTRA_ASCII_POSIX) != 0)? " ascii_posix" : "",
   ((options & PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL) != 0)? " bad_escape_is_literal" : "",
-  ((options & PCRE2_EXTRA_ALT_BSUX) != 0)? " extra_alt_bsux" : "",
+  ((options & PCRE2_EXTRA_CASELESS_RESTRICT) != 0)? " caseless_restrict" : "",
+  ((options & PCRE2_EXTRA_ESCAPED_CR_IS_LF) != 0)? " escaped_cr_is_lf" : "",
   ((options & PCRE2_EXTRA_MATCH_WORD) != 0)? " match_word" : "",
   ((options & PCRE2_EXTRA_MATCH_LINE) != 0)? " match_line" : "",
-  ((options & PCRE2_EXTRA_ESCAPED_CR_IS_LF) != 0)? " escaped_cr_is_lf" : "",
-  ((options & PCRE2_EXTRA_CASELESS_RESTRICT) != 0)? " caseless_restrict" : "",
   after);
 }

diff --git a/testdata/testinput5 b/testdata/testinput5
index b8174230..6e186cf0 100644
--- a/testdata/testinput5
+++ b/testdata/testinput5
@@ -2309,4 +2309,137 @@

 # End caseless restrict tests

+# TESTS for PCRE2_EXTRA_ASCII_xxx - again, tests with and without.
+
+# DIGITS
+
+/\d+/i,utf
+    123\x{660}456
+
+/\d+/i,utf,ucp
+    123\x{660}456
+
+/\d+/i,utf,ucp,ascii_bsd
+    123\x{660}456
+
+/[\d]+/i,utf
+    123\x{660}456
+
+/[\d]+/i,utf,ucp
+    123\x{660}456
+
+/[\d]+/i,utf,ucp,ascii_bsd
+    123\x{660}456
+
+/\d(?aD)\d(?-aD)\d/utf,ucp
+    \x{660}9\x{660}
+\= Expect no match
+    \x{660}\x{660}\x{660}
+
+/\d(?a)\d(?-a)\d/utf,ucp
+    \x{660}9\x{660}
+\= Expect no match
+    \x{660}\x{660}\x{660}
+
+# SPACES
+
+/>\s+</i,utf
+    >  <
+\= Expect no match
+    >\x{a0} <
+
+/>\s+</i,utf,ucp
+    >  <
+    >\x{a0} <
+
+/>\s+</i,utf,ucp,ascii_bss
+    >  <
+\= Expect no match
+    >\x{a0} <
+
+/>[\s]+</i,utf
+    >  <
+\= Expect no match
+    >\x{a0} <
+
+/>[\s]+</i,utf,ucp
+    >  <
+    >\x{a0} <
+
+/>[\s]+</i,utf,ucp,ascii_bss
+    >  <
+\= Expect no match
+    >\x{a0} <
+
+/>\s(?aS)\s(?-aS)\s</utf,ucp
+    >\x{a0} \x{a0}<
+\= Expect no match
+    >\x{a0}\x{a0}\x{a0}<
+
+/>\s(?a)\s(?-a)\s</utf,ucp
+    >\x{a0} \x{a0}<
+\= Expect no match
+    >\x{a0}\x{a0}\x{a0}<
+
+# WORDS
+
+/\w+/i,utf
+    123\x{660}abc
+
+/\w+/i,utf,ucp
+    123\x{660}abc
+
+/\w+/i,utf,ucp,ascii_bsw
+    123\x{660}abc
+
+/[\w]+/i,utf
+    123\x{660}abc
+
+/[\w]+/i,utf,ucp
+    123\x{660}abc
+
+/[\w]+/i,utf,ucp,ascii_bsw
+    123\x{660}abc
+
+/\w(?aW)\w(?-aW)\w/utf,ucp
+    \x{660}A\x{c0}
+\= Expect no match
+    \x{660}\x{c0}\x{c0}
+
+/\w(?a)\w(?-a)\w/utf,ucp
+    \x{660}A\x{c0}
+\= Expect no match
+    \x{660}\x{c0}\x{c0}
+
+# POSIX
+
+/[[:digit:]]+/utf,ucp
+    123\x{660}456
+
+/[[:digit:]]+/utf,ucp,ascii_posix
+    123\x{660}456
+
+/>[[:space:]]+</utf,ucp
+    >\x{a0} \x{a0}<
+    >\x{a0}\x{a0}\x{a0}<
+
+/>[[:space:]]+</utf,ucp,ascii_posix
+\= Expect no match
+    >\x{a0} \x{a0}<
+
+/(?aP)[[:alnum:]]+/i,ucp,utf
+    abcáxyz
+    abc\x{660}xyz
+
+/(?aP)[[:alnum:]\d]+/i,ucp,utf
+    abc\x{660}xyz
+
+# VARIOUS
+
+/[\d\s\w]+/a,ucp,utf
+    9 A\x{660}À
+    9 AÀ\x{660}
+
+# End PCRE2_EXTRA_ASCII_xxx tests
+
 # End of testinput5
diff --git a/testdata/testinput7 b/testdata/testinput7
index 991de885..64a37ad2 100644
--- a/testdata/testinput7
+++ b/testdata/testinput7
@@ -2328,4 +2328,137 @@

 # End caseless restrict tests

+# TESTS for PCRE2_EXTRA_ASCII_xxx - again, tests with and without.
+
+# DIGITS
+
+/\d+/i,utf
+    123\x{660}456
+
+/\d+/i,utf,ucp
+    123\x{660}456
+
+/\d+/i,utf,ucp,ascii_bsd
+    123\x{660}456
+
+/[\d]+/i,utf
+    123\x{660}456
+
+/[\d]+/i,utf,ucp
+    123\x{660}456
+
+/[\d]+/i,utf,ucp,ascii_bsd
+    123\x{660}456
+
+/\d(?aD)\d(?-aD)\d/utf,ucp
+    \x{660}9\x{660}
+\= Expect no match
+    \x{660}\x{660}\x{660}
+
+/\d(?a)\d(?-a)\d/utf,ucp
+    \x{660}9\x{660}
+\= Expect no match
+    \x{660}\x{660}\x{660}
+
+# SPACES
+
+/>\s+</i,utf
+    >  <
+\= Expect no match
+    >\x{a0} <
+
+/>\s+</i,utf,ucp
+    >  <
+    >\x{a0} <
+
+/>\s+</i,utf,ucp,ascii_bss
+    >  <
+\= Expect no match
+    >\x{a0} <
+
+/>[\s]+</i,utf
+    >  <
+\= Expect no match
+    >\x{a0} <
+
+/>[\s]+</i,utf,ucp
+    >  <
+    >\x{a0} <
+
+/>[\s]+</i,utf,ucp,ascii_bss
+    >  <
+\= Expect no match
+    >\x{a0} <
+
+/>\s(?aS)\s(?-aS)\s</utf,ucp
+    >\x{a0} \x{a0}<
+\= Expect no match
+    >\x{a0}\x{a0}\x{a0}<
+
+/>\s(?a)\s(?-a)\s</utf,ucp
+    >\x{a0} \x{a0}<
+\= Expect no match
+    >\x{a0}\x{a0}\x{a0}<
+
+# WORDS
+
+/\w+/i,utf
+    123\x{660}abc
+
+/\w+/i,utf,ucp
+    123\x{660}abc
+
+/\w+/i,utf,ucp,ascii_bsw
+    123\x{660}abc
+
+/[\w]+/i,utf
+    123\x{660}abc
+
+/[\w]+/i,utf,ucp
+    123\x{660}abc
+
+/[\w]+/i,utf,ucp,ascii_bsw
+    123\x{660}abc
+
+/\w(?aW)\w(?-aW)\w/utf,ucp
+    \x{660}A\x{c0}
+\= Expect no match
+    \x{660}\x{c0}\x{c0}
+
+/\w(?a)\w(?-a)\w/utf,ucp
+    \x{660}A\x{c0}
+\= Expect no match
+    \x{660}\x{c0}\x{c0}
+
+# POSIX
+
+/[[:digit:]]+/utf,ucp
+    123\x{660}456
+
+/[[:digit:]]+/utf,ucp,ascii_posix
+    123\x{660}456
+
+/>[[:space:]]+</utf,ucp
+    >\x{a0} \x{a0}<
+    >\x{a0}\x{a0}\x{a0}<
+
+/>[[:space:]]+</utf,ucp,ascii_posix
+\= Expect no match
+    >\x{a0} \x{a0}<
+
+/(?aP)[[:alnum:]]+/i,ucp,utf
+    abcáxyz
+    abc\x{660}xyz
+
+/(?aP)[[:alnum:]\d]+/i,ucp,utf
+    abc\x{660}xyz
+
+# VARIOUS
+
+/[\d\s\w]+/a,ucp,utf
+    9 A\x{660}À
+    9 AÀ\x{660}
+
+# End PCRE2_EXTRA_ASCII_xxx tests
+
 # End of testinput7
diff --git a/testdata/testoutput5 b/testdata/testoutput5
index db42a117..26972f70 100644
--- a/testdata/testoutput5
+++ b/testdata/testoutput5
@@ -5196,4 +5196,183 @@ No match

 # End caseless restrict tests

+# TESTS for PCRE2_EXTRA_ASCII_xxx - again, tests with and without.
+
+# DIGITS
+
+/\d+/i,utf
+    123\x{660}456
+ 0: 123
+
+/\d+/i,utf,ucp
+    123\x{660}456
+ 0: 123\x{660}456
+
+/\d+/i,utf,ucp,ascii_bsd
+    123\x{660}456
+ 0: 123
+
+/[\d]+/i,utf
+    123\x{660}456
+ 0: 123
+
+/[\d]+/i,utf,ucp
+    123\x{660}456
+ 0: 123\x{660}456
+
+/[\d]+/i,utf,ucp,ascii_bsd
+    123\x{660}456
+ 0: 123
+
+/\d(?aD)\d(?-aD)\d/utf,ucp
+    \x{660}9\x{660}
+ 0: \x{660}9\x{660}
+\= Expect no match
+    \x{660}\x{660}\x{660}
+No match
+
+/\d(?a)\d(?-a)\d/utf,ucp
+    \x{660}9\x{660}
+ 0: \x{660}9\x{660}
+\= Expect no match
+    \x{660}\x{660}\x{660}
+No match
+
+# SPACES
+
+/>\s+</i,utf
+    >  <
+ 0: >  <
+\= Expect no match
+    >\x{a0} <
+No match
+
+/>\s+</i,utf,ucp
+    >  <
+ 0: >  <
+    >\x{a0} <
+ 0: >\x{a0} <
+
+/>\s+</i,utf,ucp,ascii_bss
+    >  <
+ 0: >  <
+\= Expect no match
+    >\x{a0} <
+No match
+
+/>[\s]+</i,utf
+    >  <
+ 0: >  <
+\= Expect no match
+    >\x{a0} <
+No match
+
+/>[\s]+</i,utf,ucp
+    >  <
+ 0: >  <
+    >\x{a0} <
+ 0: >\x{a0} <
+
+/>[\s]+</i,utf,ucp,ascii_bss
+    >  <
+ 0: >  <
+\= Expect no match
+    >\x{a0} <
+No match
+
+/>\s(?aS)\s(?-aS)\s</utf,ucp
+    >\x{a0} \x{a0}<
+ 0: >\x{a0} \x{a0}<
+\= Expect no match
+    >\x{a0}\x{a0}\x{a0}<
+No match
+
+/>\s(?a)\s(?-a)\s</utf,ucp
+    >\x{a0} \x{a0}<
+ 0: >\x{a0} \x{a0}<
+\= Expect no match
+    >\x{a0}\x{a0}\x{a0}<
+No match
+
+# WORDS
+
+/\w+/i,utf
+    123\x{660}abc
+ 0: 123
+
+/\w+/i,utf,ucp
+    123\x{660}abc
+ 0: 123\x{660}abc
+
+/\w+/i,utf,ucp,ascii_bsw
+    123\x{660}abc
+ 0: 123
+
+/[\w]+/i,utf
+    123\x{660}abc
+ 0: 123
+
+/[\w]+/i,utf,ucp
+    123\x{660}abc
+ 0: 123\x{660}abc
+
+/[\w]+/i,utf,ucp,ascii_bsw
+    123\x{660}abc
+ 0: 123
+
+/\w(?aW)\w(?-aW)\w/utf,ucp
+    \x{660}A\x{c0}
+ 0: \x{660}A\x{c0}
+\= Expect no match
+    \x{660}\x{c0}\x{c0}
+No match
+
+/\w(?a)\w(?-a)\w/utf,ucp
+    \x{660}A\x{c0}
+ 0: \x{660}A\x{c0}
+\= Expect no match
+    \x{660}\x{c0}\x{c0}
+No match
+
+# POSIX
+
+/[[:digit:]]+/utf,ucp
+    123\x{660}456
+ 0: 123\x{660}456
+
+/[[:digit:]]+/utf,ucp,ascii_posix
+    123\x{660}456
+ 0: 123
+
+/>[[:space:]]+</utf,ucp
+    >\x{a0} \x{a0}<
+ 0: >\x{a0} \x{a0}<
+    >\x{a0}\x{a0}\x{a0}<
+ 0: >\x{a0}\x{a0}\x{a0}<
+
+/>[[:space:]]+</utf,ucp,ascii_posix
+\= Expect no match
+    >\x{a0} \x{a0}<
+No match
+
+/(?aP)[[:alnum:]]+/i,ucp,utf
+    abcáxyz
+ 0: abc
+    abc\x{660}xyz
+ 0: abc
+
+/(?aP)[[:alnum:]\d]+/i,ucp,utf
+    abc\x{660}xyz
+ 0: abc\x{660}xyz
+
+# VARIOUS
+
+/[\d\s\w]+/a,ucp,utf
+    9 A\x{660}À
+ 0: 9 A
+    9 AÀ\x{660}
+ 0: 9 A
+
+# End PCRE2_EXTRA_ASCII_xxx tests
+
 # End of testinput5
diff --git a/testdata/testoutput7 b/testdata/testoutput7
index c2291a10..c830748c 100644
--- a/testdata/testoutput7
+++ b/testdata/testoutput7
@@ -3936,4 +3936,183 @@ No match

 # End caseless restrict tests

+# TESTS for PCRE2_EXTRA_ASCII_xxx - again, tests with and without.
+
+# DIGITS
+
+/\d+/i,utf
+    123\x{660}456
+ 0: 123
+
+/\d+/i,utf,ucp
+    123\x{660}456
+ 0: 123\x{660}456
+
+/\d+/i,utf,ucp,ascii_bsd
+    123\x{660}456
+ 0: 123
+
+/[\d]+/i,utf
+    123\x{660}456
+ 0: 123
+
+/[\d]+/i,utf,ucp
+    123\x{660}456
+ 0: 123\x{660}456
+
+/[\d]+/i,utf,ucp,ascii_bsd
+    123\x{660}456
+ 0: 123
+
+/\d(?aD)\d(?-aD)\d/utf,ucp
+    \x{660}9\x{660}
+ 0: \x{660}9\x{660}
+\= Expect no match
+    \x{660}\x{660}\x{660}
+No match
+
+/\d(?a)\d(?-a)\d/utf,ucp
+    \x{660}9\x{660}
+ 0: \x{660}9\x{660}
+\= Expect no match
+    \x{660}\x{660}\x{660}
+No match
+
+# SPACES
+
+/>\s+</i,utf
+    >  <
+ 0: >  <
+\= Expect no match
+    >\x{a0} <
+No match
+
+/>\s+</i,utf,ucp
+    >  <
+ 0: >  <
+    >\x{a0} <
+ 0: >\x{a0} <
+
+/>\s+</i,utf,ucp,ascii_bss
+    >  <
+ 0: >  <
+\= Expect no match
+    >\x{a0} <
+No match
+
+/>[\s]+</i,utf
+    >  <
+ 0: >  <
+\= Expect no match
+    >\x{a0} <
+No match
+
+/>[\s]+</i,utf,ucp
+    >  <
+ 0: >  <
+    >\x{a0} <
+ 0: >\x{a0} <
+
+/>[\s]+</i,utf,ucp,ascii_bss
+    >  <
+ 0: >  <
+\= Expect no match
+    >\x{a0} <
+No match
+
+/>\s(?aS)\s(?-aS)\s</utf,ucp
+    >\x{a0} \x{a0}<
+ 0: >\x{a0} \x{a0}<
+\= Expect no match
+    >\x{a0}\x{a0}\x{a0}<
+No match
+
+/>\s(?a)\s(?-a)\s</utf,ucp
+    >\x{a0} \x{a0}<
+ 0: >\x{a0} \x{a0}<
+\= Expect no match
+    >\x{a0}\x{a0}\x{a0}<
+No match
+
+# WORDS
+
+/\w+/i,utf
+    123\x{660}abc
+ 0: 123
+
+/\w+/i,utf,ucp
+    123\x{660}abc
+ 0: 123\x{660}abc
+
+/\w+/i,utf,ucp,ascii_bsw
+    123\x{660}abc
+ 0: 123
+
+/[\w]+/i,utf
+    123\x{660}abc
+ 0: 123
+
+/[\w]+/i,utf,ucp
+    123\x{660}abc
+ 0: 123\x{660}abc
+
+/[\w]+/i,utf,ucp,ascii_bsw
+    123\x{660}abc
+ 0: 123
+
+/\w(?aW)\w(?-aW)\w/utf,ucp
+    \x{660}A\x{c0}
+ 0: \x{660}A\x{c0}
+\= Expect no match
+    \x{660}\x{c0}\x{c0}
+No match
+
+/\w(?a)\w(?-a)\w/utf,ucp
+    \x{660}A\x{c0}
+ 0: \x{660}A\x{c0}
+\= Expect no match
+    \x{660}\x{c0}\x{c0}
+No match
+
+# POSIX
+
+/[[:digit:]]+/utf,ucp
+    123\x{660}456
+ 0: 123\x{660}456
+
+/[[:digit:]]+/utf,ucp,ascii_posix
+    123\x{660}456
+ 0: 123
+
+/>[[:space:]]+</utf,ucp
+    >\x{a0} \x{a0}<
+ 0: >\x{a0} \x{a0}<
+    >\x{a0}\x{a0}\x{a0}<
+ 0: >\x{a0}\x{a0}\x{a0}<
+
+/>[[:space:]]+</utf,ucp,ascii_posix
+\= Expect no match
+    >\x{a0} \x{a0}<
+No match
+
+/(?aP)[[:alnum:]]+/i,ucp,utf
+    abcáxyz
+ 0: abc
+    abc\x{660}xyz
+ 0: abc
+
+/(?aP)[[:alnum:]\d]+/i,ucp,utf
+    abc\x{660}xyz
+ 0: abc\x{660}xyz
+
+# VARIOUS
+
+/[\d\s\w]+/a,ucp,utf
+    9 A\x{660}À
+ 0: 9 A
+    9 AÀ\x{660}
+ 0: 9 A
+
+# End PCRE2_EXTRA_ASCII_xxx tests
+
 # End of testinput7
--
2.23.0