234 lines
9.3 KiB
Diff
234 lines
9.3 KiB
Diff
|
|
From ef218fbba60bfe5b0a8ac9ea4445eac5fb0847e5 Mon Sep 17 00:00:00 2001
|
||
|
|
From: Alex Dowad <alexinbeijing@gmail.com>
|
||
|
|
Date: Sat, 7 Sep 2024 00:16:03 +0900
|
||
|
|
Subject: [PATCH] Guard against out-of-bounds memory access when parsing
|
||
|
|
LIMIT_HEAP et al (#463)
|
||
|
|
|
||
|
|
Patterns passed to pcre2_compile are not guaranteed to be
|
||
|
|
null-terminated. Also, it can happen that there is an invalid
|
||
|
|
pattern like this:
|
||
|
|
|
||
|
|
(*LIMIT_HEAP=123
|
||
|
|
|
||
|
|
If the next byte of memory after the end of the pattern happens
|
||
|
|
to be a digit, it will be parsed as part of the limit value. Or,
|
||
|
|
if the next byte is a right parenthesis character, it will be taken
|
||
|
|
as the end of the (*LIMIT_HEAP=nnn) construct.
|
||
|
|
|
||
|
|
This will result in `skipatstart` being larger than `patlen`, which
|
||
|
|
will result in underflow and an erroneous call to malloc requesting
|
||
|
|
a huge number of bytes.
|
||
|
|
---
|
||
|
|
src/pcre2_compile.c | 7 ++-
|
||
|
|
src/pcre2_internal.h | 3 +
|
||
|
|
src/pcre2_util.h | 132 ++++++++++++++++++++++++++++++++++++++++++
|
||
|
|
testdata/testoutput15 | 4 +-
|
||
|
|
4 files changed, 141 insertions(+), 5 deletions(-)
|
||
|
|
create mode 100644 src/pcre2_util.h
|
||
|
|
|
||
|
|
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
|
||
|
|
index e6843bb13..410f220b3 100644
|
||
|
|
--- a/src/pcre2_compile.c
|
||
|
|
+++ b/src/pcre2_compile.c
|
||
|
|
@@ -10552,12 +10552,12 @@ if ((options & PCRE2_LITERAL) == 0)
|
||
|
|
ptr += pp;
|
||
|
|
goto HAD_EARLY_ERROR;
|
||
|
|
}
|
||
|
|
- while (IS_DIGIT(ptr[pp]))
|
||
|
|
+ while (pp < patlen && IS_DIGIT(ptr[pp]))
|
||
|
|
{
|
||
|
|
if (c > UINT32_MAX / 10 - 1) break; /* Integer overflow */
|
||
|
|
c = c*10 + (ptr[pp++] - CHAR_0);
|
||
|
|
}
|
||
|
|
- if (ptr[pp++] != CHAR_RIGHT_PARENTHESIS)
|
||
|
|
+ if (pp >= patlen || ptr[pp] != CHAR_RIGHT_PARENTHESIS)
|
||
|
|
{
|
||
|
|
errorcode = ERR60;
|
||
|
|
ptr += pp;
|
||
|
|
@@ -10566,7 +10566,7 @@ if ((options & PCRE2_LITERAL) == 0)
|
||
|
|
if (p->type == PSO_LIMH) limit_heap = c;
|
||
|
|
else if (p->type == PSO_LIMM) limit_match = c;
|
||
|
|
else limit_depth = c;
|
||
|
|
- skipatstart += pp - skipatstart;
|
||
|
|
+ skipatstart = ++pp;
|
||
|
|
break;
|
||
|
|
}
|
||
|
|
break; /* Out of the table scan loop */
|
||
|
|
@@ -10574,6 +10574,7 @@ if ((options & PCRE2_LITERAL) == 0)
|
||
|
|
}
|
||
|
|
if (i >= sizeof(pso_list)/sizeof(pso)) break; /* Out of pso loop */
|
||
|
|
}
|
||
|
|
+ PCRE2_ASSERT(skipatstart <= patlen);
|
||
|
|
}
|
||
|
|
|
||
|
|
/* End of pattern-start options; advance to start of real regex. */
|
||
|
|
diff --git a/src/pcre2_internal.h b/src/pcre2_internal.h
|
||
|
|
index d8fad1e..edb36ca 100644
|
||
|
|
--- a/src/pcre2_internal.h
|
||
|
|
+++ b/src/pcre2_internal.h
|
||
|
|
@@ -1999,6 +1999,9 @@ extern void * _pcre2_memmove(void *, const void *, size_t);
|
||
|
|
#endif
|
||
|
|
|
||
|
|
#endif /* PCRE2_CODE_UNIT_WIDTH */
|
||
|
|
+
|
||
|
|
+#include "pcre2_util.h"
|
||
|
|
+
|
||
|
|
#endif /* PCRE2_INTERNAL_H_IDEMPOTENT_GUARD */
|
||
|
|
|
||
|
|
/* End of pcre2_internal.h */
|
||
|
|
diff --git a/src/pcre2_util.h b/src/pcre2_util.h
|
||
|
|
new file mode 100644
|
||
|
|
index 0000000..ea86355
|
||
|
|
--- /dev/null
|
||
|
|
+++ b/src/pcre2_util.h
|
||
|
|
@@ -0,0 +1,132 @@
|
||
|
|
+/*************************************************
|
||
|
|
+* Perl-Compatible Regular Expressions *
|
||
|
|
+*************************************************/
|
||
|
|
+
|
||
|
|
+/* PCRE2 is a library of functions to support regular expressions whose syntax
|
||
|
|
+and semantics are as close as possible to those of the Perl 5 language.
|
||
|
|
+
|
||
|
|
+ Written by Philip Hazel
|
||
|
|
+ Original API code Copyright (c) 1997-2012 University of Cambridge
|
||
|
|
+ New API code Copyright (c) 2016-2024 University of Cambridge
|
||
|
|
+
|
||
|
|
+-----------------------------------------------------------------------------
|
||
|
|
+Redistribution and use in source and binary forms, with or without
|
||
|
|
+modification, are permitted provided that the following conditions are met:
|
||
|
|
+
|
||
|
|
+ * Redistributions of source code must retain the above copyright notice,
|
||
|
|
+ this list of conditions and the following disclaimer.
|
||
|
|
+
|
||
|
|
+ * Redistributions in binary form must reproduce the above copyright
|
||
|
|
+ notice, this list of conditions and the following disclaimer in the
|
||
|
|
+ documentation and/or other materials provided with the distribution.
|
||
|
|
+
|
||
|
|
+ * Neither the name of the University of Cambridge nor the names of its
|
||
|
|
+ contributors may be used to endorse or promote products derived from
|
||
|
|
+ this software without specific prior written permission.
|
||
|
|
+
|
||
|
|
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||
|
|
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||
|
|
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||
|
|
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||
|
|
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||
|
|
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||
|
|
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||
|
|
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||
|
|
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||
|
|
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||
|
|
+POSSIBILITY OF SUCH DAMAGE.
|
||
|
|
+-----------------------------------------------------------------------------
|
||
|
|
+*/
|
||
|
|
+
|
||
|
|
+#ifndef PCRE2_UTIL_H_IDEMPOTENT_GUARD
|
||
|
|
+#define PCRE2_UTIL_H_IDEMPOTENT_GUARD
|
||
|
|
+
|
||
|
|
+/* Assertion macros */
|
||
|
|
+
|
||
|
|
+#ifdef PCRE2_DEBUG
|
||
|
|
+
|
||
|
|
+#if defined(HAVE_ASSERT_H) && !defined(NDEBUG)
|
||
|
|
+#include <assert.h>
|
||
|
|
+#endif
|
||
|
|
+
|
||
|
|
+/* PCRE2_ASSERT(x) can be used to inject an assert() for conditions
|
||
|
|
+that the code below doesn't support. It is a NOP for non debug builds
|
||
|
|
+but in debug builds will print information about the location of the
|
||
|
|
+code where it triggered and crash.
|
||
|
|
+
|
||
|
|
+It is meant to work like assert(), and therefore the expression used
|
||
|
|
+should indicate what the expected state is, and shouldn't have any
|
||
|
|
+side-effects. */
|
||
|
|
+
|
||
|
|
+#if defined(HAVE_ASSERT_H) && !defined(NDEBUG)
|
||
|
|
+#define PCRE2_ASSERT(x) assert(x)
|
||
|
|
+#else
|
||
|
|
+#define PCRE2_ASSERT(x) do \
|
||
|
|
+{ \
|
||
|
|
+ if (!(x)) \
|
||
|
|
+ { \
|
||
|
|
+ fprintf(stderr, "Assertion failed at " __FILE__ ":%d\n", __LINE__); \
|
||
|
|
+ abort(); \
|
||
|
|
+ } \
|
||
|
|
+} while(0)
|
||
|
|
+#endif
|
||
|
|
+
|
||
|
|
+/* PCRE2_UNREACHABLE() can be used to mark locations on the code that
|
||
|
|
+shouldn't be reached. In non debug builds is defined as a hint for
|
||
|
|
+the compiler to eliminate any code after it, so it is useful also for
|
||
|
|
+performance reasons, but should be used with care because if it is
|
||
|
|
+ever reached will trigger Undefined Behaviour and if you are lucky a
|
||
|
|
+crash. In debug builds it will report the location where it was triggered
|
||
|
|
+and crash. One important point to consider when using this macro, is
|
||
|
|
+that it is only implemented for a few compilers, and therefore can't
|
||
|
|
+be relied on to always be active either, so if it is followed by some
|
||
|
|
+code it is important to make sure that the whole thing is safe to
|
||
|
|
+use even if the macro is not there (ex: make sure there is a `break`
|
||
|
|
+after it if used at the end of a `case`) and to test your code also
|
||
|
|
+with a configuration where the macro will be a NOP. */
|
||
|
|
+
|
||
|
|
+#if defined(HAVE_ASSERT_H) && !defined(NDEBUG)
|
||
|
|
+#define PCRE2_UNREACHABLE() \
|
||
|
|
+assert(((void)"Execution reached unexpected point", 0))
|
||
|
|
+#else
|
||
|
|
+#define PCRE2_UNREACHABLE() do \
|
||
|
|
+{ \
|
||
|
|
+fprintf(stderr, "Execution reached unexpected point at " __FILE__ \
|
||
|
|
+ ":%d\n", __LINE__); \
|
||
|
|
+abort(); \
|
||
|
|
+} while(0)
|
||
|
|
+#endif
|
||
|
|
+
|
||
|
|
+/* PCRE2_DEBUG_UNREACHABLE() is a debug only version of the previous
|
||
|
|
+macro. It is meant to be used in places where the code is handling
|
||
|
|
+an error situation in code that shouldn't be reached, but that has
|
||
|
|
+some sort of fallback code to normally handle the error. When in
|
||
|
|
+doubt you should use this instead of the previous macro. Like in
|
||
|
|
+the previous case, it is a good idea to document as much as possible
|
||
|
|
+the reason and the actions that should be taken if it ever triggers. */
|
||
|
|
+
|
||
|
|
+#define PCRE2_DEBUG_UNREACHABLE() PCRE2_UNREACHABLE()
|
||
|
|
+
|
||
|
|
+#endif /* PCRE2_DEBUG */
|
||
|
|
+
|
||
|
|
+#ifndef PCRE2_DEBUG_UNREACHABLE
|
||
|
|
+#define PCRE2_DEBUG_UNREACHABLE() do {} while(0)
|
||
|
|
+#endif
|
||
|
|
+
|
||
|
|
+#ifndef PCRE2_UNREACHABLE
|
||
|
|
+#ifdef HAVE_BUILTIN_UNREACHABLE
|
||
|
|
+#define PCRE2_UNREACHABLE() __builtin_unreachable()
|
||
|
|
+#elif defined(HAVE_BUILTIN_ASSUME)
|
||
|
|
+#define PCRE2_UNREACHABLE() __assume(0)
|
||
|
|
+#else
|
||
|
|
+#define PCRE2_UNREACHABLE() do {} while(0)
|
||
|
|
+#endif
|
||
|
|
+#endif /* !PCRE2_UNREACHABLE */
|
||
|
|
+
|
||
|
|
+#ifndef PCRE2_ASSERT
|
||
|
|
+#define PCRE2_ASSERT(x) do {} while(0)
|
||
|
|
+#endif
|
||
|
|
+
|
||
|
|
+#endif /* PCRE2_UTIL_H_IDEMPOTENT_GUARD */
|
||
|
|
+
|
||
|
|
+/* End of pcre2_util.h */
|
||
|
|
diff --git a/testdata/testoutput15 b/testdata/testoutput15
|
||
|
|
index aa9c5c930..f36faeeaf 100644
|
||
|
|
--- a/testdata/testoutput15
|
||
|
|
+++ b/testdata/testoutput15
|
||
|
|
@@ -111,10 +111,10 @@ Minimum depth limit = 10
|
||
|
|
3: ee
|
||
|
|
|
||
|
|
/(*LIMIT_MATCH=12bc)abc/
|
||
|
|
-Failed: error 160 at offset 17: (*VERB) not recognized or malformed
|
||
|
|
+Failed: error 160 at offset 16: (*VERB) not recognized or malformed
|
||
|
|
|
||
|
|
/(*LIMIT_MATCH=4294967290)abc/
|
||
|
|
-Failed: error 160 at offset 24: (*VERB) not recognized or malformed
|
||
|
|
+Failed: error 160 at offset 23: (*VERB) not recognized or malformed
|
||
|
|
|
||
|
|
/(*LIMIT_DEPTH=4294967280)abc/I
|
||
|
|
Capture group count = 0
|