pcre2/backport-Guard-against-out-of-bounds-memory-access-when-parsing.patch
2024-11-19 09:16:25 +00:00

234 lines
9.3 KiB
Diff

From ef218fbba60bfe5b0a8ac9ea4445eac5fb0847e5 Mon Sep 17 00:00:00 2001
From: Alex Dowad <alexinbeijing@gmail.com>
Date: Sat, 7 Sep 2024 00:16:03 +0900
Subject: [PATCH] Guard against out-of-bounds memory access when parsing
LIMIT_HEAP et al (#463)
Patterns passed to pcre2_compile are not guaranteed to be
null-terminated. Also, it can happen that there is an invalid
pattern like this:
(*LIMIT_HEAP=123
If the next byte of memory after the end of the pattern happens
to be a digit, it will be parsed as part of the limit value. Or,
if the next byte is a right parenthesis character, it will be taken
as the end of the (*LIMIT_HEAP=nnn) construct.
This will result in `skipatstart` being larger than `patlen`, which
will result in underflow and an erroneous call to malloc requesting
a huge number of bytes.
---
src/pcre2_compile.c | 7 ++-
src/pcre2_internal.h | 3 +
src/pcre2_util.h | 132 ++++++++++++++++++++++++++++++++++++++++++
testdata/testoutput15 | 4 +-
4 files changed, 141 insertions(+), 5 deletions(-)
create mode 100644 src/pcre2_util.h
diff --git a/src/pcre2_compile.c b/src/pcre2_compile.c
index e6843bb13..410f220b3 100644
--- a/src/pcre2_compile.c
+++ b/src/pcre2_compile.c
@@ -10552,12 +10552,12 @@ if ((options & PCRE2_LITERAL) == 0)
ptr += pp;
goto HAD_EARLY_ERROR;
}
- while (IS_DIGIT(ptr[pp]))
+ while (pp < patlen && IS_DIGIT(ptr[pp]))
{
if (c > UINT32_MAX / 10 - 1) break; /* Integer overflow */
c = c*10 + (ptr[pp++] - CHAR_0);
}
- if (ptr[pp++] != CHAR_RIGHT_PARENTHESIS)
+ if (pp >= patlen || ptr[pp] != CHAR_RIGHT_PARENTHESIS)
{
errorcode = ERR60;
ptr += pp;
@@ -10566,7 +10566,7 @@ if ((options & PCRE2_LITERAL) == 0)
if (p->type == PSO_LIMH) limit_heap = c;
else if (p->type == PSO_LIMM) limit_match = c;
else limit_depth = c;
- skipatstart += pp - skipatstart;
+ skipatstart = ++pp;
break;
}
break; /* Out of the table scan loop */
@@ -10574,6 +10574,7 @@ if ((options & PCRE2_LITERAL) == 0)
}
if (i >= sizeof(pso_list)/sizeof(pso)) break; /* Out of pso loop */
}
+ PCRE2_ASSERT(skipatstart <= patlen);
}
/* End of pattern-start options; advance to start of real regex. */
diff --git a/src/pcre2_internal.h b/src/pcre2_internal.h
index d8fad1e..edb36ca 100644
--- a/src/pcre2_internal.h
+++ b/src/pcre2_internal.h
@@ -1999,6 +1999,9 @@ extern void * _pcre2_memmove(void *, const void *, size_t);
#endif
#endif /* PCRE2_CODE_UNIT_WIDTH */
+
+#include "pcre2_util.h"
+
#endif /* PCRE2_INTERNAL_H_IDEMPOTENT_GUARD */
/* End of pcre2_internal.h */
diff --git a/src/pcre2_util.h b/src/pcre2_util.h
new file mode 100644
index 0000000..ea86355
--- /dev/null
+++ b/src/pcre2_util.h
@@ -0,0 +1,132 @@
+/*************************************************
+* Perl-Compatible Regular Expressions *
+*************************************************/
+
+/* PCRE2 is a library of functions to support regular expressions whose syntax
+and semantics are as close as possible to those of the Perl 5 language.
+
+ Written by Philip Hazel
+ Original API code Copyright (c) 1997-2012 University of Cambridge
+ New API code Copyright (c) 2016-2024 University of Cambridge
+
+-----------------------------------------------------------------------------
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ * Neither the name of the University of Cambridge nor the names of its
+ contributors may be used to endorse or promote products derived from
+ this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+POSSIBILITY OF SUCH DAMAGE.
+-----------------------------------------------------------------------------
+*/
+
+#ifndef PCRE2_UTIL_H_IDEMPOTENT_GUARD
+#define PCRE2_UTIL_H_IDEMPOTENT_GUARD
+
+/* Assertion macros */
+
+#ifdef PCRE2_DEBUG
+
+#if defined(HAVE_ASSERT_H) && !defined(NDEBUG)
+#include <assert.h>
+#endif
+
+/* PCRE2_ASSERT(x) can be used to inject an assert() for conditions
+that the code below doesn't support. It is a NOP for non debug builds
+but in debug builds will print information about the location of the
+code where it triggered and crash.
+
+It is meant to work like assert(), and therefore the expression used
+should indicate what the expected state is, and shouldn't have any
+side-effects. */
+
+#if defined(HAVE_ASSERT_H) && !defined(NDEBUG)
+#define PCRE2_ASSERT(x) assert(x)
+#else
+#define PCRE2_ASSERT(x) do \
+{ \
+ if (!(x)) \
+ { \
+ fprintf(stderr, "Assertion failed at " __FILE__ ":%d\n", __LINE__); \
+ abort(); \
+ } \
+} while(0)
+#endif
+
+/* PCRE2_UNREACHABLE() can be used to mark locations on the code that
+shouldn't be reached. In non debug builds is defined as a hint for
+the compiler to eliminate any code after it, so it is useful also for
+performance reasons, but should be used with care because if it is
+ever reached will trigger Undefined Behaviour and if you are lucky a
+crash. In debug builds it will report the location where it was triggered
+and crash. One important point to consider when using this macro, is
+that it is only implemented for a few compilers, and therefore can't
+be relied on to always be active either, so if it is followed by some
+code it is important to make sure that the whole thing is safe to
+use even if the macro is not there (ex: make sure there is a `break`
+after it if used at the end of a `case`) and to test your code also
+with a configuration where the macro will be a NOP. */
+
+#if defined(HAVE_ASSERT_H) && !defined(NDEBUG)
+#define PCRE2_UNREACHABLE() \
+assert(((void)"Execution reached unexpected point", 0))
+#else
+#define PCRE2_UNREACHABLE() do \
+{ \
+fprintf(stderr, "Execution reached unexpected point at " __FILE__ \
+ ":%d\n", __LINE__); \
+abort(); \
+} while(0)
+#endif
+
+/* PCRE2_DEBUG_UNREACHABLE() is a debug only version of the previous
+macro. It is meant to be used in places where the code is handling
+an error situation in code that shouldn't be reached, but that has
+some sort of fallback code to normally handle the error. When in
+doubt you should use this instead of the previous macro. Like in
+the previous case, it is a good idea to document as much as possible
+the reason and the actions that should be taken if it ever triggers. */
+
+#define PCRE2_DEBUG_UNREACHABLE() PCRE2_UNREACHABLE()
+
+#endif /* PCRE2_DEBUG */
+
+#ifndef PCRE2_DEBUG_UNREACHABLE
+#define PCRE2_DEBUG_UNREACHABLE() do {} while(0)
+#endif
+
+#ifndef PCRE2_UNREACHABLE
+#ifdef HAVE_BUILTIN_UNREACHABLE
+#define PCRE2_UNREACHABLE() __builtin_unreachable()
+#elif defined(HAVE_BUILTIN_ASSUME)
+#define PCRE2_UNREACHABLE() __assume(0)
+#else
+#define PCRE2_UNREACHABLE() do {} while(0)
+#endif
+#endif /* !PCRE2_UNREACHABLE */
+
+#ifndef PCRE2_ASSERT
+#define PCRE2_ASSERT(x) do {} while(0)
+#endif
+
+#endif /* PCRE2_UTIL_H_IDEMPOTENT_GUARD */
+
+/* End of pcre2_util.h */
diff --git a/testdata/testoutput15 b/testdata/testoutput15
index aa9c5c930..f36faeeaf 100644
--- a/testdata/testoutput15
+++ b/testdata/testoutput15
@@ -111,10 +111,10 @@ Minimum depth limit = 10
3: ee
/(*LIMIT_MATCH=12bc)abc/
-Failed: error 160 at offset 17: (*VERB) not recognized or malformed
+Failed: error 160 at offset 16: (*VERB) not recognized or malformed
/(*LIMIT_MATCH=4294967290)abc/
-Failed: error 160 at offset 24: (*VERB) not recognized or malformed
+Failed: error 160 at offset 23: (*VERB) not recognized or malformed
/(*LIMIT_DEPTH=4294967280)abc/I
Capture group count = 0