Score:CVE-2022-41715:4,CVE-2022-2880:5.3,CVE-2022-2879:6.2 Reference:https://go-review.googlesource.com/c/go/+/438501, https://go-review.googlesource.com/c/go/+/433695, https://go-review.googlesource.com/c/go/+/438500 Conflict:NA Reason:fix CVE-2022-41715,CVE-2022-2880,CVE-2022-2879
387 lines
12 KiB
Diff
387 lines
12 KiB
Diff
From 8b3a5d153b7b255bafd1a82d61505088356d0458 Mon Sep 17 00:00:00 2001
|
|
From: Russ Cox <rsc@golang.org>
|
|
Date: Wed, 28 Sep 2022 11:18:51 -0400
|
|
Subject: [PATCH] regexp: limit size of parsed regexps
|
|
|
|
Set a 128 MB limit on the amount of space used by []syntax.Inst
|
|
in the compiled form corresponding to a given regexp.
|
|
|
|
Also set a 128 MB limit on the rune storage in the *syntax.Regexp
|
|
tree itself.
|
|
|
|
Thanks to Adam Korczynski (ADA Logics) and OSS-Fuzz for reporting this issue.
|
|
|
|
Fixes CVE-2022-41715.
|
|
Updates #55949.
|
|
Fixes #55950.
|
|
|
|
Change-Id: Ia656baed81564436368cf950e1c5409752f28e1b
|
|
Reviewed-on: https://team-review.git.corp.google.com/c/golang/go-private/+/1592136
|
|
TryBot-Result: Security TryBots <security-trybots@go-security-trybots.iam.gserviceaccount.com>
|
|
Reviewed-by: Damien Neil <dneil@google.com>
|
|
Run-TryBot: Roland Shoemaker <bracewell@google.com>
|
|
Reviewed-by: Julie Qiu <julieqiu@google.com>
|
|
Reviewed-on: https://go-review.googlesource.com/c/go/+/438501
|
|
Run-TryBot: Carlos Amedee <carlos@golang.org>
|
|
Reviewed-by: Carlos Amedee <carlos@golang.org>
|
|
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
|
|
TryBot-Result: Gopher Robot <gobot@golang.org>
|
|
Reviewed-by: Dmitri Shuralyov <dmitshur@golang.org>
|
|
---
|
|
src/regexp/syntax/parse.go | 222 +++++++++++++++++++++++++++++++-
|
|
src/regexp/syntax/parse_test.go | 11 +-
|
|
2 files changed, 224 insertions(+), 9 deletions(-)
|
|
|
|
diff --git a/src/regexp/syntax/parse.go b/src/regexp/syntax/parse.go
|
|
index 7b40309..67254d6 100644
|
|
--- a/src/regexp/syntax/parse.go
|
|
+++ b/src/regexp/syntax/parse.go
|
|
@@ -43,6 +43,7 @@ const (
|
|
ErrMissingRepeatArgument ErrorCode = "missing argument to repetition operator"
|
|
ErrTrailingBackslash ErrorCode = "trailing backslash at end of expression"
|
|
ErrUnexpectedParen ErrorCode = "unexpected )"
|
|
+ ErrNestingDepth ErrorCode = "expression nests too deeply"
|
|
)
|
|
|
|
func (e ErrorCode) String() string {
|
|
@@ -76,13 +77,63 @@ const (
|
|
opVerticalBar
|
|
)
|
|
|
|
+// maxHeight is the maximum height of a regexp parse tree.
|
|
+// It is somewhat arbitrarily chosen, but the idea is to be large enough
|
|
+// that no one will actually hit in real use but at the same time small enough
|
|
+// that recursion on the Regexp tree will not hit the 1GB Go stack limit.
|
|
+// The maximum amount of stack for a single recursive frame is probably
|
|
+// closer to 1kB, so this could potentially be raised, but it seems unlikely
|
|
+// that people have regexps nested even this deeply.
|
|
+// We ran a test on Google's C++ code base and turned up only
|
|
+// a single use case with depth > 100; it had depth 128.
|
|
+// Using depth 1000 should be plenty of margin.
|
|
+// As an optimization, we don't even bother calculating heights
|
|
+// until we've allocated at least maxHeight Regexp structures.
|
|
+const maxHeight = 1000
|
|
+
|
|
+// maxSize is the maximum size of a compiled regexp in Insts.
|
|
+// It too is somewhat arbitrarily chosen, but the idea is to be large enough
|
|
+// to allow significant regexps while at the same time small enough that
|
|
+// the compiled form will not take up too much memory.
|
|
+// 128 MB is enough for a 3.3 million Inst structures, which roughly
|
|
+// corresponds to a 3.3 MB regexp.
|
|
+const (
|
|
+ maxSize = 128 << 20 / instSize
|
|
+ instSize = 5 * 8 // byte, 2 uint32, slice is 5 64-bit words
|
|
+)
|
|
+
|
|
+// maxRunes is the maximum number of runes allowed in a regexp tree
|
|
+// counting the runes in all the nodes.
|
|
+// Ignoring character classes p.numRunes is always less than the length of the regexp.
|
|
+// Character classes can make it much larger: each \pL adds 1292 runes.
|
|
+// 128 MB is enough for 32M runes, which is over 26k \pL instances.
|
|
+// Note that repetitions do not make copies of the rune slices,
|
|
+// so \pL{1000} is only one rune slice, not 1000.
|
|
+// We could keep a cache of character classes we've seen,
|
|
+// so that all the \pL we see use the same rune list,
|
|
+// but that doesn't remove the problem entirely:
|
|
+// consider something like [\pL01234][\pL01235][\pL01236]...[\pL^&*()].
|
|
+// And because the Rune slice is exposed directly in the Regexp,
|
|
+// there is not an opportunity to change the representation to allow
|
|
+// partial sharing between different character classes.
|
|
+// So the limit is the best we can do.
|
|
+const (
|
|
+ maxRunes = 128 << 20 / runeSize
|
|
+ runeSize = 4 // rune is int32
|
|
+)
|
|
+
|
|
type parser struct {
|
|
flags Flags // parse mode flags
|
|
stack []*Regexp // stack of parsed expressions
|
|
free *Regexp
|
|
numCap int // number of capturing groups seen
|
|
wholeRegexp string
|
|
- tmpClass []rune // temporary char class work space
|
|
+ tmpClass []rune // temporary char class work space
|
|
+ numRegexp int // number of regexps allocated
|
|
+ numRunes int // number of runes in char classes
|
|
+ repeats int64 // product of all repetitions seen
|
|
+ height map[*Regexp]int // regexp height, for height limit check
|
|
+ size map[*Regexp]int64 // regexp compiled size, for size limit check
|
|
}
|
|
|
|
func (p *parser) newRegexp(op Op) *Regexp {
|
|
@@ -92,20 +143,155 @@ func (p *parser) newRegexp(op Op) *Regexp {
|
|
*re = Regexp{}
|
|
} else {
|
|
re = new(Regexp)
|
|
+ p.numRegexp++
|
|
}
|
|
re.Op = op
|
|
return re
|
|
}
|
|
|
|
func (p *parser) reuse(re *Regexp) {
|
|
+ if p.height != nil {
|
|
+ delete(p.height, re)
|
|
+ }
|
|
re.Sub0[0] = p.free
|
|
p.free = re
|
|
}
|
|
|
|
+func (p *parser) checkLimits(re *Regexp) {
|
|
+ if p.numRunes > maxRunes {
|
|
+ panic(ErrInternalError)
|
|
+ }
|
|
+ p.checkSize(re)
|
|
+ p.checkHeight(re)
|
|
+}
|
|
+
|
|
+func (p *parser) checkSize(re *Regexp) {
|
|
+ if p.size == nil {
|
|
+ // We haven't started tracking size yet.
|
|
+ // Do a relatively cheap check to see if we need to start.
|
|
+ // Maintain the product of all the repeats we've seen
|
|
+ // and don't track if the total number of regexp nodes
|
|
+ // we've seen times the repeat product is in budget.
|
|
+ if p.repeats == 0 {
|
|
+ p.repeats = 1
|
|
+ }
|
|
+ if re.Op == OpRepeat {
|
|
+ n := re.Max
|
|
+ if n == -1 {
|
|
+ n = re.Min
|
|
+ }
|
|
+ if n <= 0 {
|
|
+ n = 1
|
|
+ }
|
|
+ if int64(n) > maxSize/p.repeats {
|
|
+ p.repeats = maxSize
|
|
+ } else {
|
|
+ p.repeats *= int64(n)
|
|
+ }
|
|
+ }
|
|
+ if int64(p.numRegexp) < maxSize/p.repeats {
|
|
+ return
|
|
+ }
|
|
+
|
|
+ // We need to start tracking size.
|
|
+ // Make the map and belatedly populate it
|
|
+ // with info about everything we've constructed so far.
|
|
+ p.size = make(map[*Regexp]int64)
|
|
+ for _, re := range p.stack {
|
|
+ p.checkSize(re)
|
|
+ }
|
|
+ }
|
|
+
|
|
+ if p.calcSize(re, true) > maxSize {
|
|
+ panic(ErrInternalError)
|
|
+ }
|
|
+}
|
|
+
|
|
+func (p *parser) calcSize(re *Regexp, force bool) int64 {
|
|
+ if !force {
|
|
+ if size, ok := p.size[re]; ok {
|
|
+ return size
|
|
+ }
|
|
+ }
|
|
+
|
|
+ var size int64
|
|
+ switch re.Op {
|
|
+ case OpLiteral:
|
|
+ size = int64(len(re.Rune))
|
|
+ case OpCapture, OpStar:
|
|
+ // star can be 1+ or 2+; assume 2 pessimistically
|
|
+ size = 2 + p.calcSize(re.Sub[0], false)
|
|
+ case OpPlus, OpQuest:
|
|
+ size = 1 + p.calcSize(re.Sub[0], false)
|
|
+ case OpConcat:
|
|
+ for _, sub := range re.Sub {
|
|
+ size += p.calcSize(sub, false)
|
|
+ }
|
|
+ case OpAlternate:
|
|
+ for _, sub := range re.Sub {
|
|
+ size += p.calcSize(sub, false)
|
|
+ }
|
|
+ if len(re.Sub) > 1 {
|
|
+ size += int64(len(re.Sub)) - 1
|
|
+ }
|
|
+ case OpRepeat:
|
|
+ sub := p.calcSize(re.Sub[0], false)
|
|
+ if re.Max == -1 {
|
|
+ if re.Min == 0 {
|
|
+ size = 2 + sub // x*
|
|
+ } else {
|
|
+ size = 1 + int64(re.Min)*sub // xxx+
|
|
+ }
|
|
+ break
|
|
+ }
|
|
+ // x{2,5} = xx(x(x(x)?)?)?
|
|
+ size = int64(re.Max)*sub + int64(re.Max-re.Min)
|
|
+ }
|
|
+
|
|
+ if size < 1 {
|
|
+ size = 1
|
|
+ }
|
|
+ p.size[re] = size
|
|
+ return size
|
|
+}
|
|
+
|
|
+func (p *parser) checkHeight(re *Regexp) {
|
|
+ if p.numRegexp < maxHeight {
|
|
+ return
|
|
+ }
|
|
+ if p.height == nil {
|
|
+ p.height = make(map[*Regexp]int)
|
|
+ for _, re := range p.stack {
|
|
+ p.checkHeight(re)
|
|
+ }
|
|
+ }
|
|
+ if p.calcHeight(re, true) > maxHeight {
|
|
+ panic(ErrNestingDepth)
|
|
+ }
|
|
+}
|
|
+
|
|
+func (p *parser) calcHeight(re *Regexp, force bool) int {
|
|
+ if !force {
|
|
+ if h, ok := p.height[re]; ok {
|
|
+ return h
|
|
+ }
|
|
+ }
|
|
+ h := 1
|
|
+ for _, sub := range re.Sub {
|
|
+ hsub := p.calcHeight(sub, false)
|
|
+ if h < 1+hsub {
|
|
+ h = 1 + hsub
|
|
+ }
|
|
+ }
|
|
+ p.height[re] = h
|
|
+ return h
|
|
+}
|
|
+
|
|
// Parse stack manipulation.
|
|
|
|
// push pushes the regexp re onto the parse stack and returns the regexp.
|
|
func (p *parser) push(re *Regexp) *Regexp {
|
|
+ p.numRunes += len(re.Rune)
|
|
if re.Op == OpCharClass && len(re.Rune) == 2 && re.Rune[0] == re.Rune[1] {
|
|
// Single rune.
|
|
if p.maybeConcat(re.Rune[0], p.flags&^FoldCase) {
|
|
@@ -137,6 +323,7 @@ func (p *parser) push(re *Regexp) *Regexp {
|
|
}
|
|
|
|
p.stack = append(p.stack, re)
|
|
+ p.checkLimits(re)
|
|
return re
|
|
}
|
|
|
|
@@ -246,6 +433,7 @@ func (p *parser) repeat(op Op, min, max int, before, after, lastRepeat string) (
|
|
re.Sub = re.Sub0[:1]
|
|
re.Sub[0] = sub
|
|
p.stack[n-1] = re
|
|
+ p.checkLimits(re)
|
|
|
|
if op == OpRepeat && (min >= 2 || max >= 2) && !repeatIsValid(re, 1000) {
|
|
return "", &Error{ErrInvalidRepeatSize, before[:len(before)-len(after)]}
|
|
@@ -390,12 +578,16 @@ func (p *parser) collapse(subs []*Regexp, op Op) *Regexp {
|
|
// frees (passes to p.reuse) any removed *Regexps.
|
|
//
|
|
// For example,
|
|
-// ABC|ABD|AEF|BCX|BCY
|
|
+//
|
|
+// ABC|ABD|AEF|BCX|BCY
|
|
+//
|
|
// simplifies by literal prefix extraction to
|
|
-// A(B(C|D)|EF)|BC(X|Y)
|
|
+//
|
|
+// A(B(C|D)|EF)|BC(X|Y)
|
|
+//
|
|
// which simplifies by character class introduction to
|
|
-// A(B[CD]|EF)|BC[XY]
|
|
//
|
|
+// A(B[CD]|EF)|BC[XY]
|
|
func (p *parser) factor(sub []*Regexp) []*Regexp {
|
|
if len(sub) < 2 {
|
|
return sub
|
|
@@ -449,6 +641,7 @@ func (p *parser) factor(sub []*Regexp) []*Regexp {
|
|
|
|
for j := start; j < i; j++ {
|
|
sub[j] = p.removeLeadingString(sub[j], len(str))
|
|
+ p.checkLimits(sub[j])
|
|
}
|
|
suffix := p.collapse(sub[start:i], OpAlternate) // recurse
|
|
|
|
@@ -506,6 +699,7 @@ func (p *parser) factor(sub []*Regexp) []*Regexp {
|
|
for j := start; j < i; j++ {
|
|
reuse := j != start // prefix came from sub[start]
|
|
sub[j] = p.removeLeadingRegexp(sub[j], reuse)
|
|
+ p.checkLimits(sub[j])
|
|
}
|
|
suffix := p.collapse(sub[start:i], OpAlternate) // recurse
|
|
|
|
@@ -693,6 +887,23 @@ func literalRegexp(s string, flags Flags) *Regexp {
|
|
// Flags, and returns a regular expression parse tree. The syntax is
|
|
// described in the top-level comment.
|
|
func Parse(s string, flags Flags) (*Regexp, error) {
|
|
+ return parse(s, flags)
|
|
+}
|
|
+
|
|
+func parse(s string, flags Flags) (_ *Regexp, err error) {
|
|
+ defer func() {
|
|
+ switch r := recover(); r {
|
|
+ default:
|
|
+ panic(r)
|
|
+ case nil:
|
|
+ // ok
|
|
+ case ErrInternalError: // too big
|
|
+ err = &Error{Code: ErrInternalError, Expr: s}
|
|
+ case ErrNestingDepth:
|
|
+ err = &Error{Code: ErrNestingDepth, Expr: s}
|
|
+ }
|
|
+ }()
|
|
+
|
|
if flags&Literal != 0 {
|
|
// Trivial parser for literal string.
|
|
if err := checkUTF8(s); err != nil {
|
|
@@ -704,7 +915,6 @@ func Parse(s string, flags Flags) (*Regexp, error) {
|
|
// Otherwise, must do real work.
|
|
var (
|
|
p parser
|
|
- err error
|
|
c rune
|
|
op Op
|
|
lastRepeat string
|
|
@@ -1733,7 +1943,7 @@ func appendClass(r []rune, x []rune) []rune {
|
|
return r
|
|
}
|
|
|
|
-// appendFolded returns the result of appending the case folding of the class x to the class r.
|
|
+// appendFoldedClass returns the result of appending the case folding of the class x to the class r.
|
|
func appendFoldedClass(r []rune, x []rune) []rune {
|
|
for i := 0; i < len(x); i += 2 {
|
|
r = appendFoldedRange(r, x[i], x[i+1])
|
|
diff --git a/src/regexp/syntax/parse_test.go b/src/regexp/syntax/parse_test.go
|
|
index 5581ba1..6044da6 100644
|
|
--- a/src/regexp/syntax/parse_test.go
|
|
+++ b/src/regexp/syntax/parse_test.go
|
|
@@ -479,10 +479,15 @@ var invalidRegexps = []string{
|
|
`(?P<>a)`,
|
|
`[a-Z]`,
|
|
`(?i)[a-Z]`,
|
|
- `a{100000}`,
|
|
- `a{100000,}`,
|
|
- "((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}){2})",
|
|
`\Q\E*`,
|
|
+ `a{100000}`, // too much repetition
|
|
+ `a{100000,}`, // too much repetition
|
|
+ "((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}){2})", // too much repetition
|
|
+ strings.Repeat("(", 1000) + strings.Repeat(")", 1000), // too deep
|
|
+ strings.Repeat("(?:", 1000) + strings.Repeat(")*", 1000), // too deep
|
|
+ "(" + strings.Repeat("(xx?)", 1000) + "){1000}", // too long
|
|
+ strings.Repeat("(xx?){1000}", 1000), // too long
|
|
+ strings.Repeat(`\pL`, 27000), // too many runes
|
|
}
|
|
|
|
var onlyPerl = []string{
|
|
--
|
|
2.33.0
|
|
|