golang/0020-release-branch.go1.18-regexp-limit-size-of-parsed-re.patch

From 8b3a5d153b7b255bafd1a82d61505088356d0458 Mon Sep 17 00:00:00 2001
From: Russ Cox <rsc@golang.org>
Date: Wed, 28 Sep 2022 11:18:51 -0400
Subject: [PATCH] regexp: limit size of parsed regexps

Set a 128 MB limit on the amount of space used by []syntax.Inst
in the compiled form corresponding to a given regexp.

Also set a 128 MB limit on the rune storage in the *syntax.Regexp
tree itself.

Thanks to Adam Korczynski (ADA Logics) and OSS-Fuzz for reporting this issue.

Fixes CVE-2022-41715.
Updates #55949.
Fixes #55950.

Change-Id: Ia656baed81564436368cf950e1c5409752f28e1b
Reviewed-on: https://team-review.git.corp.google.com/c/golang/go-private/+/1592136
TryBot-Result: Security TryBots <security-trybots@go-security-trybots.iam.gserviceaccount.com>
Reviewed-by: Damien Neil <dneil@google.com>
Run-TryBot: Roland Shoemaker <bracewell@google.com>
Reviewed-by: Julie Qiu <julieqiu@google.com>
Reviewed-on: https://go-review.googlesource.com/c/go/+/438501
Run-TryBot: Carlos Amedee <carlos@golang.org>
Reviewed-by: Carlos Amedee <carlos@golang.org>
Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Dmitri Shuralyov <dmitshur@golang.org>
---
 src/regexp/syntax/parse.go      | 222 +++++++++++++++++++++++++++++++-
 src/regexp/syntax/parse_test.go |  11 +-
 2 files changed, 224 insertions(+), 9 deletions(-)

diff --git a/src/regexp/syntax/parse.go b/src/regexp/syntax/parse.go
index 7b40309..67254d6 100644
--- a/src/regexp/syntax/parse.go
+++ b/src/regexp/syntax/parse.go
@@ -43,6 +43,7 @@ const (
 	ErrMissingRepeatArgument ErrorCode = "missing argument to repetition operator"
 	ErrTrailingBackslash     ErrorCode = "trailing backslash at end of expression"
 	ErrUnexpectedParen       ErrorCode = "unexpected )"
+	ErrNestingDepth          ErrorCode = "expression nests too deeply"
 )
 
 func (e ErrorCode) String() string {
@@ -76,13 +77,63 @@ const (
 	opVerticalBar
 )
 
+// maxHeight is the maximum height of a regexp parse tree.
+// It is somewhat arbitrarily chosen, but the idea is to be large enough
+// that no one will actually hit in real use but at the same time small enough
+// that recursion on the Regexp tree will not hit the 1GB Go stack limit.
+// The maximum amount of stack for a single recursive frame is probably
+// closer to 1kB, so this could potentially be raised, but it seems unlikely
+// that people have regexps nested even this deeply.
+// We ran a test on Google's C++ code base and turned up only
+// a single use case with depth > 100; it had depth 128.
+// Using depth 1000 should be plenty of margin.
+// As an optimization, we don't even bother calculating heights
+// until we've allocated at least maxHeight Regexp structures.
+const maxHeight = 1000
+
+// maxSize is the maximum size of a compiled regexp in Insts.
+// It too is somewhat arbitrarily chosen, but the idea is to be large enough
+// to allow significant regexps while at the same time small enough that
+// the compiled form will not take up too much memory.
+// 128 MB is enough for a 3.3 million Inst structures, which roughly
+// corresponds to a 3.3 MB regexp.
+const (
+	maxSize  = 128 << 20 / instSize
+	instSize = 5 * 8 // byte, 2 uint32, slice is 5 64-bit words
+)
+
+// maxRunes is the maximum number of runes allowed in a regexp tree
+// counting the runes in all the nodes.
+// Ignoring character classes p.numRunes is always less than the length of the regexp.
+// Character classes can make it much larger: each \pL adds 1292 runes.
+// 128 MB is enough for 32M runes, which is over 26k \pL instances.
+// Note that repetitions do not make copies of the rune slices,
+// so \pL{1000} is only one rune slice, not 1000.
+// We could keep a cache of character classes we've seen,
+// so that all the \pL we see use the same rune list,
+// but that doesn't remove the problem entirely:
+// consider something like [\pL01234][\pL01235][\pL01236]...[\pL^&*()].
+// And because the Rune slice is exposed directly in the Regexp,
+// there is not an opportunity to change the representation to allow
+// partial sharing between different character classes.
+// So the limit is the best we can do.
+const (
+	maxRunes = 128 << 20 / runeSize
+	runeSize = 4 // rune is int32
+)
+
 type parser struct {
 	flags       Flags     // parse mode flags
 	stack       []*Regexp // stack of parsed expressions
 	free        *Regexp
 	numCap      int // number of capturing groups seen
 	wholeRegexp string
-	tmpClass    []rune // temporary char class work space
+	tmpClass    []rune            // temporary char class work space
+	numRegexp   int               // number of regexps allocated
+	numRunes    int               // number of runes in char classes
+	repeats     int64             // product of all repetitions seen
+	height      map[*Regexp]int   // regexp height, for height limit check
+	size        map[*Regexp]int64 // regexp compiled size, for size limit check
 }
 
 func (p *parser) newRegexp(op Op) *Regexp {
@@ -92,20 +143,155 @@ func (p *parser) newRegexp(op Op) *Regexp {
 		*re = Regexp{}
 	} else {
 		re = new(Regexp)
+		p.numRegexp++
 	}
 	re.Op = op
 	return re
 }
 
 func (p *parser) reuse(re *Regexp) {
+	if p.height != nil {
+		delete(p.height, re)
+	}
 	re.Sub0[0] = p.free
 	p.free = re
 }
 
+func (p *parser) checkLimits(re *Regexp) {
+	if p.numRunes > maxRunes {
+		panic(ErrInternalError)
+	}
+	p.checkSize(re)
+	p.checkHeight(re)
+}
+
+func (p *parser) checkSize(re *Regexp) {
+	if p.size == nil {
+		// We haven't started tracking size yet.
+		// Do a relatively cheap check to see if we need to start.
+		// Maintain the product of all the repeats we've seen
+		// and don't track if the total number of regexp nodes
+		// we've seen times the repeat product is in budget.
+		if p.repeats == 0 {
+			p.repeats = 1
+		}
+		if re.Op == OpRepeat {
+			n := re.Max
+			if n == -1 {
+				n = re.Min
+			}
+			if n <= 0 {
+				n = 1
+			}
+			if int64(n) > maxSize/p.repeats {
+				p.repeats = maxSize
+			} else {
+				p.repeats *= int64(n)
+			}
+		}
+		if int64(p.numRegexp) < maxSize/p.repeats {
+			return
+		}
+
+		// We need to start tracking size.
+		// Make the map and belatedly populate it
+		// with info about everything we've constructed so far.
+		p.size = make(map[*Regexp]int64)
+		for _, re := range p.stack {
+			p.checkSize(re)
+		}
+	}
+
+	if p.calcSize(re, true) > maxSize {
+		panic(ErrInternalError)
+	}
+}
+
+func (p *parser) calcSize(re *Regexp, force bool) int64 {
+	if !force {
+		if size, ok := p.size[re]; ok {
+			return size
+		}
+	}
+
+	var size int64
+	switch re.Op {
+	case OpLiteral:
+		size = int64(len(re.Rune))
+	case OpCapture, OpStar:
+		// star can be 1+ or 2+; assume 2 pessimistically
+		size = 2 + p.calcSize(re.Sub[0], false)
+	case OpPlus, OpQuest:
+		size = 1 + p.calcSize(re.Sub[0], false)
+	case OpConcat:
+		for _, sub := range re.Sub {
+			size += p.calcSize(sub, false)
+		}
+	case OpAlternate:
+		for _, sub := range re.Sub {
+			size += p.calcSize(sub, false)
+		}
+		if len(re.Sub) > 1 {
+			size += int64(len(re.Sub)) - 1
+		}
+	case OpRepeat:
+		sub := p.calcSize(re.Sub[0], false)
+		if re.Max == -1 {
+			if re.Min == 0 {
+				size = 2 + sub // x*
+			} else {
+				size = 1 + int64(re.Min)*sub // xxx+
+			}
+			break
+		}
+		// x{2,5} = xx(x(x(x)?)?)?
+		size = int64(re.Max)*sub + int64(re.Max-re.Min)
+	}
+
+	if size < 1 {
+		size = 1
+	}
+	p.size[re] = size
+	return size
+}
+
+func (p *parser) checkHeight(re *Regexp) {
+	if p.numRegexp < maxHeight {
+		return
+	}
+	if p.height == nil {
+		p.height = make(map[*Regexp]int)
+		for _, re := range p.stack {
+			p.checkHeight(re)
+		}
+	}
+	if p.calcHeight(re, true) > maxHeight {
+		panic(ErrNestingDepth)
+	}
+}
+
+func (p *parser) calcHeight(re *Regexp, force bool) int {
+	if !force {
+		if h, ok := p.height[re]; ok {
+			return h
+		}
+	}
+	h := 1
+	for _, sub := range re.Sub {
+		hsub := p.calcHeight(sub, false)
+		if h < 1+hsub {
+			h = 1 + hsub
+		}
+	}
+	p.height[re] = h
+	return h
+}
+
 // Parse stack manipulation.
 
 // push pushes the regexp re onto the parse stack and returns the regexp.
 func (p *parser) push(re *Regexp) *Regexp {
+	p.numRunes += len(re.Rune)
 	if re.Op == OpCharClass && len(re.Rune) == 2 && re.Rune[0] == re.Rune[1] {
 		// Single rune.
 		if p.maybeConcat(re.Rune[0], p.flags&^FoldCase) {
@@ -137,6 +323,7 @@ func (p *parser) push(re *Regexp) *Regexp {
 	}
 
 	p.stack = append(p.stack, re)
+	p.checkLimits(re)
 	return re
 }
 
@@ -246,6 +433,7 @@ func (p *parser) repeat(op Op, min, max int, before, after, lastRepeat string) (
 	re.Sub = re.Sub0[:1]
 	re.Sub[0] = sub
 	p.stack[n-1] = re
+	p.checkLimits(re)
 
 	if op == OpRepeat && (min >= 2 || max >= 2) && !repeatIsValid(re, 1000) {
 		return "", &Error{ErrInvalidRepeatSize, before[:len(before)-len(after)]}
@@ -390,12 +578,16 @@ func (p *parser) collapse(subs []*Regexp, op Op) *Regexp {
 // frees (passes to p.reuse) any removed *Regexps.
 //
 // For example,
-//     ABC|ABD|AEF|BCX|BCY
+//
+//	ABC|ABD|AEF|BCX|BCY
+//
 // simplifies by literal prefix extraction to
-//     A(B(C|D)|EF)|BC(X|Y)
+//
+//	A(B(C|D)|EF)|BC(X|Y)
+//
 // which simplifies by character class introduction to
-//     A(B[CD]|EF)|BC[XY]
 //
+//	A(B[CD]|EF)|BC[XY]
 func (p *parser) factor(sub []*Regexp) []*Regexp {
 	if len(sub) < 2 {
 		return sub
@@ -449,6 +641,7 @@ func (p *parser) factor(sub []*Regexp) []*Regexp {
 
 			for j := start; j < i; j++ {
 				sub[j] = p.removeLeadingString(sub[j], len(str))
+				p.checkLimits(sub[j])
 			}
 			suffix := p.collapse(sub[start:i], OpAlternate) // recurse
 
@@ -506,6 +699,7 @@ func (p *parser) factor(sub []*Regexp) []*Regexp {
 			for j := start; j < i; j++ {
 				reuse := j != start // prefix came from sub[start]
 				sub[j] = p.removeLeadingRegexp(sub[j], reuse)
+				p.checkLimits(sub[j])
 			}
 			suffix := p.collapse(sub[start:i], OpAlternate) // recurse
 
@@ -693,6 +887,23 @@ func literalRegexp(s string, flags Flags) *Regexp {
 // Flags, and returns a regular expression parse tree. The syntax is
 // described in the top-level comment.
 func Parse(s string, flags Flags) (*Regexp, error) {
+	return parse(s, flags)
+}
+
+func parse(s string, flags Flags) (_ *Regexp, err error) {
+	defer func() {
+		switch r := recover(); r {
+		default:
+			panic(r)
+		case nil:
+			// ok
+		case ErrInternalError: // too big
+			err = &Error{Code: ErrInternalError, Expr: s}
+		case ErrNestingDepth:
+			err = &Error{Code: ErrNestingDepth, Expr: s}
+		}
+	}()
+
 	if flags&Literal != 0 {
 		// Trivial parser for literal string.
 		if err := checkUTF8(s); err != nil {
@@ -704,7 +915,6 @@ func Parse(s string, flags Flags) (*Regexp, error) {
 	// Otherwise, must do real work.
 	var (
 		p          parser
-		err        error
 		c          rune
 		op         Op
 		lastRepeat string
@@ -1733,7 +1943,7 @@ func appendClass(r []rune, x []rune) []rune {
 	return r
 }
 
-// appendFolded returns the result of appending the case folding of the class x to the class r.
+// appendFoldedClass returns the result of appending the case folding of the class x to the class r.
 func appendFoldedClass(r []rune, x []rune) []rune {
 	for i := 0; i < len(x); i += 2 {
 		r = appendFoldedRange(r, x[i], x[i+1])
diff --git a/src/regexp/syntax/parse_test.go b/src/regexp/syntax/parse_test.go
index 5581ba1..6044da6 100644
--- a/src/regexp/syntax/parse_test.go
+++ b/src/regexp/syntax/parse_test.go
@@ -479,10 +479,15 @@ var invalidRegexps = []string{
 	`(?P<>a)`,
 	`[a-Z]`,
 	`(?i)[a-Z]`,
-	`a{100000}`,
-	`a{100000,}`,
-	"((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}){2})",
 	`\Q\E*`,
+	`a{100000}`,  // too much repetition
+	`a{100000,}`, // too much repetition
+	"((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}){2})",    // too much repetition
+	strings.Repeat("(", 1000) + strings.Repeat(")", 1000),    // too deep
+	strings.Repeat("(?:", 1000) + strings.Repeat(")*", 1000), // too deep
+	"(" + strings.Repeat("(xx?)", 1000) + "){1000}",          // too long
+	strings.Repeat("(xx?){1000}", 1000),                      // too long
+	strings.Repeat(`\pL`, 27000),                             // too many runes
 }
 
 var onlyPerl = []string{
-- 
2.33.0
golang: fix CVE-2022-41715,CVE-2022-2880,CVE-2022-2879 Score:CVE-2022-41715:4,CVE-2022-2880:5.3,CVE-2022-2879:6.2 Reference:https://go-review.googlesource.com/c/go/+/438501, https://go-review.googlesource.com/c/go/+/433695, https://go-review.googlesource.com/c/go/+/438500 Conflict:NA Reason:fix CVE-2022-41715,CVE-2022-2880,CVE-2022-2879 2022-10-10 21:55:25 +08:00			`From 8b3a5d153b7b255bafd1a82d61505088356d0458 Mon Sep 17 00:00:00 2001`
			`From: Russ Cox <rsc@golang.org>`
			`Date: Wed, 28 Sep 2022 11:18:51 -0400`
			`Subject: [PATCH] regexp: limit size of parsed regexps`

			`Set a 128 MB limit on the amount of space used by []syntax.Inst`
			`in the compiled form corresponding to a given regexp.`

			`Also set a 128 MB limit on the rune storage in the *syntax.Regexp`
			`tree itself.`

			`Thanks to Adam Korczynski (ADA Logics) and OSS-Fuzz for reporting this issue.`

			`Fixes CVE-2022-41715.`
			`Updates #55949.`
			`Fixes #55950.`

			`Change-Id: Ia656baed81564436368cf950e1c5409752f28e1b`
			`Reviewed-on: https://team-review.git.corp.google.com/c/golang/go-private/+/1592136`
			`TryBot-Result: Security TryBots <security-trybots@go-security-trybots.iam.gserviceaccount.com>`
			`Reviewed-by: Damien Neil <dneil@google.com>`
			`Run-TryBot: Roland Shoemaker <bracewell@google.com>`
			`Reviewed-by: Julie Qiu <julieqiu@google.com>`
			`Reviewed-on: https://go-review.googlesource.com/c/go/+/438501`
			`Run-TryBot: Carlos Amedee <carlos@golang.org>`
			`Reviewed-by: Carlos Amedee <carlos@golang.org>`
			`Reviewed-by: Dmitri Shuralyov <dmitshur@google.com>`
			`TryBot-Result: Gopher Robot <gobot@golang.org>`
			`Reviewed-by: Dmitri Shuralyov <dmitshur@golang.org>`
			`---`
			`src/regexp/syntax/parse.go \| 222 +++++++++++++++++++++++++++++++-`
			`src/regexp/syntax/parse_test.go \| 11 +-`
			`2 files changed, 224 insertions(+), 9 deletions(-)`

			`diff --git a/src/regexp/syntax/parse.go b/src/regexp/syntax/parse.go`
			`index 7b40309..67254d6 100644`
			`--- a/src/regexp/syntax/parse.go`
			`+++ b/src/regexp/syntax/parse.go`
			`@@ -43,6 +43,7 @@ const (`
			`ErrMissingRepeatArgument ErrorCode = "missing argument to repetition operator"`
			`ErrTrailingBackslash ErrorCode = "trailing backslash at end of expression"`
			`ErrUnexpectedParen ErrorCode = "unexpected )"`
			`+ ErrNestingDepth ErrorCode = "expression nests too deeply"`
			`)`

			`func (e ErrorCode) String() string {`
			`@@ -76,13 +77,63 @@ const (`
			`opVerticalBar`
			`)`

			`+// maxHeight is the maximum height of a regexp parse tree.`
			`+// It is somewhat arbitrarily chosen, but the idea is to be large enough`
			`+// that no one will actually hit in real use but at the same time small enough`
			`+// that recursion on the Regexp tree will not hit the 1GB Go stack limit.`
			`+// The maximum amount of stack for a single recursive frame is probably`
			`+// closer to 1kB, so this could potentially be raised, but it seems unlikely`
			`+// that people have regexps nested even this deeply.`
			`+// We ran a test on Google's C++ code base and turned up only`
			`+// a single use case with depth > 100; it had depth 128.`
			`+// Using depth 1000 should be plenty of margin.`
			`+// As an optimization, we don't even bother calculating heights`
			`+// until we've allocated at least maxHeight Regexp structures.`
			`+const maxHeight = 1000`
			`+`
			`+// maxSize is the maximum size of a compiled regexp in Insts.`
			`+// It too is somewhat arbitrarily chosen, but the idea is to be large enough`
			`+// to allow significant regexps while at the same time small enough that`
			`+// the compiled form will not take up too much memory.`
			`+// 128 MB is enough for a 3.3 million Inst structures, which roughly`
			`+// corresponds to a 3.3 MB regexp.`
			`+const (`
			`+ maxSize = 128 << 20 / instSize`
			`+ instSize = 5 * 8 // byte, 2 uint32, slice is 5 64-bit words`
			`+)`
			`+`
			`+// maxRunes is the maximum number of runes allowed in a regexp tree`
			`+// counting the runes in all the nodes.`
			`+// Ignoring character classes p.numRunes is always less than the length of the regexp.`
			`+// Character classes can make it much larger: each \pL adds 1292 runes.`
			`+// 128 MB is enough for 32M runes, which is over 26k \pL instances.`
			`+// Note that repetitions do not make copies of the rune slices,`
			`+// so \pL{1000} is only one rune slice, not 1000.`
			`+// We could keep a cache of character classes we've seen,`
			`+// so that all the \pL we see use the same rune list,`
			`+// but that doesn't remove the problem entirely:`
			`+// consider something like [\pL01234][\pL01235][\pL01236]...[\pL^&*()].`
			`+// And because the Rune slice is exposed directly in the Regexp,`
			`+// there is not an opportunity to change the representation to allow`
			`+// partial sharing between different character classes.`
			`+// So the limit is the best we can do.`
			`+const (`
			`+ maxRunes = 128 << 20 / runeSize`
			`+ runeSize = 4 // rune is int32`
			`+)`
			`+`
			`type parser struct {`
			`flags Flags // parse mode flags`
			`stack []*Regexp // stack of parsed expressions`
			`free *Regexp`
			`numCap int // number of capturing groups seen`
			`wholeRegexp string`
			`- tmpClass []rune // temporary char class work space`
			`+ tmpClass []rune // temporary char class work space`
			`+ numRegexp int // number of regexps allocated`
			`+ numRunes int // number of runes in char classes`
			`+ repeats int64 // product of all repetitions seen`
			`+ height map[*Regexp]int // regexp height, for height limit check`
			`+ size map[*Regexp]int64 // regexp compiled size, for size limit check`
			`}`

			`func (p parser) newRegexp(op Op) Regexp {`
			`@@ -92,20 +143,155 @@ func (p parser) newRegexp(op Op) Regexp {`
			`*re = Regexp{}`
			`} else {`
			`re = new(Regexp)`
			`+ p.numRegexp++`
			`}`
			`re.Op = op`
			`return re`
			`}`

			`func (p parser) reuse(re Regexp) {`
			`+ if p.height != nil {`
			`+ delete(p.height, re)`
			`+ }`
			`re.Sub0[0] = p.free`
			`p.free = re`
			`}`

			`+func (p parser) checkLimits(re Regexp) {`
			`+ if p.numRunes > maxRunes {`
			`+ panic(ErrInternalError)`
			`+ }`
			`+ p.checkSize(re)`
			`+ p.checkHeight(re)`
			`+}`
			`+`
			`+func (p parser) checkSize(re Regexp) {`
			`+ if p.size == nil {`
			`+ // We haven't started tracking size yet.`
			`+ // Do a relatively cheap check to see if we need to start.`
			`+ // Maintain the product of all the repeats we've seen`
			`+ // and don't track if the total number of regexp nodes`
			`+ // we've seen times the repeat product is in budget.`
			`+ if p.repeats == 0 {`
			`+ p.repeats = 1`
			`+ }`
			`+ if re.Op == OpRepeat {`
			`+ n := re.Max`
			`+ if n == -1 {`
			`+ n = re.Min`
			`+ }`
			`+ if n <= 0 {`
			`+ n = 1`
			`+ }`
			`+ if int64(n) > maxSize/p.repeats {`
			`+ p.repeats = maxSize`
			`+ } else {`
			`+ p.repeats *= int64(n)`
			`+ }`
			`+ }`
			`+ if int64(p.numRegexp) < maxSize/p.repeats {`
			`+ return`
			`+ }`
			`+`
			`+ // We need to start tracking size.`
			`+ // Make the map and belatedly populate it`
			`+ // with info about everything we've constructed so far.`
			`+ p.size = make(map[*Regexp]int64)`
			`+ for _, re := range p.stack {`
			`+ p.checkSize(re)`
			`+ }`
			`+ }`
			`+`
			`+ if p.calcSize(re, true) > maxSize {`
			`+ panic(ErrInternalError)`
			`+ }`
			`+}`
			`+`
			`+func (p parser) calcSize(re Regexp, force bool) int64 {`
			`+ if !force {`
			`+ if size, ok := p.size[re]; ok {`
			`+ return size`
			`+ }`
			`+ }`
			`+`
			`+ var size int64`
			`+ switch re.Op {`
			`+ case OpLiteral:`
			`+ size = int64(len(re.Rune))`
			`+ case OpCapture, OpStar:`
			`+ // star can be 1+ or 2+; assume 2 pessimistically`
			`+ size = 2 + p.calcSize(re.Sub[0], false)`
			`+ case OpPlus, OpQuest:`
			`+ size = 1 + p.calcSize(re.Sub[0], false)`
			`+ case OpConcat:`
			`+ for _, sub := range re.Sub {`
			`+ size += p.calcSize(sub, false)`
			`+ }`
			`+ case OpAlternate:`
			`+ for _, sub := range re.Sub {`
			`+ size += p.calcSize(sub, false)`
			`+ }`
			`+ if len(re.Sub) > 1 {`
			`+ size += int64(len(re.Sub)) - 1`
			`+ }`
			`+ case OpRepeat:`
			`+ sub := p.calcSize(re.Sub[0], false)`
			`+ if re.Max == -1 {`
			`+ if re.Min == 0 {`
			`+ size = 2 + sub // x*`
			`+ } else {`
			`+ size = 1 + int64(re.Min)*sub // xxx+`
			`+ }`
			`+ break`
			`+ }`
			`+ // x{2,5} = xx(x(x(x)?)?)?`
			`+ size = int64(re.Max)*sub + int64(re.Max-re.Min)`
			`+ }`
			`+`
			`+ if size < 1 {`
			`+ size = 1`
			`+ }`
			`+ p.size[re] = size`
			`+ return size`
			`+}`
			`+`
			`+func (p parser) checkHeight(re Regexp) {`
			`+ if p.numRegexp < maxHeight {`
			`+ return`
			`+ }`
			`+ if p.height == nil {`
			`+ p.height = make(map[*Regexp]int)`
			`+ for _, re := range p.stack {`
			`+ p.checkHeight(re)`
			`+ }`
			`+ }`
			`+ if p.calcHeight(re, true) > maxHeight {`
			`+ panic(ErrNestingDepth)`
			`+ }`
			`+}`
			`+`
			`+func (p parser) calcHeight(re Regexp, force bool) int {`
			`+ if !force {`
			`+ if h, ok := p.height[re]; ok {`
			`+ return h`
			`+ }`
			`+ }`
			`+ h := 1`
			`+ for _, sub := range re.Sub {`
			`+ hsub := p.calcHeight(sub, false)`
			`+ if h < 1+hsub {`
			`+ h = 1 + hsub`
			`+ }`
			`+ }`
			`+ p.height[re] = h`
			`+ return h`
			`+}`
			`+`
			`// Parse stack manipulation.`

			`// push pushes the regexp re onto the parse stack and returns the regexp.`
			`func (p parser) push(re Regexp) *Regexp {`
			`+ p.numRunes += len(re.Rune)`
			`if re.Op == OpCharClass && len(re.Rune) == 2 && re.Rune[0] == re.Rune[1] {`
			`// Single rune.`
			`if p.maybeConcat(re.Rune[0], p.flags&^FoldCase) {`
			`@@ -137,6 +323,7 @@ func (p parser) push(re Regexp) *Regexp {`
			`}`

			`p.stack = append(p.stack, re)`
			`+ p.checkLimits(re)`
			`return re`
			`}`

			`@@ -246,6 +433,7 @@ func (p *parser) repeat(op Op, min, max int, before, after, lastRepeat string) (`
			`re.Sub = re.Sub0[:1]`
			`re.Sub[0] = sub`
			`p.stack[n-1] = re`
			`+ p.checkLimits(re)`

			`if op == OpRepeat && (min >= 2 \|\| max >= 2) && !repeatIsValid(re, 1000) {`
			`return "", &Error{ErrInvalidRepeatSize, before[:len(before)-len(after)]}`
			`@@ -390,12 +578,16 @@ func (p parser) collapse(subs []Regexp, op Op) *Regexp {`
			`// frees (passes to p.reuse) any removed *Regexps.`
			`//`
			`// For example,`
			`-// ABC\|ABD\|AEF\|BCX\|BCY`
			`+//`
			`+// ABC\|ABD\|AEF\|BCX\|BCY`
			`+//`
			`// simplifies by literal prefix extraction to`
			`-// A(B(C\|D)\|EF)\|BC(X\|Y)`
			`+//`
			`+// A(B(C\|D)\|EF)\|BC(X\|Y)`
			`+//`
			`// which simplifies by character class introduction to`
			`-// A(B[CD]\|EF)\|BC[XY]`
			`//`
			`+// A(B[CD]\|EF)\|BC[XY]`
			`func (p parser) factor(sub []Regexp) []*Regexp {`
			`if len(sub) < 2 {`
			`return sub`
			`@@ -449,6 +641,7 @@ func (p parser) factor(sub []Regexp) []*Regexp {`

			`for j := start; j < i; j++ {`
			`sub[j] = p.removeLeadingString(sub[j], len(str))`
			`+ p.checkLimits(sub[j])`
			`}`
			`suffix := p.collapse(sub[start:i], OpAlternate) // recurse`

			`@@ -506,6 +699,7 @@ func (p parser) factor(sub []Regexp) []*Regexp {`
			`for j := start; j < i; j++ {`
			`reuse := j != start // prefix came from sub[start]`
			`sub[j] = p.removeLeadingRegexp(sub[j], reuse)`
			`+ p.checkLimits(sub[j])`
			`}`
			`suffix := p.collapse(sub[start:i], OpAlternate) // recurse`

			`@@ -693,6 +887,23 @@ func literalRegexp(s string, flags Flags) *Regexp {`
			`// Flags, and returns a regular expression parse tree. The syntax is`
			`// described in the top-level comment.`
			`func Parse(s string, flags Flags) (*Regexp, error) {`
			`+ return parse(s, flags)`
			`+}`
			`+`
			`+func parse(s string, flags Flags) (_ *Regexp, err error) {`
			`+ defer func() {`
			`+ switch r := recover(); r {`
			`+ default:`
			`+ panic(r)`
			`+ case nil:`
			`+ // ok`
			`+ case ErrInternalError: // too big`
			`+ err = &Error{Code: ErrInternalError, Expr: s}`
			`+ case ErrNestingDepth:`
			`+ err = &Error{Code: ErrNestingDepth, Expr: s}`
			`+ }`
			`+ }()`
			`+`
			`if flags&Literal != 0 {`
			`// Trivial parser for literal string.`
			`if err := checkUTF8(s); err != nil {`
			`@@ -704,7 +915,6 @@ func Parse(s string, flags Flags) (*Regexp, error) {`
			`// Otherwise, must do real work.`
			`var (`
			`p parser`
			`- err error`
			`c rune`
			`op Op`
			`lastRepeat string`
			`@@ -1733,7 +1943,7 @@ func appendClass(r []rune, x []rune) []rune {`
			`return r`
			`}`

			`-// appendFolded returns the result of appending the case folding of the class x to the class r.`
			`+// appendFoldedClass returns the result of appending the case folding of the class x to the class r.`
			`func appendFoldedClass(r []rune, x []rune) []rune {`
			`for i := 0; i < len(x); i += 2 {`
			`r = appendFoldedRange(r, x[i], x[i+1])`
			`diff --git a/src/regexp/syntax/parse_test.go b/src/regexp/syntax/parse_test.go`
			`index 5581ba1..6044da6 100644`
			`--- a/src/regexp/syntax/parse_test.go`
			`+++ b/src/regexp/syntax/parse_test.go`
			`@@ -479,10 +479,15 @@ var invalidRegexps = []string{`
			`(?P<>a)`,
			`[a-Z]`,
			`(?i)[a-Z]`,
			- `a{100000}`,
			- `a{100000,}`,
			`- "((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}){2})",`
			`\Q\E*`,
			+ `a{100000}`, // too much repetition
			+ `a{100000,}`, // too much repetition
			`+ "((((((((((x{2}){2}){2}){2}){2}){2}){2}){2}){2}){2})", // too much repetition`
			`+ strings.Repeat("(", 1000) + strings.Repeat(")", 1000), // too deep`
			`+ strings.Repeat("(?:", 1000) + strings.Repeat(")*", 1000), // too deep`
			`+ "(" + strings.Repeat("(xx?)", 1000) + "){1000}", // too long`
			`+ strings.Repeat("(xx?){1000}", 1000), // too long`
			+ strings.Repeat(`\pL`, 27000), // too many runes
			`}`

			`var onlyPerl = []string{`
			`--`
			`2.33.0`