From 429f477455b84ede9651f79eb15e78a129a660e9 Mon Sep 17 00:00:00 2001
From: bwzhang <zhangbowei@kylinos.cn>
Date: Thu, 9 May 2024 19:25:53 +0800
Subject: [PATCH] fix CVE-2022-3064

Improve heuristics preventing CPU/memory abuse (#515)
This addresses the following items:

==== Parse time of excessively deep nested or indented documents

Parsing these documents is non-linear; limiting stack depth to 10,000 keeps parse times of pathological documents sub-second (~.25 seconds in benchmarks)

==== Alias node expansion limits

The current limit allows 10,000% expansion, which is too permissive for large documents.

Limiting to 10% expansion for larger documents allows callers to use input size as an effective way to limit resource usage. Continuing to allow larger expansion rates (up to the current 10,000% limit) for smaller documents does not unduly affect memory use.

This change bounds decode operations from alias expansion to ~400,000 operations for small documents (worst-case ~100-150MB) or 10% of the input document for large documents, whichever is greater.
liggitt authored and niemeyer committed on Oct 3, 2019
---
 vendor/gopkg.in/yaml.v2/decode.go   | 38 +++++++++++++++++++++++++++++
 vendor/gopkg.in/yaml.v2/scannerc.go | 16 ++++++++++++
 2 files changed, 54 insertions(+)

diff --git a/vendor/gopkg.in/yaml.v2/decode.go b/vendor/gopkg.in/yaml.v2/decode.go
index e4e56e2..5310876 100644
--- a/vendor/gopkg.in/yaml.v2/decode.go
+++ b/vendor/gopkg.in/yaml.v2/decode.go
@@ -229,6 +229,10 @@ type decoder struct {
 	mapType reflect.Type
 	terrors []string
 	strict  bool
+
+	decodeCount int
+	aliasCount  int
+	aliasDepth  int
 }
 
 var (
@@ -314,7 +318,39 @@ func (d *decoder) prepare(n *node, out reflect.Value) (newout reflect.Value, unm
 	return out, false, false
 }
 
+const (
+	// 400,000 decode operations is ~500kb of dense object declarations, or ~5kb of dense object declarations with 10000% alias expansion
+	alias_ratio_range_low = 400000
+	// 4,000,000 decode operations is ~5MB of dense object declarations, or ~4.5MB of dense object declarations with 10% alias expansion
+	alias_ratio_range_high = 4000000
+	// alias_ratio_range is the range over which we scale allowed alias ratios
+	alias_ratio_range = float64(alias_ratio_range_high - alias_ratio_range_low)
+)
+
+func allowedAliasRatio(decodeCount int) float64 {
+	switch {
+	case decodeCount <= alias_ratio_range_low:
+		// allow 99% to come from alias expansion for small-to-medium documents
+		return 0.99
+	case decodeCount >= alias_ratio_range_high:
+		// allow 10% to come from alias expansion for very large documents
+		return 0.10
+	default:
+		// scale smoothly from 99% down to 10% over the range.
+		// this maps to 396,000 - 400,000 allowed alias-driven decodes over the range.
+		// 400,000 decode operations is ~100MB of allocations in worst-case scenarios (single-item maps).
+		return 0.99 - 0.89*(float64(decodeCount-alias_ratio_range_low)/alias_ratio_range)
+	}
+}
+
 func (d *decoder) unmarshal(n *node, out reflect.Value) (good bool) {
+	d.decodeCount++
+	if d.aliasDepth > 0 {
+		d.aliasCount++
+	}
+	if d.aliasCount > 100 && d.decodeCount > 1000 && float64(d.aliasCount)/float64(d.decodeCount) > allowedAliasRatio(d.decodeCount) {
+		failf("document contains excessive aliasing")
+	}
 	switch n.kind {
 	case documentNode:
 		return d.document(n, out)
@@ -353,7 +389,9 @@ func (d *decoder) alias(n *node, out reflect.Value) (good bool) {
 		failf("anchor '%s' value contains itself", n.value)
 	}
 	d.aliases[n] = true
+	d.aliasDepth++
 	good = d.unmarshal(n.alias, out)
+	d.aliasDepth--
 	delete(d.aliases, n)
 	return good
 }
diff --git a/vendor/gopkg.in/yaml.v2/scannerc.go b/vendor/gopkg.in/yaml.v2/scannerc.go
index 077fd1d..570b8ec 100644
--- a/vendor/gopkg.in/yaml.v2/scannerc.go
+++ b/vendor/gopkg.in/yaml.v2/scannerc.go
@@ -906,6 +906,9 @@ func yaml_parser_remove_simple_key(parser *yaml_parser_t) bool {
 	return true
 }
 
+// max_flow_level limits the flow_level
+const max_flow_level = 10000
+
 // Increase the flow level and resize the simple key list if needed.
 func yaml_parser_increase_flow_level(parser *yaml_parser_t) bool {
 	// Reset the simple key on the next level.
@@ -913,6 +916,11 @@ func yaml_parser_increase_flow_level(parser *yaml_parser_t) bool {
 
 	// Increase the flow level.
 	parser.flow_level++
+	if parser.flow_level > max_flow_level {
+		return yaml_parser_set_scanner_error(parser,
+			"while increasing flow level", parser.simple_keys[len(parser.simple_keys)-1].mark,
+			fmt.Sprintf("exceeded max depth of %d", max_flow_level))
+	}
 	return true
 }
 
@@ -925,6 +933,9 @@ func yaml_parser_decrease_flow_level(parser *yaml_parser_t) bool {
 	return true
 }
 
+// max_indents limits the indents stack size
+const max_indents = 10000
+
 // Push the current indentation level to the stack and set the new level
 // the current column is greater than the indentation level.  In this case,
 // append or insert the specified token into the token queue.
@@ -939,6 +950,11 @@ func yaml_parser_roll_indent(parser *yaml_parser_t, column, number int, typ yaml
 		// indentation level.
 		parser.indents = append(parser.indents, parser.indent)
 		parser.indent = column
+		if len(parser.indents) > max_indents {
+			return yaml_parser_set_scanner_error(parser,
+				"while increasing indent level", parser.simple_keys[len(parser.simple_keys)-1].mark,
+				fmt.Sprintf("exceeded max depth of %d", max_indents))
+		}
 
 		// Create a token and insert it into the queue.
 		token := yaml_token_t{
-- 
2.20.1