ruby/backport-CVE-2024-39908-CVE-2024-41123-upgrade-lib-rexml-to-3.3.3.patch

373 lines
14 KiB
Diff
Raw Permalink Normal View History

From e4a067e11235a2ec7a00616d41350485e384ec05 Mon Sep 17 00:00:00 2001
From: Sutou Kouhei <kou@clear-code.com>
Date: Thu, 1 Aug 2024 11:51:33 +0900
Subject: [PATCH] Add 3.3.3 entry
---
.../lib/rexml/formatters/pretty.rb | 2 +-
.../lib/rexml/parsers/baseparser.rb | 67 ++++++++++++++-----
.../lib/rexml/parsers/sax2parser.rb | 21 +-----
.../lib/rexml/parsers/streamparser.rb | 4 +-
.bundle/gems/rexml-3.2.5/lib/rexml/rexml.rb | 2 +-
.bundle/gems/rexml-3.2.5/lib/rexml/source.rb | 22 ++++--
.bundle/gems/rexml-3.2.5/lib/rexml/text.rb | 48 +++++++++----
7 files changed, 105 insertions(+), 61 deletions(-)
diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/formatters/pretty.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/formatters/pretty.rb
index a1198b7..a838d83 100644
--- a/.bundle/gems/rexml-3.2.5/lib/rexml/formatters/pretty.rb
+++ b/.bundle/gems/rexml-3.2.5/lib/rexml/formatters/pretty.rb
@@ -111,7 +111,7 @@ module REXML
# itself, then we don't need a carriage return... which makes this
# logic more complex.
node.children.each { |child|
- next if child == node.children[-1] and child.instance_of?(Text)
+ next if child.instance_of?(Text)
unless child == node.children[0] or child.instance_of?(Text) or
(child == node.children[1] and !node.children[0].writethis)
output << "\n"
diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb
index 617a3d5..44dc658 100644
--- a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb
+++ b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb
@@ -124,11 +124,10 @@ module REXML
}
module Private
- INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um
TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
- NAME_PATTERN = /\s*#{NAME}/um
+ NAME_PATTERN = /#{NAME}/um
GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
@@ -159,6 +158,7 @@ module REXML
def stream=( source )
@source = SourceFactory.create_from( source )
@closed = nil
+ @have_root = false
@document_status = nil
@tags = []
@stack = []
@@ -241,7 +241,7 @@ module REXML
if @document_status == nil
start_position = @source.position
if @source.match("<?", true)
- return process_instruction(start_position)
+ return process_instruction
elsif @source.match("<!", true)
if @source.match("--", true)
md = @source.match(/(.*?)-->/um, true)
@@ -311,7 +311,11 @@ module REXML
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
return [ :elementdecl, "<!ELEMENT" + md[1] ]
elsif @source.match("ENTITY", true)
- match = [:entitydecl, *@source.match(Private::ENTITYDECL_PATTERN, true).captures.compact]
+ match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
+ unless match_data
+ raise REXML::ParseException.new("Malformed entity declaration", @source)
+ end
+ match = [:entitydecl, *match_data.captures.compact]
ref = false
if match[1] == '%'
ref = true
@@ -343,7 +347,7 @@ module REXML
contents = md[0]
pairs = {}
- values = md[0].scan( ATTDEF_RE )
+ values = md[0].strip.scan( ATTDEF_RE )
values.each do |attdef|
unless attdef[3] == "#IMPLIED"
attdef.compact!
@@ -437,7 +441,7 @@ module REXML
raise REXML::ParseException.new( "Declarations can only occur "+
"in the doctype declaration.", @source)
elsif @source.match("?", true)
- return process_instruction(start_position)
+ return process_instruction
else
# Get the next tag
md = @source.match(Private::TAG_PATTERN, true)
@@ -462,8 +466,12 @@ module REXML
@closed = tag
@nsstack.shift
else
+ if @tags.empty? and @have_root
+ raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
+ end
@tags.push( tag )
end
+ @have_root = true
return [ :start_element, tag, attributes ]
end
else
@@ -471,6 +479,16 @@ module REXML
if text.chomp!("<")
@source.position -= "<".bytesize
end
+ if @tags.empty?
+ unless /\A\s*\z/.match?(text)
+ if @have_root
+ raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
+ else
+ raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source)
+ end
+ end
+ return pull_event if @have_root
+ end
return [ :text, text ]
end
rescue REXML::UndefinedNamespaceException
@@ -515,7 +533,11 @@ module REXML
# Unescapes all possible entities
def unnormalize( string, entities=nil, filter=nil )
- rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
+ if string.include?("\r")
+ rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
+ else
+ rv = string.dup
+ end
matches = rv.scan( REFERENCE_RE )
return rv if matches.size == 0
rv.gsub!( Private::CHARACTER_REFERENCES ) {
@@ -565,14 +587,14 @@ module REXML
def parse_name(base_error_message)
md = @source.match(Private::NAME_PATTERN, true)
unless md
- if @source.match(/\s*\S/um)
+ if @source.match(/\S/um)
message = "#{base_error_message}: invalid name"
else
message = "#{base_error_message}: name is missing"
end
raise REXML::ParseException.new(message, @source)
end
- md[1]
+ md[0]
end
def parse_id(base_error_message,
@@ -641,15 +663,24 @@ module REXML
end
end
- def process_instruction(start_position)
- match_data = @source.match(Private::INSTRUCTION_END, true)
- unless match_data
- message = "Invalid processing instruction node"
- @source.position = start_position
- raise REXML::ParseException.new(message, @source)
+ def process_instruction
+ name = parse_name("Malformed XML: Invalid processing instruction node")
+ if @source.match(/\s+/um, true)
+ match_data = @source.match(/(.*?)\?>/um, true)
+ unless match_data
+ raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
+ end
+ content = match_data[1]
+ else
+ content = nil
+ unless @source.match("?>", true)
+ raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
+ end
end
- if @document_status.nil? and match_data[1] == "xml"
- content = match_data[2]
+ if name == "xml"
+ if @document_status
+ raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
+ end
version = VERSION.match(content)
version = version[1] unless version.nil?
encoding = ENCODING.match(content)
@@ -664,7 +695,7 @@ module REXML
standalone = standalone[1] unless standalone.nil?
return [ :xmldecl, version, encoding, standalone ]
end
- [:processing_instruction, match_data[1], match_data[2]]
+ [:processing_instruction, name, content]
end
def parse_attributes(prefixes, curr_ns)
diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/sax2parser.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/sax2parser.rb
index 01cb469..cec9d2f 100644
--- a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/sax2parser.rb
+++ b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/sax2parser.rb
@@ -161,25 +161,8 @@ module REXML
end
end
when :text
- #normalized = @parser.normalize( event[1] )
- #handle( :characters, normalized )
- copy = event[1].clone
-
- esub = proc { |match|
- if @entities.has_key?($1)
- @entities[$1].gsub(Text::REFERENCE, &esub)
- else
- match
- end
- }
-
- copy.gsub!( Text::REFERENCE, &esub )
- copy.gsub!( Text::NUMERICENTITY ) {|m|
- m=$1
- m = "0#{m}" if m[0] == ?x
- [Integer(m)].pack('U*')
- }
- handle( :characters, copy )
+ unnormalized = @parser.unnormalize( event[1], @entities )
+ handle( :characters, unnormalized )
when :entitydecl
handle_entitydecl( event )
when :processing_instruction, :comment, :attlistdecl,
diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/streamparser.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/streamparser.rb
index 9e0eb0b..fa3ac49 100644
--- a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/streamparser.rb
+++ b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/streamparser.rb
@@ -36,8 +36,8 @@ module REXML
@listener.tag_end( event[1] )
@tag_stack.pop
when :text
- normalized = @parser.unnormalize( event[1] )
- @listener.text( normalized )
+ unnormalized = @parser.unnormalize( event[1] )
+ @listener.text( unnormalized )
when :processing_instruction
@listener.instruction( *event[1,2] )
when :start_doctype
diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/rexml.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/rexml.rb
index 3af03ec..39e92a5 100644
--- a/.bundle/gems/rexml-3.2.5/lib/rexml/rexml.rb
+++ b/.bundle/gems/rexml-3.2.5/lib/rexml/rexml.rb
@@ -31,7 +31,7 @@
module REXML
COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
DATE = "2008/019"
- VERSION = "3.3.1"
+ VERSION = "3.3.3"
REVISION = ""
Copyright = COPYRIGHT
diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb
index 5715c35..ff887fc 100644
--- a/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb
+++ b/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb
@@ -204,10 +204,20 @@ module REXML
end
end
- def read(term = nil)
+ def read(term = nil, min_bytes = 1)
term = encode(term) if term
begin
- @scanner << readline(term)
+ str = readline(term)
+ @scanner << str
+ read_bytes = str.bytesize
+ begin
+ while read_bytes < min_bytes
+ str = readline(term)
+ @scanner << str
+ read_bytes += str.bytesize
+ end
+ rescue IOError
+ end
true
rescue Exception, NameError
@source = nil
@@ -237,10 +247,9 @@ module REXML
read if @scanner.eos? && @source
end
- # Note: When specifying a string for 'pattern', it must not include '>' except in the following formats:
- # - ">"
- # - "XXX>" (X is any string excluding '>')
def match( pattern, cons=false )
+ # To avoid performance issue, we need to increase bytes to read per scan
+ min_bytes = 1
while true
if cons
md = @scanner.scan(pattern)
@@ -250,7 +259,8 @@ module REXML
break if md
return nil if pattern.is_a?(String)
return nil if @source.nil?
- return nil unless read
+ return nil unless read(nil, min_bytes)
+ min_bytes *= 2
end
md.nil? ? nil : @scanner
diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/text.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/text.rb
index b47bad3..7e0befe 100644
--- a/.bundle/gems/rexml-3.2.5/lib/rexml/text.rb
+++ b/.bundle/gems/rexml-3.2.5/lib/rexml/text.rb
@@ -151,25 +151,45 @@ module REXML
end
end
- # context sensitive
- string.scan(pattern) do
- if $1[-1] != ?;
- raise "Illegal character #{$1.inspect} in raw string #{string.inspect}"
- elsif $1[0] == ?&
- if $5 and $5[0] == ?#
- case ($5[1] == ?x ? $5[2..-1].to_i(16) : $5[1..-1].to_i)
- when *VALID_CHAR
+ pos = 0
+ while (index = string.index(/<|&/, pos))
+ if string[index] == "<"
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
+ end
+
+ unless (end_index = string.index(/[^\s];/, index + 1))
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
+ end
+
+ value = string[(index + 1)..end_index]
+ if /\s/.match?(value)
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
+ end
+
+ if value[0] == "#"
+ character_reference = value[1..-1]
+
+ unless (/\A(\d+|x[0-9a-fA-F]+)\z/.match?(character_reference))
+ if character_reference[0] == "x" || character_reference[-1] == "x"
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
else
- raise "Illegal character #{$1.inspect} in raw string #{string.inspect}"
+ raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
end
- # FIXME: below can't work but this needs API change.
- # elsif @parent and $3 and !SUBSTITUTES.include?($1)
- # if !doctype or !doctype.entities.has_key?($3)
- # raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
- # end
end
+
+ case (character_reference[0] == "x" ? character_reference[1..-1].to_i(16) : character_reference[0..-1].to_i)
+ when *VALID_CHAR
+ else
+ raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
+ end
+ elsif !(/\A#{Entity::NAME}\z/um.match?(value))
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
end
+
+ pos = end_index + 1
end
+
+ string
end
def node_type
--
2.27.0