373 lines
14 KiB
Diff
373 lines
14 KiB
Diff
From e4a067e11235a2ec7a00616d41350485e384ec05 Mon Sep 17 00:00:00 2001
|
|
From: Sutou Kouhei <kou@clear-code.com>
|
|
Date: Thu, 1 Aug 2024 11:51:33 +0900
|
|
Subject: [PATCH] Add 3.3.3 entry
|
|
|
|
---
|
|
.../lib/rexml/formatters/pretty.rb | 2 +-
|
|
.../lib/rexml/parsers/baseparser.rb | 67 ++++++++++++++-----
|
|
.../lib/rexml/parsers/sax2parser.rb | 21 +-----
|
|
.../lib/rexml/parsers/streamparser.rb | 4 +-
|
|
.bundle/gems/rexml-3.2.5/lib/rexml/rexml.rb | 2 +-
|
|
.bundle/gems/rexml-3.2.5/lib/rexml/source.rb | 22 ++++--
|
|
.bundle/gems/rexml-3.2.5/lib/rexml/text.rb | 48 +++++++++----
|
|
7 files changed, 105 insertions(+), 61 deletions(-)
|
|
|
|
diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/formatters/pretty.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/formatters/pretty.rb
|
|
index a1198b7..a838d83 100644
|
|
--- a/.bundle/gems/rexml-3.2.5/lib/rexml/formatters/pretty.rb
|
|
+++ b/.bundle/gems/rexml-3.2.5/lib/rexml/formatters/pretty.rb
|
|
@@ -111,7 +111,7 @@ module REXML
|
|
# itself, then we don't need a carriage return... which makes this
|
|
# logic more complex.
|
|
node.children.each { |child|
|
|
- next if child == node.children[-1] and child.instance_of?(Text)
|
|
+ next if child.instance_of?(Text)
|
|
unless child == node.children[0] or child.instance_of?(Text) or
|
|
(child == node.children[1] and !node.children[0].writethis)
|
|
output << "\n"
|
|
diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb
|
|
index 617a3d5..44dc658 100644
|
|
--- a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb
|
|
+++ b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb
|
|
@@ -124,11 +124,10 @@ module REXML
|
|
}
|
|
|
|
module Private
|
|
- INSTRUCTION_END = /#{NAME}(\s+.*?)?\?>/um
|
|
TAG_PATTERN = /((?>#{QNAME_STR}))\s*/um
|
|
CLOSE_PATTERN = /(#{QNAME_STR})\s*>/um
|
|
ATTLISTDECL_END = /\s+#{NAME}(?:#{ATTDEF})*\s*>/um
|
|
- NAME_PATTERN = /\s*#{NAME}/um
|
|
+ NAME_PATTERN = /#{NAME}/um
|
|
GEDECL_PATTERN = "\\s+#{NAME}\\s+#{ENTITYDEF}\\s*>"
|
|
PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>"
|
|
ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um
|
|
@@ -159,6 +158,7 @@ module REXML
|
|
def stream=( source )
|
|
@source = SourceFactory.create_from( source )
|
|
@closed = nil
|
|
+ @have_root = false
|
|
@document_status = nil
|
|
@tags = []
|
|
@stack = []
|
|
@@ -241,7 +241,7 @@ module REXML
|
|
if @document_status == nil
|
|
start_position = @source.position
|
|
if @source.match("<?", true)
|
|
- return process_instruction(start_position)
|
|
+ return process_instruction
|
|
elsif @source.match("<!", true)
|
|
if @source.match("--", true)
|
|
md = @source.match(/(.*?)-->/um, true)
|
|
@@ -311,7 +311,11 @@ module REXML
|
|
raise REXML::ParseException.new( "Bad ELEMENT declaration!", @source ) if md.nil?
|
|
return [ :elementdecl, "<!ELEMENT" + md[1] ]
|
|
elsif @source.match("ENTITY", true)
|
|
- match = [:entitydecl, *@source.match(Private::ENTITYDECL_PATTERN, true).captures.compact]
|
|
+ match_data = @source.match(Private::ENTITYDECL_PATTERN, true)
|
|
+ unless match_data
|
|
+ raise REXML::ParseException.new("Malformed entity declaration", @source)
|
|
+ end
|
|
+ match = [:entitydecl, *match_data.captures.compact]
|
|
ref = false
|
|
if match[1] == '%'
|
|
ref = true
|
|
@@ -343,7 +347,7 @@ module REXML
|
|
contents = md[0]
|
|
|
|
pairs = {}
|
|
- values = md[0].scan( ATTDEF_RE )
|
|
+ values = md[0].strip.scan( ATTDEF_RE )
|
|
values.each do |attdef|
|
|
unless attdef[3] == "#IMPLIED"
|
|
attdef.compact!
|
|
@@ -437,7 +441,7 @@ module REXML
|
|
raise REXML::ParseException.new( "Declarations can only occur "+
|
|
"in the doctype declaration.", @source)
|
|
elsif @source.match("?", true)
|
|
- return process_instruction(start_position)
|
|
+ return process_instruction
|
|
else
|
|
# Get the next tag
|
|
md = @source.match(Private::TAG_PATTERN, true)
|
|
@@ -462,8 +466,12 @@ module REXML
|
|
@closed = tag
|
|
@nsstack.shift
|
|
else
|
|
+ if @tags.empty? and @have_root
|
|
+ raise ParseException.new("Malformed XML: Extra tag at the end of the document (got '<#{tag}')", @source)
|
|
+ end
|
|
@tags.push( tag )
|
|
end
|
|
+ @have_root = true
|
|
return [ :start_element, tag, attributes ]
|
|
end
|
|
else
|
|
@@ -471,6 +479,16 @@ module REXML
|
|
if text.chomp!("<")
|
|
@source.position -= "<".bytesize
|
|
end
|
|
+ if @tags.empty?
|
|
+ unless /\A\s*\z/.match?(text)
|
|
+ if @have_root
|
|
+ raise ParseException.new("Malformed XML: Extra content at the end of the document (got '#{text}')", @source)
|
|
+ else
|
|
+ raise ParseException.new("Malformed XML: Content at the start of the document (got '#{text}')", @source)
|
|
+ end
|
|
+ end
|
|
+ return pull_event if @have_root
|
|
+ end
|
|
return [ :text, text ]
|
|
end
|
|
rescue REXML::UndefinedNamespaceException
|
|
@@ -515,7 +533,11 @@ module REXML
|
|
|
|
# Unescapes all possible entities
|
|
def unnormalize( string, entities=nil, filter=nil )
|
|
- rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
|
|
+ if string.include?("\r")
|
|
+ rv = string.gsub( Private::CARRIAGE_RETURN_NEWLINE_PATTERN, "\n" )
|
|
+ else
|
|
+ rv = string.dup
|
|
+ end
|
|
matches = rv.scan( REFERENCE_RE )
|
|
return rv if matches.size == 0
|
|
rv.gsub!( Private::CHARACTER_REFERENCES ) {
|
|
@@ -565,14 +587,14 @@ module REXML
|
|
def parse_name(base_error_message)
|
|
md = @source.match(Private::NAME_PATTERN, true)
|
|
unless md
|
|
- if @source.match(/\s*\S/um)
|
|
+ if @source.match(/\S/um)
|
|
message = "#{base_error_message}: invalid name"
|
|
else
|
|
message = "#{base_error_message}: name is missing"
|
|
end
|
|
raise REXML::ParseException.new(message, @source)
|
|
end
|
|
- md[1]
|
|
+ md[0]
|
|
end
|
|
|
|
def parse_id(base_error_message,
|
|
@@ -641,15 +663,24 @@ module REXML
|
|
end
|
|
end
|
|
|
|
- def process_instruction(start_position)
|
|
- match_data = @source.match(Private::INSTRUCTION_END, true)
|
|
- unless match_data
|
|
- message = "Invalid processing instruction node"
|
|
- @source.position = start_position
|
|
- raise REXML::ParseException.new(message, @source)
|
|
+ def process_instruction
|
|
+ name = parse_name("Malformed XML: Invalid processing instruction node")
|
|
+ if @source.match(/\s+/um, true)
|
|
+ match_data = @source.match(/(.*?)\?>/um, true)
|
|
+ unless match_data
|
|
+ raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
|
+ end
|
|
+ content = match_data[1]
|
|
+ else
|
|
+ content = nil
|
|
+ unless @source.match("?>", true)
|
|
+ raise ParseException.new("Malformed XML: Unclosed processing instruction", @source)
|
|
+ end
|
|
end
|
|
- if @document_status.nil? and match_data[1] == "xml"
|
|
- content = match_data[2]
|
|
+ if name == "xml"
|
|
+ if @document_status
|
|
+ raise ParseException.new("Malformed XML: XML declaration is not at the start", @source)
|
|
+ end
|
|
version = VERSION.match(content)
|
|
version = version[1] unless version.nil?
|
|
encoding = ENCODING.match(content)
|
|
@@ -664,7 +695,7 @@ module REXML
|
|
standalone = standalone[1] unless standalone.nil?
|
|
return [ :xmldecl, version, encoding, standalone ]
|
|
end
|
|
- [:processing_instruction, match_data[1], match_data[2]]
|
|
+ [:processing_instruction, name, content]
|
|
end
|
|
|
|
def parse_attributes(prefixes, curr_ns)
|
|
diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/sax2parser.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/sax2parser.rb
|
|
index 01cb469..cec9d2f 100644
|
|
--- a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/sax2parser.rb
|
|
+++ b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/sax2parser.rb
|
|
@@ -161,25 +161,8 @@ module REXML
|
|
end
|
|
end
|
|
when :text
|
|
- #normalized = @parser.normalize( event[1] )
|
|
- #handle( :characters, normalized )
|
|
- copy = event[1].clone
|
|
-
|
|
- esub = proc { |match|
|
|
- if @entities.has_key?($1)
|
|
- @entities[$1].gsub(Text::REFERENCE, &esub)
|
|
- else
|
|
- match
|
|
- end
|
|
- }
|
|
-
|
|
- copy.gsub!( Text::REFERENCE, &esub )
|
|
- copy.gsub!( Text::NUMERICENTITY ) {|m|
|
|
- m=$1
|
|
- m = "0#{m}" if m[0] == ?x
|
|
- [Integer(m)].pack('U*')
|
|
- }
|
|
- handle( :characters, copy )
|
|
+ unnormalized = @parser.unnormalize( event[1], @entities )
|
|
+ handle( :characters, unnormalized )
|
|
when :entitydecl
|
|
handle_entitydecl( event )
|
|
when :processing_instruction, :comment, :attlistdecl,
|
|
diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/streamparser.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/streamparser.rb
|
|
index 9e0eb0b..fa3ac49 100644
|
|
--- a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/streamparser.rb
|
|
+++ b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/streamparser.rb
|
|
@@ -36,8 +36,8 @@ module REXML
|
|
@listener.tag_end( event[1] )
|
|
@tag_stack.pop
|
|
when :text
|
|
- normalized = @parser.unnormalize( event[1] )
|
|
- @listener.text( normalized )
|
|
+ unnormalized = @parser.unnormalize( event[1] )
|
|
+ @listener.text( unnormalized )
|
|
when :processing_instruction
|
|
@listener.instruction( *event[1,2] )
|
|
when :start_doctype
|
|
diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/rexml.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/rexml.rb
|
|
index 3af03ec..39e92a5 100644
|
|
--- a/.bundle/gems/rexml-3.2.5/lib/rexml/rexml.rb
|
|
+++ b/.bundle/gems/rexml-3.2.5/lib/rexml/rexml.rb
|
|
@@ -31,7 +31,7 @@
|
|
module REXML
|
|
COPYRIGHT = "Copyright © 2001-2008 Sean Russell <ser@germane-software.com>"
|
|
DATE = "2008/019"
|
|
- VERSION = "3.3.1"
|
|
+ VERSION = "3.3.3"
|
|
REVISION = ""
|
|
|
|
Copyright = COPYRIGHT
|
|
diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb
|
|
index 5715c35..ff887fc 100644
|
|
--- a/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb
|
|
+++ b/.bundle/gems/rexml-3.2.5/lib/rexml/source.rb
|
|
@@ -204,10 +204,20 @@ module REXML
|
|
end
|
|
end
|
|
|
|
- def read(term = nil)
|
|
+ def read(term = nil, min_bytes = 1)
|
|
term = encode(term) if term
|
|
begin
|
|
- @scanner << readline(term)
|
|
+ str = readline(term)
|
|
+ @scanner << str
|
|
+ read_bytes = str.bytesize
|
|
+ begin
|
|
+ while read_bytes < min_bytes
|
|
+ str = readline(term)
|
|
+ @scanner << str
|
|
+ read_bytes += str.bytesize
|
|
+ end
|
|
+ rescue IOError
|
|
+ end
|
|
true
|
|
rescue Exception, NameError
|
|
@source = nil
|
|
@@ -237,10 +247,9 @@ module REXML
|
|
read if @scanner.eos? && @source
|
|
end
|
|
|
|
- # Note: When specifying a string for 'pattern', it must not include '>' except in the following formats:
|
|
- # - ">"
|
|
- # - "XXX>" (X is any string excluding '>')
|
|
def match( pattern, cons=false )
|
|
+ # To avoid performance issue, we need to increase bytes to read per scan
|
|
+ min_bytes = 1
|
|
while true
|
|
if cons
|
|
md = @scanner.scan(pattern)
|
|
@@ -250,7 +259,8 @@ module REXML
|
|
break if md
|
|
return nil if pattern.is_a?(String)
|
|
return nil if @source.nil?
|
|
- return nil unless read
|
|
+ return nil unless read(nil, min_bytes)
|
|
+ min_bytes *= 2
|
|
end
|
|
|
|
md.nil? ? nil : @scanner
|
|
diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/text.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/text.rb
|
|
index b47bad3..7e0befe 100644
|
|
--- a/.bundle/gems/rexml-3.2.5/lib/rexml/text.rb
|
|
+++ b/.bundle/gems/rexml-3.2.5/lib/rexml/text.rb
|
|
@@ -151,25 +151,45 @@ module REXML
|
|
end
|
|
end
|
|
|
|
- # context sensitive
|
|
- string.scan(pattern) do
|
|
- if $1[-1] != ?;
|
|
- raise "Illegal character #{$1.inspect} in raw string #{string.inspect}"
|
|
- elsif $1[0] == ?&
|
|
- if $5 and $5[0] == ?#
|
|
- case ($5[1] == ?x ? $5[2..-1].to_i(16) : $5[1..-1].to_i)
|
|
- when *VALID_CHAR
|
|
+ pos = 0
|
|
+ while (index = string.index(/<|&/, pos))
|
|
+ if string[index] == "<"
|
|
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
|
+ end
|
|
+
|
|
+ unless (end_index = string.index(/[^\s];/, index + 1))
|
|
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
|
+ end
|
|
+
|
|
+ value = string[(index + 1)..end_index]
|
|
+ if /\s/.match?(value)
|
|
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
|
+ end
|
|
+
|
|
+ if value[0] == "#"
|
|
+ character_reference = value[1..-1]
|
|
+
|
|
+ unless (/\A(\d+|x[0-9a-fA-F]+)\z/.match?(character_reference))
|
|
+ if character_reference[0] == "x" || character_reference[-1] == "x"
|
|
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
|
else
|
|
- raise "Illegal character #{$1.inspect} in raw string #{string.inspect}"
|
|
+ raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
|
|
end
|
|
- # FIXME: below can't work but this needs API change.
|
|
- # elsif @parent and $3 and !SUBSTITUTES.include?($1)
|
|
- # if !doctype or !doctype.entities.has_key?($3)
|
|
- # raise "Undeclared entity '#{$1}' in raw string \"#{string}\""
|
|
- # end
|
|
end
|
|
+
|
|
+ case (character_reference[0] == "x" ? character_reference[1..-1].to_i(16) : character_reference[0..-1].to_i)
|
|
+ when *VALID_CHAR
|
|
+ else
|
|
+ raise "Illegal character #{string.inspect} in raw string #{string.inspect}"
|
|
+ end
|
|
+ elsif !(/\A#{Entity::NAME}\z/um.match?(value))
|
|
+ raise "Illegal character \"#{string[index]}\" in raw string #{string.inspect}"
|
|
end
|
|
+
|
|
+ pos = end_index + 1
|
|
end
|
|
+
|
|
+ string
|
|
end
|
|
|
|
def node_type
|
|
--
|
|
2.27.0
|
|
|