From ce59f2eb1aeb371fe1643414f06618dbe031979f Mon Sep 17 00:00:00 2001 From: Sutou Kouhei Date: Thu, 24 Oct 2024 14:45:31 +0900 Subject: [PATCH] parser: fix a bug that �x...; is accepted as a character reference diff --git a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb index 7bd8adf..b4547ba 100644 --- a/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb +++ b/.bundle/gems/rexml-3.2.5/lib/rexml/parsers/baseparser.rb @@ -150,7 +150,7 @@ module REXML PEDECL_PATTERN = "\\s+(%)\\s+#{NAME}\\s+#{PEDEF}\\s*>" ENTITYDECL_PATTERN = /(?:#{GEDECL_PATTERN})|(?:#{PEDECL_PATTERN})/um CARRIAGE_RETURN_NEWLINE_PATTERN = /\r\n?/ - CHARACTER_REFERENCES = /�*((?:\d+)|(?:x[a-fA-F0-9]+));/ + CHARACTER_REFERENCES = /&#((?:\d+)|(?:x[a-fA-F0-9]+));/ DEFAULT_ENTITIES_PATTERNS = {} default_entities = ['gt', 'lt', 'quot', 'apos', 'amp'] default_entities.each do |term| @@ -570,8 +570,12 @@ module REXML return rv if matches.size == 0 rv.gsub!( Private::CHARACTER_REFERENCES ) { m=$1 - m = "0#{m}" if m[0] == ?x - [Integer(m)].pack('U*') + if m.start_with?("x") + code_point = Integer(m[1..-1], 16) + else + code_point = Integer(m, 10) + end + [code_point].pack('U*') } matches.collect!{|x|x[0]}.compact! if filter -- 2.27.0