From eedd8618910765b7fa92c1f632be0ce6865285d0 Mon Sep 17 00:00:00 2001 From: NAITOH Jun Date: Tue, 30 Jan 2024 07:35:01 +0900 Subject: [PATCH] Reduce calls to StringScanner.new() [Why] StringScanner.new() instances can be reused within parse_attributes, reducing initialization costs. ## Benchmark ``` RUBYLIB= BUNDLER_ORIG_RUBYLIB= /Users/naitoh/.rbenv/versions/3.3.0/bin/ruby -v -S benchmark-driver /Users/naitoh/ghq/github.com/naitoh/rexml/benchmark/parse.yaml ruby 3.3.0 (2023-12-25 revision 5124f9ac75) [arm64-darwin22] Calculating ------------------------------------- before after before(YJIT) after(YJIT) dom 11.025 11.202 16.207 17.315 i/s - 100.000 times in 9.069926s 8.926851s 6.170348s 5.775288s sax 30.084 30.519 45.220 47.814 i/s - 100.000 times in 3.324024s 3.276648s 2.211399s 2.091429s pull 34.782 35.849 53.867 56.851 i/s - 100.000 times in 2.875069s 2.789495s 1.856439s 1.758998s stream 32.546 33.541 46.362 47.775 i/s - 100.000 times in 3.072603s 2.981465s 2.156952s 2.093130s Comparison: dom after(YJIT): 17.3 i/s before(YJIT): 16.2 i/s - 1.07x slower after: 11.2 i/s - 1.55x slower before: 11.0 i/s - 1.57x slower sax after(YJIT): 47.8 i/s before(YJIT): 45.2 i/s - 1.06x slower after: 30.5 i/s - 1.57x slower before: 30.1 i/s - 1.59x slower pull after(YJIT): 56.9 i/s before(YJIT): 53.9 i/s - 1.06x slower after: 35.8 i/s - 1.59x slower before: 34.8 i/s - 1.63x slower stream after(YJIT): 47.8 i/s before(YJIT): 46.4 i/s - 1.03x slower after: 33.5 i/s - 1.42x slower before: 32.5 i/s - 1.47x slower ``` - YJIT=ON : 1.03x - 1.07x faster - YJIT=OFF : 1.01x - 1.03x faster --- lib/rexml/parsers/baseparser.rb | 43 +++++++++++++++++---------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/lib/rexml/parsers/baseparser.rb b/lib/rexml/parsers/baseparser.rb index 7126a12d..0fbb94ee 100644 --- a/lib/rexml/parsers/baseparser.rb +++ b/lib/rexml/parsers/baseparser.rb @@ -115,6 +115,7 @@ class BaseParser def initialize( source ) self.stream = source @listeners = [] + @attributes_scanner = StringScanner.new('') end def add_listener( listener ) @@ -601,36 +602,36 @@ def parse_attributes(prefixes, curr_ns) return attributes, closed if raw_attributes.nil? return attributes, closed if raw_attributes.empty? - scanner = StringScanner.new(raw_attributes) - until scanner.eos? - if scanner.scan(/\s+/) - break if scanner.eos? + @attributes_scanner.string = raw_attributes + until @attributes_scanner.eos? + if @attributes_scanner.scan(/\s+/) + break if @attributes_scanner.eos? end - pos = scanner.pos + pos = @attributes_scanner.pos loop do - break if scanner.scan(ATTRIBUTE_PATTERN) - unless scanner.scan(QNAME) - message = "Invalid attribute name: <#{scanner.rest}>" + break if @attributes_scanner.scan(ATTRIBUTE_PATTERN) + unless @attributes_scanner.scan(QNAME) + message = "Invalid attribute name: <#{@attributes_scanner.rest}>" raise REXML::ParseException.new(message, @source) end - name = scanner[0] - unless scanner.scan(/\s*=\s*/um) + name = @attributes_scanner[0] + unless @attributes_scanner.scan(/\s*=\s*/um) message = "Missing attribute equal: <#{name}>" raise REXML::ParseException.new(message, @source) end - quote = scanner.scan(/['"]/) + quote = @attributes_scanner.scan(/['"]/) unless quote message = "Missing attribute value start quote: <#{name}>" raise REXML::ParseException.new(message, @source) end - unless scanner.scan(/.*#{Regexp.escape(quote)}/um) + unless @attributes_scanner.scan(/.*#{Regexp.escape(quote)}/um) match_data = @source.match(/^(.*?)(\/)?>/um, true) if match_data - scanner << "/" if closed - scanner << ">" - scanner << match_data[1] - scanner.pos = pos + @attributes_scanner << "/" if closed + @attributes_scanner << ">" + @attributes_scanner << match_data[1] + @attributes_scanner.pos = pos closed = !match_data[2].nil? next end @@ -639,11 +640,11 @@ def parse_attributes(prefixes, curr_ns) raise REXML::ParseException.new(message, @source) end end - name = scanner[1] - prefix = scanner[2] - local_part = scanner[3] - # quote = scanner[4] - value = scanner[5] + name = @attributes_scanner[1] + prefix = @attributes_scanner[2] + local_part = @attributes_scanner[3] + # quote = @attributes_scanner[4] + value = @attributes_scanner[5] if prefix == "xmlns" if local_part == "xml" if value != "http://www.w3.org/XML/1998/namespace"