Skip to content

Commit 323951b

Browse files
authoredJan 10, 2025
Merge pull request #1844 from Shopify/fast-expression-parse
Faster Expression parser / Tokenizer with StringScanner
2 parents a5b91e8 + 10114b3 commit 323951b

30 files changed

+690
-231
lines changed
 

‎Gemfile

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ group :benchmark, :test do
1313
gem 'benchmark-ips'
1414
gem 'memory_profiler'
1515
gem 'terminal-table'
16+
gem "lru_redux"
1617

1718
install_if -> { RUBY_PLATFORM !~ /mingw|mswin|java/ && RUBY_ENGINE != 'truffleruby' } do
1819
gem 'stackprof'

‎Rakefile

+24-5
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@ end
7171

7272
namespace :benchmark do
7373
desc "Run the liquid benchmark with lax parsing"
74-
task :run do
74+
task :lax do
7575
ruby "./performance/benchmark.rb lax"
7676
end
7777

@@ -80,11 +80,30 @@ namespace :benchmark do
8080
ruby "./performance/benchmark.rb strict"
8181
end
8282

83+
desc "Run the liquid benchmark with both lax and strict parsing"
84+
task run: [:lax, :strict]
85+
8386
desc "Run unit benchmarks"
84-
task :unit do
85-
Dir["./performance/unit/*_benchmark.rb"].each do |file|
86-
puts "🧪 Running #{file}"
87-
ruby file
87+
namespace :unit do
88+
task :all do
89+
Dir["./performance/unit/*_benchmark.rb"].each do |file|
90+
puts "🧪 Running #{file}"
91+
ruby file
92+
end
93+
end
94+
95+
task :lexer do
96+
Dir["./performance/unit/lexer_benchmark.rb"].each do |file|
97+
puts "🧪 Running #{file}"
98+
ruby file
99+
end
100+
end
101+
102+
task :expression do
103+
Dir["./performance/unit/expression_benchmark.rb"].each do |file|
104+
puts "🧪 Running #{file}"
105+
ruby file
106+
end
88107
end
89108
end
90109
end

‎lib/liquid.rb

+4-1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@
2121
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
2222
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
2323

24+
require "strscan"
25+
2426
module Liquid
2527
FilterSeparator = /\|/
2628
ArgumentSeparator = ','
@@ -44,6 +46,7 @@ module Liquid
4446
VariableParser = /\[(?>[^\[\]]+|\g<0>)*\]|#{VariableSegment}+\??/o
4547

4648
RAISE_EXCEPTION_LAMBDA = ->(_e) { raise }
49+
HAS_STRING_SCANNER_SCAN_BYTE = StringScanner.instance_methods.include?(:scan_byte)
4750
end
4851

4952
require "liquid/version"
@@ -68,7 +71,6 @@ module Liquid
6871
require 'liquid/errors'
6972
require 'liquid/interrupts'
7073
require 'liquid/strainer_template'
71-
require 'liquid/expression'
7274
require 'liquid/context'
7375
require 'liquid/tag'
7476
require 'liquid/block_body'
@@ -77,6 +79,7 @@ module Liquid
7779
require 'liquid/variable_lookup'
7880
require 'liquid/range_lookup'
7981
require 'liquid/resource_limits'
82+
require 'liquid/expression'
8083
require 'liquid/template'
8184
require 'liquid/condition'
8285
require 'liquid/utils'

‎lib/liquid/context.rb

+8-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# frozen_string_literal: true
22

3+
require "lru_redux"
4+
35
module Liquid
46
# Context keeps the variable stack and resolves variables, as well as keywords
57
#
@@ -39,6 +41,11 @@ def initialize(environments = {}, outer_scope = {}, registers = {}, rethrow_erro
3941
@filters = []
4042
@global_filter = nil
4143
@disabled_tags = {}
44+
@expression_cache = LruRedux::ThreadSafeCache.new(1000)
45+
46+
# Instead of constructing new StringScanner objects for each Expression parse,
47+
# we recycle the same one.
48+
@string_scanner = StringScanner.new("")
4249

4350
@registers.static[:cached_partials] ||= {}
4451
@registers.static[:file_system] ||= environment.file_system
@@ -176,7 +183,7 @@ def []=(key, value)
176183
# Example:
177184
# products == empty #=> products.empty?
178185
def [](expression)
179-
evaluate(Expression.parse(expression))
186+
evaluate(Expression.parse(expression, @string_scanner, @expression_cache))
180187
end
181188

182189
def key?(key)

‎lib/liquid/expression.rb

+92-21
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# frozen_string_literal: true
22

3+
require "lru_redux"
4+
35
module Liquid
46
class Expression
57
LITERALS = {
@@ -10,37 +12,106 @@ class Expression
1012
'true' => true,
1113
'false' => false,
1214
'blank' => '',
13-
'empty' => ''
15+
'empty' => '',
16+
# in lax mode, minus sign can be a VariableLookup
17+
# For simplicity and performace, we treat it like a literal
18+
'-' => VariableLookup.parse("-", nil).freeze,
1419
}.freeze
1520

16-
INTEGERS_REGEX = /\A(-?\d+)\z/
17-
FLOATS_REGEX = /\A(-?\d[\d\.]+)\z/
21+
DOT = ".".ord
22+
ZERO = "0".ord
23+
NINE = "9".ord
24+
DASH = "-".ord
1825

1926
# Use an atomic group (?>...) to avoid pathological backtracing from
2027
# malicious input as described in https://github.com/Shopify/liquid/issues/1357
21-
RANGES_REGEX = /\A\(\s*(?>(\S+)\s*\.\.)\s*(\S+)\s*\)\z/
28+
RANGES_REGEX = /\A\(\s*(?>(\S+)\s*\.\.)\s*(\S+)\s*\)\z/
29+
INTEGER_REGEX = /\A(-?\d+)\z/
30+
FLOAT_REGEX = /\A(-?\d+)\.\d+\z/
31+
32+
class << self
33+
def parse(markup, ss = StringScanner.new(""), cache = nil)
34+
return unless markup
35+
36+
markup = markup.strip # markup can be a frozen string
2237

23-
def self.parse(markup)
24-
return nil unless markup
38+
if (markup.start_with?('"') && markup.end_with?('"')) ||
39+
(markup.start_with?("'") && markup.end_with?("'"))
40+
return markup[1..-2]
41+
elsif LITERALS.key?(markup)
42+
return LITERALS[markup]
43+
end
44+
45+
# Cache only exists during parsing
46+
if cache
47+
return cache[markup] if cache.key?(markup)
2548

26-
markup = markup.strip
27-
if (markup.start_with?('"') && markup.end_with?('"')) ||
28-
(markup.start_with?("'") && markup.end_with?("'"))
29-
return markup[1..-2]
49+
cache[markup] = inner_parse(markup, ss, cache).freeze
50+
else
51+
inner_parse(markup, ss, nil).freeze
52+
end
3053
end
3154

32-
case markup
33-
when INTEGERS_REGEX
34-
Regexp.last_match(1).to_i
35-
when RANGES_REGEX
36-
RangeLookup.parse(Regexp.last_match(1), Regexp.last_match(2))
37-
when FLOATS_REGEX
38-
Regexp.last_match(1).to_f
39-
else
40-
if LITERALS.key?(markup)
41-
LITERALS[markup]
55+
def inner_parse(markup, ss, cache)
56+
if (markup.start_with?("(") && markup.end_with?(")")) && markup =~ RANGES_REGEX
57+
return RangeLookup.parse(
58+
Regexp.last_match(1),
59+
Regexp.last_match(2),
60+
ss,
61+
cache,
62+
)
63+
end
64+
65+
if (num = parse_number(markup, ss))
66+
num
67+
else
68+
VariableLookup.parse(markup, ss, cache)
69+
end
70+
end
71+
72+
def parse_number(markup, ss)
73+
# check if the markup is simple integer or float
74+
case markup
75+
when INTEGER_REGEX
76+
return Integer(markup, 10)
77+
when FLOAT_REGEX
78+
return markup.to_f
79+
end
80+
81+
ss.string = markup
82+
# the first byte must be a digit, a period, or a dash
83+
byte = ss.scan_byte
84+
85+
return false if byte != DASH && byte != DOT && (byte < ZERO || byte > NINE)
86+
87+
# The markup could be a float with multiple dots
88+
first_dot_pos = nil
89+
num_end_pos = nil
90+
91+
while (byte = ss.scan_byte)
92+
return false if byte != DOT && (byte < ZERO || byte > NINE)
93+
94+
# we found our number and now we are just scanning the rest of the string
95+
next if num_end_pos
96+
97+
if byte == DOT
98+
if first_dot_pos.nil?
99+
first_dot_pos = ss.pos
100+
else
101+
# we found another dot, so we know that the number ends here
102+
num_end_pos = ss.pos - 1
103+
end
104+
end
105+
end
106+
107+
num_end_pos = markup.length if ss.eos?
108+
109+
if num_end_pos
110+
# number ends with a number "123.123"
111+
markup.byteslice(0, num_end_pos).to_f
42112
else
43-
VariableLookup.parse(markup)
113+
# number ends with a dot "123."
114+
markup.byteslice(0, first_dot_pos).to_f
44115
end
45116
end
46117
end

0 commit comments

Comments
 (0)
Failed to load comments.