Skip to content

Commit

Permalink
Merge pull request #1844 from Shopify/fast-expression-parse
Browse files Browse the repository at this point in the history
Faster Expression parser / Tokenizer with StringScanner
  • Loading branch information
ggmichaelgo authored Jan 10, 2025
2 parents a5b91e8 + 10114b3 commit 323951b
Show file tree
Hide file tree
Showing 30 changed files with 690 additions and 231 deletions.
1 change: 1 addition & 0 deletions Gemfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ group :benchmark, :test do
gem 'benchmark-ips'
gem 'memory_profiler'
gem 'terminal-table'
gem "lru_redux"

install_if -> { RUBY_PLATFORM !~ /mingw|mswin|java/ && RUBY_ENGINE != 'truffleruby' } do
gem 'stackprof'
Expand Down
29 changes: 24 additions & 5 deletions Rakefile
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ end

namespace :benchmark do
desc "Run the liquid benchmark with lax parsing"
task :run do
task :lax do
ruby "./performance/benchmark.rb lax"
end

Expand All @@ -80,11 +80,30 @@ namespace :benchmark do
ruby "./performance/benchmark.rb strict"
end

desc "Run the liquid benchmark with both lax and strict parsing"
task run: [:lax, :strict]

desc "Run unit benchmarks"
task :unit do
Dir["./performance/unit/*_benchmark.rb"].each do |file|
puts "🧪 Running #{file}"
ruby file
namespace :unit do
task :all do
Dir["./performance/unit/*_benchmark.rb"].each do |file|
puts "🧪 Running #{file}"
ruby file
end
end

task :lexer do
Dir["./performance/unit/lexer_benchmark.rb"].each do |file|
puts "🧪 Running #{file}"
ruby file
end
end

task :expression do
Dir["./performance/unit/expression_benchmark.rb"].each do |file|
puts "🧪 Running #{file}"
ruby file
end
end
end
end
Expand Down
5 changes: 4 additions & 1 deletion lib/liquid.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

require "strscan"

module Liquid
FilterSeparator = /\|/
ArgumentSeparator = ','
Expand All @@ -44,6 +46,7 @@ module Liquid
VariableParser = /\[(?>[^\[\]]+|\g<0>)*\]|#{VariableSegment}+\??/o

RAISE_EXCEPTION_LAMBDA = ->(_e) { raise }
HAS_STRING_SCANNER_SCAN_BYTE = StringScanner.instance_methods.include?(:scan_byte)
end

require "liquid/version"
Expand All @@ -68,7 +71,6 @@ module Liquid
require 'liquid/errors'
require 'liquid/interrupts'
require 'liquid/strainer_template'
require 'liquid/expression'
require 'liquid/context'
require 'liquid/tag'
require 'liquid/block_body'
Expand All @@ -77,6 +79,7 @@ module Liquid
require 'liquid/variable_lookup'
require 'liquid/range_lookup'
require 'liquid/resource_limits'
require 'liquid/expression'
require 'liquid/template'
require 'liquid/condition'
require 'liquid/utils'
Expand Down
9 changes: 8 additions & 1 deletion lib/liquid/context.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# frozen_string_literal: true

require "lru_redux"

module Liquid
# Context keeps the variable stack and resolves variables, as well as keywords
#
Expand Down Expand Up @@ -39,6 +41,11 @@ def initialize(environments = {}, outer_scope = {}, registers = {}, rethrow_erro
@filters = []
@global_filter = nil
@disabled_tags = {}
@expression_cache = LruRedux::ThreadSafeCache.new(1000)

# Instead of constructing new StringScanner objects for each Expression parse,
# we recycle the same one.
@string_scanner = StringScanner.new("")

@registers.static[:cached_partials] ||= {}
@registers.static[:file_system] ||= environment.file_system
Expand Down Expand Up @@ -176,7 +183,7 @@ def []=(key, value)
# Example:
# products == empty #=> products.empty?
def [](expression)
evaluate(Expression.parse(expression))
evaluate(Expression.parse(expression, @string_scanner, @expression_cache))
end

def key?(key)
Expand Down
113 changes: 92 additions & 21 deletions lib/liquid/expression.rb
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# frozen_string_literal: true

require "lru_redux"

module Liquid
class Expression
LITERALS = {
Expand All @@ -10,37 +12,106 @@ class Expression
'true' => true,
'false' => false,
'blank' => '',
'empty' => ''
'empty' => '',
# in lax mode, minus sign can be a VariableLookup
# For simplicity and performace, we treat it like a literal
'-' => VariableLookup.parse("-", nil).freeze,
}.freeze

INTEGERS_REGEX = /\A(-?\d+)\z/
FLOATS_REGEX = /\A(-?\d[\d\.]+)\z/
DOT = ".".ord
ZERO = "0".ord
NINE = "9".ord
DASH = "-".ord

# Use an atomic group (?>...) to avoid pathological backtracing from
# malicious input as described in https://github.com/Shopify/liquid/issues/1357
RANGES_REGEX = /\A\(\s*(?>(\S+)\s*\.\.)\s*(\S+)\s*\)\z/
RANGES_REGEX = /\A\(\s*(?>(\S+)\s*\.\.)\s*(\S+)\s*\)\z/
INTEGER_REGEX = /\A(-?\d+)\z/
FLOAT_REGEX = /\A(-?\d+)\.\d+\z/

class << self
def parse(markup, ss = StringScanner.new(""), cache = nil)
return unless markup

markup = markup.strip # markup can be a frozen string

def self.parse(markup)
return nil unless markup
if (markup.start_with?('"') && markup.end_with?('"')) ||
(markup.start_with?("'") && markup.end_with?("'"))
return markup[1..-2]
elsif LITERALS.key?(markup)
return LITERALS[markup]
end

# Cache only exists during parsing
if cache
return cache[markup] if cache.key?(markup)

markup = markup.strip
if (markup.start_with?('"') && markup.end_with?('"')) ||
(markup.start_with?("'") && markup.end_with?("'"))
return markup[1..-2]
cache[markup] = inner_parse(markup, ss, cache).freeze
else
inner_parse(markup, ss, nil).freeze
end
end

case markup
when INTEGERS_REGEX
Regexp.last_match(1).to_i
when RANGES_REGEX
RangeLookup.parse(Regexp.last_match(1), Regexp.last_match(2))
when FLOATS_REGEX
Regexp.last_match(1).to_f
else
if LITERALS.key?(markup)
LITERALS[markup]
def inner_parse(markup, ss, cache)
if (markup.start_with?("(") && markup.end_with?(")")) && markup =~ RANGES_REGEX
return RangeLookup.parse(
Regexp.last_match(1),
Regexp.last_match(2),
ss,
cache,
)
end

if (num = parse_number(markup, ss))
num
else
VariableLookup.parse(markup, ss, cache)
end
end

def parse_number(markup, ss)
# check if the markup is simple integer or float
case markup
when INTEGER_REGEX
return Integer(markup, 10)
when FLOAT_REGEX
return markup.to_f
end

ss.string = markup
# the first byte must be a digit, a period, or a dash
byte = ss.scan_byte

return false if byte != DASH && byte != DOT && (byte < ZERO || byte > NINE)

# The markup could be a float with multiple dots
first_dot_pos = nil
num_end_pos = nil

while (byte = ss.scan_byte)
return false if byte != DOT && (byte < ZERO || byte > NINE)

# we found our number and now we are just scanning the rest of the string
next if num_end_pos

if byte == DOT
if first_dot_pos.nil?
first_dot_pos = ss.pos
else
# we found another dot, so we know that the number ends here
num_end_pos = ss.pos - 1
end
end
end

num_end_pos = markup.length if ss.eos?

if num_end_pos
# number ends with a number "123.123"
markup.byteslice(0, num_end_pos).to_f
else
VariableLookup.parse(markup)
# number ends with a dot "123."
markup.byteslice(0, first_dot_pos).to_f
end
end
end
Expand Down
Loading

0 comments on commit 323951b

Please sign in to comment.