Skip to content

Commit da4afd4

Browse files
authoredFeb 13, 2025
Merge pull request #1905 from Shopify/catlee/invalid_utf8
Raise SyntaxError on invalid UTF8 strings in lexer/tokenizer
2 parents 1bb3091 + 550135c commit da4afd4

File tree

5 files changed

+35
-0
lines changed

5 files changed

+35
-0
lines changed
 

‎History.md

+2
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
## 5.7.3 (unreleased)
44

5+
* Raise Liquid::SyntaxError when parsing invalidly encoded strings
6+
57
## 5.7.2 2025-01-31
68

79
* Fix array filters to not support nested properties

‎lib/liquid/lexer.rb

+6
Original file line numberDiff line numberDiff line change
@@ -161,6 +161,12 @@ def tokenize(ss)
161161
end
162162
# rubocop:enable Metrics/BlockNesting
163163
output << EOS
164+
rescue ::ArgumentError => e
165+
if e.message == "invalid byte sequence in #{ss.string.encoding}"
166+
raise SyntaxError, "Invalid byte sequence in #{ss.string.encoding}"
167+
else
168+
raise
169+
end
164170
end
165171

166172
def raise_syntax_error(start_pos, ss)

‎lib/liquid/tokenizer.rb

+6
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,12 @@ def next_text_token
103103

104104
pos = @ss.pos -= 2
105105
@source.byteslice(start, pos - start)
106+
rescue ::ArgumentError => e
107+
if e.message == "invalid byte sequence in #{@ss.string.encoding}"
108+
raise SyntaxError, "Invalid byte sequence in #{@ss.string.encoding}"
109+
else
110+
raise
111+
end
106112
end
107113

108114
def next_variable_token

‎test/unit/lexer_unit_test.rb

+10
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,16 @@ def test_tokenize_incomplete_expression
131131
assert_equal([[:id, "false"], [:number, "1"], [:end_of_string]], tokenize("false 1"))
132132
end
133133

134+
def test_error_with_invalid_utf8
135+
error = assert_raises(SyntaxError) do
136+
tokenize("\x00\xff")
137+
end
138+
assert_equal(
139+
'Liquid syntax error: Invalid byte sequence in UTF-8',
140+
error.message,
141+
)
142+
end
143+
134144
private
135145

136146
def tokenize(input)

‎test/unit/template_unit_test.rb

+11
Original file line numberDiff line numberDiff line change
@@ -35,4 +35,15 @@ class TemplateSubclass < Liquid::Template
3535
def test_template_inheritance
3636
assert_equal("foo", TemplateSubclass.parse("foo").render)
3737
end
38+
39+
def test_invalid_utf8
40+
input = "\xff\x00"
41+
error = assert_raises(SyntaxError) do
42+
Liquid::Tokenizer.new(source: input, string_scanner: StringScanner.new(input))
43+
end
44+
assert_equal(
45+
'Liquid syntax error: Invalid byte sequence in UTF-8',
46+
error.message,
47+
)
48+
end
3849
end

0 commit comments

Comments
 (0)
Failed to load comments.