Skip to content

Commit e3d3727

Browse files
authored
toml: fix 7 escape tests (#12017)
1 parent c2f535f commit e3d3727

File tree

3 files changed

+98
-25
lines changed

3 files changed

+98
-25
lines changed

vlib/toml/checker/checker.v

Lines changed: 63 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ import toml.ast.walker
99
import toml.token
1010
import toml.scanner
1111

12+
pub const allowed_basic_escape_chars = [`u`, `U`, `b`, `t`, `n`, `f`, `r`, `"`, `\\`]
13+
1214
// Checker checks a tree of TOML `ast.Value`'s for common errors.
1315
pub struct Checker {
1416
scanner &scanner.Scanner
@@ -172,12 +174,68 @@ fn (c Checker) check_boolean(b ast.Bool) ? {
172174
' boolean values like "$lit" can only be `true` or `false` literals, not `$lit` in ...${c.excerpt(b.pos)}...')
173175
}
174176

175-
fn (c Checker) check_quoted(b ast.Quoted) ? {
176-
lit := b.text
177-
quote := b.quote.ascii_str()
177+
fn (c Checker) check_quoted(q ast.Quoted) ? {
178+
lit := q.text
179+
quote := q.quote.ascii_str()
178180
triple_quote := quote + quote + quote
179-
if b.is_multiline && lit.ends_with(triple_quote) {
181+
if q.is_multiline && lit.ends_with(triple_quote) {
180182
return error(@MOD + '.' + @STRUCT + '.' + @FN +
181-
' string values like "$lit" is has unbalanced quote literals `b.quote` in ...${c.excerpt(b.pos)}...')
183+
' string values like "$lit" is has unbalanced quote literals `q.quote` in ...${c.excerpt(q.pos)}...')
184+
}
185+
c.check_quoted_escapes(q) ?
186+
}
187+
188+
// check_quoted_escapes returns an error for any disallowed escape sequences.
189+
// Delimiters in TOML has significant meaning:
190+
// '/''' delimits *literal* strings (WYSIWYG / What-you-see-is-what-you-get)
191+
// "/""" delimits *basic* strings
192+
// Allowed escapes in *basic* strings are:
193+
// \b - backspace (U+0008)
194+
// \t - tab (U+0009)
195+
// \n - linefeed (U+000A)
196+
// \f - form feed (U+000C)
197+
// \r - carriage return (U+000D)
198+
// \" - quote (U+0022)
199+
// \\ - backslash (U+005C)
200+
// \uXXXX - unicode (U+XXXX)
201+
// \UXXXXXXXX - unicode (U+XXXXXXXX)
202+
fn (c Checker) check_quoted_escapes(q ast.Quoted) ? {
203+
// Setup a scanner in stack memory for easier navigation.
204+
mut s := scanner.new_simple(q.text) ?
205+
206+
is_basic := q.quote == `\"`
207+
for {
208+
ch := s.next()
209+
if ch == -1 {
210+
break
211+
}
212+
ch_byte := byte(ch)
213+
if ch == `\\` {
214+
next_ch := byte(s.at())
215+
216+
if next_ch == `\\` {
217+
s.next()
218+
continue
219+
}
220+
escape := ch_byte.ascii_str() + next_ch.ascii_str()
221+
if is_basic {
222+
if q.is_multiline {
223+
if next_ch == byte(32) && s.peek(1) == byte(92) {
224+
st := s.state()
225+
return error(@MOD + '.' + @STRUCT + '.' + @FN +
226+
' can not escape whitespaces before escapes in multi-line strings (`\\ \\`) at `$escape` ($st.line_nr,$st.col) in ...${c.excerpt(q.pos)}...')
227+
}
228+
if next_ch in [`\t`, `\n`, ` `] {
229+
s.next()
230+
continue
231+
}
232+
}
233+
if next_ch !in checker.allowed_basic_escape_chars {
234+
st := s.state()
235+
return error(@MOD + '.' + @STRUCT + '.' + @FN +
236+
' unknown basic string escape character `$next_ch.ascii_str()` in `$escape` ($st.line_nr,$st.col) in ...${c.excerpt(q.pos)}...')
237+
}
238+
}
239+
}
182240
}
183241
}

vlib/toml/scanner/scanner.v

Lines changed: 31 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,16 @@ mut:
2525
mode Mode // sub-mode of the scanner
2626
}
2727

28+
// State is a read-only copy of the scanner's internal state.
29+
// See also `Scanner.state()`.
30+
pub struct State {
31+
pub:
32+
col int // current column number (x coordinate)
33+
line_nr int = 1 // current line number (y coordinate)
34+
pos int // current flat/index position in the `text` field
35+
mode Mode // sub-mode of the scanner
36+
}
37+
2838
enum Mode {
2939
normal
3040
inside_string
@@ -426,6 +436,8 @@ fn (mut s Scanner) extract_multiline_string() ?string {
426436
}
427437

428438
c := s.at()
439+
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'c: `$c.ascii_str()` / $c (quote type: $quote/$quote.ascii_str())')
440+
429441
if c == `\n` {
430442
s.inc_line_number()
431443
lit += c.ascii_str()
@@ -443,8 +455,6 @@ fn (mut s Scanner) extract_multiline_string() ?string {
443455
}
444456
}
445457

446-
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'c: `$c.ascii_str()` / $c')
447-
448458
if c == quote {
449459
if s.peek(1) == quote && s.peek(2) == quote {
450460
if s.peek(3) == -1 {
@@ -469,14 +479,16 @@ fn (mut s Scanner) extract_multiline_string() ?string {
469479
return lit
470480
}
471481

472-
// handle_escapes
482+
// handle_escapes returns any escape character sequence.
483+
// For escape sequence validation see `Checker.check_quoted_escapes`.
473484
fn (mut s Scanner) handle_escapes(quote byte, is_multiline bool) (string, int) {
474485
c := s.at()
475486
mut lit := c.ascii_str()
476-
if s.peek(1) == byte(92) {
477-
lit += lit
478-
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped `$lit`')
479-
return lit, 1
487+
if s.peek(1) == `u` && byte(s.peek(2)).is_hex_digit() && byte(s.peek(3)).is_hex_digit()
488+
&& byte(s.peek(4)).is_hex_digit() && byte(s.peek(5)).is_hex_digit() {
489+
lit += s.text[s.pos + 1..s.pos + 6] //.ascii_str()
490+
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped unicode `$lit`')
491+
return lit, 4
480492
} else if s.peek(1) == quote {
481493
if (!is_multiline && s.peek(2) == `\n`)
482494
|| (is_multiline && s.peek(2) == quote && s.peek(3) == quote && s.peek(4) == `\n`) {
@@ -486,13 +498,9 @@ fn (mut s Scanner) handle_escapes(quote byte, is_multiline bool) (string, int) {
486498
lit += quote.ascii_str()
487499
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped `$lit`')
488500
return lit, 1
489-
} else if s.peek(1) == `u` && byte(s.peek(2)).is_hex_digit() && byte(s.peek(3)).is_hex_digit()
490-
&& byte(s.peek(4)).is_hex_digit() && byte(s.peek(5)).is_hex_digit() {
491-
lit += s.text[s.pos + 1..s.pos + 6] //.ascii_str()
492-
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'gulp escaped `$lit`')
493-
return lit, 4
494501
}
495-
return '', 0
502+
lit += byte(s.peek(1)).ascii_str()
503+
return lit, 1
496504
}
497505

498506
// extract_number collects and returns a string containing
@@ -542,3 +550,13 @@ pub fn (s Scanner) excerpt(pos int, margin int) string {
542550
end := if pos + margin < s.text.len { pos + margin } else { s.text.len }
543551
return s.text[start..end].replace('\n', r'\n')
544552
}
553+
554+
// state returns a read-only view of the scanner's internal state.
555+
pub fn (s Scanner) state() State {
556+
return State{
557+
col: s.col
558+
line_nr: s.line_nr
559+
pos: s.pos
560+
mode: s.mode
561+
}
562+
}

vlib/toml/tests/burntsushi.toml-test_test.v

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -15,19 +15,12 @@ const (
1515
invalid_exceptions = [
1616
// String
1717
'string/basic-multiline-out-of-range-unicode-escape-1.toml',
18-
'string/basic-byte-escapes.toml',
19-
'string/multiline-escape-space.toml',
2018
'string/bad-codepoint.toml',
2119
'string/basic-multiline-out-of-range-unicode-escape-2.toml',
22-
'string/bad-slash-escape.toml',
2320
'string/basic-out-of-range-unicode-escape-1.toml',
2421
'string/basic-out-of-range-unicode-escape-2.toml',
2522
'string/bad-uni-esc.toml',
26-
'string/bad-escape.toml',
27-
'string/basic-multiline-unknown-escape.toml',
2823
'string/missing-quotes.toml',
29-
'string/bad-byte-escape.toml',
30-
'string/basic-unknown-escape.toml',
3124
// Integer
3225
'integer/capital-bin.toml',
3326
'integer/invalid-bin.toml',
@@ -155,6 +148,10 @@ fn test_burnt_sushi_tomltest() {
155148
if relative !in invalid_exceptions {
156149
println('OK [$i/$invalid_test_files.len] "$invalid_test_file"...')
157150
if toml_doc := toml.parse_file(invalid_test_file) {
151+
content_that_should_have_failed := os.read_file(invalid_test_file) or {
152+
panic(err)
153+
}
154+
println(' This TOML should have failed:\n${'-'.repeat(40)}\n$content_that_should_have_failed\n${'-'.repeat(40)}')
158155
assert false
159156
} else {
160157
println(' $err.msg')

0 commit comments

Comments
 (0)