Skip to content

Commit 1a54817

Browse files
authored
toml: parse formatting (#12374)
1 parent 2b41549 commit 1a54817

File tree

6 files changed

+199
-60
lines changed

6 files changed

+199
-60
lines changed

vlib/toml/parser/parser.v

Lines changed: 146 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,11 @@ import toml.util
99
import toml.token
1010
import toml.scanner
1111

12+
pub const (
13+
all_formatting = [token.Kind.whitespace, .tab, .nl]
14+
space_formatting = [token.Kind.whitespace, .tab]
15+
)
16+
1217
// Parser contains the necessary fields for keeping the state of the parsing process.
1318
pub struct Parser {
1419
pub:
@@ -127,7 +132,8 @@ fn (mut p Parser) check(check_token token.Kind) ? {
127132
}
128133
}
129134

130-
// check_one_of returns true if the current token's `Kind` is equal that of `expected_token`.
135+
// check_one_of forwards the parser to the next token if the current
136+
// token's `Kind` can be found in `tokens`. Otherwise it returns an error.
131137
fn (mut p Parser) check_one_of(tokens []token.Kind) ? {
132138
if p.tok.kind in tokens {
133139
p.next() ?
@@ -137,6 +143,45 @@ fn (mut p Parser) check_one_of(tokens []token.Kind) ? {
137143
}
138144
}
139145

146+
// ignore_while forwards the parser to the next token as long as the current
147+
// token's `Kind` can be found in `tokens`. This is helpful for ignoring
148+
// a stream of formatting tokens.
149+
fn (mut p Parser) ignore_while(tokens []token.Kind) {
150+
if p.tok.kind in tokens {
151+
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'ignoring "$p.tok.kind" ...')
152+
p.next() or { return }
153+
p.ignore_while(tokens)
154+
}
155+
}
156+
157+
// ignore_while_peek forwards the parser to the next token as long as `peek_tok`
158+
// token's `Kind` can be found in `tokens`. This is helpful for ignoring
159+
// a stream of formatting tokens.
160+
// In contrast to `ignore_while`, `ignore_while_peek` compares on `peek_tok` this is
161+
// sometimes necessary since not all parser calls forward using the `next()` call.
162+
fn (mut p Parser) ignore_while_peek(tokens []token.Kind) {
163+
for p.peek_tok.kind in tokens {
164+
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'eating "$p.tok.kind" ...')
165+
p.next() or { return }
166+
}
167+
}
168+
169+
// peek_over peeks ahead from token starting at `i` skipping over
170+
// any `token.Kind`s found in `tokens`. `peek_over` returns the next token *not*
171+
// found in `tokens`.
172+
fn (mut p Parser) peek_over(i int, tokens []token.Kind) ?token.Token {
173+
mut peek_tok := p.peek_tok
174+
175+
// Peek ahead as far as we can from token at `i` while the peeked
176+
// token is found in `tokens`.
177+
mut peek_i := i
178+
for peek_tok.kind in tokens {
179+
peek_tok = p.peek(peek_i) ?
180+
peek_i++
181+
}
182+
return peek_tok
183+
}
184+
140185
// is_at returns true if the token kind is equal to `expected_token`.
141186
fn (mut p Parser) is_at(expected_token token.Kind) bool {
142187
return p.tok.kind == expected_token
@@ -251,12 +296,15 @@ pub fn (mut p Parser) find_in_table(mut table map[string]ast.Value, key string)
251296
pub fn (mut p Parser) sub_key() ?string {
252297
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing nested key...')
253298
key := p.key() ?
299+
p.ignore_while_peek(parser.space_formatting)
254300
mut text := key.str()
255301
for p.peek_tok.kind == .period {
256302
p.next() ? // .
257303
p.check(.period) ?
304+
p.ignore_while(parser.space_formatting)
258305
next_key := p.key() ?
259306
text += '.' + next_key.text
307+
p.ignore_while_peek(parser.space_formatting)
260308
}
261309
p.next() ?
262310
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsed nested key `$text` now at "$p.tok.kind" "$p.tok.lit"')
@@ -282,23 +330,22 @@ pub fn (mut p Parser) root_table() ? {
282330
p.ast_root.comments << c
283331
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping comment "$c.text"')
284332
}
285-
//.whitespace, .tab, .nl {
286-
// util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping "$p.tok.kind "$p.tok.lit"')
287-
//}
333+
.whitespace, .tab, .nl, .cr {
334+
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping formatting "$p.tok.kind" "$p.tok.lit"')
335+
continue
336+
}
288337
.bare, .quoted, .boolean, .number, .underscore { // NOTE .boolean allows for use of "true" and "false" as table keys
289-
if p.peek_tok.kind == .assign
290-
|| (p.tok.kind == .number && p.peek_tok.kind == .minus) {
291-
key, val := p.key_value() ?
338+
mut peek_tok := p.peek_tok
292339

293-
t := p.find_table() ?
294-
unsafe {
295-
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'setting "$key.str()" = $val.to_json() in table ${ptr_str(t)}')
296-
t[key.str()] = val
297-
}
298-
} else if p.peek_tok.kind == .period {
299-
subkey := p.sub_key() ?
340+
// Peek forward as far as we can skipping over space formatting tokens.
341+
peek_tok = p.peek_over(1, parser.space_formatting) ?
300342

343+
if peek_tok.kind == .period {
344+
p.ignore_while(parser.space_formatting)
345+
subkey := p.sub_key() ?
346+
p.ignore_while(parser.space_formatting)
301347
p.check(.assign) ?
348+
p.ignore_while(parser.space_formatting)
302349
val := p.value() ?
303350

304351
sub_table, key := p.sub_table_key(subkey)
@@ -309,19 +356,32 @@ pub fn (mut p Parser) root_table() ? {
309356
t[key] = val
310357
}
311358
} else {
312-
return error(@MOD + '.' + @STRUCT + '.' + @FN +
313-
' dead end at "$p.tok.kind" "$p.tok.lit"')
359+
p.ignore_while(parser.space_formatting)
360+
key, val := p.key_value() ?
361+
362+
t := p.find_table() ?
363+
unsafe {
364+
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'setting "$key.str()" = $val.to_json() in table ${ptr_str(t)}')
365+
t[key.str()] = val
366+
}
314367
}
315368
}
316369
.lsbr {
317370
p.check(.lsbr) ? // '[' bracket
371+
p.ignore_while(parser.space_formatting)
372+
373+
mut peek_tok := p.peek_tok
374+
// Peek forward as far as we can skipping over space formatting tokens.
375+
peek_tok = p.peek_over(1, parser.space_formatting) ?
318376

319377
if p.tok.kind == .lsbr {
320378
p.array_of_tables(mut &p.root_map) ?
321379
p.skip_next = true // skip calling p.next() in coming iteration
322-
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'leaving double bracket at "$p.tok.kind "$p.tok.lit". NEXT is "$p.peek_tok.kind "$p.peek_tok.lit"')
323-
} else if p.peek_tok.kind == .period {
380+
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'leaving double bracket at "$p.tok.kind" "$p.tok.lit". NEXT is "$p.peek_tok.kind "$p.peek_tok.lit"')
381+
} else if peek_tok.kind == .period {
382+
p.ignore_while(parser.space_formatting)
324383
p.root_map_key = p.sub_key() ?
384+
p.ignore_while(parser.space_formatting)
325385
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'setting root map key to `$p.root_map_key` at "$p.tok.kind" "$p.tok.lit"')
326386
p.expect(.rsbr) ?
327387
} else {
@@ -359,17 +419,25 @@ pub fn (mut p Parser) inline_table(mut tbl map[string]ast.Value) ? {
359419
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing token "$p.tok.kind"')
360420

361421
if previous_token_was_value {
422+
p.ignore_while(parser.space_formatting)
362423
if p.tok.kind != .rcbr {
363424
p.expect(.comma) ?
364425
}
365426
previous_token_was_value = false
366427
}
367428

368429
match p.tok.kind {
369-
//.whitespace, .tab, .nl {
370-
// util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping "$p.tok.kind "$p.tok.lit"')
371-
//}
430+
.whitespace, .tab {
431+
/*
432+
if !p.scanner.config.tokenize_formatting {
433+
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping "$p.tok.kind" "$p.tok.lit"')
434+
continue
435+
}*/
436+
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping formatting "$p.tok.kind" "$p.tok.lit"')
437+
continue
438+
}
372439
.comma {
440+
p.ignore_while_peek(parser.space_formatting)
373441
if p.peek_tok.kind == .rcbr {
374442
p.next() ? // Forward to the peek_tok
375443
return error(@MOD + '.' + @STRUCT + '.' + @FN +
@@ -388,13 +456,16 @@ pub fn (mut p Parser) inline_table(mut tbl map[string]ast.Value) ? {
388456
return
389457
}
390458
.bare, .quoted, .boolean, .number, .underscore {
391-
if p.peek_tok.kind == .assign {
392-
key, val := p.key_value() ?
393-
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'inserting @5 "$key.str()" = $val.to_json() into ${ptr_str(tbl)}')
394-
tbl[key.str()] = val
395-
} else if p.peek_tok.kind == .period {
459+
mut peek_tok := p.peek_tok
460+
// Peek forward as far as we can skipping over space formatting tokens.
461+
peek_tok = p.peek_over(1, parser.space_formatting) ?
462+
463+
if peek_tok.kind == .period {
464+
p.ignore_while(parser.space_formatting)
396465
subkey := p.sub_key() ?
466+
p.ignore_while(parser.space_formatting)
397467
p.check(.assign) ?
468+
p.ignore_while(parser.space_formatting)
398469
val := p.value() ?
399470

400471
sub_table, key := p.sub_table_key(subkey)
@@ -405,8 +476,10 @@ pub fn (mut p Parser) inline_table(mut tbl map[string]ast.Value) ? {
405476
t[key] = val
406477
}
407478
} else {
408-
return error(@MOD + '.' + @STRUCT + '.' + @FN +
409-
' dead end at "$p.tok.kind" "$p.tok.lit"')
479+
p.ignore_while(parser.space_formatting)
480+
key, val := p.key_value() ?
481+
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'inserting @5 "$key.str()" = $val.to_json() into ${ptr_str(tbl)}')
482+
tbl[key.str()] = val
410483
}
411484
previous_token_was_value = true
412485
}
@@ -438,6 +511,8 @@ pub fn (mut p Parser) array_of_tables(mut table map[string]ast.Value) ? {
438511
p.check(.rsbr) ?
439512
p.check(.rsbr) ?
440513

514+
p.ignore_while(parser.all_formatting)
515+
441516
key_str := key.str()
442517
unsafe {
443518
if key_str in table.keys() {
@@ -448,15 +523,15 @@ pub fn (mut p Parser) array_of_tables(mut table map[string]ast.Value) ? {
448523
{
449524
if val is []ast.Value {
450525
arr := &(table[key_str] as []ast.Value)
451-
arr << p.double_bracket_array() ?
526+
arr << p.array_of_tables_contents() ?
452527
table[key_str] = arr
453528
} else {
454529
return error(@MOD + '.' + @STRUCT + '.' + @FN +
455530
' table[$key_str] is not an array. (excerpt): "...${p.excerpt()}..."')
456531
}
457532
}
458533
} else {
459-
table[key_str] = p.double_bracket_array() ?
534+
table[key_str] = p.array_of_tables_contents() ?
460535
}
461536
}
462537
p.last_aot = key_str
@@ -475,6 +550,7 @@ pub fn (mut p Parser) double_array_of_tables(mut table map[string]ast.Value) ? {
475550
next_key := p.key() ?
476551
key_str += '.' + next_key.text
477552
}
553+
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsed nested key `$key_str` now at "$p.tok.kind" "$p.tok.lit"')
478554

479555
p.next() ?
480556
p.check(.rsbr) ?
@@ -501,7 +577,10 @@ pub fn (mut p Parser) double_array_of_tables(mut table map[string]ast.Value) ? {
501577
}
502578

503579
mut t_arr := &(table[p.last_aot] as []ast.Value)
504-
mut t_map := t_arr[p.last_aot_index]
580+
mut t_map := ast.Value(map[string]ast.Value{})
581+
if t_arr.len > 0 {
582+
t_map = t_arr[p.last_aot_index]
583+
}
505584
mut t := &(t_map as map[string]ast.Value)
506585

507586
if last in t.keys() {
@@ -512,29 +591,51 @@ pub fn (mut p Parser) double_array_of_tables(mut table map[string]ast.Value) ? {
512591
{
513592
if val is []ast.Value {
514593
arr := &(val as []ast.Value)
515-
arr << p.double_bracket_array() ?
594+
arr << p.array_of_tables_contents() ?
516595
t[last] = arr
517596
} else {
518597
return error(@MOD + '.' + @STRUCT + '.' + @FN +
519598
' t[$last] is not an array. (excerpt): "...${p.excerpt()}..."')
520599
}
521600
}
522601
} else {
523-
t[last] = p.double_bracket_array() ?
602+
t[last] = p.array_of_tables_contents() ?
603+
}
604+
if t_arr.len == 0 {
605+
t_arr << t
606+
p.last_aot_index = 0
524607
}
525608
}
526609
}
527610

528611
// array parses next tokens into an array of `ast.Value`s.
529-
pub fn (mut p Parser) double_bracket_array() ?[]ast.Value {
530-
mut arr := []ast.Value{}
531-
for p.tok.kind in [.bare, .quoted, .boolean, .number] && p.peek_tok.kind == .assign {
532-
mut tbl := map[string]ast.Value{}
533-
key, val := p.key_value() ?
534-
tbl[key.str()] = val
535-
arr << tbl
612+
pub fn (mut p Parser) array_of_tables_contents() ?[]ast.Value {
613+
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing array of tables contents from "$p.tok.kind" "$p.tok.lit"')
614+
mut tbl := map[string]ast.Value{}
615+
for p.tok.kind in [.bare, .quoted, .boolean, .number] {
616+
if p.peek_tok.kind == .period {
617+
subkey := p.sub_key() ?
618+
p.check(.assign) ?
619+
val := p.value() ?
620+
621+
sub_table, key := p.sub_table_key(subkey)
622+
623+
mut t := p.find_in_table(mut tbl, sub_table) ?
624+
unsafe {
625+
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'inserting @6 "$key" = $val.to_json() into ${ptr_str(t)}')
626+
t[key] = val
627+
}
628+
} else {
629+
key, val := p.key_value() ?
630+
tbl[key.str()] = val
631+
}
536632
p.next() ?
633+
p.ignore_while(parser.all_formatting)
537634
}
635+
mut arr := []ast.Value{}
636+
arr << tbl
637+
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing array of tables ${arr.str().replace('\n',
638+
' ')}. leaving at "$p.tok.kind" "$p.tok.lit"')
538639
return arr
539640
}
540641

@@ -549,6 +650,7 @@ pub fn (mut p Parser) array() ?[]ast.Value {
549650
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing token "$p.tok.kind" "$p.tok.lit"')
550651

551652
if previous_token_was_value {
653+
p.ignore_while(parser.all_formatting)
552654
if p.tok.kind != .rsbr && p.tok.kind != .hash {
553655
p.expect(.comma) ?
554656
}
@@ -581,6 +683,7 @@ pub fn (mut p Parser) array() ?[]ast.Value {
581683
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'skipping comment "$c.text"')
582684
}
583685
.lcbr {
686+
p.ignore_while(parser.space_formatting)
584687
mut t := map[string]ast.Value{}
585688
p.inline_table(mut t) ?
586689
arr << ast.Value(t)
@@ -680,7 +783,9 @@ pub fn (mut p Parser) key_value() ?(ast.Key, ast.Value) {
680783
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsing key value pair...')
681784
key := p.key() ?
682785
p.next() ?
786+
p.ignore_while(parser.space_formatting)
683787
p.check(.assign) ? // Assignment operator
788+
p.ignore_while(parser.space_formatting)
684789
value := p.value() ?
685790
util.printdbg(@MOD + '.' + @STRUCT + '.' + @FN, 'parsed key value pair. "$key" = $value.to_json()')
686791
return key, value
@@ -711,6 +816,7 @@ pub fn (mut p Parser) value() ?ast.Value {
711816
ast.Value(p.array() ?)
712817
}
713818
.lcbr {
819+
p.ignore_while(parser.space_formatting)
714820
mut t := map[string]ast.Value{}
715821
p.inline_table(mut t) ?
716822
// table[key_str] = ast.Value(t)

0 commit comments

Comments
 (0)