Skip to content

Commit 106da40

Browse files
authored
scanner: refactor string interpolation (fix #24198) (#25053)
1 parent 9fef7ca commit 106da40

File tree

2 files changed

+40
-84
lines changed

2 files changed

+40
-84
lines changed

vlib/v/scanner/scanner.v

Lines changed: 35 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -33,18 +33,13 @@ pub mut:
3333
is_nested_string bool // '${'abc':-12s}'
3434
is_inter_start bool // for hacky string interpolation TODO simplify
3535
is_inter_end bool
36-
is_enclosed_inter bool
37-
is_nested_enclosed_inter bool
38-
string_count int
39-
str_dollar_needs_rcbr []bool = []
36+
str_helper_tokens []u8 // ', ", 0 (string interpolation with lcbr), { (block)
4037
line_comment string
4138
last_lt int = -1 // position of latest <
4239
is_print_line_on_error bool
4340
is_print_colored_error bool
4441
is_print_rel_paths_on_error bool
45-
quote u8 // which quote is used to denote current string: ' or "
46-
inter_quote u8
47-
just_closed_inter bool // if is_enclosed_inter was set to false on the previous character: `}`
42+
quote u8 // which quote is used to denote current string: ' or "
4843
nr_lines int // total number of lines in the source file that were scanned
4944
is_vh bool // Keep newlines
5045
is_fmt bool // Used for v fmt.
@@ -54,7 +49,6 @@ pub mut:
5449
tidx int
5550
eofs int
5651
max_eofs int = 50
57-
inter_cbr_count int
5852
pref &pref.Preferences
5953
error_details []string
6054
errors []errors.Error
@@ -671,8 +665,9 @@ pub fn (mut s Scanner) text_scan() token.Token {
671665
}
672666
// End of $var, start next string
673667
if s.is_inter_end {
674-
if s.text[s.pos] == s.quote || (s.text[s.pos] == s.inter_quote && s.is_enclosed_inter) {
668+
if s.text[s.pos] == s.quote {
675669
s.is_inter_end = false
670+
s.str_helper_tokens.delete_last()
676671
return s.new_token(.string, '', 1)
677672
}
678673
s.is_inter_end = false
@@ -799,12 +794,7 @@ pub fn (mut s Scanner) text_scan() token.Token {
799794
return s.new_token(.question, '?', 1)
800795
}
801796
single_quote, double_quote {
802-
if s.string_count == 1 && s.str_dollar_needs_rcbr.len == 0 {
803-
s.string_count = 0
804-
return s.new_token(.string, '', 1)
805-
} else {
806-
s.string_count++
807-
}
797+
s.str_helper_tokens << c
808798
start_line := s.line_nr
809799
ident_string := s.ident_string()
810800
return s.new_multiline_token(.string, ident_string, ident_string.len + 2,
@@ -829,21 +819,18 @@ pub fn (mut s Scanner) text_scan() token.Token {
829819
}
830820
`{` {
831821
// Skip { in `${` in strings
832-
if s.is_inside_string || s.is_enclosed_inter {
833-
if s.text[s.pos - 1] == `$` {
834-
s.str_dollar_needs_rcbr << true
835-
continue
836-
} else {
837-
s.str_dollar_needs_rcbr << false
838-
s.inter_cbr_count++
839-
}
822+
if _ := s.str_quote() {
823+
s.str_helper_tokens << 0
840824
} else {
841-
s.str_dollar_needs_rcbr << false
825+
s.str_helper_tokens << c
826+
}
827+
if s.is_inside_string && s.text[s.pos - 1] == `$` {
828+
continue
842829
}
843830
return s.new_token(.lcbr, '', 1)
844831
}
845832
`$` {
846-
if s.is_inside_string || s.is_enclosed_inter {
833+
if s.is_inside_string {
847834
return s.new_token(.str_dollar, '', 1)
848835
} else {
849836
return s.new_token(.dollar, '', 1)
@@ -852,48 +839,23 @@ pub fn (mut s Scanner) text_scan() token.Token {
852839
`}` {
853840
// s = `hello $name !`
854841
// s = `hello ${name} !`
855-
if ((s.is_enclosed_inter || s.is_nested_enclosed_inter) && s.inter_cbr_count == 0)
856-
|| (s.all_tokens.last().kind != .string && s.str_dollar_needs_rcbr.len > 0
857-
&& s.str_dollar_needs_rcbr.last()) {
858-
if s.str_dollar_needs_rcbr.len > 0 {
859-
s.str_dollar_needs_rcbr.delete_last()
860-
}
842+
if s.str_helper_tokens.len > 0 {
843+
s.str_helper_tokens.delete_last()
844+
}
845+
if quote := s.str_quote() {
861846
if s.pos < s.text.len - 1 {
862847
s.pos++
863848
} else {
864849
s.error('unfinished string literal')
865850
}
866-
if s.text[s.pos] == s.quote
867-
|| (s.text[s.pos] == s.inter_quote && s.is_nested_enclosed_inter) {
851+
if s.text[s.pos] == quote {
868852
s.is_inside_string = false
869-
if s.is_nested_enclosed_inter {
870-
s.is_nested_enclosed_inter = false
871-
} else {
872-
s.is_enclosed_inter = false
873-
}
874-
s.string_count--
853+
s.str_helper_tokens.delete_last()
875854
return s.new_token(.string, '', 1)
876855
}
877-
if s.is_nested_enclosed_inter {
878-
s.is_nested_enclosed_inter = false
879-
} else {
880-
s.is_enclosed_inter = false
881-
}
882-
s.just_closed_inter = true
883856
ident_string := s.ident_string()
884857
return s.new_token(.string, ident_string, ident_string.len + 2) // + two quotes
885858
} else {
886-
if s.str_dollar_needs_rcbr.len > 0 {
887-
if s.str_dollar_needs_rcbr.last() {
888-
s.str_dollar_needs_rcbr.delete_last()
889-
s.pos++
890-
return s.new_token(.string, '', 1)
891-
}
892-
s.str_dollar_needs_rcbr.delete_last()
893-
}
894-
if s.inter_cbr_count > 0 {
895-
s.inter_cbr_count--
896-
}
897859
return s.new_token(.rcbr, '', 1)
898860
}
899861
}
@@ -1225,6 +1187,8 @@ fn (s &Scanner) count_symbol_before(p int, sym u8) int {
12251187
// escapes in them (except in the r'strings' where the content is returned verbatim)
12261188
@[direct_array_access]
12271189
pub fn (mut s Scanner) ident_string() string {
1190+
quote := s.str_quote() or { return '' }
1191+
s.quote = quote
12281192
// determines if it is a nested string
12291193
if s.is_inside_string {
12301194
s.is_nested_string = true
@@ -1240,21 +1204,10 @@ pub fn (mut s Scanner) ident_string() string {
12401204
is_quote := q in [single_quote, double_quote]
12411205
is_raw := is_quote && s.pos > 0 && s.text[s.pos - 1] == `r` && !s.is_inside_string
12421206
is_cstr := is_quote && s.pos > 0 && s.text[s.pos - 1] == `c` && !s.is_inside_string
1243-
// don't interpret quote as "start of string" quote when a string interpolation has
1244-
// just ended on the previous character meaning it's not the start of a new string
1245-
if is_quote && !s.just_closed_inter {
1246-
if s.is_inside_string || s.is_enclosed_inter || s.is_inter_start {
1247-
s.inter_quote = q
1248-
} else {
1249-
s.quote = q
1250-
}
1251-
}
1252-
s.just_closed_inter = false
12531207
mut n_cr_chars := 0
12541208
mut start := s.pos
12551209
start_char := s.text[start]
1256-
if start_char == s.quote
1257-
|| (start_char == s.inter_quote && (s.is_inter_start || s.is_enclosed_inter)) {
1210+
if start_char == s.quote {
12581211
start++
12591212
} else if start_char == b_lf {
12601213
s.inc_line_number()
@@ -1281,14 +1234,6 @@ pub fn (mut s Scanner) ident_string() string {
12811234
// end of string
12821235
if c == s.quote && (is_raw || backslash_count & 1 == 0) {
12831236
// handle '123\\' backslash at the end
1284-
s.string_count--
1285-
break
1286-
}
1287-
if c == s.inter_quote && (s.is_inter_start || s.is_enclosed_inter) {
1288-
s.string_count--
1289-
break
1290-
}
1291-
if c == s.quote && s.string_count == 0 {
12921237
break
12931238
}
12941239
if c == b_cr {
@@ -1340,11 +1285,6 @@ pub fn (mut s Scanner) ident_string() string {
13401285
if prevc == `$` && c == `{` && !is_raw
13411286
&& s.count_symbol_before(s.pos - 2, backslash) & 1 == 0 {
13421287
s.is_inside_string = true
1343-
if s.is_enclosed_inter {
1344-
s.is_nested_enclosed_inter = true
1345-
} else {
1346-
s.is_enclosed_inter = true
1347-
}
13481288
// so that s.pos points to $ at the next step
13491289
s.pos -= 2
13501290
break
@@ -1416,6 +1356,9 @@ pub fn (mut s Scanner) ident_string() string {
14161356
lit = string_so_far
14171357
}
14181358
}
1359+
if s.text[end] == quote {
1360+
s.str_helper_tokens.delete_last()
1361+
}
14191362
return lit
14201363
}
14211364

@@ -1863,11 +1806,8 @@ pub fn (mut s Scanner) prepare_for_new_text(text string) {
18631806
s.is_nested_string = false
18641807
s.is_inter_start = false
18651808
s.is_inter_end = false
1866-
s.is_enclosed_inter = false
1867-
s.is_nested_enclosed_inter = false
18681809
s.last_lt = 0
18691810
s.quote = 0
1870-
s.inter_quote = 0
18711811
}
18721812

18731813
// new_silent_scanner returns a new scanner instance, setup to just set internal flags and append errors
@@ -1880,3 +1820,14 @@ pub fn new_silent_scanner() &Scanner {
18801820
pref: p
18811821
}
18821822
}
1823+
1824+
pub fn (s Scanner) str_quote() ?u8 {
1825+
if s.str_helper_tokens.len == 0 {
1826+
return none
1827+
}
1828+
c := s.str_helper_tokens.last()
1829+
if c in [`'`, `"`] {
1830+
return c
1831+
}
1832+
return none
1833+
}
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
import term
2+
3+
fn test_main() {
4+
println('\'hello\'${term.blue(", 'world'")} !')
5+
}

0 commit comments

Comments
 (0)