Skip to content

Commit ab642ca

Browse files
authored
scanner, cgen: improve support for escape codes in backticks/runes (#13127)
1 parent ea66031 commit ab642ca

12 files changed

+249
-48
lines changed

vlib/v/checker/check_types.v

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -617,8 +617,10 @@ pub fn (mut c Checker) string_inter_lit(mut node ast.StringInterLiteral) ast.Typ
617617
return ast.string_type
618618
}
619619

620-
const hex_lit_overflow_message = 'hex character literal overflows string'
620+
const unicode_lit_overflow_message = 'unicode character exceeds max allowed value of 0x10ffff, consider using a unicode literal (\\u####)'
621621

622+
// unicode character literals are limited to a maximum value of 0x10ffff
623+
// https://stackoverflow.com/questions/52203351/why-unicode-is-restricted-to-0x10ffff
622624
pub fn (mut c Checker) string_lit(mut node ast.StringLiteral) ast.Type {
623625
mut idx := 0
624626
for idx < node.val.len {
@@ -631,7 +633,7 @@ pub fn (mut c Checker) string_lit(mut node ast.StringLiteral) ast.Type {
631633
start_idx := idx
632634
idx++
633635
next_ch := node.val[idx] or { return ast.string_type }
634-
if next_ch == `x` {
636+
if next_ch == `u` {
635637
idx++
636638
mut ch := node.val[idx] or { return ast.string_type }
637639
mut hex_char_count := 0
@@ -647,13 +649,13 @@ pub fn (mut c Checker) string_lit(mut node ast.StringLiteral) ast.Type {
647649
first_digit := node.val[idx - 5] - 48
648650
second_digit := node.val[idx - 4] - 48
649651
if first_digit > 1 {
650-
c.error(checker.hex_lit_overflow_message, end_pos)
652+
c.error(checker.unicode_lit_overflow_message, end_pos)
651653
} else if first_digit == 1 && second_digit > 0 {
652-
c.error(checker.hex_lit_overflow_message, end_pos)
654+
c.error(checker.unicode_lit_overflow_message, end_pos)
653655
}
654656
}
655657
else {
656-
c.error(checker.hex_lit_overflow_message, end_pos)
658+
c.error(checker.unicode_lit_overflow_message, end_pos)
657659
}
658660
}
659661
idx++
Lines changed: 0 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +0,0 @@
1-
vlib/v/checker/tests/hex_literal_overflow.vv:1:7: error: hex character literal overflows string
2-
1 | a := '\x11ffff'
3-
| ~~~~~~~~
4-
2 | b := '\x20ffff'
5-
3 | c := '\x10fffff'
6-
vlib/v/checker/tests/hex_literal_overflow.vv:2:7: error: hex character literal overflows string
7-
1 | a := '\x11ffff'
8-
2 | b := '\x20ffff'
9-
| ~~~~~~~~
10-
3 | c := '\x10fffff'
11-
4 | println(a)
12-
vlib/v/checker/tests/hex_literal_overflow.vv:3:7: error: hex character literal overflows string
13-
1 | a := '\x11ffff'
14-
2 | b := '\x20ffff'
15-
3 | c := '\x10fffff'
16-
| ~~~~~~~~~
17-
4 | println(a)
18-
5 | println(b)
Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
vlib/v/checker/tests/import_mod_sub_as_sub_err.vv:1:25: error: import alias `encoding.utf8 as utf8` is redundant
2-
1 | import encoding.utf8 as utf8
3-
| ~~~~
4-
2 |
5-
3 | fn main() {
1+
vlib/v/checker/tests/import_mod_sub_as_sub_err.vv:1:25: error: import alias `encoding.utf8 as utf8` is redundant
2+
1 | import encoding.utf8 as utf8
3+
| ~~~~
4+
2 |
5+
3 | fn main() {

vlib/v/checker/tests/minus_op_wrong_type_err.out

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
vlib/v/checker/tests/minus_op_wrong_type_err.vv:10:10: error: mismatched types `Aaa` and `int literal`
2-
8 |
2+
8 |
33
9 | fn main() {
44
10 | println(Aaa{} - 10)
55
| ~~~~~~~~~~
Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
1-
vlib/v/checker/tests/mut_array_get_element_address_err.vv:3:20: error: cannot take the address of mutable array elements outside unsafe blocks
2-
1 | fn main() {
3-
2 | mut arr_int := [int(23), 45, 7, 8]
4-
3 | ele := &arr_int[1]
5-
| ~~~
6-
4 | println(ele)
7-
5 | }
1+
vlib/v/checker/tests/mut_array_get_element_address_err.vv:3:20: error: cannot take the address of mutable array elements outside unsafe blocks
2+
1 | fn main() {
3+
2 | mut arr_int := [int(23), 45, 7, 8]
4+
3 | ele := &arr_int[1]
5+
| ~~~
6+
4 | println(ele)
7+
5 | }

vlib/v/checker/tests/string_escape_x_err_a.out

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
vlib/v/checker/tests/string_escape_x_err_a.vv:2:15: error: `\x` used with no following hex digits
1+
vlib/v/checker/tests/string_escape_x_err_a.vv:2:15: error: `\x` used without two following hex digits
22
1 | fn main() {
33
2 | println('\x')
44
| ^

vlib/v/checker/tests/string_escape_x_err_b.out

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
vlib/v/checker/tests/string_escape_x_err_b.vv:2:15: error: `\x` used with no following hex digits
1+
vlib/v/checker/tests/string_escape_x_err_b.vv:2:15: error: `\x` used without two following hex digits
22
1 | fn main() {
33
2 | println('\xhh')
44
| ^

vlib/v/checker/tests/unknown_function.out

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
vlib/v/checker/tests/unknown_function.vv:4:15: error: unknown function: math.max_i64
2-
2 |
2+
2 |
33
3 | fn main() {
44
4 | println(math.max_i64())
55
| ~~~~~~~~~

vlib/v/gen/c/cgen.v

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2510,14 +2510,27 @@ fn (mut g Gen) expr_with_cast(expr ast.Expr, got_type_raw ast.Type, expected_typ
25102510
g.expr(expr)
25112511
}
25122512

2513+
fn cescape_nonascii(original string) string {
2514+
mut b := strings.new_builder(original.len)
2515+
for c in original {
2516+
if c < 32 || c > 126 {
2517+
b.write_string('\\${c:03o}')
2518+
continue
2519+
}
2520+
b.write_b(c)
2521+
}
2522+
res := b.str()
2523+
return res
2524+
}
2525+
25132526
// cestring returns a V string, properly escaped for embeddeding in a C string literal.
25142527
fn cestring(s string) string {
25152528
return s.replace('\\', '\\\\').replace('"', "'")
25162529
}
25172530

25182531
// ctoslit returns a '_SLIT("$s")' call, where s is properly escaped.
25192532
fn ctoslit(s string) string {
2520-
return '_SLIT("' + cestring(s) + '")'
2533+
return '_SLIT("' + cescape_nonascii(cestring(s)) + '")'
25212534
}
25222535

25232536
fn (mut g Gen) gen_attrs(attrs []ast.Attr) {

vlib/v/gen/c/str.v

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ import v.ast
66
import v.util
77

88
fn (mut g Gen) string_literal(node ast.StringLiteral) {
9-
escaped_val := util.smart_quote(node.val, node.is_raw)
9+
escaped_val := cescape_nonascii(util.smart_quote(node.val, node.is_raw))
1010
if node.language == .c {
1111
g.write('"$escaped_val"')
1212
} else {
@@ -25,7 +25,7 @@ fn (mut g Gen) string_inter_literal_sb_optimized(call_expr ast.CallExpr) {
2525
is_nl := call_expr.name == 'writeln'
2626
// println('optimize sb $call_expr.name')
2727
for i, val in node.vals {
28-
escaped_val := util.smart_quote(val, false)
28+
escaped_val := cescape_nonascii(util.smart_quote(val, false))
2929
// if val == '' {
3030
// break
3131
// continue

0 commit comments

Comments
 (0)