Skip to content

Commit ebde7bc

Browse files
authored
native: improve string support (#24600)
1 parent 399454f commit ebde7bc

File tree

10 files changed

+253
-23
lines changed

10 files changed

+253
-23
lines changed

vlib/v/gen/native/amd64.v

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -290,6 +290,9 @@ fn (mut c Amd64) cmp_zero(reg Register) {
290290
}
291291

292292
fn (mut c Amd64) cmp_var_reg(var Var, reg Register, config VarConfig) {
293+
if reg as Amd64Register != .rax {
294+
c.g.n_error('${@LOCATION} register ${reg} is not supported')
295+
}
293296
match var {
294297
ast.Ident {
295298
var_object := c.g.get_var_from_ident(var)
@@ -1668,6 +1671,12 @@ fn (mut c Amd64) sub_reg(a Amd64Register, b Amd64Register) {
16681671
c.g.println('sub ${a}, ${b}')
16691672
}
16701673

1674+
fn (mut c Amd64) add_reg2(a_reg Register, b_reg Register) {
1675+
a := a_reg as Amd64Register
1676+
b := b_reg as Amd64Register
1677+
c.add_reg(a, b)
1678+
}
1679+
16711680
fn (mut c Amd64) add_reg(a Amd64Register, b Amd64Register) {
16721681
if i32(a) <= i32(Amd64Register.r15) && i32(b) <= i32(Amd64Register.r15) {
16731682
c.g.write8(0x48 + if i32(a) >= i32(Amd64Register.r8) { i32(1) } else { i32(0) } +
@@ -1757,7 +1766,7 @@ fn (mut c Amd64) sar8(r Amd64Register, val u8) {
17571766
pub fn (mut c Amd64) call_fn(node ast.CallExpr) {
17581767
name := node.name
17591768
mut n := name
1760-
if !n.contains('.') && n !in c.g.fn_addr.keys() { // if the name is in keys, it is a function from builtin
1769+
if !n.contains('.') && n !in c.g.fn_names { // if the name is in keys, it is a function from builtin
17611770
n = 'main.${n}'
17621771
}
17631772
if node.is_method {
@@ -1847,7 +1856,8 @@ pub fn (mut c Amd64) call_fn(node ast.CallExpr) {
18471856
continue
18481857
}
18491858
c.g.expr(args[i].expr)
1850-
if c.g.table.sym(args[i].typ).kind == .struct && !args[i].typ.is_ptr() {
1859+
if (c.g.table.sym(args[i].typ).kind == .struct || args[i].typ.is_string())
1860+
&& !args[i].typ.is_ptr() {
18511861
match args_size[i] {
18521862
1...8 {
18531863
c.mov_deref(Amd64Register.rax, Amd64Register.rax, ast.i64_type_idx)
@@ -2193,6 +2203,14 @@ fn (mut c Amd64) mov_float_xmm0_var(reg Amd64Register, var_type ast.Type) {
21932203
}
21942204
}
21952205

2206+
fn (mut c Amd64) create_string_struct(typ ast.Type, name string, str string) {
2207+
dest := c.allocate_var(name, c.g.get_type_size(typ), i64(0))
2208+
c.learel(Amd64Register.rsi, c.g.allocate_string(str, 3, .rel32))
2209+
c.mov_reg_to_var(LocalVar{dest, ast.u64_type_idx, name}, Amd64Register.rsi)
2210+
offset := c.g.get_field_offset(typ, 'len')
2211+
c.mov_int_to_var(LocalVar{dest, ast.i32_type_idx, name}, i32(str.len), offset: offset)
2212+
}
2213+
21962214
fn (mut c Amd64) assign_ident_right_expr(node ast.AssignStmt, i i32, right ast.Expr, name string, ident ast.Ident) {
21972215
match right {
21982216
ast.IntegerLiteral {
@@ -2222,11 +2240,9 @@ fn (mut c Amd64) assign_ident_right_expr(node ast.AssignStmt, i i32, right ast.E
22222240
c.assign_float(node, i, right, ident)
22232241
}
22242242
ast.StringLiteral {
2225-
dest := c.allocate_var(name, 8, i64(0))
22262243
ie := right as ast.StringLiteral
22272244
str := c.g.eval_str_lit_escape_codes(ie)
2228-
c.learel(Amd64Register.rsi, c.g.allocate_string(str, 3, .rel32))
2229-
c.mov_reg_to_var(LocalVar{dest, ast.u64_type_idx, name}, Amd64Register.rsi)
2245+
c.create_string_struct(node.right_types[0], name, str)
22302246
}
22312247
ast.StructInit {
22322248
match node.op {
@@ -3394,7 +3410,7 @@ fn (mut c Amd64) fn_decl(node ast.FnDecl) {
33943410
}
33953411
}
33963412

3397-
params << node.params
3413+
params << node.params // also the var of the method
33983414

33993415
args_size := params.map(c.g.get_type_size(it.typ))
34003416
is_floats := params.map(it.typ.is_pure_float())

vlib/v/gen/native/arm64.v

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -531,6 +531,14 @@ pub fn (mut c Arm64) add(r Register, val i32) {
531531
panic('Arm64.add() not implemented')
532532
}
533533

534+
pub fn (mut c Arm64) add_reg2(r Register, r2 Register) {
535+
panic('Arm64.add_reg2() not implemented')
536+
}
537+
538+
fn (mut c Arm64) create_string_struct(typ ast.Type, name string, str string) {
539+
panic('Arm64.add_reg2() not implemented')
540+
}
541+
534542
fn (mut c Arm64) mov_deref(reg Register, regptr Register, typ ast.Type) {
535543
panic('Arm64.mov_deref() not implemented')
536544
}

vlib/v/gen/native/blacklist.v

Lines changed: 18 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -29,19 +29,24 @@ already compiling functions:
2929
// false: whitelist function
3030
// true: blacklist function
3131
const whitelist = {
32-
'main.main': false
33-
'exit': false
34-
'gc_is_enabled': false
35-
'int_max': false
36-
'int_min': false
37-
'u8.is_alnum': false
38-
'u8.is_bin_digit': false
39-
'u8.is_capital': false
40-
'u8.is_digit': false
41-
'u8.is_hex_digit': false
42-
'u8.is_letter': false
43-
'u8.is_oct_digit': false
44-
'u8.is_space': false
32+
'main.main': false
33+
'c_error_number_str': false
34+
'exit': false
35+
'gc_is_enabled': false
36+
'int_max': false
37+
'int_min': false
38+
'u8.is_alnum': false
39+
'u8.is_bin_digit': false
40+
'u8.is_capital': false
41+
'u8.is_digit': false
42+
'u8.is_hex_digit': false
43+
'u8.is_letter': false
44+
'u8.is_oct_digit': false
45+
'u8.is_space': false
46+
'string.is_capital': false
47+
'string.is_ascii': false
48+
'string.is_identifier': false
49+
// 'string.is_blank': false need for in
4550
}
4651

4752
fn (g &Gen) is_blacklisted(name string, is_builtin bool) bool {

vlib/v/gen/native/elf.v

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -615,6 +615,7 @@ fn (mut g Gen) gen_section_data(sections []Section) {
615615
for rela in data {
616616
g.write64(rela.offset)
617617
g.fn_addr[rela.name] = rela.offset
618+
g.fn_names << rela.name
618619
g.write64(rela.info)
619620
g.write64(rela.addend)
620621
g.println('; SHT_RELA `${rela.name}` (${rela.offset}, ${rela.info}, ${rela.addend})')

vlib/v/gen/native/expr.v

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ fn (mut g Gen) expr(node ast.Expr) {
7878
}
7979
ast.StringLiteral {
8080
str := g.eval_str_lit_escape_codes(node)
81-
g.allocate_string(str, 3, .rel32)
81+
g.code_gen.create_string_struct(ast.string_type_idx, 'string_lit', str)
8282
}
8383
ast.CharLiteral {
8484
bytes := g.eval_escape_codes(node.val)
@@ -128,6 +128,19 @@ fn (mut g Gen) expr(node ast.Expr) {
128128
ast.SizeOf {
129129
g.gen_sizeof_expr(node)
130130
}
131+
ast.IndexExpr {
132+
if node.left_type.is_string() {
133+
g.expr(node.index)
134+
g.code_gen.mov_var_to_reg(Amd64Register.rdx, node.left as ast.Ident) // load address of string
135+
g.code_gen.add_reg2(Amd64Register.rdx, Amd64Register.rax) // add the offset to the address
136+
g.code_gen.mov_deref(Amd64Register.rax, Amd64Register.rdx, ast.u8_type_idx)
137+
} else if node.left_type.is_pointer() {
138+
dump(node)
139+
g.n_error('${@LOCATION} expr: unhandled node type: Index expr is not applied on string')
140+
} else {
141+
g.n_error('${@LOCATION} expr: unhandled node type: Index expr is not applied on string')
142+
}
143+
}
131144
else {
132145
g.n_error('${@LOCATION} expr: unhandled node type: ${node.type_name()}')
133146
}

vlib/v/gen/native/gen.v

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ mut:
3737
linker_libs []string
3838
extern_fn_calls map[i64]string
3939
fn_addr map[string]i64
40+
fn_names []string
4041
var_offset map[string]i32 // local var stack offset
4142
var_alloc_size map[string]i32 // local var allocation size
4243
stack_var_pos i32
@@ -80,6 +81,7 @@ interface CodeGen {
8081
mut:
8182
g &Gen
8283
add(r Register, val i32)
84+
add_reg2(r Register, r2 Register)
8385
address_size() i32
8486
adr(r Arm64Register, delta i32) // Note: Temporary!
8587
allocate_var(name string, size i32, initial_val Number) i32
@@ -97,6 +99,7 @@ mut:
9799
convert_bool_to_string(r Register)
98100
convert_int_to_string(a Register, b Register)
99101
convert_rune_to_string(r Register, buffer i32, var Var, config VarConfig)
102+
create_string_struct(typ ast.Type, name string, str string)
100103
dec_var(var Var, config VarConfig)
101104
fn_decl(node ast.FnDecl)
102105
gen_asm_stmt(asm_node ast.AsmStmt)
@@ -1126,6 +1129,7 @@ fn (mut g Gen) fn_decl(node ast.FnDecl) {
11261129
g.stack_var_pos = 0
11271130
g.stack_depth = 0
11281131
g.register_function_address(name)
1132+
g.fn_names << name
11291133
g.labels = &LabelTable{}
11301134
g.defer_stmts.clear()
11311135
g.return_type = node.return_type

vlib/v/gen/native/stmt.c.v

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,16 @@ fn (mut g Gen) gen_forc_stmt(node ast.ForCStmt) {
133133
ast.InfixExpr {
134134
match cond.left {
135135
ast.Ident {
136-
lit := cond.right as ast.IntegerLiteral
137-
g.code_gen.cmp_var(cond.left as ast.Ident, i32(lit.val.int()))
136+
match cond.right {
137+
ast.IntegerLiteral {
138+
lit := cond.right as ast.IntegerLiteral
139+
g.code_gen.cmp_var(cond.left as ast.Ident, i32(lit.val.int()))
140+
}
141+
else {
142+
g.expr(cond.right)
143+
g.code_gen.cmp_var_reg(cond.left as ast.Ident, Amd64Register.rax)
144+
}
145+
}
138146
match cond.op {
139147
.gt {
140148
jump_addr = g.code_gen.cjmp(.jle)

vlib/v/gen/native/tests/builtin.vv

Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,118 @@
1+
mut a := ''
2+
assert a.is_ascii()
3+
a = ' Aö~'
4+
assert !a.is_ascii()
5+
6+
7+
assert ''.is_ascii() == true
8+
assert ' '.is_ascii() == true
9+
assert '~~'.is_ascii() == true
10+
assert ' Az~'.is_ascii() == true
11+
assert ' Aö~'.is_ascii() == false
12+
assert '👋'.is_ascii() == false
13+
assert 'a👋bc'.is_ascii() == false
14+
assert ''.is_identifier() == false
15+
assert ' '.is_identifier() == false
16+
assert '~~'.is_identifier() == false
17+
assert '_Az~'.is_identifier() == false
18+
assert '_Aö~'.is_identifier() == false
19+
assert '👋'.is_identifier() == false
20+
assert 'a👋bc'.is_identifier() == false
21+
assert '9'.is_identifier() == false
22+
assert '_9'.is_identifier() == true
23+
assert 'a 9'.is_identifier() == false
24+
assert 't'.is_identifier() == true
25+
/*
26+
assert ''.is_blank()
27+
assert ' '.is_blank()
28+
assert ' \t'.is_blank()
29+
assert ' \t
30+
31+
'.is_blank()
32+
assert ' \t\r'.is_blank()
33+
assert ' \t\r
34+
35+
'.is_blank()
36+
assert 'abc'.indent_width() == 0
37+
assert ' abc'.indent_width() == 1
38+
assert ' abc'.indent_width() == 2
39+
assert '\tabc'.indent_width() == 1
40+
assert '\t abc'.indent_width() == 2
41+
assert '\t\tabc'.indent_width() == 2
42+
assert '\t\t abc'.indent_width() == 3
43+
assert 'abcabca'.index_u8(`a`) == 0
44+
assert 'abcabca'.index_u8(`b`) == 1
45+
assert 'abcabca'.index_u8(`c`) == 2
46+
47+
assert 'abc'.index_u8(`d`) == -1
48+
assert 'abc'.index_u8(`A`) == -1
49+
assert 'abc'.index_u8(`B`) == -1
50+
assert 'abc'.index_u8(`C`) == -1
51+
assert 'abcabca'.last_index('ca')? == 5
52+
assert 'abcabca'.last_index('ab')? == 3
53+
assert 'abcabca'.last_index('b')? == 4
54+
assert 'Zabcabca'.last_index('Z')? == 0
55+
x := 'Zabcabca'.last_index('Y')
56+
assert x == none
57+
// TODO: `assert 'Zabcabca'.index_last('Y') == none` is a cgen error, 2023/12/04
58+
assert 'abcabca'.last_index_u8(`a`) == 6
59+
assert 'abcabca'.last_index_u8(`c`) == 5
60+
assert 'abcabca'.last_index_u8(`b`) == 4
61+
assert 'Zabcabca'.last_index_u8(`Z`) == 0
62+
//
63+
assert 'abc'.last_index_u8(`d`) == -1
64+
assert 'abc'.last_index_u8(`A`) == -1
65+
assert 'abc'.last_index_u8(`B`) == -1
66+
assert 'abc'.last_index_u8(`C`) == -1
67+
assert 'abc abca'.contains_u8(`a`)
68+
assert 'abc abca'.contains_u8(`b`)
69+
assert 'abc abca'.contains_u8(`c`)
70+
assert 'abc abca'.contains_u8(` `)
71+
assert !'abc abca'.contains_u8(`A`)
72+
assert 'Abcd'.camel_to_snake() == 'abcd'
73+
assert 'aBcd'.camel_to_snake() == 'a_bcd'
74+
assert 'AAbb'.camel_to_snake() == 'aa_bb'
75+
assert 'aaBB'.camel_to_snake() == 'aa_bb'
76+
assert 'aaBbCcDD'.camel_to_snake() == 'aa_bb_cc_dd'
77+
assert 'AAbbCC'.camel_to_snake() == 'aa_bb_cc'
78+
assert 'aaBBcc'.camel_to_snake() == 'aa_bb_cc'
79+
assert 'aa_BB'.camel_to_snake() == 'aa_bb'
80+
assert 'aa__BB'.camel_to_snake() == 'aa__bb'
81+
assert 'JVM_PUBLIC_ACC'.camel_to_snake() == 'jvm_public_acc'
82+
assert '_ISspace'.camel_to_snake() == '_is_space'
83+
assert '_aBcd'.camel_to_snake() == '_a_bcd'
84+
assert '_a_Bcd'.camel_to_snake() == '_a_bcd'
85+
assert '_AbCDe_'.camel_to_snake() == '_ab_cd_e_'
86+
assert 'abcd'.snake_to_camel() == 'Abcd'
87+
assert 'ab_cd'.snake_to_camel() == 'AbCd'
88+
assert 'ab_cd_efg'.snake_to_camel() == 'AbCdEfg'
89+
assert '_abcd'.snake_to_camel() == 'Abcd'
90+
assert '_abcd_'.snake_to_camel() == 'Abcd'
91+
assert 'Hello World'.wrap(width: 10) == 'Hello\nWorld'
92+
assert 'Hello World'.wrap(width: 10, end: '<linea-break>') == 'Hello<linea-break>World'
93+
assert 'The V programming language'.wrap(width: 20, end: '|') == 'The V programming|language'
94+
assert 'Hello, my name is Carl and I am a delivery'.wrap(width: 20) == 'Hello, my name is\nCarl and I am a\ndelivery'
95+
96+
assert 'Hello World!'.hex() == '48656c6c6f20576f726c6421'
97+
assert 'VLANG'.hex() == '564c414e47'
98+
assert 'VLANG'.hex() == 'VLANG'.bytes().hex()
99+
for c in u8(0) .. 255 {
100+
assert c.ascii_str().hex() == [c].hex()
101+
}
102+
103+
*/
104+
105+
106+
107+
108+
/// ABOVE ARE TESTS FROM VLIB
109+
110+
111+
112+
// TODO: not working
113+
// println(c_error_number_str(0))
114+
// println(c_error_number_str(1))
115+
1116
/* uncomment when floats are supported
2117
println(f32_abs(32.32))
3118
println(f32_abs(-32.32))
@@ -26,6 +141,10 @@ println(int_min(-32, 32))
26141

27142
// print_character(`a`) enable when C vars will work
28143

144+
// assert u8(`a`).ascii_str() == 'a' when ptr index will work
145+
// assert u8(`b`).ascii_str() != 'a'
146+
// assert u8(0x4F).hex() == '4F' when fixed array index will work
147+
// assert u8(0x42).hex() != '4F'
29148
assert u8(`a`).is_alnum()
30149
assert !u8(`_`).is_alnum()
31150
assert u8(`0`).is_bin_digit()
@@ -43,4 +162,7 @@ assert !u8(`8`).is_oct_digit()
43162
assert u8(` `).is_space()
44163
assert !u8(`_`).is_space()
45164

165+
assert 'Hello'.is_capital() == true
166+
assert 'HelloWorld'.is_capital() == false
167+
46168
exit(0)

0 commit comments

Comments
 (0)