Skip to content

Commit ad2a629

Browse files
committed
v2: fix arm64 self-host, improve transformer type handling, and harden cleanc backend
ARM64 backend: - Fix null pointer dereference when returning none (zero-fill return area) - Add branch trampoline for CBNZ/CBZ 19-bit range overflow - Fix large struct truthiness check in conditional branches - Fix logical NOT to use (x == 0) instead of (x XOR 1) SSA builder: - Register sum types as {_tag, _data} struct pairs - Resolve Option/Result types to their base type instead of i64 fallback - Handle pointer type names (Type*), address-of non-addressable values - Fix array struct field indices for offset field (data=0, offset=1, len=2) - Prefer SSA struct field type over type environment for selectors Transformer: - Add post-transform type propagation pass (new type_propagation.v) - Fix sumtype option returns to check _data field instead of raw truthiness - Execute or-block side effects in else path, not unconditionally - Skip uninstantiated generic functions (unchecked bodies) - Fix smartcast: check variant has method before applying, disable when passing sumtype args to sumtype params - Use _data directly for native backend smartcasts (no union variants) - Fix map index assignment scope-escape with prefix temp variables - Lower `return none` to `return 0` for native backends - Resolve enum shorthands in non-sumtype match expressions Parser: - Add in_top_level flag to prevent parsing declarations inside fn bodies Cleanc: - Fix &T(x) pointer type in assignments - Use shallow clone (depth 0) to avoid type misidentification by size - Implement BlockStmt recursion instead of TODO placeholder Type checker: - Check struct field defaults and enum values after all declarations - Add comptime flags: little_endian, big_endian, debug, native, user-defined - Check assert extras, comptime stmts, label stmts Misc: - Flush libc stdio before write() syscall to prevent output reordering - Fix tcc fallback to check compiler binary, not full command string
1 parent 8b4ff32 commit ad2a629

18 files changed

Lines changed: 1488 additions & 167 deletions

File tree

vlib/builtin/printing.c.v

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,9 @@ fn _write_buf_to_fd(fd int, buf &u8, buf_len int) {
179179
mut remaining_bytes := isize(buf_len)
180180
mut x := isize(0)
181181
$if freestanding || vinix || builtin_write_buf_to_fd_should_use_c_write ? {
182+
// Flush any pending libc stdio output (from C.puts, C.putchar, etc.)
183+
// before writing directly via write() syscall to prevent output reordering.
184+
C.fflush(unsafe { nil })
182185
unsafe {
183186
for remaining_bytes > 0 {
184187
x = C.write(fd, ptr, remaining_bytes)

vlib/v2/builder/builder.v

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -547,11 +547,14 @@ fn run_cc_cmd_or_exit(cmd string, stage string, show_cc bool) {
547547
result := os.execute(cmd)
548548
if result.exit_code != 0 {
549549
// If tcc failed, fall back to cc.
550-
if cmd.contains('tcc') {
550+
// Check only the compiler binary (before the first space), not the full
551+
// command string which contains tcc in include/library flag paths.
552+
cc_binary := cmd.all_before(' ')
553+
if cc_binary.contains('tcc') {
551554
eprintln('Failed to compile with tcc, falling back to cc')
552555
eprintln('tcc cmd: ${cmd}')
553556
eprintln(result.output)
554-
fallback_cmd := cmd.replace_once(cmd.all_before(' '), 'cc')
557+
fallback_cmd := cmd.replace_once(cc_binary, 'cc')
555558
run_cc_cmd_or_exit(fallback_cmd, stage, show_cc)
556559
return
557560
}

vlib/v2/gen/arm64/arm64.v

Lines changed: 84 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1401,35 +1401,47 @@ fn (mut g Gen) gen_instr(val_id int) {
14011401
g.emit_ldr_reg_offset(8, 29, g.x8_save_offset)
14021402
}
14031403

1404-
// string_literal values need to be materialized on the stack
1405-
// before we can copy them to the return pointer.
1406-
if ret_val.kind == .string_literal {
1407-
g.load_val_to_reg(9, ret_val_id)
1408-
}
1404+
// Check if returning a zero/none value (e.g., `return 0` from `return none`).
1405+
// In this case, zero-fill the return area instead of trying to copy
1406+
// from address 0 (which would be a null pointer dereference).
1407+
is_zero_const := ret_val.kind == .constant && ret_val.name == '0'
1408+
if is_zero_const {
1409+
num_fields := (fn_ret_size + 7) / 8
1410+
for i in 0 .. num_fields {
1411+
// STR xzr, [x8, #i*8]
1412+
g.emit(asm_str_imm(Reg(31), Reg(8), u32(i)))
1413+
}
1414+
} else {
1415+
// string_literal values need to be materialized on the stack
1416+
// before we can copy them to the return pointer.
1417+
if ret_val.kind == .string_literal {
1418+
g.load_val_to_reg(9, ret_val_id)
1419+
}
14091420

1410-
// Get the source address of the struct
1411-
if is_indirect_struct_return {
1412-
// Return value is a pointer to struct - use it as source
1413-
g.load_val_to_reg(9, ret_val_id)
1414-
} else if ret_offset := g.stack_map[ret_val_id] {
1415-
if g.large_struct_stack_value_is_pointer(ret_val_id) {
1416-
// Some large-struct temporaries are represented as pointers in stack slots.
1417-
g.emit_ldr_reg_offset(9, 29, ret_offset)
1421+
// Get the source address of the struct
1422+
if is_indirect_struct_return {
1423+
// Return value is a pointer to struct - use it as source
1424+
g.load_val_to_reg(9, ret_val_id)
1425+
} else if ret_offset := g.stack_map[ret_val_id] {
1426+
if g.large_struct_stack_value_is_pointer(ret_val_id) {
1427+
// Some large-struct temporaries are represented as pointers in stack slots.
1428+
g.emit_ldr_reg_offset(9, 29, ret_offset)
1429+
} else {
1430+
// Struct is materialized by value on stack.
1431+
g.emit_add_fp_imm(9, ret_offset)
1432+
}
14181433
} else {
1419-
// Struct is materialized by value on stack.
1420-
g.emit_add_fp_imm(9, ret_offset)
1434+
// Fallback
1435+
g.load_val_to_reg(9, ret_val_id)
1436+
}
1437+
// Copy struct from [x9] to [x8] (x8 was restored from saved location)
1438+
num_fields := (fn_ret_size + 7) / 8
1439+
for i in 0 .. num_fields {
1440+
// LDR x10, [x9, #i*8]
1441+
g.emit(asm_ldr_imm(Reg(10), Reg(9), u32(i)))
1442+
// STR x10, [x8, #i*8]
1443+
g.emit(asm_str_imm(Reg(10), Reg(8), u32(i)))
14211444
}
1422-
} else {
1423-
// Fallback
1424-
g.load_val_to_reg(9, ret_val_id)
1425-
}
1426-
// Copy struct from [x9] to [x8] (x8 was restored from saved location)
1427-
num_fields := (fn_ret_size + 7) / 8
1428-
for i in 0 .. num_fields {
1429-
// LDR x10, [x9, #i*8]
1430-
g.emit(asm_ldr_imm(Reg(10), Reg(9), u32(i)))
1431-
// STR x10, [x8, #i*8]
1432-
g.emit(asm_str_imm(Reg(10), Reg(8), u32(i)))
14331445
}
14341446
} else if (ret_typ.kind == .struct_t && ret_typ.fields.len > 1)
14351447
|| is_indirect_struct_return {
@@ -1454,6 +1466,15 @@ fn (mut g Gen) gen_instr(val_id int) {
14541466
} else {
14551467
g.load_val_to_reg(0, ret_val_id)
14561468
}
1469+
} else if fn_ret_typ.kind == .struct_t && ret_val.kind == .constant
1470+
&& ret_val.name == '0' {
1471+
// Returning zero/none from a function that returns a small struct.
1472+
// Zero all return registers for the struct to avoid garbage in x1+.
1473+
for i in 0 .. fn_ret_typ.fields.len {
1474+
if i < 8 {
1475+
g.emit_mov_reg(i, 31) // xN = xzr (zero)
1476+
}
1477+
}
14571478
} else {
14581479
g.load_val_to_reg(0, ret_val_id)
14591480
}
@@ -1502,7 +1523,21 @@ fn (mut g Gen) gen_instr(val_id int) {
15021523
}
15031524
}
15041525
.br {
1505-
g.load_val_to_reg(8, instr.operands[0])
1526+
// Load condition value into x8 for branch.
1527+
// For large structs (> 16 bytes), load_val_to_reg returns the *address*
1528+
// (which is always non-zero). For truthiness checks on option struct returns,
1529+
// we need to load the first word of the struct instead.
1530+
cond_val := g.mod.values[instr.operands[0]]
1531+
cond_is_large_struct := cond_val.typ > 0 && cond_val.typ < g.mod.type_store.types.len
1532+
&& g.mod.type_store.types[cond_val.typ].kind == .struct_t
1533+
&& g.type_size(cond_val.typ) > 16
1534+
if cond_is_large_struct {
1535+
// Large struct: load the address, then dereference first word
1536+
g.load_val_to_reg(8, instr.operands[0])
1537+
g.emit(asm_ldr_imm(Reg(8), Reg(8), 0)) // x8 = [x8] (first word)
1538+
} else {
1539+
g.load_val_to_reg(8, instr.operands[0])
1540+
}
15061541

15071542
true_blk := g.mod.values[instr.operands[1]].index
15081543
false_blk := g.mod.values[instr.operands[2]].index
@@ -1512,10 +1547,20 @@ fn (mut g Gen) gen_instr(val_id int) {
15121547

15131548
if off := g.block_offsets[true_blk] {
15141549
rel := (off - (g.macho.text_data.len - g.curr_offset)) / 4
1515-
g.emit(asm_cbnz(Reg(8), rel))
1550+
if rel >= -262144 && rel < 262144 {
1551+
g.emit(asm_cbnz(Reg(8), rel))
1552+
} else {
1553+
// Branch target too far for CBNZ (19-bit range).
1554+
// Use trampoline: CBZ skip; B target; skip:
1555+
g.emit(asm_cbz(Reg(8), 2)) // skip over next B instruction
1556+
g.emit(asm_b(rel - 1)) // adjust for the extra CBZ instruction
1557+
}
15161558
} else {
1559+
// Forward reference: use trampoline pattern to avoid 19-bit overflow.
1560+
// CBZ x8, skip; B target; skip:
1561+
g.emit(asm_cbz(Reg(8), 2)) // skip over next B instruction
15171562
g.record_pending_label(true_blk)
1518-
g.emit(asm_cbnz(Reg(8), 0))
1563+
g.emit(asm_b(0))
15191564
}
15201565

15211566
if false_blk == g.next_blk {
@@ -1545,10 +1590,18 @@ fn (mut g Gen) gen_instr(val_id int) {
15451590

15461591
if off := g.block_offsets[target_blk_idx] {
15471592
rel := (off - (g.macho.text_data.len - g.curr_offset)) / 4
1548-
g.emit(asm_b_cond(cond_eq, rel))
1593+
if rel >= -262144 && rel < 262144 {
1594+
g.emit(asm_b_cond(cond_eq, rel))
1595+
} else {
1596+
// Trampoline: b.ne skip; B target; skip:
1597+
g.emit(asm_b_cond(cond_ne, 2)) // skip over next B
1598+
g.emit(asm_b(rel - 1))
1599+
}
15491600
} else {
1601+
// Forward reference: use trampoline for safety
1602+
g.emit(asm_b_cond(cond_ne, 2)) // skip over next B
15501603
g.record_pending_label(target_blk_idx)
1551-
g.emit(asm_b_cond(cond_eq, 0))
1604+
g.emit(asm_b(0))
15521605
}
15531606
}
15541607

vlib/v2/gen/cleanc/assign.v

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -221,6 +221,14 @@ fn (mut g Gen) gen_assign_stmt(node ast.AssignStmt) {
221221
}
222222
}
223223
mut typ := g.get_expr_type(rhs)
224+
// Fix: &T(x) pattern - the checker may assign only the inner type T instead of T*.
225+
// Derive the pointer type directly from the expression structure.
226+
if rhs is ast.PrefixExpr && rhs.op == .amp && rhs.expr is ast.CastExpr {
227+
target_type := g.expr_type_to_c(rhs.expr.typ)
228+
if target_type != '' {
229+
typ = target_type + '*'
230+
}
231+
}
224232
mut elem_type_from_array := false
225233
if rhs is ast.CallExpr {
226234
if rhs.lhs is ast.Ident

vlib/v2/gen/cleanc/fn.v

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1412,11 +1412,13 @@ fn (mut g Gen) call_expr(lhs ast.Expr, args []ast.Expr) {
14121412
g.sb.write_string(', 3)')
14131413
return
14141414
}
1415-
// array__clone → array__clone_to_depth with automatic depth for deep clone
1415+
// array__clone → array__clone_to_depth with depth 0 (shallow memcpy clone).
1416+
// Depth > 0 uses element_size heuristics that misidentify non-string types
1417+
// of the same size (e.g. tagged unions like ast.Expr are 16 bytes == sizeof(string)).
14161418
if name == 'array__clone' && call_args.len == 1 {
14171419
g.sb.write_string('array__clone_to_depth(')
14181420
g.gen_call_arg(name, 0, call_args[0])
1419-
g.sb.write_string(', 3)')
1421+
g.sb.write_string(', 0)')
14201422
return
14211423
}
14221424
// array__insert with array arg → array__insert_many

vlib/v2/gen/cleanc/stmt.v

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -301,8 +301,9 @@ fn (mut g Gen) gen_stmt(node ast.Stmt) {
301301
panic('bug in v2 compiler: ComptimeStmt should have been handled in v2.transformer')
302302
}
303303
ast.BlockStmt {
304-
g.write_indent()
305-
g.sb.writeln('/* [TODO] BlockStmt */')
304+
for bs in node.stmts {
305+
g.gen_stmt(bs)
306+
}
306307
}
307308
ast.LabelStmt {
308309
g.write_indent()

vlib/v2/parser/parser.v

Lines changed: 57 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,9 @@ mut:
1717
file &token.File = &token.File{}
1818
scanner &scanner.Scanner
1919
// track state
20-
exp_lcbr bool // expecting `{` parsing `x` in `for|if|match x {` etc
21-
exp_pt bool // expecting (p)ossible (t)ype from `p.expr()`
20+
exp_lcbr bool // expecting `{` parsing `x` in `for|if|match x {` etc
21+
exp_pt bool // expecting (p)ossible (t)ype from `p.expr()`
22+
in_top_level bool // inside top-level context (file scope / top-level comptime block)
2223
// token info : start
2324
line int
2425
lit string
@@ -120,13 +121,15 @@ pub fn (mut p Parser) parse_file(filename string, mut file_set token.FileSet) as
120121
imports << import_stmt
121122
top_stmts << import_stmt
122123
}
124+
p.in_top_level = true
123125
for p.tok != .eof {
124126
top_stmt := p.top_stmt()
125127
// if top_stmt is ast.Decl {
126128
// decls << top_stmt
127129
// }
128130
top_stmts << top_stmt
129131
}
132+
p.in_top_level = false
130133
if p.pref.verbose {
131134
parse_time := sw.elapsed()
132135
println('scan & parse ${filename} (${p.file.line_count()} LOC): ${parse_time.milliseconds()}ms (${parse_time.microseconds()}µs)')
@@ -209,54 +212,59 @@ fn (mut p Parser) top_stmt() ast.Stmt {
209212

210213
fn (mut p Parser) stmt() ast.Stmt {
211214
// p.log('STMT: ${p.tok} - ${p.file.name}:${p.line}')
215+
// Top-level declarations that can appear inside comptime $if blocks at
216+
// file scope. Only parsed when in_top_level is set (file-level context),
217+
// not inside regular function bodies where they would be invalid.
218+
if p.in_top_level {
219+
match p.tok {
220+
.key_const {
221+
return p.const_decl(false)
222+
}
223+
.key_enum {
224+
return p.enum_decl(false, [])
225+
}
226+
.key_fn {
227+
// `fn name(...)` or `fn C.name(...)` is a declaration;
228+
// `fn (recv Type) method(...)` is a method declaration.
229+
next := p.peek()
230+
if next == .name || next == .lpar {
231+
return p.fn_decl(false, [])
232+
}
233+
}
234+
.key_global {
235+
return p.global_decl([])
236+
}
237+
.key_interface {
238+
return p.interface_decl(false, [])
239+
}
240+
.key_pub {
241+
p.next()
242+
match p.tok {
243+
.key_const { return p.const_decl(true) }
244+
.key_enum { return p.enum_decl(true, []) }
245+
.key_fn { return p.fn_decl(true, []) }
246+
.key_interface { return p.interface_decl(true, []) }
247+
.key_struct, .key_union { return p.struct_decl(true, []) }
248+
.key_type { return p.type_decl(true) }
249+
else { p.error('not implemented: pub ${p.tok}') }
250+
}
251+
}
252+
.key_struct, .key_union {
253+
return p.struct_decl(false, [])
254+
}
255+
.key_type {
256+
return p.type_decl(false)
257+
}
258+
else {}
259+
}
260+
}
212261
match p.tok {
213262
.dollar {
214263
return p.comptime_stmt()
215264
}
216265
.hash {
217266
return p.directive()
218267
}
219-
// Top-level declarations that can appear inside comptime $if blocks.
220-
.key_const {
221-
return p.const_decl(false)
222-
}
223-
.key_enum {
224-
return p.enum_decl(false, [])
225-
}
226-
.key_fn {
227-
// `fn name(...)` or `fn C.name(...)` is a declaration;
228-
// `fn (...)` or `fn [captures](...)` is a literal (handled by else/expr).
229-
if p.peek() == .name {
230-
return p.fn_decl(false, [])
231-
}
232-
// fall through to expression (fn literal)
233-
expr := p.expr(.lowest)
234-
return p.complete_simple_stmt(expr, false)
235-
}
236-
.key_global {
237-
return p.global_decl([])
238-
}
239-
.key_interface {
240-
return p.interface_decl(false, [])
241-
}
242-
.key_pub {
243-
p.next()
244-
match p.tok {
245-
.key_const { return p.const_decl(true) }
246-
.key_enum { return p.enum_decl(true, []) }
247-
.key_fn { return p.fn_decl(true, []) }
248-
.key_interface { return p.interface_decl(true, []) }
249-
.key_struct, .key_union { return p.struct_decl(true, []) }
250-
.key_type { return p.type_decl(true) }
251-
else { p.error('not implemented: pub ${p.tok}') }
252-
}
253-
}
254-
.key_struct, .key_union {
255-
return p.struct_decl(false, [])
256-
}
257-
.key_type {
258-
return p.type_decl(false)
259-
}
260268
.key_asm {
261269
return p.asm_stmt()
262270
}
@@ -1960,11 +1968,14 @@ fn (mut p Parser) fn_decl(is_public bool, attributes []ast.Attribute) ast.FnDecl
19601968
if p.tok != .lcbr {
19611969
return_type = p.expect_type()
19621970
}
1971+
prev_top_level := p.in_top_level
1972+
p.in_top_level = false
19631973
stmts := if p.tok == .lcbr {
19641974
p.block()
19651975
} else {
19661976
[]ast.Stmt{}
19671977
}
1978+
p.in_top_level = prev_top_level
19681979
p.expect(.semicolon)
19691980
return ast.FnDecl{
19701981
attributes: attributes
@@ -2010,11 +2021,14 @@ fn (mut p Parser) fn_decl(is_public bool, attributes []ast.Attribute) ast.FnDecl
20102021
typ := p.fn_type()
20112022
// p.log('ast.FnDecl: ${name} ${p.lit} - ${p.tok} (${p.lit}) - ${p.tok_next_}')
20122023
// also check line for better error detection
2024+
prev_top_level := p.in_top_level
2025+
p.in_top_level = false
20132026
stmts := if p.tok == .lcbr {
20142027
p.block()
20152028
} else {
20162029
[]ast.Stmt{}
20172030
}
2031+
p.in_top_level = prev_top_level
20182032
p.expect(.semicolon)
20192033
return ast.FnDecl{
20202034
attributes: attributes

0 commit comments

Comments
 (0)