Skip to content

Commit 14e53cd

Browse files
authored
v2: flat-AST migration phase 5
1 parent 8216926 commit 14e53cd

8 files changed

Lines changed: 2932 additions & 237 deletions

File tree

vlib/v2/ast/ast.v

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -977,19 +977,25 @@ pub:
977977
// Type Nodes
978978
pub struct ArrayType {
979979
pub:
980-
elem_type Expr
980+
elem_type Expr = empty_expr
981981
}
982982

983983
pub struct ArrayFixedType {
984984
pub:
985-
len Expr
986-
elem_type Expr
985+
len Expr = empty_expr
986+
elem_type Expr = empty_expr
987987
}
988988

989989
pub struct ChannelType {
990990
pub:
991-
cap Expr
992-
elem_type Expr
991+
// s255: cap defaults to empty_expr (like ThreadType/OptionType/etc.). The
992+
// parser omits cap for a bare `chan T` (type.v: `ChannelType{elem_type: ...}`),
993+
// which previously left it a zero-valued Expr (invalid sum-type tag, null
994+
// payload). Encoding that via FlatBuilder.add_expr crashes the arm64 self-host
995+
// — an exhaustive match on the unmatched tag falls into the first arm
996+
// (ArrayInitExpr) and derefs the null payload (same class as s251).
997+
cap Expr = empty_expr
998+
elem_type Expr = empty_expr
993999
}
9941000

9951001
pub struct ThreadType {
@@ -1050,14 +1056,14 @@ pub:
10501056

10511057
pub struct GenericType {
10521058
pub:
1053-
name Expr
1059+
name Expr = empty_expr
10541060
params []Expr
10551061
}
10561062

10571063
pub struct MapType {
10581064
pub:
1059-
key_type Expr
1060-
value_type Expr
1065+
key_type Expr = empty_expr
1066+
value_type Expr = empty_expr
10611067
}
10621068

10631069
pub struct NilType {}

vlib/v2/ast/flat.v

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1714,7 +1714,20 @@ fn (mut b FlatBuilder) add_stmt(stmt Stmt) FlatNodeId {
17141714
flags |= flag_is_static
17151715
}
17161716
mut edges := []FlatEdge{}
1717-
b.push_edge(mut edges, b.add_parameter(stmt.receiver))
1717+
if stmt.is_method {
1718+
b.push_edge(mut edges, b.add_parameter(stmt.receiver))
1719+
} else {
1720+
// s251: a non-method FnDecl keeps the parser's zero `ast.Parameter{}`
1721+
// receiver, whose `typ` is a zero-valued Expr (invalid sum-type tag,
1722+
// null payload). Routing that through add_expr is unsafe on the arm64
1723+
// self-host: an exhaustive match on an unmatched tag falls into the
1724+
// first arm (ArrayInitExpr) and derefs the null payload. The receiver
1725+
// edge is only read for methods, so emit a clean empty receiver
1726+
// (typ = empty_expr, a valid EmptyExpr) for non-methods.
1727+
b.push_edge(mut edges, b.add_parameter(Parameter{
1728+
typ: empty_expr
1729+
}))
1730+
}
17181731
b.push_edge(mut edges, b.add_type(Type(stmt.typ)))
17191732
b.push_edge(mut edges, b.make_list_attribute(stmt.attributes))
17201733
b.push_edge(mut edges, b.make_list_stmt(stmt.stmts))

vlib/v2/ast/flat_reader.v

Lines changed: 90 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -185,8 +185,7 @@ pub fn (flat &FlatAst) decode_fn_decl_signature(id FlatNodeId) FnDecl {
185185
recv_id := r.edge(n, 0)
186186
typ_id := r.edge(n, 1)
187187
attrs_id := r.edge(n, 2)
188-
typ_node := r.read_type(typ_id)
189-
fn_typ := if typ_node is FnType { typ_node } else { FnType{} }
188+
fn_typ := r.read_fn_type(typ_id)
190189
return FnDecl{
191190
attributes: r.read_attr_list(attrs_id)
192191
is_public: (n.flags & flag_is_public) != 0
@@ -365,6 +364,77 @@ fn (r &FlatReader) read_parameter_list(id FlatNodeId) []Parameter {
365364
return out
366365
}
367366

367+
// read_fn_type (s252) decodes a `.typ_fn` node straight into a FnType, without
368+
// the read_type→`Type(FnType{...})`→`is FnType` round-trip. Boxing a large
369+
// struct into the Type sum type and then unboxing it via smartcast corrupts the
370+
// FnType's slice headers (generic_params/params) on the arm64 self-host (the
371+
// documented chained-access/smartcast bug). This path is only ever exercised by
372+
// the flat decode, so the default self-host never hit it. Edge layout matches
373+
// the encoder's FnType arm and read_type's `.typ_fn`: 0=generics, 1=params,
374+
// 2=return_type.
375+
fn (r &FlatReader) read_fn_type(id FlatNodeId) FnType {
376+
if id < 0 {
377+
return FnType{}
378+
}
379+
n := r.node(id)
380+
if n.kind != .typ_fn {
381+
return FnType{}
382+
}
383+
return FnType{
384+
generic_params: r.read_expr_list(r.edge(n, 0))
385+
params: r.read_parameter_list(r.edge(n, 1))
386+
return_type: r.read_expr(r.edge(n, 2))
387+
}
388+
}
389+
390+
// read_ident (s254) decodes an expr_ident node straight into an Ident, avoiding
391+
// the read_expr→Expr→`is Ident` smartcast-unbox. Copying a struct out of the Expr
392+
// sum type via smartcast corrupts its fields (here the `name` string header) on
393+
// the arm64 self-host (same chained-access/smartcast bug as the FnType unbox in
394+
// s252). Returns an empty Ident for ids that don't point at an expr_ident.
395+
fn (r &FlatReader) read_ident(id FlatNodeId) Ident {
396+
if id < 0 || id >= r.flat.nodes.len {
397+
return Ident{}
398+
}
399+
n := r.node(id)
400+
if n.kind != .expr_ident {
401+
return Ident{}
402+
}
403+
return Ident{
404+
pos: n.pos
405+
name: r.get_str(n.name_id)
406+
}
407+
}
408+
409+
// read_assign_stmt (s254) decodes a stmt_assign node straight into an AssignStmt,
410+
// avoiding the read_stmt→Stmt→`is AssignStmt` unbox (which corrupts the lhs/rhs
411+
// slice headers on arm64). Mirrors the `.stmt_assign` arm of read_stmt. Returns
412+
// an empty AssignStmt for ids that don't point at a stmt_assign.
413+
fn (r &FlatReader) read_assign_stmt(id FlatNodeId) AssignStmt {
414+
if id < 0 || id >= r.flat.nodes.len {
415+
return AssignStmt{}
416+
}
417+
n := r.node(id)
418+
if n.kind != .stmt_assign {
419+
return AssignStmt{}
420+
}
421+
lhs_len := n.extra
422+
mut lhs := []Expr{cap: lhs_len}
423+
for i in 0 .. lhs_len {
424+
lhs << r.read_expr(r.edge(n, i))
425+
}
426+
mut rhs := []Expr{cap: n.edge_count - lhs_len}
427+
for i in lhs_len .. n.edge_count {
428+
rhs << r.read_expr(r.edge(n, i))
429+
}
430+
return AssignStmt{
431+
op: unsafe { token.Token(int(n.aux)) }
432+
lhs: lhs
433+
rhs: rhs
434+
pos: n.pos
435+
}
436+
}
437+
368438
fn (r &FlatReader) read_string_list(id FlatNodeId) []string {
369439
if id < 0 {
370440
return []string{}
@@ -499,8 +569,7 @@ fn (r &FlatReader) read_stmt(id FlatNodeId) Stmt {
499569
typ_id := r.edge(n, 1)
500570
attrs_id := r.edge(n, 2)
501571
stmts_id := r.edge(n, 3)
502-
typ_node := r.read_type(typ_id)
503-
fn_typ := if typ_node is FnType { typ_node } else { FnType{} }
572+
fn_typ := r.read_fn_type(typ_id)
504573
return Stmt(FnDecl{
505574
attributes: r.read_attr_list(attrs_id)
506575
is_public: (n.flags & flag_is_public) != 0
@@ -737,8 +806,7 @@ fn (r &FlatReader) read_expr(id FlatNodeId) Expr {
737806
}
738807
.expr_fn_literal {
739808
fn_typ_id := r.edge(n, 0)
740-
typ_node := r.read_type(fn_typ_id)
741-
fn_typ := if typ_node is FnType { typ_node } else { FnType{} }
809+
fn_typ := r.read_fn_type(fn_typ_id)
742810
cap_len := n.extra
743811
mut captured := []Expr{cap: cap_len}
744812
for i in 0 .. cap_len {
@@ -795,10 +863,11 @@ fn (r &FlatReader) read_expr(id FlatNodeId) Expr {
795863
})
796864
}
797865
.expr_if_guard {
798-
child := r.read_stmt(r.edge(n, 0))
799-
assign := if child is AssignStmt { child } else { AssignStmt{} }
866+
// s254: read the assign straight into an AssignStmt; the old
867+
// `read_stmt → if child is AssignStmt { child }` unbox corrupted its
868+
// lhs/rhs slice headers on arm64.
800869
return Expr(IfGuardExpr{
801-
stmt: assign
870+
stmt: r.read_assign_stmt(r.edge(n, 0))
802871
pos: n.pos
803872
})
804873
}
@@ -850,9 +919,12 @@ fn (r &FlatReader) read_expr(id FlatNodeId) Expr {
850919
expr := r.read_expr(r.edge(n, 0))
851920
mut args := []Ident{cap: n.edge_count - 1}
852921
for i in 1 .. n.edge_count {
853-
e := r.read_expr(r.edge(n, i))
854-
if e is Ident {
855-
args << e
922+
// s254: read each arg straight into an Ident (the old
923+
// `read_expr → if e is Ident { args << e }` unbox corrupted the
924+
// Ident's `name` on arm64).
925+
arg_id := r.edge(n, i)
926+
if arg_id >= 0 && arg_id < r.flat.nodes.len && r.node(arg_id).kind == .expr_ident {
927+
args << r.read_ident(arg_id)
856928
}
857929
}
858930
return Expr(LambdaExpr{
@@ -982,12 +1054,13 @@ fn (r &FlatReader) read_expr(id FlatNodeId) Expr {
9821054
})
9831055
}
9841056
.expr_selector {
985-
lhs := r.read_expr(r.edge(n, 0))
986-
rhs := r.read_expr(r.edge(n, 1))
987-
ident := if rhs is Ident { rhs } else { Ident{} }
1057+
// s254: read the rhs straight into an Ident; the previous
1058+
// `read_expr → if rhs is Ident { rhs }` unbox corrupted the Ident's
1059+
// `name` on the arm64 self-host, yielding garbage `missing X.<name>`
1060+
// checker errors for selector types like `strings.Builder`.
9881061
return Expr(SelectorExpr{
989-
lhs: lhs
990-
rhs: ident
1062+
lhs: r.read_expr(r.edge(n, 0))
1063+
rhs: r.read_ident(r.edge(n, 1))
9911064
pos: n.pos
9921065
})
9931066
}

vlib/v2/builder/builder.v

Lines changed: 23 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,7 @@ mut:
4949
flat_roundtrip_enabled bool // V2_FLAT_ROUNDTRIP=1: route parses through streaming + to_files()
5050
flat_check_enabled bool // V2_CHECK_FLAT=1: route type-check through Checker.check_flat
5151
markused_flat_enabled bool // V2_MARKUSED_FLAT=1: route markused through mark_used_flat shim
52+
flat_ssa_enabled bool // V2_FLAT_SSA=1: route the (sequential) native SSA build through build_all_from_flat on the post-transform b.flat. Requires V2_CHECK_FLAT + V2_MARKUSED_FLAT so b.flat is post-transform-populated. Default off.
5253
// flat caches the FlatAst representation of b.files. When
5354
// flat_check_enabled is set, parse_batch streams directly into
5455
// flat_builder so b.flat is built incrementally during parsing rather
@@ -73,6 +74,7 @@ pub fn new_builder(prefs &pref.Preferences) &Builder {
7374
flat_roundtrip_enabled: os.getenv('V2_FLAT_ROUNDTRIP') != ''
7475
flat_check_enabled: os.getenv('V2_CHECK_FLAT') != ''
7576
markused_flat_enabled: os.getenv('V2_MARKUSED_FLAT') != ''
77+
flat_ssa_enabled: os.getenv('V2_FLAT_SSA') != ''
7678
}
7779
}
7880
}
@@ -329,10 +331,14 @@ pub fn (mut b Builder) build(files []string) {
329331
}
330332
mark_used_time := time.Duration(sw.elapsed() - mark_used_start)
331333
print_time('Mark Used', mark_used_time)
332-
// b.flat is unused by the codegen path; drop the arenas so a GC build
333-
// can reclaim them. Under -gc none this is a no-op for peak memory,
334+
// b.flat is unused by the legacy codegen path; drop the arenas so a GC
335+
// build can reclaim them. Under -gc none this is a no-op for peak memory,
334336
// but it documents the lifetime correctly for the eventual GC switch.
335-
b.flat = ast.FlatAst{}
337+
// When V2_FLAT_SSA is on, the native SSA build consumes b.flat directly
338+
// (build_all_from_flat), so keep it alive through codegen.
339+
if !b.flat_ssa_enabled {
340+
b.flat = ast.FlatAst{}
341+
}
336342
print_rss('after markused')
337343
print_heap('after markused')
338344
}
@@ -2496,7 +2502,20 @@ fn (mut b Builder) build_native_mir_from_files(files []ast.File, arch pref.Arch,
24962502
}
24972503

24982504
mut stage_start := native_sw.elapsed()
2499-
if b.native_mir_build_sequential(label) {
2505+
// V2_FLAT_SSA: route the whole SSA build through the cursor-native
2506+
// build_all_from_flat on the post-transform b.flat (kept alive above).
2507+
// Sequential only (build_all_from_flat builds fn bodies in-phase). Default off.
2508+
//
2509+
// b.flat is only POST-TRANSFORM when V2_MARKUSED_FLAT is also on: that path
2510+
// routes transform through transform_files_to_flat, which re-flattens the
2511+
// transformed files back into b.flat. With V2_CHECK_FLAT but NOT
2512+
// V2_MARKUSED_FLAT, b.flat stays the parse-time (pre-transform) flat while the
2513+
// transformer only updates b.files, so feeding it to build_all_from_flat would
2514+
// skip every transformer rewrite. Require both flags here; otherwise fall back
2515+
// to the legacy build_all(files), which uses the post-transform b.files.
2516+
if b.flat_ssa_enabled && b.markused_flat_enabled && b.flat_check_enabled && b.flat.files.len > 0 {
2517+
ssa_builder.build_all_from_flat(&b.flat)
2518+
} else if b.native_mir_build_sequential(label) {
25002519
ssa_builder.build_all(files)
25012520
} else {
25022521
// Phases 1-3 sequential, Phase 4 parallel, Phase 5 sequential

0 commit comments

Comments
 (0)