Skip to content

Commit 3dd96de

Browse files
authored
v2: finish flat AST migration (#27372)
1 parent d364167 commit 3dd96de

23 files changed

Lines changed: 1465 additions & 216 deletions

vlib/v2/README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,10 @@
44

55
## Flat AST
66

7-
`v2.ast` now includes an index-based flat AST graph for tooling and profiling:
7+
`v2.ast` is the default index-based AST graph used by the v2 builder for
8+
parsing, type checking, markused, and native SSA input:
89

910
- `ast.flatten_files(files)` builds `ast.FlatAst` with contiguous `nodes` and `edges`.
1011
- `ast.legacy_ast_stats(files)` estimates memory/shape metrics for recursive AST files.
1112
- `flat.stats()` and `flat.count_reachable_nodes()` report flat graph size and reachability.
13+
- `V2_LEGACY_AST=1` keeps the old recursive-AST pipeline available for comparison.

vlib/v2/ast/cursor.v

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,39 @@ pub fn (c Cursor) name_id() int {
7373
return c.flat.nodes[c.id].name_id
7474
}
7575

76+
// ident reads an expr_ident cursor directly into an Ident.
77+
pub fn (c Cursor) ident() Ident {
78+
if !c.is_valid() || c.kind() != .expr_ident {
79+
return Ident{}
80+
}
81+
return Ident{
82+
pos: c.pos()
83+
name: c.name()
84+
}
85+
}
86+
87+
// import_stmt reads a stmt_import cursor directly into an ImportStmt.
88+
pub fn (c Cursor) import_stmt() ImportStmt {
89+
if !c.is_valid() || c.kind() != .stmt_import {
90+
return ImportStmt{}
91+
}
92+
mut symbols := []Expr{cap: c.edge_count()}
93+
for i in 0 .. c.edge_count() {
94+
sym := c.edge(i)
95+
if sym.kind() == .expr_ident {
96+
symbols << Expr(sym.ident())
97+
} else if sym.is_valid() {
98+
symbols << c.flat.decode_expr(sym.id)
99+
}
100+
}
101+
return ImportStmt{
102+
name: c.name()
103+
alias: c.extra_str()
104+
is_aliased: c.flag(flag_is_aliased)
105+
symbols: symbols
106+
}
107+
}
108+
76109
@[inline]
77110
pub fn (c Cursor) edge_count() int {
78111
return c.flat.nodes[c.id].edge_count

vlib/v2/ast/flat_reader.v

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -93,9 +93,12 @@ fn (r &FlatReader) read_file(ff FlatFile) File {
9393
}
9494
mut imports := []ImportStmt{}
9595
for cid in r.list_children(imports_id) {
96-
s := r.read_stmt(cid)
97-
if s is ImportStmt {
98-
imports << s
96+
c := Cursor{
97+
flat: r.flat
98+
id: cid
99+
}
100+
if c.kind() == .stmt_import {
101+
imports << c.import_stmt()
99102
}
100103
}
101104
mut stmts := []Stmt{}
@@ -123,9 +126,12 @@ pub fn (flat &FlatAst) read_file_imports(ff FlatFile) []ImportStmt {
123126
imports_id := r.edge(n, 1)
124127
mut imports := []ImportStmt{}
125128
for cid in r.list_children(imports_id) {
126-
s := r.read_stmt(cid)
127-
if s is ImportStmt {
128-
imports << s
129+
c := Cursor{
130+
flat: r.flat
131+
id: cid
132+
}
133+
if c.kind() == .stmt_import {
134+
imports << c.import_stmt()
129135
}
130136
}
131137
return imports

vlib/v2/builder/builder.v

Lines changed: 86 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -48,10 +48,10 @@ mut:
4848
used_vh_for_parse bool
4949
used_import_vh_for_parse bool
5050
used_virtual_vh_for_parse bool
51-
flat_roundtrip_enabled bool // V2_FLAT_ROUNDTRIP=1: route parses through streaming + to_files()
52-
flat_check_enabled bool // V2_CHECK_FLAT=1: route type-check through Checker.check_flat
53-
markused_flat_enabled bool // V2_MARKUSED_FLAT=1: route markused through mark_used_flat shim
54-
flat_ssa_enabled bool // V2_FLAT_SSA=1: route the (sequential) native SSA build through build_all_from_flat on the post-transform b.flat. Requires V2_CHECK_FLAT + V2_MARKUSED_FLAT so b.flat is post-transform-populated. Default off.
51+
flat_roundtrip_enabled bool // V2_FLAT_ROUNDTRIP=1: legacy comparison mode; route parses through streaming + to_files().
52+
flat_check_enabled bool // Default on: stream parse/type-check through FlatAst. V2_LEGACY_AST=1 disables it unless V2_CHECK_FLAT=1 is set.
53+
markused_flat_enabled bool // Default on: route markused through mark_used_flat.
54+
flat_ssa_enabled bool // Default on: route SSA codegen through build_all_from_flat on the post-transform b.flat.
5555
// flat caches the FlatAst representation of b.files. When
5656
// flat_check_enabled is set, parse_batch streams directly into
5757
// flat_builder so b.flat is built incrementally during parsing rather
@@ -71,15 +71,17 @@ mut:
7171
}
7272

7373
pub fn new_builder(prefs &pref.Preferences) &Builder {
74+
legacy_ast_enabled := os.getenv('V2_LEGACY_AST') != ''
75+
flat_default_enabled := !legacy_ast_enabled
7476
unsafe {
7577
return &Builder{
7678
pref: prefs
7779
used_fn_keys: map[string]bool{}
7880
cached_called_fn_names: map[string]bool{}
7981
flat_roundtrip_enabled: os.getenv('V2_FLAT_ROUNDTRIP') != ''
80-
flat_check_enabled: os.getenv('V2_CHECK_FLAT') != ''
81-
markused_flat_enabled: os.getenv('V2_MARKUSED_FLAT') != ''
82-
flat_ssa_enabled: os.getenv('V2_FLAT_SSA') != ''
82+
flat_check_enabled: flat_default_enabled || os.getenv('V2_CHECK_FLAT') != ''
83+
markused_flat_enabled: flat_default_enabled || os.getenv('V2_MARKUSED_FLAT') != ''
84+
flat_ssa_enabled: flat_default_enabled || os.getenv('V2_FLAT_SSA') != ''
8385
}
8486
}
8587
}
@@ -108,6 +110,20 @@ fn (b &Builder) backend_uses_markused_pruning() bool {
108110
return b.pref.backend != .arm64
109111
}
110112

113+
fn (b &Builder) should_build_ssa_from_flat() bool {
114+
return b.flat.files.len > 0 && b.flat_ssa_enabled && b.markused_flat_enabled
115+
&& b.flat_check_enabled
116+
}
117+
118+
fn (b &Builder) should_keep_flat_for_codegen() bool {
119+
flat_ssa_codegen := b.flat_ssa_enabled && b.markused_flat_enabled && b.flat_check_enabled
120+
return match b.pref.backend {
121+
.c { flat_ssa_codegen }
122+
.x64, .arm64 { flat_ssa_codegen || b.native_flat_pipeline_enabled }
123+
else { false }
124+
}
125+
}
126+
111127
fn (b &Builder) can_compile_cleanc_locally() bool {
112128
if b.pref == unsafe { nil } {
113129
return true
@@ -263,20 +279,26 @@ pub fn (mut b Builder) build(files []string) {
263279
// transform_files_from_flat, parallel streams per-worker via
264280
// to_files_range. No up-front full rehydration needed in either case.
265281
//
266-
// When V2_MARKUSED_FLAT is also enabled, both transform paths route
267-
// through their *_to_flat wedge so the post-transform flatten lives
268-
// inside the transformer call (one round-trip), avoiding a separate
269-
// flatten_files() pass before mark_used_flat.
282+
// Flat markused routes transform through flat-output wedges. Backends that
283+
// can consume flat codegen use the direct flat-output path and drop the
284+
// transformed []ast.File result; legacy backends keep the compatibility
285+
// wedge that returns both flat and files.
270286
mut flat_populated_by_transform := false
287+
transform_flat_only := b.should_keep_flat_for_codegen()
271288
if sequential_transform {
272289
if use_native_flat_pipeline && !b.flat_check_enabled {
273290
b.flat = trans.transform_files_to_flat_direct(b.files)
274291
b.files = []ast.File{}
275292
flat_populated_by_transform = true
276293
} else if use_flat_markused {
277-
new_flat, files_out := trans.transform_files_to_flat(&b.flat, b.files)
278-
b.flat = new_flat
279-
b.files = files_out
294+
if transform_flat_only {
295+
b.flat = trans.transform_flat_to_flat_direct(&b.flat, b.files)
296+
b.files = []ast.File{}
297+
} else {
298+
new_flat, files_out := trans.transform_files_to_flat(&b.flat, b.files)
299+
b.flat = new_flat
300+
b.files = files_out
301+
}
280302
flat_populated_by_transform = true
281303
} else if b.flat_check_enabled {
282304
b.files = trans.transform_files_from_flat(&b.flat, b.files)
@@ -289,9 +311,14 @@ pub fn (mut b Builder) build(files []string) {
289311
b.files = []ast.File{}
290312
flat_populated_by_transform = true
291313
} else if use_flat_markused {
292-
new_flat, files_out := b.transform_files_parallel_to_flat(mut trans)
293-
b.flat = new_flat
294-
b.files = files_out
314+
if transform_flat_only {
315+
b.flat = trans.transform_flat_to_flat_direct(&b.flat, b.files)
316+
b.files = []ast.File{}
317+
} else {
318+
new_flat, files_out := b.transform_files_parallel_to_flat(mut trans)
319+
b.flat = new_flat
320+
b.files = files_out
321+
}
295322
flat_populated_by_transform = true
296323
} else if b.flat_check_enabled {
297324
b.files = b.transform_files_parallel_from_flat(mut trans)
@@ -308,17 +335,15 @@ pub fn (mut b Builder) build(files []string) {
308335
b.used_fn_keys = map[string]bool{}
309336
} else {
310337
mark_used_start := sw.elapsed()
311-
// V2_MARKUSED_FLAT only takes effect when V2_CHECK_FLAT is also on,
312-
// since b.flat is only populated when flat_check_enabled streams
313-
// parses into flat_builder. Without that, b.flat is empty and the
314-
// shim would walk nothing.
338+
// Flat markused consumes the post-transform FlatAst. Legacy comparison
339+
// mode (`V2_LEGACY_AST=1`) can still reach the AST walker unless one of
340+
// the flat env flags explicitly re-enables this path.
315341
//
316-
// The transformer mutates b.files but does not write back into
317-
// b.flat. Both sequential and parallel paths now populate b.flat
318-
// as part of their *_to_flat wedge when V2_MARKUSED_FLAT is on,
319-
// so the separate flatten_files() pass is gone. The branch below
320-
// remains as a defensive fallback for any future code path that
321-
// reaches markused without having set flat_populated_by_transform.
342+
// The transformer mutates b.files but does not write back into b.flat.
343+
// Both sequential and parallel paths now populate b.flat as part of
344+
// their *_to_flat wedge, so the separate flatten_files() pass is gone.
345+
// The branch below remains as a defensive fallback for any future code
346+
// path that reaches markused without setting flat_populated_by_transform.
322347
if use_flat_markused && !flat_populated_by_transform {
323348
b.flat = ast.flatten_files(b.files)
324349
}
@@ -340,16 +365,11 @@ pub fn (mut b Builder) build(files []string) {
340365
}
341366
mark_used_time := time.Duration(sw.elapsed() - mark_used_start)
342367
print_time('Mark Used', mark_used_time)
343-
// b.flat is unused by the legacy codegen path. Under -gc none this is a
344-
// no-op for peak memory, but keep the lifetime explicit for readers.
345-
// When V2_FLAT_SSA or the native flat pipeline is on, the native SSA
346-
// build consumes b.flat directly (build_all_from_flat), so keep it alive
347-
// through codegen.
348-
if !b.flat_ssa_enabled && !b.native_flat_pipeline_enabled {
349-
b.flat = ast.FlatAst{}
350-
}
351368
print_rss('after markused')
352369
}
370+
if b.flat_check_enabled && !b.should_keep_flat_for_codegen() {
371+
b.flat = ast.FlatAst{}
372+
}
353373

354374
// Generate output based on backend
355375
match b.pref.backend {
@@ -601,7 +621,13 @@ fn (mut b Builder) gen_ssa_c() {
601621
ssa_builder.target_os = b.pref.target_os_or_host()
602622

603623
mut stage_start := sw.elapsed()
604-
ssa_builder.build_all(b.files)
624+
mut built_from_flat := false
625+
if b.should_build_ssa_from_flat() {
626+
ssa_builder.build_all_from_flat(&b.flat)
627+
built_from_flat = true
628+
} else {
629+
ssa_builder.build_all(b.files)
630+
}
605631
print_time('SSA Build', time.Duration(sw.elapsed() - stage_start))
606632

607633
// TODO: re-enable SSA optimization once the new builder is mature
@@ -618,6 +644,9 @@ fn (mut b Builder) gen_ssa_c() {
618644
}
619645

620646
if output_name.ends_with('.c') {
647+
if built_from_flat {
648+
b.flat = ast.FlatAst{}
649+
}
621650
stage_start = sw.elapsed()
622651
mut gen := c.new_gen(mod)
623652
c_source := gen.gen()
@@ -633,6 +662,12 @@ fn (mut b Builder) gen_ssa_c() {
633662

634663
cc := if b.pref.ccompiler.len > 0 { b.pref.ccompiler } else { configured_cc(b.pref.vroot) }
635664
directive_flags := b.collect_cflags_from_sources()
665+
if built_from_flat {
666+
// SSA has copied the program into MIR, and directive scanning has read
667+
// source names from the FlatAst. Keep the later C generator/compiler
668+
// working sets clear of the transformed FlatAst.
669+
b.flat = ast.FlatAst{}
670+
}
636671
mut cc_flag_parts := []string{}
637672
env_flags := configured_cflags()
638673
if env_flags.trim_space() != '' {
@@ -1684,7 +1719,7 @@ fn (mut b Builder) prepare_macos_tiny_candidate_source_files() {
16841719
if !b.uses_macos_x64_tiny_object(.x64) {
16851720
return
16861721
}
1687-
b.macos_tiny_candidate_source_files = if b.flat_check_enabled {
1722+
b.macos_tiny_candidate_source_files = if b.flat_check_enabled && b.flat.files.len > 0 {
16881723
b.flat.to_files()
16891724
} else {
16901725
b.files.clone()
@@ -1998,6 +2033,12 @@ fn (b &Builder) collect_cflags_from_sources() string {
19982033
scan_paths << file.name
19992034
}
20002035
}
2036+
for ff in b.flat.files {
2037+
name := b.flat.file_name(ff)
2038+
if name != '' {
2039+
scan_paths << name
2040+
}
2041+
}
20012042
cflags_target_os := b.cflags_target_os_for_local_compile()
20022043
if !b.pref.skip_builtin {
20032044
target_os := cflags_target_os
@@ -2567,18 +2608,15 @@ fn (mut b Builder) build_native_mir_from_files(files []ast.File, arch pref.Arch,
25672608
}
25682609

25692610
mut stage_start := native_sw.elapsed()
2570-
// V2_FLAT_SSA: route the whole SSA build through the cursor-native
2571-
// build_all_from_flat on the post-transform b.flat (kept alive above).
2572-
// Sequential only (build_all_from_flat builds fn bodies in-phase). Default off.
2611+
// Route the whole SSA build through the cursor-native build_all_from_flat
2612+
// on the post-transform b.flat (kept alive above). Sequential only
2613+
// (build_all_from_flat builds fn bodies in-phase).
25732614
//
2574-
// b.flat is only POST-TRANSFORM when either V2_MARKUSED_FLAT has routed
2575-
// transform through transform_files_to_flat, or the native flat pipeline has
2576-
// emitted transform output directly into FlatAst. With V2_CHECK_FLAT but NOT
2577-
// V2_MARKUSED_FLAT, b.flat stays parse-time, so feeding it here would skip
2578-
// transformer rewrites.
2579-
build_from_flat := b.flat.files.len > 0
2580-
&& ((b.flat_ssa_enabled && b.markused_flat_enabled && b.flat_check_enabled)
2581-
|| (b.native_flat_pipeline_enabled && label == ''))
2615+
// b.flat is only POST-TRANSFORM when flat markused has routed transform
2616+
// through transform_files_to_flat, or the direct native flat pipeline has
2617+
// emitted transform output directly into FlatAst.
2618+
build_from_flat := b.should_build_ssa_from_flat()
2619+
|| (b.flat.files.len > 0 && b.native_flat_pipeline_enabled && label == '')
25822620
if build_from_flat {
25832621
ssa_builder.build_all_from_flat(&b.flat)
25842622
// SSA has copied the program into MIR; keep the FlatAst lifetime out of

vlib/v2/builder/flag_target_test.v

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,26 @@ fn collect_cflags_for_test_source(source string, mut prefs pref.Preferences) str
2222
return b.collect_cflags_from_sources()
2323
}
2424

25+
fn collect_cflags_for_flat_only_test_source(source string, mut prefs pref.Preferences) string {
26+
tmp_dir := os.join_path(os.vtmp_dir(), 'v2_builder_flag_target_flat_${os.getpid()}')
27+
os.mkdir_all(tmp_dir) or { panic(err) }
28+
defer {
29+
os.rmdir_all(tmp_dir) or {}
30+
}
31+
source_path := os.join_path(tmp_dir, 'main.v')
32+
os.write_file(source_path, source) or { panic(err) }
33+
prefs.skip_builtin = true
34+
mut b := new_builder(&prefs)
35+
b.flat = ast.flatten_files([
36+
ast.File{
37+
name: source_path
38+
mod: 'main'
39+
},
40+
])
41+
b.files = []ast.File{}
42+
return b.collect_cflags_from_sources()
43+
}
44+
2545
fn freestanding_test_call_name(name string, user_defines []string) string {
2646
return freestanding_test_call_name_with_hooks(name, user_defines, [])
2747
}
@@ -304,6 +324,16 @@ $if linux {
304324
assert free_explicit_flags.contains('-DOPTIONAL_FREE_BLOCK')
305325
}
306326

327+
fn test_collect_cflags_from_sources_reads_flat_file_names() {
328+
source := 'module main
329+
330+
#flag -DFLAT_ONLY_FLAG
331+
'
332+
mut prefs := pref.new_preferences()
333+
flags := collect_cflags_for_flat_only_test_source(source, mut prefs)
334+
assert flags.contains('-DFLAT_ONLY_FLAG')
335+
}
336+
307337
fn test_collect_cflags_from_sources_keeps_optional_os_flags_custom_only() {
308338
source := 'module main
309339

vlib/v2/builder/flat_streaming_test.v

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ fn test_streaming_real_source_file() {
114114
assert flat_batch.signature() == flat_stream.signature(), 'real-source streaming mismatch'
115115
}
116116

117-
fn test_check_flat_default_parallel_transform_keeps_user_files() {
117+
fn test_default_flat_parallel_transform_keeps_user_files() {
118118
path := write_tmp_file('parallel_transform_main', 'module main
119119
120120
fn main() {
@@ -126,7 +126,7 @@ fn main() {
126126
os.rm(path) or {}
127127
os.rm(out_path) or {}
128128
}
129-
cmd := 'V2_CHECK_FLAT=1 ${os.quoted_path(@VEXE)} -v2 -nocache -o ${os.quoted_path(out_path)} ${os.quoted_path(path)} 2>&1'
129+
cmd := '${os.quoted_path(@VEXE)} -v2 -nocache -o ${os.quoted_path(out_path)} ${os.quoted_path(path)} 2>&1'
130130
res := os.execute(cmd)
131131
assert res.exit_code == 0, res.output
132132
run_res := os.execute(os.quoted_path(out_path))

0 commit comments

Comments
 (0)