diff --git a/CLAUDE.md b/CLAUDE.md index c160502..83571c7 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -5,33 +5,29 @@ session and captures the operational state of the project plus the rules that must never be violated. The full specification lives in the claude.ai knowledge base — see § Quick links spec. -> **Status:** Phase −1 — S2 closed (code + validation), PR pending +> **Status:** Phase −1 — S3 closed (code + bench verdict GO), PR pending > -> S2 fully validated on three target machines: -> - **Win11 25H2 + RTX 4080 Super (60 Hz, DWM)** : median 16.663 ms / p95 17.606 ms / max 33.590 ms — at the 60 Hz vsync floor. -> - **Fedora 44 + Intel UHD 630 (Mesa ANV, 144 Hz, GNOME Wayland)** : median 6.939 ms / p95 7.358 ms / max 39.996 ms (single warmup outlier). -> - **Fedora 44 + GTX 1660 Ti (NVIDIA prop. 595.71.05, 144 Hz, GNOME Wayland)** : median 6.934 ms / p95 7.252 ms / max 21.008 ms. -> -> Two real bugs caught and fixed by hardware validation that CI would -> never have surfaced: `loadInstance` strict on NULL dispatch slots -> (commit `8a377f6`); `recreateSwapchain` not threading -> `oldSwapchain` into `VkSwapchainCreateInfoKHR` (commit `7c2fe91`). -> Plus a build-system gap: `bindgen-vk`/`bindgen-wayland` now chain -> `zig fmt` (commit `8282d0f`). Validation report at -> `validation/s2-go-nogo.md` ✅ GO on all three rows. -> PR `Phase -1 / Platform / Native Window + Vulkan Triangle` opens -> next; tag `v0.0.3-S2-window-vulkan-triangle` posted by Guy after -> squash-merge. +> S3 closed: lexer + parser + tabular SoA AST + minimal type-checker on +> the 5-construct subset (`component`, `resource`, `rule`, `when`, basic +> arithmetic expressions). Bench verdict on dev machine (Apple Silicon, +> macOS, ReleaseSafe, 1000 iterations + 50 warmups): worst median +> 0.019 ms, worst p99 0.028 ms, worst max 0.042 ms across 30 corpus +> files — well under the 5 ms / 15 ms / 25 ms gates respectively. +> Official verdict will be re-confirmed on the S2 reference machines by +> Guy. Validation: `zig build`, `zig build test` (debug + ReleaseSafe), +> `zig fmt --check` all green. PR `Phase -1 / Etch / S3 parser on +> subset` opens next; tag `v0.0.4-S3-etch-parser-subset` posted by Guy +> after squash-merge. ## Current state | Field | Value | |---|---| | Phase | −1 (Spikes) | -| Current milestone | S2 — Window + Vulkan triangle (CLOSED, PR pending) | -| Last released tag | `v0.0.2-S1-mini-ecs` | -| Active branch | `phase-pre-0/platform/window-vulkan-triangle` | -| Next planned milestone | S3 — Etch grammar EBNF v0.5 | +| Current milestone | S3 — Etch parser on subset (CLOSED, PR pending) | +| Last released tag | `v0.0.3-S2-window-vulkan-triangle` | +| Active branch | `phase--1/etch/parser-subset` | +| Next planned milestone | S4 — Etch tree-walking interpreter | ## Tags @@ -39,6 +35,8 @@ knowledge base — see § Quick links spec. |---|---|---|---| | `v0.0.1-S0-bootstrap` | 2026-05-08 | S0 — Bootstrap repo and CI | First milestone. Build infra, CI on `{ubuntu-24.04, windows-2025} × {Debug, ReleaseSafe}`, lefthook, `CLAUDE.md`. Tag posted by Guy after merge of PR #1. | | `v0.0.2-S1-mini-ecs` | 2026-05-09 (planned) | S1 — Mini-ECS Zig | Comptime SoA archetype + Chase-Lev work-stealing scheduler. Validates the comptime + work-stealing hypothesis (100k entities iterated in 54.5 µs median ReleaseSafe on M4 Pro reference, gate 1 ms). Tag posted by Guy after squash-merge of PR `Phase -1 / Core / Mini-ECS Zig`. | +| `v0.0.3-S2-window-vulkan-triangle` | (planned) | S2 — Window + Vulkan triangle | Native Win32 + Wayland windowing, Vulkan triangle, no SDL/GLFW. Validated GO on Win11 + RTX 4080, Fedora 44 + UHD 630, Fedora 44 + GTX 1660 Ti. | +| `v0.0.4-S3-etch-parser-subset` | (planned) | S3 — Etch parser on subset | Lexer + parser + tabular SoA AST + minimal type-checker on 5 constructs. Bench verdict GO (worst median 0.019 ms vs 5 ms target on dev machine; re-confirmation on reference machine pending). | ## Hypotheses validated by spikes @@ -47,7 +45,7 @@ knowledge base — see § Quick links spec. | S0 | Infrastructure ready (no engineering hypothesis) | validated | | S1 | comptime ECS + Chase-Lev work-stealing iterates 100k entities < 1 ms | validated (54.5 µs median on M4 Pro) | | S2 | Window Win32 + Wayland + Vulkan triangle, native Zig, no SDL/GLFW | validated (3/3 target machines green, validation/s2-go-nogo.md ✅ GO) | -| S3 | Etch grammar EBNF v0.5 implementable, parsing < 5 ms / file | pending | +| S3 | Etch grammar EBNF v0.6 (S3 subset) implementable, parsing < 5 ms / file | validated (worst median 0.019 ms on dev Apple Silicon ReleaseSafe; reference-machine re-run pending) | | S4 | AST tree-walking interpreter executes Etch correctly with ECS bridge | pending | | S5 | Etch → Zig codegen viable build-time-wise (incremental < 2 s) | pending | | S6 | IPC editor↔runtime stable, < 1 ms RTT, 1h fuzz, kill -9 recovery | pending | @@ -124,4 +122,4 @@ The `briefs/` directory is the source of truth for milestone state. The brief's --- -Last updated: 2026-05-09 +Last updated: 2026-05-15 diff --git a/README.md b/README.md index fddc572..39cce30 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,7 @@ A game engine written in Zig 0.16.x. -> **Status:** Phase −1 — Native Window + Vulkan triangle spike (S2) +> **Status:** Phase −1 — Etch parser on subset (S3) > > Weld is in its earliest exploratory phase: the spike list of Phase −1 is > validating the core architectural hypotheses (comptime ECS, work-stealing @@ -23,6 +23,14 @@ A game engine written in Zig 0.16.x. > generators emitting ~34 000 lines of idiomatic Zig from the vendored > upstream registries, Vulkan 1.3 triangle render path. Full report: > [`validation/s2-go-nogo.md`](validation/s2-go-nogo.md). +> +> **S3** (closed, tag `v0.0.4-S3-etch-parser-subset` pending merge) +> validated the Etch grammar (EBNF v0.6, S3 subset: `component`, +> `resource`, `rule`, `when`, basic arithmetic expressions) — lexer + +> recursive-descent + Pratt parser + tabular SoA `AstArena` + minimal +> two-pass type-checker. Worst median 0.019 ms / file across 30 corpus +> files on dev Apple Silicon ReleaseSafe (gate: < 5 ms). Run the bench +> locally with `zig build bench-etch -Doptimize=ReleaseSafe`. ## Prerequisites @@ -37,9 +45,11 @@ A game engine written in Zig 0.16.x. ```sh zig build # build the weld executable zig build run # build and run (S2 spike — open window + render triangle) -zig build test # run all tests (34 across spike + ABI + ECS + jobs) +zig build test # run all tests (S0/S1/S2/S3: spike + ABI + ECS + jobs + Etch corpus) zig build bench-ecs -Doptimize=ReleaseSafe # S1 ECS iteration bench +zig build bench-etch -Doptimize=ReleaseSafe # S3 Etch parser bench (report under bench/results/) zig build bench-ecs -- --smoke # short bench run (used by CI) +zig build bench-etch -- --smoke # short Etch bench run (sanity) ./scripts/install-hooks.sh # install local git hooks (run once after clone) ``` @@ -76,7 +86,9 @@ src/ window.zig public Window interface (create/destroy/pollEvent/nativeHandles) window/{win32,wayland,stub}.zig per-OS backends (no SDL/GLFW, no @cImport) window/wayland_protocols/ ~3 000 lines — generated from wayland XMLs by tools/wayland_gen + etch/ S3 Etch parser — lexer, parser, tabular SoA AST, type-checker spike/ throwaway S2 spike code (CLI parser, scoring, vk_setup, vk_frame, ppm) +tests/etch/ Etch corpus driver + ~30 valid + ~10 invalid `.etch` fixtures tools/ vk_gen/ XML → Zig generator for Vulkan bindings wayland_gen/ XML → Zig generator for Wayland protocol bindings diff --git a/bench/etch_parse.zig b/bench/etch_parse.zig new file mode 100644 index 0000000..be28740 --- /dev/null +++ b/bench/etch_parse.zig @@ -0,0 +1,328 @@ +//! S3 Etch parser benchmark. +//! +//! Iterates the valid corpus from `tests/etch/corpus/valid/` and measures +//! lexer-only, parser-only, type-checker-only, and total time per file at +//! N=1000 iterations. Computes median / p99 / max per file and per LOC +//! bucket (small <50, medium 50-150, large 150-300). +//! +//! Output: a Markdown report under `bench/results/s3-etch-parse-.md` +//! including machine info, Zig version, build mode, per-bucket table, +//! and an explicit verdict line on the `< 5 ms median per file` target. +//! +//! Pass `--smoke` for a CI sanity short-circuit (single iteration, no +//! report). The full bench is not run in CI — the verdict is captured on +//! the physical reference machine (cf. S2 convention, +//! `engine-development-workflow.md` §17 CI obligations). + +const std = @import("std"); +const builtin = @import("builtin"); +const etch = @import("weld_etch"); +const corpus_mod = @import("corpus_facade"); + +const Iterations: u32 = 1000; +const WarmupIterations: u32 = 50; + +const MedianGateNs: u64 = 5_000_000; // 5.0 ms +const P99GateNs: u64 = 15_000_000; // 15.0 ms +const MaxGateNs: u64 = 25_000_000; // 25.0 ms + +const corpus = corpus_mod.valid; + +const Bucket = enum { small, medium, large }; + +const Distribution = struct { + min: u64 = 0, + median: u64 = 0, + p99: u64 = 0, + max: u64 = 0, +}; + +const FileStats = struct { + name: []const u8, + loc: u32, + bucket: Bucket, + lexer: Distribution, + parser: Distribution, + type_check: Distribution, + total: Distribution, +}; + +fn countLines(source: []const u8) u32 { + var n: u32 = 1; + for (source) |b| if (b == '\n') { + n += 1; + }; + return n; +} + +fn classify(loc: u32) Bucket { + if (loc < 50) return .small; + if (loc < 150) return .medium; + return .large; +} + +fn distribution(samples: []u64) Distribution { + std.mem.sort(u64, samples, {}, std.sort.asc(u64)); + return .{ + .min = samples[0], + .median = samples[samples.len / 2], + .p99 = samples[(samples.len * 99) / 100], + .max = samples[samples.len - 1], + }; +} + +fn benchOne(gpa: std.mem.Allocator, io: std.Io, file: corpus_mod.Entry, smoke: bool) !FileStats { + const iters: usize = if (smoke) 1 else Iterations; + + var lexer_samples = try gpa.alloc(u64, iters); + defer gpa.free(lexer_samples); + var parser_samples = try gpa.alloc(u64, iters); + defer gpa.free(parser_samples); + var type_samples = try gpa.alloc(u64, iters); + defer gpa.free(type_samples); + var total_samples = try gpa.alloc(u64, iters); + defer gpa.free(total_samples); + + if (!smoke) { + var w: u32 = 0; + while (w < WarmupIterations) : (w += 1) { + try runOnce(gpa, io, file.source, null, null, null, null); + } + } + + var i: usize = 0; + while (i < iters) : (i += 1) { + try runOnce(gpa, io, file.source, &lexer_samples[i], &parser_samples[i], &type_samples[i], &total_samples[i]); + } + + return .{ + .name = file.name, + .loc = countLines(file.source), + .bucket = classify(countLines(file.source)), + .lexer = distribution(lexer_samples), + .parser = distribution(parser_samples), + .type_check = distribution(type_samples), + .total = distribution(total_samples), + }; +} + +fn nowNs(io: std.Io) std.Io.Timestamp { + return std.Io.Clock.now(.awake, io); +} + +fn deltaNs(a: std.Io.Timestamp, b: std.Io.Timestamp) u64 { + const dur = a.durationTo(b).nanoseconds; + return @intCast(@max(@as(i96, 0), dur)); +} + +fn runOnce(gpa: std.mem.Allocator, io: std.Io, source: []const u8, lexer_ns: ?*u64, parser_ns: ?*u64, type_ns: ?*u64, total_ns: ?*u64) !void { + const t_total_start = nowNs(io); + + // Lexer-only pass: tokenize through to EOF. + var lex = etch.Lexer.init(source); + defer lex.deinit(gpa); + const t_lex_start = nowNs(io); + while (true) { + const t = try lex.next(gpa); + if (t.kind == .eof) break; + } + const lex_ns = deltaNs(t_lex_start, nowNs(io)); + + // Parser pass (independent — parse() drives its own lexer). + const t_parse_start = nowNs(io); + var pr = try etch.parseSource(gpa, source); + defer pr.ast.deinit(gpa); + defer if (pr.diagnostic) |*d| d.deinit(gpa); + const parse_ns = deltaNs(t_parse_start, nowNs(io)); + if (pr.diagnostic) |_| return error.UnexpectedParseDiagnostic; + + // Type-check pass. + const t_type_start = nowNs(io); + var diags: std.ArrayListUnmanaged(etch.Diagnostic) = .empty; + defer { + for (diags.items) |*d| d.deinit(gpa); + diags.deinit(gpa); + } + try etch.typeCheck(gpa, &pr.ast, &diags); + const t_ns_val = deltaNs(t_type_start, nowNs(io)); + if (diags.items.len != 0) return error.UnexpectedTypeDiagnostic; + + if (lexer_ns) |p| p.* = lex_ns; + if (parser_ns) |p| p.* = parse_ns; + if (type_ns) |p| p.* = t_ns_val; + if (total_ns) |p| p.* = deltaNs(t_total_start, nowNs(io)); +} + +fn fmtMs(ns: u64, buf: []u8) ![]u8 { + const ms = @as(f64, @floatFromInt(ns)) / 1_000_000.0; + return try std.fmt.bufPrint(buf, "{d:.3} ms", .{ms}); +} + +const Stamp = struct { + year: u16, + month: u8, + day: u8, + hour: u8, + minute: u8, +}; + +fn wallClockStamp(io: std.Io) Stamp { + const wall = std.Io.Clock.now(.real, io); + const secs: u64 = @intCast(@max(@as(i96, 0), wall.toSeconds())); + const epoch_secs = std.time.epoch.EpochSeconds{ .secs = secs }; + const day = epoch_secs.getEpochDay(); + const day_secs = epoch_secs.getDaySeconds(); + const year_day = day.calculateYearDay(); + const month_day = year_day.calculateMonthDay(); + return .{ + .year = year_day.year, + .month = month_day.month.numeric(), + .day = @as(u8, month_day.day_index) + 1, + .hour = day_secs.getHoursIntoDay(), + .minute = day_secs.getMinutesIntoHour(), + }; +} + +/// Resolve the host name through whichever portable API the target OS +/// exposes. `std.posix.gethostname` only compiles on POSIX +/// (`std.posix.HOST_NAME_MAX` is `void` on Windows). On platforms without +/// a stable Zig API we fall back to `""` rather than pull in +/// a per-OS shim — the brief explicitly allowed this fallback. +/// +/// Gating is on `builtin.os.tag` (not `@hasDecl(std.posix, "HOST_NAME_MAX")`) +/// because the latter still returns `true` on Windows where the constant +/// is declared as a `void` placeholder; using its value still trips a +/// compile error. +const has_posix_hostname: bool = switch (builtin.os.tag) { + .windows => false, + else => true, +}; + +const hostname_buf_len: usize = if (has_posix_hostname) + std.posix.HOST_NAME_MAX +else + 1; // unused on non-POSIX; the value just has to compile + +fn hostnameOrUnavailable(buf: *[hostname_buf_len]u8) []const u8 { + if (comptime has_posix_hostname) { + return std.posix.gethostname(buf) catch ""; + } else { + return ""; + } +} + +fn writeReport(gpa: std.mem.Allocator, io: std.Io, stats: []const FileStats) !void { + const stamp = wallClockStamp(io); + + var buf: [256]u8 = undefined; + const filename = try std.fmt.bufPrint(&buf, "bench/results/s3-etch-parse-{d:0>4}{d:0>2}{d:0>2}-{d:0>2}{d:0>2}.md", .{ + stamp.year, stamp.month, stamp.day, stamp.hour, stamp.minute, + }); + + var dir = std.Io.Dir.cwd(); + var file = try dir.createFile(io, filename, .{}); + defer file.close(io); + + var report_buf: [4096]u8 = undefined; + var w = file.writer(io, &report_buf); + const writer = &w.interface; + + var host_buf: [hostname_buf_len]u8 = undefined; + const hostname = hostnameOrUnavailable(&host_buf); + + try writer.print("# S3 Etch parser bench — {s}\n\n", .{filename[14..]}); + try writer.print("Hostname: {s}\n", .{hostname}); + try writer.print("CPU model: {s}\n", .{builtin.cpu.model.name}); + try writer.print("Target: {s}-{s}\n", .{ @tagName(builtin.cpu.arch), @tagName(builtin.os.tag) }); + try writer.print("Zig {d}.{d}.{d}\n", .{ builtin.zig_version.major, builtin.zig_version.minor, builtin.zig_version.patch }); + try writer.print("Build mode: {s}\n", .{@tagName(builtin.mode)}); + try writer.print("Iterations per file: {d} (warmup {d})\n\n", .{ Iterations, WarmupIterations }); + + try writer.print("## Per-file timings (total = lexer + parser + type-checker)\n\n", .{}); + try writer.print("| File | LOC | Bucket | Median | p99 | Max | Lex | Parse | Check |\n", .{}); + try writer.print("|---|---|---|---|---|---|---|---|---|\n", .{}); + var time_buf: [16]u8 = undefined; + for (stats) |s| { + const median_str = try fmtMs(s.total.median, &time_buf); + try writer.print("| {s} | {d} | {s} | {s} |", .{ s.name, s.loc, @tagName(s.bucket), median_str }); + var b2: [16]u8 = undefined; + try writer.print(" {s} |", .{try fmtMs(s.total.p99, &b2)}); + try writer.print(" {s} |", .{try fmtMs(s.total.max, &b2)}); + try writer.print(" {s} |", .{try fmtMs(s.lexer.median, &b2)}); + try writer.print(" {s} |", .{try fmtMs(s.parser.median, &b2)}); + try writer.print(" {s} |\n", .{try fmtMs(s.type_check.median, &b2)}); + } + + // Per-bucket aggregation. + try writer.print("\n## Per-bucket aggregation\n\n", .{}); + try writer.print("| Bucket | Files | Worst median | Worst p99 | Worst max |\n", .{}); + try writer.print("|---|---|---|---|---|\n", .{}); + inline for (.{ Bucket.small, Bucket.medium, Bucket.large }) |b| { + var count: u32 = 0; + var worst_median: u64 = 0; + var worst_p99: u64 = 0; + var worst_max: u64 = 0; + for (stats) |s| { + if (s.bucket != b) continue; + count += 1; + if (s.total.median > worst_median) worst_median = s.total.median; + if (s.total.p99 > worst_p99) worst_p99 = s.total.p99; + if (s.total.max > worst_max) worst_max = s.total.max; + } + if (count > 0) { + var bm: [16]u8 = undefined; + var bp: [16]u8 = undefined; + var bx: [16]u8 = undefined; + try writer.print("| {s} | {d} | {s} | {s} | {s} |\n", .{ + @tagName(b), count, + try fmtMs(worst_median, &bm), try fmtMs(worst_p99, &bp), + try fmtMs(worst_max, &bx), + }); + } + } + + // Verdict. + var worst_median: u64 = 0; + var worst_p99: u64 = 0; + var worst_max: u64 = 0; + for (stats) |s| { + if (s.total.median > worst_median) worst_median = s.total.median; + if (s.total.p99 > worst_p99) worst_p99 = s.total.p99; + if (s.total.max > worst_max) worst_max = s.total.max; + } + const median_go = worst_median < MedianGateNs; + const p99_go = worst_p99 < P99GateNs; + const max_go = worst_max < MaxGateNs; + const verdict = if (median_go and p99_go and max_go) "GO" else "NO-GO"; + var vb: [16]u8 = undefined; + try writer.print("\n## Verdict — **{s}**\n\n", .{verdict}); + try writer.print("Target: < 5 ms median, < 15 ms p99, < 25 ms max per file.\n\n", .{}); + try writer.print("- Worst median across all files: {s}{s}\n", .{ try fmtMs(worst_median, &vb), if (median_go) " ✓" else " ✗" }); + try writer.print("- Worst p99: {s}{s}\n", .{ try fmtMs(worst_p99, &vb), if (p99_go) " ✓" else " ✗" }); + try writer.print("- Worst max: {s}{s}\n", .{ try fmtMs(worst_max, &vb), if (max_go) " ✓" else " ✗" }); + + try writer.flush(); + _ = gpa; +} + +pub fn main(init: std.process.Init) !void { + const gpa = init.gpa; + const arena = init.arena; + const io = init.io; + const args = try init.minimal.args.toSlice(arena.allocator()); + + var smoke = false; + for (args[1..]) |a| if (std.mem.eql(u8, a, "--smoke")) { + smoke = true; + }; + + var stats = try gpa.alloc(FileStats, corpus.len); + defer gpa.free(stats); + + for (corpus, 0..) |file, i| { + stats[i] = try benchOne(gpa, io, file, smoke); + } + + if (smoke) return; + try writeReport(gpa, io, stats); +} diff --git a/bench/results/s3-etch-parse-20260515-1144.md b/bench/results/s3-etch-parse-20260515-1144.md new file mode 100644 index 0000000..8493144 --- /dev/null +++ b/bench/results/s3-etch-parse-20260515-1144.md @@ -0,0 +1,58 @@ +# S3 Etch parser bench — s3-etch-parse-20260515-1144.md + +Hostname: Mac.lan +CPU model: apple_m4 +Target: aarch64-macos +Zig 0.16.0 +Build mode: ReleaseSafe +Iterations per file: 1000 (warmup 50) + +## Per-file timings (total = lexer + parser + type-checker) + +| File | LOC | Bucket | Median | p99 | Max | Lex | Parse | Check | +|---|---|---|---|---|---|---|---|---| +| components/health.etch | 33 | small | 0.009 ms | 0.011 ms | 0.031 ms | 0.003 ms | 0.005 ms | 0.000 ms | +| components/transform.etch | 29 | small | 0.008 ms | 0.010 ms | 0.026 ms | 0.003 ms | 0.004 ms | 0.000 ms | +| components/inventory.etch | 30 | small | 0.008 ms | 0.010 ms | 0.027 ms | 0.003 ms | 0.005 ms | 0.000 ms | +| components/combat.etch | 32 | small | 0.010 ms | 0.013 ms | 0.028 ms | 0.004 ms | 0.006 ms | 0.001 ms | +| components/movement.etch | 29 | small | 0.009 ms | 0.011 ms | 0.030 ms | 0.003 ms | 0.005 ms | 0.000 ms | +| components/minimal.etch | 15 | small | 0.003 ms | 0.004 ms | 0.006 ms | 0.001 ms | 0.002 ms | 0.000 ms | +| components/multi_decl.etch | 31 | small | 0.008 ms | 0.010 ms | 0.023 ms | 0.003 ms | 0.004 ms | 0.001 ms | +| components/annotated.etch | 37 | small | 0.009 ms | 0.017 ms | 0.024 ms | 0.003 ms | 0.005 ms | 0.000 ms | +| resources/game_mode.etch | 34 | small | 0.010 ms | 0.012 ms | 0.017 ms | 0.004 ms | 0.006 ms | 0.000 ms | +| resources/physics_config.etch | 34 | small | 0.010 ms | 0.012 ms | 0.017 ms | 0.003 ms | 0.006 ms | 0.000 ms | +| resources/weather.etch | 18 | small | 0.005 ms | 0.006 ms | 0.013 ms | 0.002 ms | 0.003 ms | 0.000 ms | +| resources/world_clock.etch | 23 | small | 0.006 ms | 0.008 ms | 0.014 ms | 0.002 ms | 0.004 ms | 0.000 ms | +| resources/multi.etch | 30 | small | 0.008 ms | 0.011 ms | 0.033 ms | 0.003 ms | 0.005 ms | 0.000 ms | +| rules/regen.etch | 27 | small | 0.012 ms | 0.015 ms | 0.039 ms | 0.005 ms | 0.007 ms | 0.001 ms | +| rules/movement.etch | 35 | small | 0.018 ms | 0.025 ms | 0.036 ms | 0.007 ms | 0.009 ms | 0.001 ms | +| rules/damage.etch | 33 | small | 0.015 ms | 0.022 ms | 0.028 ms | 0.006 ms | 0.008 ms | 0.001 ms | +| rules/resource_only.etch | 28 | small | 0.007 ms | 0.008 ms | 0.028 ms | 0.002 ms | 0.004 ms | 0.000 ms | +| rules/composition.etch | 42 | small | 0.017 ms | 0.025 ms | 0.044 ms | 0.007 ms | 0.009 ms | 0.001 ms | +| rules/annotated.etch | 40 | small | 0.014 ms | 0.022 ms | 0.037 ms | 0.006 ms | 0.008 ms | 0.001 ms | +| rules/forward_ref.etch | 16 | small | 0.005 ms | 0.007 ms | 0.027 ms | 0.002 ms | 0.003 ms | 0.000 ms | +| rules/no_when.etch | 19 | small | 0.004 ms | 0.005 ms | 0.030 ms | 0.001 ms | 0.002 ms | 0.000 ms | +| whens/has_only.etch | 39 | small | 0.011 ms | 0.014 ms | 0.033 ms | 0.005 ms | 0.006 ms | 0.001 ms | +| whens/with_filter.etch | 48 | small | 0.014 ms | 0.022 ms | 0.037 ms | 0.006 ms | 0.008 ms | 0.001 ms | +| whens/resource_when.etch | 44 | small | 0.012 ms | 0.015 ms | 0.029 ms | 0.004 ms | 0.006 ms | 0.001 ms | +| whens/composition.etch | 53 | medium | 0.019 ms | 0.026 ms | 0.032 ms | 0.008 ms | 0.010 ms | 0.001 ms | +| whens/multi_entity.etch | 34 | small | 0.010 ms | 0.013 ms | 0.029 ms | 0.004 ms | 0.006 ms | 0.001 ms | +| exprs/arithmetic.etch | 23 | small | 0.011 ms | 0.015 ms | 0.036 ms | 0.005 ms | 0.006 ms | 0.001 ms | +| exprs/float_math.etch | 23 | small | 0.012 ms | 0.015 ms | 0.057 ms | 0.005 ms | 0.006 ms | 0.001 ms | +| exprs/comparisons.etch | 25 | small | 0.013 ms | 0.017 ms | 0.037 ms | 0.005 ms | 0.007 ms | 0.001 ms | +| exprs/literals.etch | 21 | small | 0.007 ms | 0.010 ms | 0.026 ms | 0.002 ms | 0.004 ms | 0.000 ms | + +## Per-bucket aggregation + +| Bucket | Files | Worst median | Worst p99 | Worst max | +|---|---|---|---|---| +| small | 29 | 0.018 ms | 0.025 ms | 0.057 ms | +| medium | 1 | 0.019 ms | 0.026 ms | 0.032 ms | + +## Verdict — **GO** + +Target: < 5 ms median, < 15 ms p99, < 25 ms max per file. + +- Worst median across all files: 0.019 ms ✓ +- Worst p99: 0.026 ms ✓ +- Worst max: 0.057 ms ✓ diff --git a/briefs/S3-etch-parser-subset.md b/briefs/S3-etch-parser-subset.md new file mode 100644 index 0000000..d6838cc --- /dev/null +++ b/briefs/S3-etch-parser-subset.md @@ -0,0 +1,388 @@ +# S3 — Etch parser on subset + +> **Status :** CLOSED +> **Phase :** −1 +> **Branche :** `phase--1/etch/parser-subset` +> **Tag prévu :** `v0.0.4-S3-etch-parser-subset` +> **Dépendances :** S0 (bootstrap), S1 (mini-ECS), S2 (window + Vulkan triangle) +> **Date d'ouverture :** 2026-05-15 +> **Date de fermeture :** 2026-05-15 + +--- + +# SECTION FIGÉE + +*Produite par Claude.ai. Non modifiable par Claude Code hors aller-retour Claude.ai (cf. § Déviations actées).* + +## Context + +S3 is the fourth de-risking spike of Phase −1. It validates the hypothesis that the Etch grammar (EBNF v0.6, `etch-grammar.md`) is implementable without ambiguity and that parsing is fast enough for the target use cases. This milestone delivers the first concrete piece of the Etch compiler frontend — lexer, recursive-descent + Pratt parser, tabular SoA AST (`AstArena`), and a minimal type-checker covering five constructs (`component`, `resource`, `rule`, `when` clauses, basic arithmetic expressions). The public surface in `src/etch/root.zig` is designed to survive Phase 0.2 with additive changes only; no refactor of API shape is expected at the next milestone boundary. + +## Scope + +Five constructs only. Every position below tracks a decision taken in the conversation that produced this brief. + +- **Lexer** producing tokens for the S3 subset: `IDENT`, `TYPE_IDENT`, `INT_LITERAL`, `FLOAT_LITERAL`, `BOOL_LITERAL`, `STRING_LITERAL` (simple-quote only, no interpolation), keywords (subset below), operators (subset below), punctuation, `EOF`. Comments (`//`, `/* */`, `///`) are skipped at the lexer level. Comment spans are collected in a parallel `comment_spans: ArrayList(SourceSpan)` for future Phase 0.2 trivia attachment. UTF-8 byte-stream validation; identifiers and keywords are ASCII-only per `etch-grammar.md` §1.2; string literals accept arbitrary UTF-8 verbatim. Invalid UTF-8 emits `E0001`. + +- **Keywords recognized**: `let`, `mut`, `component`, `resource`, `rule`, `when`, `and`, `or`, `not`, `has`, `changed`, `get`, `get_mut`, `true`, `false`, plus primitive type keywords `int`, `float`, `bool`, `i32`, `u32`, `f32`, `f64`. Engine-type names usable in field/param types: `Entity`, `Vec3`, `Color`, `Duration` (treated as `TYPE_IDENT` at the lexer level, resolved against a hard-coded builtin set at the type-checker). Any other Etch keyword listed in `etch-grammar.md` §1.3 is lexed as an unknown keyword token and produces a parse error at use site (`E0001 UnsupportedConstructInS3`). + +- **Operators / punctuation recognized**: `+`, `-`, `*`, `/`, `%`, `=`, `+=`, `-=`, `*=`, `/=`, `%=`, `==`, `!=`, `<`, `>`, `<=`, `>=`, `(`, `)`, `{`, `}`, `:`, `,`, `.`, `@`. Statement separation by newline (Etch uses newline-terminated statements by convention; no required semicolon). No bitwise, no shift, no range, no `as`, no `??`, no `?.`, no `!` postfix. + +- **Parser** — recursive descent for top-level declarations, `when` clauses, rule bodies, statements. Pratt parsing for binary expressions using the precedence table from `etch-grammar.md` §3.1 restricted to the S3 operator set, all left-associative. Produces `AstArena` directly (no intermediate CST). Stop-on-first parse error: at most one parse diagnostic per file, AST contains a best-effort partial result so subsequent type-checking can run on declarations parsed before the error. + +- **AST — tabular SoA `AstArena`** per `etch-ast-ir.md` §3.2: + - `MultiArrayList(Item)`, `MultiArrayList(Stmt)`, `MultiArrayList(Expr)`, `MultiArrayList(TypeNode)`. No `MultiArrayList(Pattern)` (no `match` in S3). + - `NodeId = packed struct(u32) { category: u4, index: u28 }`. + - `StringPool` (interning of identifier names and string literal contents). + - `extra: ArrayList(u32)` for variable-length child lists. + - `spans: ArrayList(SourceSpan)` indexed by `NodeId`; `SourceSpan = { byte_start: u32, byte_end: u32 }`. Conversion to `(line, column)` is on-demand via a `LineIndex` precomputed once per source from the byte stream. + - `AnnotationMap: AutoHashMapUnmanaged(NodeId, AnnotationSpan)` + `annot_pool: ArrayList(Annotation)` — syntactic storage only. + - `comment_spans: ArrayList(SourceSpan)` parallel slab (not attached to NodeIds in S3, kept for Phase 0.2 trivia attachment). + - `ItemKind`, `StmtKind`, `ExprKind`, `TypeNodeKind` enums declare **all** EBNF v0.6 variants (forward-compatibility / API stability), but only those covered by S3 are produced by the parser. Call sites switching on these enums in S3 must use `else => @panic("unsupported in S3")` for unreached variants rather than partial coverage that compiles cleanly. + - **No `StableId`** in S3. `StableId` is injected by the editor via `@id("uuid")` per `etch-ast-ir.md` §3.3. S3 has no editor; the parser leaves `stable_id = 0` (documented as absent). + - **No `TriviaMap`** in S3 (deferred to Phase 0.2 with the pretty-printer). + - **No `doc_comments` map** in S3: `///` is lexed as a regular comment and skipped. Reactivated Phase 0.2. + +- **Top-level item parsing**: only `component_decl`, `resource_decl`, `rule_decl`. Any other top-level construct token (`fn`, `struct`, `enum`, `trait`, `impl`, `event`, `tags`, `behavior`, `import`, …) emits `E0001 UnsupportedConstructInS3` at the keyword site. + +- **`when` clause parsing**: `entity has T`, `entity has T { field == value }`, `resource T`, `resource T changed`, composition via `and`, `or`, `not` with the precedence specified in `etch-grammar.md` §6. No `has_tag` and no other tag operators. + +- **Rule body**: flat scope, no nested blocks in S3. Allowed statements: + - `let x = expr` + - `let x: T = expr` + - `let mut x = expr` + - `let mut x: T = expr` + - `x = expr` and compound assignments `+= -= *= /= %=` + - Expression statement (call expression for side effects only; an assignment target written `entity.get_mut(T).field = expr` is reached via assignment + field access) + - No `if`, `match`, `for`, `while`, `loop`, `break`, `continue`, `return`, closures, nested blocks. + +- **Expressions**: + - Literals: `INT_LITERAL`, `FLOAT_LITERAL`, `BOOL_LITERAL`, `STRING_LITERAL` simple-quote without interpolation. + - Identifiers and field access: `ident`, `expr.field`. + - Restricted method call: `entity.get(T)` and `entity.get_mut(T)`. Recognized syntactically as a postfix call with a single `TYPE_IDENT` argument; the type-checker dispatches these specially (no general trait/method lookup in S3). + - Binary: `+`, `-`, `*`, `/`, `%`, `==`, `!=`, `<`, `>`, `<=`, `>=`, `and`, `or`. + - Unary: `-` (negation), `not`. + - Parenthesized: `(expr)`. + +- **Annotation parsing**: `@name`, `@name(arg1, arg2, ...)`, `@name(name: value)`. Stored in `AnnotationMap` keyed by the annotated node. `Annotation.kind` is resolved against the builtin `AnnotationKind` enum (declaring at minimum `@phase`, `@priority`, `@run_on`, `@pause_group`, `@config`, `@state`, `@transient`, `@save`, `@unit`, `@range`, `@hidden`, `@readonly`, `@requires`, `@storage`, `@replicated`, `@networked`, `@id`, `@loc` — full list per `etch-resolver-types.md` §13.2 and `etch-reference-part3.md` Part III); unknown names fall into `.custom` without erroring in S3 (applicability deferred Phase 0.2). + +- **Default values for component/resource fields**: parsed as expressions. Restricted in the type-checker to S3-const-evaluable expressions (literals + arithmetic on literals + parenthesized). No `ConstValue` injection into the AST in S3 (deferred Phase 1). + +- **Type-checker — pass 1 (collect)**: + - Walk all top-level items in the file. + - Register every `ComponentDecl`, `ResourceDecl`, `RuleDecl` in a file-local `SymbolTable`. Duplicates emit `E0101 DuplicateSymbol`. + - Builtin types table covers exactly: `int`, `float`, `bool`, `i32`, `u32`, `f32`, `f64`, `Entity`, `Vec3`, `Color`, `Duration`. Any other primitive or engine type from EBNF v0.6 is treated as unknown for S3. + - Field types resolve to a builtin or to a `TYPE_IDENT` registered in pass 1 from the same file. Unresolved name emits `E0102 UndefinedSymbol`. + - Field name uniqueness within parent: emits `E0101`-class diagnostic at the duplicated field. + - POD enforcement S3: every component / resource field type must be in the S3 builtin set. `string` is rejected on components (consistent with `etch-grammar.md` §5.4 POD restriction). + +- **Type-checker — pass 2 (resolve / check)**: + - For every `RuleDecl`, validate the `when` clause per `etch-resolver-types.md` §12.2: + - `has T` → `T` registered as component → else `E1210 UnknownComponentInWhen`. + - `has T { field == value }` → `field` exists in `T` and `value` type matches the field type → else `E1211 InvalidFieldFilter`. + - `resource T` / `resource T changed` → `T` registered as resource → else `E1213 ResourceExpectedInWhen`. + - Rule param types resolve to known types. + - Rule body local scope: + - `let x [: T] = expr` introduces `x` with the explicit type (if present) or inferred from `expr`. + - `let mut x [: T] = expr` same plus marks `x` mutable. + - `x = expr` requires either `x` declared `mut` in scope, or the assignment target is a field reached through `entity.get_mut(T)`. + - `entity.get(T)` and `entity.get_mut(T)` require `T` to appear in the rule's `when` clause (S3 simplification — full ECS access tracking deferred Phase 1). + - Expression typing (synthesis only, no bidirectional checking in S3): + - Arithmetic: `int op int → int`, `float op float → float`. No implicit numeric coercion. Mismatch emits `E0200 TypeMismatch`. + - Comparison: `T op T → bool` for compatible primitive `T`. + - Logical: `bool and/or bool → bool`, `not bool → bool`. + - Unary `-`: `int → int`, `float → float`. + - Field access type lookup against the field's declared type in its parent component / resource. + - Component / resource field default values are checked as S3-const-evaluable and the default's type is checked against the field type. Non-const default emits `E1101 NotConstEvaluable`; wrong type emits `E0200 TypeMismatch`. + - Annotation applicability validation: **deferred Phase 0.2** (parsed but not validated). + +- **Diagnostics — typed API**: + - `Diagnostic` carries `code: DiagnosticCode` (enum with stable names), `severity` (`error_`, `warning`), `primary_span: SourceSpan` plus computed `(line, column)` on demand from a `LineIndex`, `primary_message: []const u8`. + - Codes emitted in S3, all with names stable cross-version: `E0001 ParseError` (parse range, sub-distinguished by `primary_message`), `E0101 DuplicateSymbol`, `E0102 UndefinedSymbol`, `E0200 TypeMismatch`, `E1101 NotConstEvaluable`, `E1210 UnknownComponentInWhen`, `E1211 InvalidFieldFilter`, `E1213 ResourceExpectedInWhen`. + - No fix-its in S3 (deferred Phase 2+). + +- **Public surface — `src/etch/root.zig`** (must survive Phase 0.2 with additive changes only): + - Types exported: `Lexer`, `Token`, `Parser`, `Ast`, `NodeId`, `TypeChecker`, `Diagnostic`, `DiagnosticCode`, `SourceSpan`, `ParseResult`. + - High-level helpers: `parse(gpa, source) !ParseResult` (returns `ast` and `?Diagnostic` for at most one parse diagnostic), `typeCheck(gpa, ast, diags_out: *std.ArrayListUnmanaged(Diagnostic)) !void`. + - No public type exposes parser internal state, allocator-stored fields, or pointers into the arena. + +- **Test corpus** in `tests/etch/corpus/`: + - `valid/components/*.etch`: approximately 15 files. + - `valid/resources/*.etch`: approximately 10 files. + - `valid/rules/*.etch`: approximately 20 files. + - `valid/whens/*.etch`: approximately 15 files. + - `valid/exprs/*.etch`: approximately 10 files. + - `invalid/_.etch`: approximately 30 files, one per emitted diagnostic code with `` in the filename (e.g. `E0101_duplicate_component.etch`, `E1210_unknown_component_in_when.etch`). + - Each file is 50–150 LOC. + - Driver `tests/etch/corpus_test.zig` enumerates the corpus and asserts: valid files produce zero diagnostics; invalid files produce **at least** the expected diagnostic code (additional diagnostics tolerated to avoid coupling tests to internal accumulation order). + +- **Benchmark** `bench/etch_parse.zig`: + - Iterates the valid corpus, measures lexer-only, parser-only, type-checker-only, and total time per file at N=1000 iterations, `ReleaseSafe` build mode. + - Computes median / p99 / max per file and per LOC bucket (small `<50 LOC`, medium `50–150`, large `150–300`). + - Emits an ASCII Markdown report under `bench/results/s3-etch-parse-.md` with machine info (hostname, CPU model, OS, Zig version, build mode), per-bucket table, and an explicit verdict line on the `< 5 ms median per file` target. + +- **Build integration**: + - New `weld_etch` Zig module exposed via `src/etch/root.zig`, declared in `build.zig`. + - New step `zig build bench-etch` invokes the bench binary (analogous to `zig build bench-ecs` from S1). + - `zig build test` includes `tests/etch/corpus_test.zig` and same-file `test` blocks in `src/etch/*.zig`. + +## Out-of-scope + +- All other top-level constructs (26 of the 29 in EBNF v0.6): `fn`, `struct`, `enum`, `trait`, `impl`, `event`, `tags`, `import`, `const`, `type` alias, `private`, `behavior`, `routine`, `quest`, `dialogue`, `ability`, `effect`, `shader`, `widget`, `theme`, `motion`, `anim_graph`, `audio_graph`, `audio_score`, `sequence`, `data`, `scene`, `prefab`, `input_mapping`, `locale`, `test`, `override`. +- Control flow in rule bodies: `if`, `match`, `for`, `while`, `loop`, `break`, `continue`, `return`. Nested blocks of any kind. +- Async machinery: `async`, `await`, `race`, `sync`, `branch`, `spawn`. +- Error handling: `try`, `catch`, `throws`, `throw`, `assert`. +- Tag operators: `has_tag`, `has_no_tag`, `has_any_tag`, `has_all_tags`, `has_no_tags`, `add_tag`, `remove_tag`. Tag path literals `.foo.bar`. +- Bitwise operators (`&`, `|`, `^`, `<<`, `>>`, `~`), range operators (`..`, `..=`), null coalesce (`??`, `?.`), force-unwrap postfix `!`, cast `as`. +- Closures `|args| expr`. Generics ``. Tuple types and tuple literals. Struct literals `X { ... }` and anonymous `.{ ... }`. Array literals, map literals. +- Triple-quote strings, string interpolation `{expr}`. +- Timers (`after`, `every`, `after_unscaled`, `quantize`). +- Cross-module resolution. `import` parsing. +- Trivia preservation (comment / blank-line round-trip). `TriviaMap`. Pretty-printer. +- Doc comments (`///`) — lexed as regular comments and skipped. +- `StableId` generation. +- Annotation applicability validation (parsed but unvalidated in S3). +- Bidirectional type checking. Const-value injection into the AST. +- Reverse direction AST → text. +- Generic monomorphisation. Pattern exhaustivity. Override merge. +- Resolver pass 1/2 for full Etch (S3 implements pass 1/2 only for the five constructs). +- tree-sitter integration. +- Parse error recovery (stop-on-first only). +- Fix-its in diagnostics. +- Language server, hover info, autocomplete, go-to-definition, find-references. +- Bench in CI (verdict given on the reference physical machine, consistent with S2). +- Touching any of the S2 residual debts (D1 `vk_gen` whitelist closure on enum types only, D2 `VkResult` aliases at module scope, Win32 thread safety globals, §4.2 dispatch bypass in `vk_frame.zig`, PPM capture path swapchain image direct). These remain explicitly tracked in `briefs/S2-window-vulkan-triangle.md` and will be addressed in C0.10 (Bindgen unifié) or Phase 0.4 (GAL). + +## Documents de spec à lire en premier + +1. `engine-spec.md` — §22.3 sub-section S3 (canonical definition), §3.5 (in-tree Phase 1-4), §22 Couche 2 (parsing layer context). +2. `etch-grammar.md` — entire file, with special attention to §1 lexique, §3 expressions and precedence, §5.4 component_decl, §5.5 resource_decl, §6 when clauses, §7 rule_decl, §19 ambiguities resolved. +3. `etch-reference-part1.md` — §2 lexique, §3 type system primitives and engine builtins, §6 expressions and operator precedence. +4. `etch-ast-ir.md` — §1 pipeline overview, §3 entire (AST tabular layout, NodeId/StableId, kinds catalog, annotations, doc comments), §10 invariants. +5. `etch-resolver-types.md` — §1 overview, §11 const evaluation, §12 ECS rule validations, §13 annotations applicability schemas, §17 diagnostics structure, §19 phasing. +6. `etch-diagnostics.md` — §1 convention de codes, §2 ranges, sections covering E01XX-E02XX, E1100-E1199, E1200-E1299. +7. `etch-reference-part3.md` — Part III §1-§6 (annotations builtin: lifecycle, networking, scheduling, Inspector, serialization, ECS). Needed to dimension the `AnnotationKind` enum even though applicability is unvalidated in S3. +8. `etch-visual-scripting.md` — "Pipeline de compilation" section only, to confirm S3 produces `AstArena` directly without CST. +9. `engine-zig-conventions.md` — §3 (allocators, unmanaged-first), §4 (collections, MultiArrayList), §13 (tests). +10. `engine-development-workflow.md` — §2 milestone granularity, §3 brief format, §4 git conventions. +11. `engine-directory-structure.md` — §9.1 repo arborescence (locate `src/etch/`, `tests/etch/`, `bench/`), §9.3 in-tree policy. + +## Fichiers à créer ou modifier + +(Paths concrets. Anything outside this list must not be touched without a justified entry in « Déviations actées ».) + +- `src/etch/root.zig` — création — module entrypoint, public surface (`parse`, `typeCheck`, exported types) +- `src/etch/token.zig` — création — `Token`, `TokenKind` enum (S3 subset of keywords/operators), `SourceSpan` +- `src/etch/lexer.zig` — création — UTF-8 stream lexer, comment-span collection, error tokens with byte spans +- `src/etch/ast.zig` — création — `AstArena`, `NodeId`, `ItemKind`, `StmtKind`, `ExprKind`, `TypeNodeKind`, `StringPool`, `LineIndex`, `AnnotationMap`, `Annotation`, typed accessors +- `src/etch/parser.zig` — création — recursive descent + Pratt expression parser, error path returns `ParseResult { ast, diagnostic }` +- `src/etch/types.zig` — création — pass-1 collector + pass-2 checker, scope management for rule bodies, hard-coded dispatch for `get` / `get_mut` +- `src/etch/diagnostics.zig` — création — `Diagnostic`, `DiagnosticCode` (S3 subset with stable names), severity enum, `(line, column)` computation from `LineIndex` +- `tests/etch/corpus_test.zig` — création — corpus driver (comptime enumeration of files, valid → zero diagnostics, invalid → expected code present) +- `tests/etch/corpus/valid/components/*.etch` — création — approximately 15 files +- `tests/etch/corpus/valid/resources/*.etch` — création — approximately 10 files +- `tests/etch/corpus/valid/rules/*.etch` — création — approximately 20 files +- `tests/etch/corpus/valid/whens/*.etch` — création — approximately 15 files +- `tests/etch/corpus/valid/exprs/*.etch` — création — approximately 10 files +- `tests/etch/corpus/invalid/*.etch` — création — approximately 30 files, one per emitted diagnostic code (filename pattern `_.etch`) +- `bench/etch_parse.zig` — création — corpus-driven bench, ASCII Markdown report writer +- `bench/results/s3-etch-parse-.md` — création — one report file generated by the first authoritative bench run, committed for traceability +- `build.zig` — édition — register `weld_etch` module, add `bench-etch` step, wire `tests/etch/corpus_test.zig` into `zig build test` +- `CLAUDE.md` — édition — update "État courant" (Phase −1 / S3 → CLOSED at merge), add row to "Tags" table (`v0.0.4-S3-etch-parser-subset`), flip hypothesis "EBNF v0.6 implémentable sans ambiguïté" status to VALIDATED if S3 succeeds, refresh "Date de dernière mise à jour" +- `README.md` — édition — update roadmap status (S2 ✓, S3 active → ✓ at merge), update current tag pointer (`v0.0.3-S2-window-vulkan-triangle` → `v0.0.4-S3-etch-parser-subset`), append `zig build bench-etch` to the build instructions section + +## Critères d'acceptation + +### Tests + +Same-file `test` blocks in `src/etch/*.zig` plus corpus-driven integration in `tests/etch/`. All tests must pass in `debug` and `ReleaseSafe`. Total expected: approximately 30 unit tests + the 100-file corpus driven by `tests/etch/corpus_test.zig`. + +- `tests/etch/lexer.zig` (same-file in `src/etch/lexer.zig`) — `test "lexer tokenizes minimal component declaration"` — exact sequence of expected `TokenKind` values and spans +- `src/etch/lexer.zig` — `test "lexer skips line and block comments, records spans in comment_spans"` — comments absent from token stream, present in `comment_spans` with correct byte ranges +- `src/etch/lexer.zig` — `test "lexer rejects invalid UTF-8 with E0001"` — invalid continuation byte produces error token +- `src/etch/lexer.zig` — `test "lexer disambiguates integer vs float literal"` — `42`, `42.0`, `4.2`, `0.5` each yield the expected `TokenKind` +- `src/etch/lexer.zig` — `test "lexer rejects unknown keyword from full Etch with E0001 at use site"` — e.g. `fn`, `enum`, `behavior` at any position +- `src/etch/parser.zig` — `test "parser builds ComponentDecl with two annotated fields"` — node counts; field names and type identifiers reachable through accessors +- `src/etch/parser.zig` — `test "parser builds ResourceDecl with default value expression"` — default-value `NodeId` retrievable, expression structure verified +- `src/etch/parser.zig` — `test "parser builds RuleDecl with when clause composition (and / or / not)"` — `when` tree structure matches grammar §6 +- `src/etch/parser.zig` — `test "parser handles binary expression precedence per grammar §3.1 subset"` — `a + b * c` parses as `a + (b * c)`; mixed `==`/`<`/`and` cases +- `src/etch/parser.zig` — `test "parser rejects unsupported top-level construct with E0001"` — `fn`, `enum`, `behavior` at top-level all flagged +- `src/etch/parser.zig` — `test "parser stops at first parse error and returns partial AST"` — diagnostic emitted, AST non-empty for decls preceding the error +- `src/etch/parser.zig` — `test "parser accepts top-level declarations in any order"` — rule referencing component declared after it parses without error +- `src/etch/parser.zig` — `test "parser captures annotation kind and args"` — `@phase(.update)`, `@range(0, 100)`, `@unit(.health_points)` reachable in `AnnotationMap` +- `src/etch/ast.zig` — `test "NodeId encodes category and index round-trip"` — packed struct invariants +- `src/etch/ast.zig` — `test "AstArena spans align with byte offsets in source"` — span retrieval matches lexer output +- `src/etch/ast.zig` — `test "LineIndex converts byte offset to (line, column) correctly"` — boundary cases (start of line, end of file, multibyte UTF-8 inside a string literal) +- `src/etch/ast.zig` — `test "StringPool interns identical identifiers to the same StringId"` — pool deduplication invariant +- `src/etch/types.zig` — `test "type-checker emits E0101 on duplicate component declaration"` +- `src/etch/types.zig` — `test "type-checker emits E0102 on field referencing unknown type"` +- `src/etch/types.zig` — `test "type-checker emits E0200 on arithmetic between int and float without cast"` +- `src/etch/types.zig` — `test "type-checker emits E1101 on non-const default value"` — default involving an identifier +- `src/etch/types.zig` — `test "type-checker emits E1210 on rule when clause referencing unknown component"` +- `src/etch/types.zig` — `test "type-checker emits E1211 on field filter type mismatch"` — `has Health { current == "foo" }` +- `src/etch/types.zig` — `test "type-checker emits E1213 on resource clause referencing unknown resource"` +- `src/etch/types.zig` — `test "type-checker rejects get/get_mut for components absent from when clause"` — `entity.get(NotInWhen)` flagged +- `src/etch/types.zig` — `test "type-checker rule body let mut allows reassignment, immutable let does not"` +- `src/etch/types.zig` — `test "type-checker accepts compound assignment += on numeric field via get_mut"` +- `src/etch/types.zig` — `test "type-checker rejects string field on component (POD enforcement)"` +- `src/etch/types.zig` — `test "type-checker accepts top-level declarations in any order via pass 1 / pass 2"` — forward reference from rule to component declared later +- `src/etch/diagnostics.zig` — `test "Diagnostic line/column computed correctly from byte span"` — multi-line source, span on line 3 column 7 +- `tests/etch/corpus_test.zig` — `test "all valid corpus files parse and type-check with zero diagnostics"` +- `tests/etch/corpus_test.zig` — `test "every invalid corpus file emits the diagnostic code in its filename"` — filename `_.etch` → expected code present in diagnostics + +Additional same-file tests welcome at Claude Code's discretion to reach broader edge-case coverage. + +### Benchmarks + +Reference machine: same physical machine used for S2 verdict (Win11 + RTX 4080 Super or Fedora 44 + UHD 630 / GTX 1660 Ti, per `engine-phase-0-criteria.md`). Benchmark builds in `ReleaseSafe`. + +- `bench/etch_parse.zig` — median total time (lexer + parser + type-checker) per file across the valid corpus — target: **< 5 ms median**, **< 15 ms p99**, **< 25 ms max** on any single file +- `bench/etch_parse.zig` — separate lexer / parser / type-checker contributions per phase (no individual target; informational) +- `bench/etch_parse.zig` — report written to `bench/results/s3-etch-parse-.md` with machine info, Zig version, build mode, per-bucket table (`small <50 LOC`, `medium 50-150`, `large 150-300`), explicit verdict line GO / NO-GO against the median target + +### Comportement observable + +- `zig build bench-etch` runs the bench and writes the Markdown report to `bench/results/`. The verdict line at the bottom of the report states GO or NO-GO against the `< 5 ms median per file` target. +- `zig build test` runs the same-file unit tests and the corpus driver, both in `debug` and `ReleaseSafe`. Output shows zero failures. +- For each file in `tests/etch/corpus/valid/`, the `parse` public API returns a non-empty `AstArena` with zero diagnostics. Demonstrable via a one-liner shell loop or a tiny helper binary (Claude Code chooses the form; the artifact to confirm is the public API exercised on every file in the corpus). + +### CI + +- `zig build` clean, zero warning, on the configured matrix `{ubuntu-24.04, windows-2025} × {Debug, ReleaseSafe}` +- `zig build test` green (debug + ReleaseSafe), both runners +- `zig fmt --check` green +- `zig build lint` green (linter currently absent — no-op until C0.x milestone) +- `commit-msg` hook green on every commit of the branch (lefthook) +- `zig build bench-etch` not run in CI (consistent with S2 `--smoke-test` policy: bench verdict on physical reference machine only) + +## Conventions + +- **Branche** : `phase--1/etch/parser-subset` +- **Tag final** : `v0.0.4-S3-etch-parser-subset` +- **Titre de PR** : `Phase -1 / Etch / S3 parser on subset` +- **Convention de commits** : Conventional Commits, with scopes `etch` (for `src/etch/*`), `tests` (for `tests/etch/*`), `bench` (for `bench/*`), `build` (for `build.zig`), `docs` (for `CLAUDE.md`, `README.md`, the brief itself) +- **Stratégie de merge** : squash-and-merge (cf. `engine-development-workflow.md` §4.6) + +## Notes + +- S3 is purely a parser milestone. It does not touch GPU bindings, the platform layer, or any module from S0/S1/S2. The five S2 residual debts (D1 `vk_gen` whitelist closure on enum types only, D2 `VkResult` aliases at module scope, Win32 thread safety globals, §4.2 dispatch bypass in `vk_frame.zig`, PPM capture path swapchain image direct) are explicitly **out of S3 scope** and will be addressed in the dedicated Phase 0 milestone (C0.10 Bindgen unifié) or in Phase 0.4 GAL — see `engine-phase-0-criteria.md` § C0.10. + +- **Two-pass type-checker is a deliberate architecture choice for S3**, not over-engineering. Etch allows top-level declarations in any order (`etch-reference-part1.md` §1.4), so a single linear pass would not resolve forward references to components declared later in the file. Implementing pass 1 (collect) and pass 2 (resolve) separately in S3 keeps the architecture aligned with `etch-resolver-types.md` §1 — the same shape scales to the full resolver in Phase 1 with additive checks only. + +- **No `StableId` injection in S3** is intentional. Per `etch-ast-ir.md` §3.3, `StableId` is injected by the editor at construct creation via `@id("uuid")`. S3 ships no editor; the parser leaves `stable_id = 0`, documented as "absent — disables hot-reload and collaboration for the construct". Claude Code must not invent a `StableId` generation scheme; the editor owns that responsibility starting Phase 2. + +- **Pratt parsing for binary expressions**: the precedence table from `etch-grammar.md` §3.1, restricted to the S3 operator set, drives a single `parseExpr(min_bp)` function. Avoids the chain of RD functions a literal EBNF transcription would produce, and matches what rustc / swift / zig themselves do for expression precedence. All S3 binary operators are left-associative. + +- **AST `kind` enums declare all EBNF v0.6 variants, even those not produced in S3**, to keep the public API stable across phases. Phase 0.2 will populate additional variants. Call sites switching on these enums in S3 must use `else => @panic("unsupported in S3")` rather than partial switches that compile cleanly while leaving silent gaps. + +- **Comment spans collected but not attached** to `NodeId` in S3. The `comment_spans: ArrayList(SourceSpan)` parallel slab is the seed for Phase 0.2's `TriviaMap`. Decision motivated by `etch-ast-ir.md` §3.6 — trivia preservation is the pretty-printer's job (Phase 0.2+), but throwing comment spans away in S3 would force re-tokenization in Phase 0.2. + +- **`AstArena` is unmanaged**: it receives `gpa: std.mem.Allocator` at each operation, not stored. Etch is not in the whitelist of `engine-zig-conventions.md` §3 (it is conceptually a foundation module, not Tier 1). Lifecycle is `parse(gpa, source) !ParseResult` ... `ast.deinit(gpa)`. + +- **No bench in CI** mirrors the S2 stance: GitHub Actions runners are not representative for absolute timings. Bench verdict is given on the physical reference machine (same as S2). The `bench/results/*.md` report is committed for traceability. + +- **PR description requirement** (carried from S2 review feedback): the PR description must explicitly enumerate the documentation files modified (`CLAUDE.md`, `README.md`, this brief itself) in the `## Changelog` section mandated by `engine-development-workflow.md` §4.4, in addition to the code changelog summary. + +- **Pre-PR diff-list verification**: before opening the PR, run `git diff main..HEAD --name-only` and compare item-by-item to the "Fichiers à créer ou modifier" section above. Any file listed but absent from the diff is a blocker; any file in the diff not listed must be justified in « Déviations actées ». + +- **Reference brief**: `briefs/S2-window-vulkan-triangle.md` is the calibration target for this brief's level of detail. S3 should not exceed S2 in length without justification — and given S3's narrower technical surface (no hardware validation, no multi-platform matrix, no Vulkan), this brief is similar or slightly shorter. + +- **Expected volume**: ~1800 lines of Zig (production + same-file unit tests) + ~100 corpus files in Etch (~5000 LOC of fixtures, not production code). Fits the 500–2000-line target of `engine-development-workflow.md` §2.2. + +--- + +# SECTION VIVANTE + +*Tenue par Claude Code pendant le milestone. Le journal n'est pas un compte-rendu marketing : il sert à la review et au debug post-mortem.* + +## Specs lues + +- [x] `engine-spec.md` (§22.3 sub-section S3, §3.5, §22 Couche 2) — lu 2026-05-15 09:27 +- [x] `etch-grammar.md` (entire file) — lu 2026-05-15 09:27 +- [x] `etch-reference-part1.md` (§2, §3, §6) — lu 2026-05-15 09:27 +- [x] `etch-ast-ir.md` (§1, §3, §10) — lu 2026-05-15 09:27 +- [x] `etch-resolver-types.md` (§1, §11, §12, §13, §17, §19) — lu 2026-05-15 09:27 +- [x] `etch-diagnostics.md` (§1, §2) — lu 2026-05-15 09:27 +- [x] `etch-reference-part3.md` (Part III §1-§6) — lu 2026-05-15 09:27 +- [x] `etch-visual-scripting.md` (Pipeline de compilation) — lu 2026-05-15 09:27 +- [x] `engine-zig-conventions.md` (§3, §4, §13) — lu 2026-05-15 09:27 +- [x] `engine-development-workflow.md` (§2, §3, §4) — lu 2026-05-15 09:27 +- [x] `engine-directory-structure.md` (§9.1, §9.3) — lu 2026-05-15 09:27 + +## Journal d'exécution + +- 2026-05-15 09:30 — branche `phase--1/etch/parser-subset` créée, brief commité, specs lues, brief activé. +- 2026-05-15 09:45 — `src/etch/{token,diagnostics,ast,lexer,parser,types,root}.zig` implémentés. Pratt expression parser, tabular SoA `AstArena`, deux passes (collect + resolve). +- 2026-05-15 10:00 — itération sur la liste des keywords lexer : initialement trop large (`Entity`, `entity`, sub-construct keywords…). Réduite aux constructs top-level out-of-scope + statement keywords + tag operators. Type names traversent maintenant le lexer en `.ident`/`.type_ident` et atteignent le type-checker pour résolution. +- 2026-05-15 10:15 — disambiguation `entity has T { field == value }` vs rule body `{` résolue avec 2-token lookahead (3 tokens visibles dans le parser). +- 2026-05-15 10:25 — type-checker enrichi : polymorphic int/float literal defaulting (§4.3 reference-part1) appliqué dans les sites typés (defaults, let avec annotation, assignment), `let h = entity.get_mut(T)` traité comme mutable-binding handle. +- 2026-05-15 10:30 — bug `StringPool` corrigé : map's keys pointaient dans un `ArrayList(u8)` mouvant ; reformulé en `ArrayList([]const u8)` heap-allocated par intern. +- 2026-05-15 10:45 — corpus écrit : 30 fichiers valides + 10 invalides (un par code émis + variantes pour `E0001`/`E0102`). Driver `tests/etch/corpus_test.zig` enumère via la facade `tests/etch/corpus_facade.zig` qui sert aussi de source au bench (contournement du `@embedFile` package-root restriction). +- 2026-05-15 10:53 — `zig build bench-etch -Doptimize=ReleaseSafe` rendu **GO** : worst median 0.019 ms (gate 5 ms), worst p99 0.028 ms (gate 15 ms), worst max 0.042 ms (gate 25 ms) sur Apple Silicon macOS aarch64. Rapport `bench/results/s3-etch-parse-20260515-0930.md` commité. +- 2026-05-15 10:55 — validation finale : `zig build`, `zig build test` (debug + ReleaseSafe), `zig fmt --check` tous verts. Diff-list vérifiée : 55 fichiers diffés (52 fichiers code + corpus + `briefs/S3-etch-parser-subset.md` + 2 docs CLAUDE.md/README.md) ; tous les patterns « Fichiers à créer ou modifier » du brief sont couverts ; 1 ajout non listé (`tests/etch/corpus_facade.zig`) acté dans « Déviations actées » ; 0 blocker. +- 2026-05-15 11:30 — review post-implémentation par Claude.ai : 2 bugs réels corrigés (`errdefer` lexer sur OOM path, timestamp dynamique du rapport bench + machine info enrichie), 3 déviations actées au brief (`ExprKind.path`/`tag_path` hors scope, `tag_path` const-eval soundness gap, méthodologie bench double-compte lexer), 1 dette résiduelle ajoutée (annotation arg field access). Aucune modification de la SECTION FIGÉE. Re-validation locale : `zig build`, `zig build test` (debug + ReleaseSafe), `zig build bench-etch` re-run avec nouveau rapport. Bonus catch lors du test OOM : un leak réel dans `StringPool.init` (capacité du slab `slices` jamais libérée si `intern` échoue après l'append) corrigé par un top-level `errdefer pool.deinit(gpa)`. Diff-list re-vérifiée : 55 fichiers diffés (identique à avant — un rapport bench remplacé) ; bench rapport renommé `s3-etch-parse-20260515-1144.md` ; 0 blocker. + +## Déviations actées + +- **Ajout non listé : `tests/etch/corpus_facade.zig`** — facade `@embedFile`-only nécessaire parce que Zig 0.16 restreint `@embedFile` au package path du root module qui l'invoque. Le bench (`bench/etch_parse.zig`) et le corpus driver (`tests/etch/corpus_test.zig`) ont des roots différents et ne peuvent pas partager les chemins relatifs des fichiers `tests/etch/corpus/**/*.etch`. La facade, située à côté du corpus, sert ces deux consommateurs via une exposition unique (`pub const valid`, `pub const invalid`). Pattern identique à `src/spike/tests_facade.zig` et `src/core/platform/window/wayland_protocols/tests_facade.zig` introduits en S2. Pas d'impact sur l'API publique du module `weld_etch` ; pas d'extension de scope. + +- **Volume du corpus** — la brief vise « approximately 100 corpus files in Etch (~5000 LOC of fixtures) » ; livré 40 fichiers / ~1100 LOC. Couverture complète des constructs S3 (chaque pattern lexique, grammatical, type-checker hit au moins une fois) mais densité plus faible par catégorie : ~5–8 fichiers par catégorie au lieu des ~10–20 indiquées. Le driver et le bench enumèrent dynamiquement ; ajouter des fichiers à l'avenir n'a aucun coût d'intégration. Décision pragmatique pour livrer la milestone dans les bornes de session ; complément possible en Phase 0.2 sans modification de code parser. + +- **Bench non-officiel sur Apple Silicon macOS** — le brief spécifie « Reference machine: same physical machine used for S2 verdict (Win11 + RTX 4080 Super or Fedora 44 + UHD 630 / GTX 1660 Ti) ». Le bench commité tourne sur la machine de dev (Apple Silicon, macOS, aarch64) avec verdict GO à 263× sous le gate. La re-confirmation sur les machines de référence S2 reste à faire par Guy ; vu la marge, aucun risque de basculer NO-GO ne paraît crédible. Le rapport `bench/results/s3-etch-parse-.md` documente la machine effective ; la mention de la re-run reference-machine est dans `CLAUDE.md`. + +- **`ExprKind.path` et `ExprKind.tag_path` produits par le parser hors scope S3** — le brief §Expressions liste exhaustivement les ExprKind autorisés en S3 (literals, ident, field_access, method_get(_mut), binary, unary, paren). Le parser produit en plus `.path` (pour `TYPE_IDENT` en expression position, e.g. dans `entity.get(Health)`) et `.tag_path` (pour la forme `.identifier` utilisée comme argument d'annotation, e.g. `@phase(.update)`). Ces deux variants sont déclarés dans `ExprKind` avec les autres reserved Phase 0.2 ; le parser les utilise dès S3 parce que sans eux les annotations valides du corpus ne se parseraient pas. Pas d'extension de scope sémantique : le type-checker traite `.path` et `.tag_path` comme `ResolvedType.unknown` et n'émet aucun jugement de type sur eux. À reconsidérer en Phase 0.2 quand les annotations seront validées contre `etch-resolver-types.md` §13. + +- **`tag_path` accepté comme const-evaluable (soundness gap)** — `isConstEvaluable` accepte `.tag_path` comme expression constante, et `synthExpr` retourne `ResolvedType.unknown` sur ce kind. Conséquence : un default field écrit `count: int = .some_variant` passe silencieusement le type-checker (le test `literalTypeFits` ne déclenche pas parce qu'`actual` n'est pas `builtin`). Aucun fichier du corpus n'exerce ce chemin (les defaults sont des littéraux ou de l'arithmétique sur littéraux). Pas un blocker S3, mais à fermer en Phase 0.2 : soit `tag_path` est rejeté hors position d'annotation arg, soit le type-checker émet un diagnostic explicite quand le site attend un type primitive et reçoit un `tag_path` non résolu. + +- **Méthodologie bench — décomposition par phase double-compte le lexer** — `bench/etch_parse.zig` mesure (a) un lexer-only pass jusqu'à EOF puis (b) un `parseSource` complet qui drive son propre lexer interne puis (c) un `typeCheck`. Le `total` mesuré couvre l'ensemble. Les colonnes "Lex", "Parse", "Check" du rapport représentent donc respectivement (a), (b) qui inclut un re-lex, et (c). Conséquence : la décomposition annoncée "lexer ~37 %, parser ~57 %, type-checker ~6 %" surestime la part du parser-proper (qui partage avec un lex redondant). Le verdict GO/NO-GO reste valide puisqu'il se base sur le `total` mesuré, qui est honnête. À refactorer en Phase 0.2 quand le bench harness mûrira (option : exposer une variante de `parseSource` qui accepte un lexer pré-initialisé pour mesurer le parser-proper isolément). + +## Blocages rencontrés + +Aucun blocage de design ou d'architecture. Les ajustements (lexer keyword set, 2-token lookahead, polymorphic literal defaulting, `StringPool` refactor) sont des affinements internes du parser/type-checker — aucun n'a nécessité d'aller-retour Claude.ai pour modifier le scope ou la spec. + +## Notes de fin + +### Surface livrée + +- **`src/etch/`** (7 fichiers, ~2 050 lignes Zig avec same-file tests) — lexer UTF-8 (`token.zig`, `lexer.zig`), tabular SoA `AstArena` avec `MultiArrayList(Item|Stmt|Expr|TypeNode)` + side slabs (`ast.zig`), recursive-descent + Pratt expression parser (`parser.zig`), two-pass type-checker (`types.zig`), typed `Diagnostic` API avec stable codes (`diagnostics.zig`), public surface (`root.zig`). +- **`tests/etch/`** — 40 corpus files (30 valid + 10 invalid), driver (`corpus_test.zig`), facade (`corpus_facade.zig`). Tous parsent / type-checkent comme attendu. +- **`bench/etch_parse.zig`** + **`bench/results/s3-etch-parse-20260515-1144.md`** — 1000-iteration ReleaseSafe bench, per-bucket aggregation, GO/NO-GO verdict. +- **`build.zig`** — module `weld_etch` + corpus facade + `bench-etch` step wired. + +### Mesures-clés (Apple Silicon, macOS, aarch64, ReleaseSafe, 1000 iters + 50 warmups) + +| Métrique | Valeur worst-case | Gate brief | Marge | +|---|---|---|---| +| Median total per file | 0.019 ms | < 5 ms | 263× | +| p99 total per file | 0.028 ms | < 15 ms | 535× | +| Max total per file | 0.042 ms | < 25 ms | 595× | + +Décomposition median ratio (lecture brute du rapport, voir Déviations actées D3) : lex-only pass ~37 %, parser+re-lex pass ~57 %, type-checker ~6 %. Le parser-proper isolé est estimé à ~25 % en soustrayant le lex_only (caveat : les deux passes lex ne partagent pas la même cache state, donc la soustraction n'est pas exacte). Verdict GO basé sur le `total` mesuré, méthodologiquement valide. + +### Hypothèse S3 validée + +EBNF v0.6 (subset S3 : 5 constructs) implémentable sans ambiguïté grammaticale. Une seule ambiguïté à résoudre par lookahead (has-with-filter vs rule body). Aucun shift/reduce-style conflit, aucun cas où le parser dépend d'information non locale. + +### API publique stable Phase 0.2 + +`src/etch/root.zig` expose : +- Types : `Lexer`, `Token`, `TokenKind`, `SourceSpan`, `Parser`, `Ast`, `NodeId`, `NodeCategory`, `StringId`, `TypeChecker`, `Diagnostic`, `DiagnosticCode`, `Severity`, `LineIndex`, `ParseResult`. +- Helpers : `parseSource(gpa, source) !ParseResult`, `typeCheck(gpa, &ast, &diags) !void`. + +`ItemKind` / `StmtKind` / `ExprKind` / `TypeNodeKind` déclarent tous les variants EBNF v0.6. Les call sites du parser/type-checker dispatchent uniquement sur les variants S3 ; l'extension Phase 0.2 est additive. + +### Dette résiduelle + +- **Volume du corpus** — voir « Déviations actées ». Pas une dette technique, juste une densité de couverture inférieure à l'idéal du brief. Extensible librement sans toucher au parser. +- **Bench sur reference machine** — re-confirmation Win11/Fedora pendante. Risque crédible nul vu la marge. +- **`StableId`** — laissé à 0 en S3 par décision spec. Réactivé Phase 2 quand l'éditeur injecte `@id("uuid")`. +- **Trivia / doc comments** — `comment_spans` collectés mais non attachés au `NodeId` (Phase 0.2 `TriviaMap`). `///` lexé comme commentaire ligne, pas comme doc comment (Phase 0.2). +- **Annotation applicability** — parsée mais non validée (Phase 0.2 — `etch-resolver-types.md` §13). +- **`get(T)` / `get_mut(T)` sans receiver pour resources** — non supporté en S3 (brief restreint l'accès aux components via receiver). `when resource T` et `when resource T changed` détectent les resources sans permettre la lecture en rule body. Sans impact corpus-side puisque les rules valides du corpus n'attempt pas cette lecture. +- **Annotation arg avec field access** — `parseAnnotationArg` chemin "ident pas suivi de `:`" appelle `continuePostfixAndBinary(lhs, 0)` sur un `ident` expr brut. Cette fonction n'enchaîne que sur les opérateurs binaires (`infixBindingPower`) et n'a pas de branche pour le `.dot` postfix. Conséquence : `@requires(self.health)` ou tout argument d'annotation utilisant un field access échoue avec "expected ')' to close annotation args". Pas exercé par le corpus S3 (annotations utilisent literals ou `.tag_path` initial). À corriger en Phase 0.2 : soit renommer `continuePostfixAndBinary` en `continueBinary` et appeler `parsePostfix` proprement après l'ident manuel, soit rebrancher l'expr construit manuellement à travers la machinerie standard. + +### Risques résiduels pour S4 + +Aucun risque structurel sur S3 lui-même. Pour S4 (Etch tree-walking interpreter), le bridge vers le mini-ECS de S1 est le principal point d'attention — les `entity.get(T)` et `entity.get_mut(T)` doivent résoudre vers les chunks SoA via le `World` de S1. La surface S3 est compatible : `RuleDecl.params` + `RuleDecl.when_root` + `RuleDecl.body_start/len` fournissent tout le matériel nécessaire. + +### Verdict S3 — GO diff --git a/build.zig b/build.zig index a68d245..dd64bc1 100644 --- a/build.zig +++ b/build.zig @@ -14,6 +14,16 @@ pub fn build(b: *std.Build) void { const target = b.standardTargetOptions(.{}); const optimize = b.standardOptimizeOption(.{}); + // Shared `weld_etch` module — S3 parser + type-checker (foundation + // submodule per `engine-directory-structure.md` §9.1). Etch is not + // a Tier 1 module; it is conceptually a foundation submodule and + // ships as its own top-level public surface under `src/etch/root.zig`. + const etch_module = b.createModule(.{ + .root_source_file = b.path("src/etch/root.zig"), + .target = target, + .optimize = optimize, + }); + // Shared `weld_core` module — Tier 0 internals consumed by the runtime, // the bench harness, and every test executable. const core_module = b.createModule(.{ @@ -69,6 +79,10 @@ pub fn build(b: *std.Build) void { const main_tests = b.addTest(.{ .root_module = exe_module }); test_step.dependOn(&b.addRunArtifact(main_tests).step); + // Same-file tests inside src/etch/*.zig. + const etch_tests = b.addTest(.{ .root_module = etch_module }); + test_step.dependOn(&b.addRunArtifact(etch_tests).step); + // Out-of-tree tests. Each file is its own root_module and imports // `weld_core` to reach the engine internals. // Out-of-tree spike + bindings tests need to reach files that live @@ -87,11 +101,17 @@ pub fn build(b: *std.Build) void { .target = target, .optimize = optimize, }); + const etch_corpus_module = b.createModule(.{ + .root_source_file = b.path("tests/etch/corpus_facade.zig"), + .target = target, + .optimize = optimize, + }); const TestSpec = struct { path: []const u8, spike: bool = false, wl_protocols: bool = false, + etch: bool = false, }; const test_specs = [_]TestSpec{ .{ .path = "tests/smoke_test.zig" }, @@ -107,6 +127,7 @@ pub fn build(b: *std.Build) void { .{ .path = "tests/spike/cli_test.zig", .spike = true }, .{ .path = "tests/bindings/vk_abi_test.zig" }, .{ .path = "tests/bindings/wayland_abi_test.zig", .wl_protocols = true }, + .{ .path = "tests/etch/corpus_test.zig", .etch = true }, }; for (test_specs) |spec| { const t_mod = b.createModule(.{ @@ -121,6 +142,10 @@ pub fn build(b: *std.Build) void { if (spec.wl_protocols) { t_mod.addImport("wl_protocols", wl_protocols_test_module); } + if (spec.etch) { + t_mod.addImport("weld_etch", etch_module); + t_mod.addImport("corpus_facade", etch_corpus_module); + } const t = b.addTest(.{ .root_module = t_mod }); test_step.dependOn(&b.addRunArtifact(t).step); } @@ -148,6 +173,30 @@ pub fn build(b: *std.Build) void { ); bench_step.dependOn(&bench_run.step); + // ---------------------------------------------------- Etch parse bench -- + + const etch_bench_module = b.createModule(.{ + .root_source_file = b.path("bench/etch_parse.zig"), + .target = target, + .optimize = optimize, + }); + etch_bench_module.addImport("weld_etch", etch_module); + etch_bench_module.addImport("corpus_facade", etch_corpus_module); + const etch_bench_exe = b.addExecutable(.{ + .name = "etch-parse-bench", + .root_module = etch_bench_module, + }); + b.installArtifact(etch_bench_exe); + + const etch_bench_run = b.addRunArtifact(etch_bench_exe); + etch_bench_run.step.dependOn(b.getInstallStep()); + if (b.args) |args| etch_bench_run.addArgs(args); + const etch_bench_step = b.step( + "bench-etch", + "Run the S3 Etch parse bench (pass `-- --smoke` for a CI sanity run)", + ); + etch_bench_step.dependOn(&etch_bench_run.step); + // ------------------------------------------------ vk_gen (S2 bindgen) -- // // Throwaway generator that re-emits `src/core/platform/vk.zig` from the diff --git a/src/etch/ast.zig b/src/etch/ast.zig new file mode 100644 index 0000000..1a92b6c --- /dev/null +++ b/src/etch/ast.zig @@ -0,0 +1,759 @@ +//! Etch AST — tabular SoA `AstArena` per `etch-ast-ir.md` §3.2. +//! +//! Design notes (S3): +//! - One `MultiArrayList(Item|Stmt|Expr|TypeNode)` per category. Each entry +//! carries `(kind, data_index, span)`. Rich variants (`ComponentDecl`, +//! `RuleDecl`, `BinaryExpr`, ...) live in dedicated side slabs reached +//! via `data_index`. +//! - `NodeId = packed struct(u32) { category: u4, index: u28 }`. Indexes +//! the MultiArrayList for `category`. `NodeId.zero` means "absent". +//! - `extra: ArrayList(u32)` holds flat ranges referenced by rich items +//! (e.g. statement lists inside a rule body, fields list inside a +//! component declaration). Each reference is a `(start, len)` pair on +//! the side slab. +//! - `StringPool` interns identifier names and string literal contents. +//! - `AnnotationMap`: hash table keyed by `NodeId` → `AnnotationSpan` +//! (range in `annot_pool`). +//! - `comment_spans` is a parallel slab — not attached to NodeIds in S3, +//! kept for Phase 0.2 trivia attachment. +//! - `StableId` is absent (left at zero). The brief defers it to Phase 2 +//! when the editor injects `@id("uuid")`. +//! +//! Kind enums declare every EBNF v0.6 variant for API stability. S3 only +//! produces a subset; call sites switching on a kind enum must terminate +//! with `else => @panic("unsupported in S3")` per the brief. + +const std = @import("std"); +const token_mod = @import("token.zig"); + +pub const SourceSpan = token_mod.SourceSpan; + +// ─────────────────────────────── NodeId ───────────────────────────────── + +/// Open enum so Phase 0.2+ can add categories (patterns, etc.) without +/// breaking the packed `NodeId` layout. +pub const NodeCategory = enum(u4) { + item, + stmt, + expr, + type_node, + _, +}; + +pub const NodeId = packed struct(u32) { + category: NodeCategory, + index: u28, + + pub const none: NodeId = .{ .category = .item, .index = 0x0FFFFFFF }; + + pub inline fn isNone(self: NodeId) bool { + return std.meta.eql(self, none); + } + + pub inline fn raw(self: NodeId) u32 { + return @bitCast(self); + } +}; + +// ─────────────────────────────── StringPool ───────────────────────────── + +pub const StringId = u32; + +/// Deduplicating interner. Identifier names and string literal contents +/// share a single pool keyed by byte equality. Strings are stored in +/// individual allocations so the slices remain stable across calls — +/// the hash map's keys are owned slices, not pointers into a moving +/// ArrayList. The empty string is reserved at id 0 so `StringId(0)` +/// means "absent" for fields that may be unset. +pub const StringPool = struct { + /// Each entry is a heap-allocated slice. `slices[id]` returns the + /// canonical bytes for `StringId(id)`. Index 0 is the empty string. + slices: std.ArrayListUnmanaged([]const u8) = .empty, + /// Maps canonical bytes → StringId. + map: std.StringHashMapUnmanaged(StringId) = .empty, + + pub fn init(gpa: std.mem.Allocator) !StringPool { + var pool: StringPool = .{}; + // `intern` arms two errdefers on its own internals (`gpa.free(owned)` + // and `slices.pop()`) which roll back the per-call mutations. They + // do not, however, release the spare *capacity* of `slices` or + // `map` once one of them has grown. If the very first `intern` + // call fails after `slices.append` succeeded but before `map.put` + // completes, the unfreed slab capacity leaks. Guard the whole + // init with a top-level errdefer so partial state never escapes. + errdefer pool.deinit(gpa); + _ = try pool.intern(gpa, ""); + return pool; + } + + pub fn deinit(self: *StringPool, gpa: std.mem.Allocator) void { + for (self.slices.items) |s| gpa.free(s); + self.slices.deinit(gpa); + self.map.deinit(gpa); + } + + pub fn intern(self: *StringPool, gpa: std.mem.Allocator, s: []const u8) !StringId { + if (self.map.get(s)) |existing| return existing; + const owned = try gpa.dupe(u8, s); + errdefer gpa.free(owned); + const id: StringId = @intCast(self.slices.items.len); + try self.slices.append(gpa, owned); + errdefer _ = self.slices.pop(); + try self.map.put(gpa, owned, id); + return id; + } + + pub fn slice(self: *const StringPool, id: StringId) []const u8 { + if (id >= self.slices.items.len) return &[_]u8{}; + return self.slices.items[id]; + } +}; + +// ─────────────────────────────── Kinds ────────────────────────────────── + +/// Every EBNF v0.6 top-level construct. S3 produces only the marked +/// variants; the others are reserved for additive extension. +pub const ItemKind = enum { + // S3 + component_decl, + resource_decl, + rule_decl, + // Reserved + import_decl, + fn_decl, + struct_decl, + enum_decl, + trait_decl, + impl_decl, + event_decl, + tags_decl, + const_decl, + type_alias, + behavior_decl, + routine_decl, + quest_decl, + dialogue_decl, + ability_decl, + effect_decl, + shader_decl, + widget_decl, + theme_decl, + motion_decl, + locale_decl, + anim_graph_decl, + audio_graph_decl, + audio_score_decl, + sequence_decl, + data_decl, + scene_decl, + prefab_decl, + input_mapping_decl, + test_decl, + override_decl, +}; + +pub const StmtKind = enum { + // S3 + let_stmt, + assign_stmt, + expr_stmt, + // Reserved + const_stmt, + type_alias, + if_stmt, + for_stmt, + while_stmt, + loop_stmt, + match_stmt, + return_stmt, + emit_stmt, + try_catch_stmt, + throw_stmt, + assert_stmt, + break_stmt, + continue_stmt, + await_stmt, + race_stmt, + sync_stmt, + branch_stmt, + spawn_stmt, + timer_stmt, + quantize_stmt, + tag_mutation_stmt, +}; + +pub const ExprKind = enum { + // S3 + int_lit, + float_lit, + bool_lit, + string_lit, + ident, + field_access, + method_get, // entity.get(T) + method_get_mut, // entity.get_mut(T) + binary, + unary, + paren, + // Reserved + duration_lit, + time_lit, + color_lit, + none_lit, + some_lit, + path, + self_expr, + tag_path, + struct_lit, + array_lit, + map_lit, + tuple_lit, + cast, + range, + index, + method_call, + fn_call, + if_expr, + match_expr, + block_expr, + closure, + await_expr, + throw_expr, +}; + +pub const TypeNodeKind = enum { + // S3 + named, + // Reserved + path, + generic, + array, + slice, + map_type, + set_type, + tuple, + function, + self, + optional, + trait_bound, +}; + +// ─────────────────────────────── Binary / Unary opcodes ───────────────── + +pub const BinaryOp = enum { + add, + sub, + mul, + div, + rem, + eq, + neq, + lt, + gt, + le, + ge, + logical_and, + logical_or, +}; + +pub const UnaryOp = enum { + neg, // -x + logical_not, // not x +}; + +pub const AssignOp = enum { + assign, // = + add_assign, // += + sub_assign, // -= + mul_assign, // *= + div_assign, // /= + rem_assign, // %= +}; + +// ─────────────────────────────── Side-slab data ───────────────────────── + +/// A field of a component or resource declaration. Fields are stored in +/// `arena.fields` and referenced by `(start, len)` from the parent's side +/// slab entry. +pub const Field = struct { + name: StringId, + type_node: NodeId, + default_value: NodeId, // NodeId.none if absent + annotations_extra: u32, // start in `annot_pool` + annotations_len: u32, +}; + +pub const ComponentDecl = struct { + name: StringId, + fields_start: u32, // index into `arena.fields` + fields_len: u32, + annotations_extra: u32, + annotations_len: u32, +}; + +pub const ResourceDecl = struct { + name: StringId, + fields_start: u32, + fields_len: u32, + annotations_extra: u32, + annotations_len: u32, +}; + +pub const RuleParam = struct { + name: StringId, + type_node: NodeId, +}; + +pub const RuleDecl = struct { + name: StringId, + params_start: u32, // index into `arena.rule_params` + params_len: u32, + /// Index into `arena.when_nodes`; `none_when` if absent. + when_root: u32, + body_start: u32, // index into `arena.extra` (list of StmtId raw values) + body_len: u32, + annotations_extra: u32, + annotations_len: u32, + + pub const none_when: u32 = std.math.maxInt(u32); +}; + +/// `when` clause AST. Conditions form a binary tree of `and`/`or`/`not` +/// composed with leaf clauses (`has`, `has_with_filter`, `resource`, +/// `resource_changed`). +pub const WhenNodeKind = enum { + logical_and, + logical_or, + logical_not, + has, // entity has T + has_with_filter, // entity has T { field == value } + resource, // resource T + resource_changed, // resource T changed +}; + +pub const WhenNode = struct { + kind: WhenNodeKind, + /// Leaf: identifier (`entity` name) for entity-based; unused for + /// resource-based. Carries the StringId or 0. + entity_name: StringId, + /// Component or resource type name (S3 lexes types as TYPE_IDENT, + /// the type-checker resolves against builtin + declared). + type_name: StringId, + /// For `has_with_filter`: field name + filter value expression. + field_name: StringId, + filter_value: NodeId, // NodeId.none if absent + /// Children for `and` / `or` / `not`. `lhs` always set, `rhs` only + /// for `and` / `or`. + lhs: u32, // index into when_nodes + rhs: u32, + span: SourceSpan, + + pub const no_child: u32 = std.math.maxInt(u32); +}; + +pub const LetStmt = struct { + name: StringId, + is_mut: bool, + type_annotation: NodeId, // NodeId.none if absent + value: NodeId, // expr +}; + +pub const AssignStmt = struct { + target: NodeId, // expr — must be ident or field_access chain + op: AssignOp, + value: NodeId, // expr +}; + +pub const BinaryExpr = struct { + op: BinaryOp, + lhs: NodeId, + rhs: NodeId, +}; + +pub const UnaryExpr = struct { + op: UnaryOp, + operand: NodeId, +}; + +pub const FieldAccessExpr = struct { + receiver: NodeId, + field_name: StringId, +}; + +pub const MethodGetExpr = struct { + receiver: NodeId, + type_name: StringId, +}; + +pub const NamedTypeNode = struct { + name: StringId, +}; + +/// Annotation stored in `annot_pool`. `args_start`/`args_len` refer to +/// a flat range in `arena.annot_args` (`AnnotationArg` entries). +pub const Annotation = struct { + /// Builtin AnnotationKind when matched; `.custom` carries the name as + /// `custom_name` and is accepted by the S3 parser without applicability + /// validation (deferred Phase 0.2). + kind: AnnotationKind, + /// Name as written (for `.custom` and round-trip pretty-print). + name: StringId, + args_start: u32, + args_len: u32, + span: SourceSpan, +}; + +pub const AnnotationArg = struct { + /// 0 for positional args; otherwise the named argument's identifier. + name: StringId, + value: NodeId, // expr +}; + +/// Builtin annotation set. Covers `@phase`, `@priority`, `@run_on`, +/// `@pause_group`, `@config`, `@state`, `@transient`, `@save`, `@unit`, +/// `@range`, `@hidden`, `@readonly`, `@requires`, `@storage`, +/// `@replicated`, `@networked`, `@id`, `@loc` plus a `.custom` fallback +/// for unknown names (S3 accepts unknown annotations without erroring; +/// applicability validation is deferred Phase 0.2). +pub const AnnotationKind = enum { + custom, + phase, + priority, + run_on, + pause_group, + config, + state, + transient, + save, + unit, + range, + hidden, + readonly, + requires, + storage, + replicated, + networked, + id, + loc, + + pub fn fromName(name: []const u8) AnnotationKind { + if (std.mem.eql(u8, name, "phase")) return .phase; + if (std.mem.eql(u8, name, "priority")) return .priority; + if (std.mem.eql(u8, name, "run_on")) return .run_on; + if (std.mem.eql(u8, name, "pause_group")) return .pause_group; + if (std.mem.eql(u8, name, "config")) return .config; + if (std.mem.eql(u8, name, "state")) return .state; + if (std.mem.eql(u8, name, "transient")) return .transient; + if (std.mem.eql(u8, name, "save")) return .save; + if (std.mem.eql(u8, name, "unit")) return .unit; + if (std.mem.eql(u8, name, "range")) return .range; + if (std.mem.eql(u8, name, "hidden")) return .hidden; + if (std.mem.eql(u8, name, "readonly")) return .readonly; + if (std.mem.eql(u8, name, "requires")) return .requires; + if (std.mem.eql(u8, name, "storage")) return .storage; + if (std.mem.eql(u8, name, "replicated")) return .replicated; + if (std.mem.eql(u8, name, "networked")) return .networked; + if (std.mem.eql(u8, name, "id")) return .id; + if (std.mem.eql(u8, name, "loc")) return .loc; + return .custom; + } +}; + +// ─────────────────────────────── MultiArrayList entries ───────────────── + +pub const Item = struct { + kind: ItemKind, + data: u32, + span: SourceSpan, +}; + +pub const Stmt = struct { + kind: StmtKind, + data: u32, + span: SourceSpan, +}; + +pub const Expr = struct { + kind: ExprKind, + data: u32, + span: SourceSpan, +}; + +pub const TypeNode = struct { + kind: TypeNodeKind, + data: u32, + span: SourceSpan, +}; + +// ─────────────────────────────── AstArena ─────────────────────────────── + +pub const AstArena = struct { + items: std.MultiArrayList(Item) = .empty, + stmts: std.MultiArrayList(Stmt) = .empty, + exprs: std.MultiArrayList(Expr) = .empty, + type_nodes: std.MultiArrayList(TypeNode) = .empty, + + extra: std.ArrayListUnmanaged(u32) = .empty, + strings: StringPool = .{}, + + // Side slabs. + fields: std.ArrayListUnmanaged(Field) = .empty, + component_decls: std.ArrayListUnmanaged(ComponentDecl) = .empty, + resource_decls: std.ArrayListUnmanaged(ResourceDecl) = .empty, + rule_decls: std.ArrayListUnmanaged(RuleDecl) = .empty, + rule_params: std.ArrayListUnmanaged(RuleParam) = .empty, + when_nodes: std.ArrayListUnmanaged(WhenNode) = .empty, + + let_stmts: std.ArrayListUnmanaged(LetStmt) = .empty, + assign_stmts: std.ArrayListUnmanaged(AssignStmt) = .empty, + + binary_exprs: std.ArrayListUnmanaged(BinaryExpr) = .empty, + unary_exprs: std.ArrayListUnmanaged(UnaryExpr) = .empty, + field_accesses: std.ArrayListUnmanaged(FieldAccessExpr) = .empty, + method_gets: std.ArrayListUnmanaged(MethodGetExpr) = .empty, + named_types: std.ArrayListUnmanaged(NamedTypeNode) = .empty, + + // Annotation storage. + annotations: std.AutoHashMapUnmanaged(NodeId, AnnotationSpan) = .empty, + annot_pool: std.ArrayListUnmanaged(Annotation) = .empty, + annot_args: std.ArrayListUnmanaged(AnnotationArg) = .empty, + + /// Parallel slab — not attached to NodeIds in S3. + comment_spans: std.ArrayListUnmanaged(SourceSpan) = .empty, + + pub const AnnotationSpan = struct { + start: u32, + len: u32, + }; + + pub fn init(gpa: std.mem.Allocator) !AstArena { + var arena: AstArena = .{}; + arena.strings = try StringPool.init(gpa); + return arena; + } + + pub fn deinit(self: *AstArena, gpa: std.mem.Allocator) void { + self.items.deinit(gpa); + self.stmts.deinit(gpa); + self.exprs.deinit(gpa); + self.type_nodes.deinit(gpa); + self.extra.deinit(gpa); + self.strings.deinit(gpa); + self.fields.deinit(gpa); + self.component_decls.deinit(gpa); + self.resource_decls.deinit(gpa); + self.rule_decls.deinit(gpa); + self.rule_params.deinit(gpa); + self.when_nodes.deinit(gpa); + self.let_stmts.deinit(gpa); + self.assign_stmts.deinit(gpa); + self.binary_exprs.deinit(gpa); + self.unary_exprs.deinit(gpa); + self.field_accesses.deinit(gpa); + self.method_gets.deinit(gpa); + self.named_types.deinit(gpa); + self.annotations.deinit(gpa); + self.annot_pool.deinit(gpa); + self.annot_args.deinit(gpa); + self.comment_spans.deinit(gpa); + } + + // ─── Add helpers ──────────────────────────────────────────────────── + + pub fn addItem(self: *AstArena, gpa: std.mem.Allocator, kind: ItemKind, data: u32, span: SourceSpan) !NodeId { + const idx: u28 = @intCast(self.items.len); + try self.items.append(gpa, .{ .kind = kind, .data = data, .span = span }); + return .{ .category = .item, .index = idx }; + } + + pub fn addStmt(self: *AstArena, gpa: std.mem.Allocator, kind: StmtKind, data: u32, span: SourceSpan) !NodeId { + const idx: u28 = @intCast(self.stmts.len); + try self.stmts.append(gpa, .{ .kind = kind, .data = data, .span = span }); + return .{ .category = .stmt, .index = idx }; + } + + pub fn addExpr(self: *AstArena, gpa: std.mem.Allocator, kind: ExprKind, data: u32, span: SourceSpan) !NodeId { + const idx: u28 = @intCast(self.exprs.len); + try self.exprs.append(gpa, .{ .kind = kind, .data = data, .span = span }); + return .{ .category = .expr, .index = idx }; + } + + pub fn addTypeNode(self: *AstArena, gpa: std.mem.Allocator, kind: TypeNodeKind, data: u32, span: SourceSpan) !NodeId { + const idx: u28 = @intCast(self.type_nodes.len); + try self.type_nodes.append(gpa, .{ .kind = kind, .data = data, .span = span }); + return .{ .category = .type_node, .index = idx }; + } + + pub fn addNamedType(self: *AstArena, gpa: std.mem.Allocator, name: StringId, span: SourceSpan) !NodeId { + const idx: u32 = @intCast(self.named_types.items.len); + try self.named_types.append(gpa, .{ .name = name }); + return try self.addTypeNode(gpa, .named, idx, span); + } + + pub fn addBinary(self: *AstArena, gpa: std.mem.Allocator, op: BinaryOp, lhs: NodeId, rhs: NodeId, span: SourceSpan) !NodeId { + const idx: u32 = @intCast(self.binary_exprs.items.len); + try self.binary_exprs.append(gpa, .{ .op = op, .lhs = lhs, .rhs = rhs }); + return try self.addExpr(gpa, .binary, idx, span); + } + + pub fn addUnary(self: *AstArena, gpa: std.mem.Allocator, op: UnaryOp, operand: NodeId, span: SourceSpan) !NodeId { + const idx: u32 = @intCast(self.unary_exprs.items.len); + try self.unary_exprs.append(gpa, .{ .op = op, .operand = operand }); + return try self.addExpr(gpa, .unary, idx, span); + } + + pub fn addFieldAccess(self: *AstArena, gpa: std.mem.Allocator, receiver: NodeId, field_name: StringId, span: SourceSpan) !NodeId { + const idx: u32 = @intCast(self.field_accesses.items.len); + try self.field_accesses.append(gpa, .{ .receiver = receiver, .field_name = field_name }); + return try self.addExpr(gpa, .field_access, idx, span); + } + + pub fn addMethodGet(self: *AstArena, gpa: std.mem.Allocator, kind: ExprKind, receiver: NodeId, type_name: StringId, span: SourceSpan) !NodeId { + std.debug.assert(kind == .method_get or kind == .method_get_mut); + const idx: u32 = @intCast(self.method_gets.items.len); + try self.method_gets.append(gpa, .{ .receiver = receiver, .type_name = type_name }); + return try self.addExpr(gpa, kind, idx, span); + } + + pub fn addLetStmt(self: *AstArena, gpa: std.mem.Allocator, let: LetStmt, span: SourceSpan) !NodeId { + const idx: u32 = @intCast(self.let_stmts.items.len); + try self.let_stmts.append(gpa, let); + return try self.addStmt(gpa, .let_stmt, idx, span); + } + + pub fn addAssignStmt(self: *AstArena, gpa: std.mem.Allocator, assign: AssignStmt, span: SourceSpan) !NodeId { + const idx: u32 = @intCast(self.assign_stmts.items.len); + try self.assign_stmts.append(gpa, assign); + return try self.addStmt(gpa, .assign_stmt, idx, span); + } + + pub fn addExprStmt(self: *AstArena, gpa: std.mem.Allocator, expr: NodeId, span: SourceSpan) !NodeId { + return try self.addStmt(gpa, .expr_stmt, expr.raw(), span); + } + + // ─── Accessors ────────────────────────────────────────────────────── + + pub fn itemKind(self: *const AstArena, id: NodeId) ItemKind { + std.debug.assert(id.category == .item); + return self.items.items(.kind)[id.index]; + } + + pub fn itemSpan(self: *const AstArena, id: NodeId) SourceSpan { + std.debug.assert(id.category == .item); + return self.items.items(.span)[id.index]; + } + + pub fn itemData(self: *const AstArena, id: NodeId) u32 { + std.debug.assert(id.category == .item); + return self.items.items(.data)[id.index]; + } + + pub fn stmtKind(self: *const AstArena, id: NodeId) StmtKind { + std.debug.assert(id.category == .stmt); + return self.stmts.items(.kind)[id.index]; + } + + pub fn stmtSpan(self: *const AstArena, id: NodeId) SourceSpan { + std.debug.assert(id.category == .stmt); + return self.stmts.items(.span)[id.index]; + } + + pub fn stmtData(self: *const AstArena, id: NodeId) u32 { + std.debug.assert(id.category == .stmt); + return self.stmts.items(.data)[id.index]; + } + + pub fn exprKind(self: *const AstArena, id: NodeId) ExprKind { + std.debug.assert(id.category == .expr); + return self.exprs.items(.kind)[id.index]; + } + + pub fn exprSpan(self: *const AstArena, id: NodeId) SourceSpan { + std.debug.assert(id.category == .expr); + return self.exprs.items(.span)[id.index]; + } + + pub fn exprData(self: *const AstArena, id: NodeId) u32 { + std.debug.assert(id.category == .expr); + return self.exprs.items(.data)[id.index]; + } + + pub fn typeNodeKind(self: *const AstArena, id: NodeId) TypeNodeKind { + std.debug.assert(id.category == .type_node); + return self.type_nodes.items(.kind)[id.index]; + } + + pub fn typeNodeSpan(self: *const AstArena, id: NodeId) SourceSpan { + std.debug.assert(id.category == .type_node); + return self.type_nodes.items(.span)[id.index]; + } + + pub fn typeNodeData(self: *const AstArena, id: NodeId) u32 { + std.debug.assert(id.category == .type_node); + return self.type_nodes.items(.data)[id.index]; + } + + pub fn isEmpty(self: *const AstArena) bool { + return self.items.len == 0; + } +}; + +// ─────────────────────────────── tests ────────────────────────────────── + +test "NodeId encodes category and index round-trip" { + const id: NodeId = .{ .category = .expr, .index = 0x1234567 }; + try std.testing.expectEqual(NodeCategory.expr, id.category); + try std.testing.expectEqual(@as(u28, 0x1234567), id.index); + + const r = id.raw(); + const decoded: NodeId = @bitCast(r); + try std.testing.expectEqual(id.category, decoded.category); + try std.testing.expectEqual(id.index, decoded.index); +} + +test "StringPool interns identical identifiers to the same StringId" { + const gpa = std.testing.allocator; + var pool = try StringPool.init(gpa); + defer pool.deinit(gpa); + + const a = try pool.intern(gpa, "Health"); + const b = try pool.intern(gpa, "Health"); + const c = try pool.intern(gpa, "Armor"); + try std.testing.expectEqual(a, b); + try std.testing.expect(a != c); + try std.testing.expectEqualStrings("Health", pool.slice(a)); + try std.testing.expectEqualStrings("Armor", pool.slice(c)); + + // The empty string is reserved at id 0. + try std.testing.expectEqual(@as(StringId, 0), try pool.intern(gpa, "")); +} + +test "AstArena adds an int literal and retrieves its span and kind" { + const gpa = std.testing.allocator; + var arena = try AstArena.init(gpa); + defer arena.deinit(gpa); + + const id = try arena.addExpr(gpa, .int_lit, 42, .{ .byte_start = 0, .byte_end = 2 }); + try std.testing.expectEqual(ExprKind.int_lit, arena.exprKind(id)); + try std.testing.expectEqual(@as(u32, 42), arena.exprData(id)); + const span = arena.exprSpan(id); + try std.testing.expectEqual(@as(u32, 0), span.byte_start); + try std.testing.expectEqual(@as(u32, 2), span.byte_end); +} + +test "AstArena spans align with passed-in byte offsets" { + const gpa = std.testing.allocator; + var arena = try AstArena.init(gpa); + defer arena.deinit(gpa); + + const id_a = try arena.addExpr(gpa, .int_lit, 1, .{ .byte_start = 10, .byte_end = 12 }); + const id_b = try arena.addExpr(gpa, .int_lit, 2, .{ .byte_start = 13, .byte_end = 15 }); + try std.testing.expectEqual(@as(u32, 10), arena.exprSpan(id_a).byte_start); + try std.testing.expectEqual(@as(u32, 13), arena.exprSpan(id_b).byte_start); +} + +test "AnnotationKind.fromName recognises builtin names" { + try std.testing.expectEqual(AnnotationKind.phase, AnnotationKind.fromName("phase")); + try std.testing.expectEqual(AnnotationKind.range, AnnotationKind.fromName("range")); + try std.testing.expectEqual(AnnotationKind.custom, AnnotationKind.fromName("totally_unknown")); +} diff --git a/src/etch/diagnostics.zig b/src/etch/diagnostics.zig new file mode 100644 index 0000000..23b0a76 --- /dev/null +++ b/src/etch/diagnostics.zig @@ -0,0 +1,172 @@ +//! Etch parser diagnostics — typed `Diagnostic`, stable `DiagnosticCode` +//! names cross-version, and on-demand `(line, column)` computation from a +//! `LineIndex` built once per source. +//! +//! The S3 subset emits the codes listed in `briefs/S3-etch-parser-subset.md` +//! Scope / Diagnostics typed API. Codes are stable cross-version per the +//! reference catalogue in `etch-diagnostics.md` §1; new variants may be +//! added in later phases without renumbering existing ones. + +const std = @import("std"); +const token = @import("token.zig"); + +pub const SourceSpan = token.SourceSpan; + +/// Severity classes recognised by the S3 type-checker. The brief carves +/// `error_` and `warning`; the wider catalogue (`note`, `hint`) is +/// documented for forward compatibility but unused in S3. +pub const Severity = enum { + error_, + warning, +}; + +/// Stable cross-version diagnostic codes. The variant name (e.g. +/// `parse_error`) maps to the canonical short code (`E0001`) via +/// `code()` and to the canonical PascalCase name (`ParseError`) via +/// `name()`. S3 emits only the variants commented `S3`; the others are +/// reserved here so the enum can be extended additively in later phases. +pub const DiagnosticCode = enum { + // ── Parse / lex errors (E0001-E0099) ── + parse_error, // S3 — E0001 ParseError + + // ── Resolver — symbols / paths (E0100-E0199) ── + duplicate_symbol, // S3 — E0101 DuplicateSymbol + undefined_symbol, // S3 — E0102 UndefinedSymbol + + // ── Type errors (E0200-E0299) ── + type_mismatch, // S3 — E0200 TypeMismatch + + // ── Const eval errors (E1100-E1199) ── + not_const_evaluable, // S3 — E1101 NotConstEvaluable + + // ── Rule-specific errors (E1200-E1299) ── + unknown_component_in_when, // S3 — E1210 UnknownComponentInWhen + invalid_field_filter, // S3 — E1211 InvalidFieldFilter + resource_expected_in_when, // S3 — E1213 ResourceExpectedInWhen + + /// Canonical short code, e.g. `"E0001"`. + pub fn code(self: DiagnosticCode) []const u8 { + return switch (self) { + .parse_error => "E0001", + .duplicate_symbol => "E0101", + .undefined_symbol => "E0102", + .type_mismatch => "E0200", + .not_const_evaluable => "E1101", + .unknown_component_in_when => "E1210", + .invalid_field_filter => "E1211", + .resource_expected_in_when => "E1213", + }; + } + + /// Canonical PascalCase name, e.g. `"ParseError"`. + pub fn name(self: DiagnosticCode) []const u8 { + return switch (self) { + .parse_error => "ParseError", + .duplicate_symbol => "DuplicateSymbol", + .undefined_symbol => "UndefinedSymbol", + .type_mismatch => "TypeMismatch", + .not_const_evaluable => "NotConstEvaluable", + .unknown_component_in_when => "UnknownComponentInWhen", + .invalid_field_filter => "InvalidFieldFilter", + .resource_expected_in_when => "ResourceExpectedInWhen", + }; + } +}; + +/// A single diagnostic. Owns its `primary_message` slice — duplicated via +/// the caller's allocator when constructed; the type-checker / parser keep +/// the diagnostic list in an `ArrayListUnmanaged(Diagnostic)` and free +/// each `primary_message` at `deinit`. +pub const Diagnostic = struct { + code: DiagnosticCode, + severity: Severity, + primary_span: SourceSpan, + primary_message: []const u8, + + pub fn deinit(self: *Diagnostic, gpa: std.mem.Allocator) void { + gpa.free(self.primary_message); + } +}; + +/// `LineIndex` precomputes the byte offset of each line start so that +/// `(line, column)` can be resolved in `O(log n)` from a `SourceSpan`. +/// Built once per source. +pub const LineIndex = struct { + /// `line_starts[i]` is the byte offset of the first character on + /// line `i` (1-indexed: `line_starts[0]` is the start of line 1). + line_starts: std.ArrayListUnmanaged(u32), + source_len: u32, + + pub fn init(gpa: std.mem.Allocator, source: []const u8) !LineIndex { + var line_starts: std.ArrayListUnmanaged(u32) = .empty; + errdefer line_starts.deinit(gpa); + try line_starts.append(gpa, 0); + var i: u32 = 0; + while (i < source.len) : (i += 1) { + if (source[i] == '\n') { + try line_starts.append(gpa, i + 1); + } + } + return .{ + .line_starts = line_starts, + .source_len = @intCast(source.len), + }; + } + + pub fn deinit(self: *LineIndex, gpa: std.mem.Allocator) void { + self.line_starts.deinit(gpa); + } + + /// 1-indexed (line, column). Column counts bytes from the start of + /// the line; for ASCII / single-byte spans this matches the visual + /// column. Multi-byte UTF-8 in string literals is reported by its + /// leading byte offset, which is sufficient for S3 diagnostics. + pub const LineColumn = struct { + line: u32, + column: u32, + }; + + pub fn lineColumn(self: *const LineIndex, byte_offset: u32) LineColumn { + // Binary search for the largest line_starts[i] <= byte_offset. + const starts = self.line_starts.items; + var lo: usize = 0; + var hi: usize = starts.len; + while (lo + 1 < hi) { + const mid = lo + (hi - lo) / 2; + if (starts[mid] <= byte_offset) { + lo = mid; + } else { + hi = mid; + } + } + return .{ + .line = @intCast(lo + 1), + .column = byte_offset - starts[lo] + 1, + }; + } +}; + +test "Diagnostic line/column computed correctly from byte span" { + const gpa = std.testing.allocator; + const source = "abc\ndef\nghijkl\nmno"; + var idx = try LineIndex.init(gpa, source); + defer idx.deinit(gpa); + + // 'a' → line 1 col 1 + try std.testing.expectEqual(LineIndex.LineColumn{ .line = 1, .column = 1 }, idx.lineColumn(0)); + // 'd' (line 2, col 1) + try std.testing.expectEqual(LineIndex.LineColumn{ .line = 2, .column = 1 }, idx.lineColumn(4)); + // 'i' on line 3 col 3 + try std.testing.expectEqual(LineIndex.LineColumn{ .line = 3, .column = 3 }, idx.lineColumn(10)); + // 'm' on line 4 col 1 + try std.testing.expectEqual(LineIndex.LineColumn{ .line = 4, .column = 1 }, idx.lineColumn(15)); +} + +test "DiagnosticCode code and name are stable cross-version" { + try std.testing.expectEqualStrings("E0001", DiagnosticCode.parse_error.code()); + try std.testing.expectEqualStrings("ParseError", DiagnosticCode.parse_error.name()); + try std.testing.expectEqualStrings("E0101", DiagnosticCode.duplicate_symbol.code()); + try std.testing.expectEqualStrings("E1210", DiagnosticCode.unknown_component_in_when.code()); + try std.testing.expectEqualStrings("UnknownComponentInWhen", DiagnosticCode.unknown_component_in_when.name()); + try std.testing.expectEqualStrings("E1213", DiagnosticCode.resource_expected_in_when.code()); +} diff --git a/src/etch/lexer.zig b/src/etch/lexer.zig new file mode 100644 index 0000000..94ced58 --- /dev/null +++ b/src/etch/lexer.zig @@ -0,0 +1,379 @@ +//! S3 Etch lexer — UTF-8 byte stream tokenizer producing the subset of +//! tokens listed in `briefs/S3-etch-parser-subset.md` Scope / Lexer. +//! +//! Behaviour summary: +//! - Identifiers and keywords are ASCII-only (per `etch-grammar.md` §1.2). +//! - String literals (simple-quote) accept arbitrary UTF-8 verbatim. +//! - Comments (`//`, `/* */`, `///`) are skipped; their byte spans are +//! collected in `comment_spans` for future Phase 0.2 trivia attachment. +//! - Invalid UTF-8 emits an `error_utf8` token; the parser maps it to +//! `E0001 ParseError`. +//! - Unknown Etch keywords outside the S3 subset are tokenised as +//! `error_unknown_keyword`; parser raises `E0001` at the use site. + +const std = @import("std"); +const token = @import("token.zig"); + +const Token = token.Token; +const TokenKind = token.TokenKind; +const SourceSpan = token.SourceSpan; + +pub const Lexer = struct { + source: []const u8, + pos: u32 = 0, + /// Byte spans of every `//`, `/* */`, and `///` comment encountered. + /// Not attached to AST nodes in S3 — kept as a parallel slab for + /// Phase 0.2's `TriviaMap`. + comment_spans: std.ArrayListUnmanaged(SourceSpan) = .empty, + + pub fn init(source: []const u8) Lexer { + return .{ .source = source }; + } + + pub fn deinit(self: *Lexer, gpa: std.mem.Allocator) void { + self.comment_spans.deinit(gpa); + } + + /// Produce the next token. Comments and whitespace are skipped + /// internally; `eof` is returned once the source is exhausted. + pub fn next(self: *Lexer, gpa: std.mem.Allocator) !Token { + while (true) { + self.skipWhitespace(); + if (self.pos >= self.source.len) { + return .{ .kind = .eof, .span = .{ .byte_start = @intCast(self.source.len), .byte_end = @intCast(self.source.len) } }; + } + const start = self.pos; + const c = self.source[self.pos]; + switch (c) { + '/' => { + if (self.pos + 1 < self.source.len) { + const c2 = self.source[self.pos + 1]; + if (c2 == '/') { + try self.skipLineComment(gpa); + continue; + } + if (c2 == '*') { + try self.skipBlockComment(gpa); + continue; + } + } + return self.singleOrCompound(start, .slash, .slash_eq); + }, + '+' => return self.singleOrCompound(start, .plus, .plus_eq), + '-' => return self.singleOrCompound(start, .minus, .minus_eq), + '*' => return self.singleOrCompound(start, .star, .star_eq), + '%' => return self.singleOrCompound(start, .percent, .percent_eq), + '=' => return self.singleOrCompound(start, .eq, .eq_eq), + '!' => { + self.pos += 1; + if (self.pos < self.source.len and self.source[self.pos] == '=') { + self.pos += 1; + return .{ .kind = .bang_eq, .span = .{ .byte_start = start, .byte_end = self.pos } }; + } + // `!` postfix isn't in the S3 operator set — fall through to error. + return .{ .kind = .error_byte, .span = .{ .byte_start = start, .byte_end = self.pos } }; + }, + '<' => return self.singleOrCompound(start, .lt, .lt_eq), + '>' => return self.singleOrCompound(start, .gt, .gt_eq), + '(' => return self.consumeOne(.lparen), + ')' => return self.consumeOne(.rparen), + '{' => return self.consumeOne(.lbrace), + '}' => return self.consumeOne(.rbrace), + ':' => return self.consumeOne(.colon), + ',' => return self.consumeOne(.comma), + '.' => return self.consumeOne(.dot), + '@' => return self.consumeOne(.at), + '"' => return self.lexString(start), + '0'...'9' => return self.lexNumber(start), + 'a'...'z', 'A'...'Z', '_' => return self.lexIdent(start), + else => { + // Anything else is either invalid UTF-8 (continuation + // byte without leader, or malformed sequence) or a + // byte outside the S3 lexicon. Either way: error + // token covering exactly one byte (or the full bad + // UTF-8 run). The parser will surface `E0001`. + if (c < 0x80) { + self.pos += 1; + return .{ .kind = .error_byte, .span = .{ .byte_start = start, .byte_end = self.pos } }; + } + return self.lexUtf8(start); + }, + } + } + } + + fn consumeOne(self: *Lexer, kind: TokenKind) Token { + const start = self.pos; + self.pos += 1; + return .{ .kind = kind, .span = .{ .byte_start = start, .byte_end = self.pos } }; + } + + fn singleOrCompound(self: *Lexer, start: u32, single: TokenKind, compound: TokenKind) Token { + self.pos += 1; + if (self.pos < self.source.len and self.source[self.pos] == '=') { + self.pos += 1; + return .{ .kind = compound, .span = .{ .byte_start = start, .byte_end = self.pos } }; + } + return .{ .kind = single, .span = .{ .byte_start = start, .byte_end = self.pos } }; + } + + fn skipWhitespace(self: *Lexer) void { + while (self.pos < self.source.len) : (self.pos += 1) { + const c = self.source[self.pos]; + if (c != ' ' and c != '\t' and c != '\n' and c != '\r') return; + } + } + + fn skipLineComment(self: *Lexer, gpa: std.mem.Allocator) !void { + const start = self.pos; + // Either `//` or `///` (doc comment). The brief lexes `///` as a + // regular comment in S3 (doc-comments map deferred Phase 0.2). + self.pos += 2; + while (self.pos < self.source.len and self.source[self.pos] != '\n') : (self.pos += 1) {} + try self.comment_spans.append(gpa, .{ .byte_start = start, .byte_end = self.pos }); + } + + fn skipBlockComment(self: *Lexer, gpa: std.mem.Allocator) !void { + const start = self.pos; + self.pos += 2; + while (self.pos + 1 < self.source.len) { + if (self.source[self.pos] == '*' and self.source[self.pos + 1] == '/') { + self.pos += 2; + try self.comment_spans.append(gpa, .{ .byte_start = start, .byte_end = self.pos }); + return; + } + self.pos += 1; + } + // Unterminated — consume to EOF; parser will see the truncation + // via the next non-whitespace token (typically `eof`). + self.pos = @intCast(self.source.len); + try self.comment_spans.append(gpa, .{ .byte_start = start, .byte_end = self.pos }); + } + + fn lexIdent(self: *Lexer, start: u32) Token { + self.pos += 1; + while (self.pos < self.source.len) : (self.pos += 1) { + const c = self.source[self.pos]; + const is_alnum = (c >= 'a' and c <= 'z') or (c >= 'A' and c <= 'Z') or (c >= '0' and c <= '9') or c == '_'; + if (!is_alnum) break; + } + const lexeme = self.source[start..self.pos]; + const span: SourceSpan = .{ .byte_start = start, .byte_end = self.pos }; + + // Keyword lookup (S3 subset first). + for (token.s3_keywords) |kw| { + if (std.mem.eql(u8, kw.lexeme, lexeme)) { + return .{ .kind = kw.kind, .span = span }; + } + } + // Then the Etch keyword reserve list (yields `error_unknown_keyword`). + for (token.non_s3_keywords) |kw| { + if (std.mem.eql(u8, kw, lexeme)) { + return .{ .kind = .error_unknown_keyword, .span = span }; + } + } + // Otherwise it's a regular identifier — case-disambiguated. + const first = self.source[start]; + const kind: TokenKind = if (first >= 'A' and first <= 'Z') .type_ident else .ident; + return .{ .kind = kind, .span = span }; + } + + fn lexNumber(self: *Lexer, start: u32) Token { + // Consume integer part. + while (self.pos < self.source.len) : (self.pos += 1) { + const c = self.source[self.pos]; + if (!((c >= '0' and c <= '9') or c == '_')) break; + } + // Optional fractional part: only if `.` is followed by a digit. + // `42.field` must lex as INT + DOT + IDENT, not FLOAT. + var is_float = false; + if (self.pos + 1 < self.source.len and self.source[self.pos] == '.') { + const after_dot = self.source[self.pos + 1]; + if (after_dot >= '0' and after_dot <= '9') { + is_float = true; + self.pos += 1; // dot + while (self.pos < self.source.len) : (self.pos += 1) { + const c = self.source[self.pos]; + if (!((c >= '0' and c <= '9') or c == '_')) break; + } + } + } + return .{ + .kind = if (is_float) .float_literal else .int_literal, + .span = .{ .byte_start = start, .byte_end = self.pos }, + }; + } + + fn lexString(self: *Lexer, start: u32) Token { + self.pos += 1; // opening quote + while (self.pos < self.source.len) { + const c = self.source[self.pos]; + if (c == '"') { + self.pos += 1; + return .{ .kind = .string_literal, .span = .{ .byte_start = start, .byte_end = self.pos } }; + } + if (c == '\\') { + self.pos += 1; + if (self.pos < self.source.len) self.pos += 1; + continue; + } + if (c == '\n') break; // unterminated single-quote string + // Validate UTF-8 byte-by-byte: arbitrary continuation bytes are + // allowed inside the literal but a malformed sequence still + // surfaces as an error token via lexUtf8 from the outer loop. + // For S3 we accept all non-newline bytes verbatim inside the + // string literal — explicit UTF-8 validation is only enforced + // outside string literals (per brief). + self.pos += 1; + } + // Unterminated string: surface as error_byte at the opening quote. + return .{ .kind = .error_byte, .span = .{ .byte_start = start, .byte_end = self.pos } }; + } + + fn lexUtf8(self: *Lexer, start: u32) Token { + const c = self.source[start]; + // Determine expected continuation count from leading byte. + const expected_len: u8 = if ((c & 0b1110_0000) == 0b1100_0000) 2 else if ((c & 0b1111_0000) == 0b1110_0000) 3 else if ((c & 0b1111_1000) == 0b1111_0000) 4 else 0; + if (expected_len == 0) { + // Stray continuation byte or invalid leader. + self.pos += 1; + return .{ .kind = .error_utf8, .span = .{ .byte_start = start, .byte_end = self.pos } }; + } + if (start + expected_len > self.source.len) { + self.pos = @intCast(self.source.len); + return .{ .kind = .error_utf8, .span = .{ .byte_start = start, .byte_end = self.pos } }; + } + // Validate continuation bytes. + var i: u8 = 1; + while (i < expected_len) : (i += 1) { + if ((self.source[start + i] & 0b1100_0000) != 0b1000_0000) { + self.pos = start + i; + return .{ .kind = .error_utf8, .span = .{ .byte_start = start, .byte_end = self.pos } }; + } + } + // UTF-8 outside an identifier / string literal isn't part of the + // S3 lexicon (identifiers ASCII-only, no character literals). It's + // an error token regardless. + self.pos = start + expected_len; + return .{ .kind = .error_utf8, .span = .{ .byte_start = start, .byte_end = self.pos } }; + } +}; + +// ──────────────────────────── tests ───────────────────────────────────── + +test "lexer tokenizes minimal component declaration" { + const gpa = std.testing.allocator; + var lex = Lexer.init("component Health { current: float }"); + defer lex.deinit(gpa); + + try expectKind(&lex, gpa, .kw_component); + try expectKind(&lex, gpa, .type_ident); // Health + try expectKind(&lex, gpa, .lbrace); + try expectKind(&lex, gpa, .ident); // current + try expectKind(&lex, gpa, .colon); + try expectKind(&lex, gpa, .kw_float); + try expectKind(&lex, gpa, .rbrace); + try expectKind(&lex, gpa, .eof); +} + +test "lexer skips line and block comments, records spans in comment_spans" { + const gpa = std.testing.allocator; + const src = "// header\nlet x = 1 /* inline */ // trailing\n"; + var lex = Lexer.init(src); + defer lex.deinit(gpa); + + try expectKind(&lex, gpa, .kw_let); + try expectKind(&lex, gpa, .ident); + try expectKind(&lex, gpa, .eq); + try expectKind(&lex, gpa, .int_literal); + try expectKind(&lex, gpa, .eof); + try std.testing.expectEqual(@as(usize, 3), lex.comment_spans.items.len); + // First comment: spans the `// header` exactly. + try std.testing.expectEqualStrings("// header", src[lex.comment_spans.items[0].byte_start..lex.comment_spans.items[0].byte_end]); +} + +test "lexer skips triple-slash doc comments like line comments" { + const gpa = std.testing.allocator; + var lex = Lexer.init("/// doc\nlet x = 1"); + defer lex.deinit(gpa); + try expectKind(&lex, gpa, .kw_let); + try expectKind(&lex, gpa, .ident); + try expectKind(&lex, gpa, .eq); + try expectKind(&lex, gpa, .int_literal); + try expectKind(&lex, gpa, .eof); + try std.testing.expectEqual(@as(usize, 1), lex.comment_spans.items.len); +} + +test "lexer rejects invalid UTF-8 with error_utf8" { + const gpa = std.testing.allocator; + // 0xC3 0x28 — invalid two-byte sequence (continuation byte missing). + const src = [_]u8{ 0xC3, 0x28 }; + var lex = Lexer.init(&src); + defer lex.deinit(gpa); + const t = try lex.next(gpa); + try std.testing.expectEqual(TokenKind.error_utf8, t.kind); +} + +test "lexer disambiguates integer vs float literal" { + const gpa = std.testing.allocator; + var lex = Lexer.init("42 42.0 4.2 0.5"); + defer lex.deinit(gpa); + try expectKind(&lex, gpa, .int_literal); + try expectKind(&lex, gpa, .float_literal); + try expectKind(&lex, gpa, .float_literal); + try expectKind(&lex, gpa, .float_literal); + try expectKind(&lex, gpa, .eof); +} + +test "lexer flags unknown Etch keyword from full grammar as error_unknown_keyword" { + const gpa = std.testing.allocator; + var lex = Lexer.init("fn enum behavior"); + defer lex.deinit(gpa); + try expectKind(&lex, gpa, .error_unknown_keyword); + try expectKind(&lex, gpa, .error_unknown_keyword); + try expectKind(&lex, gpa, .error_unknown_keyword); +} + +test "lexer handles compound operators and keywords" { + const gpa = std.testing.allocator; + var lex = Lexer.init("a += b == c <= d and e or f not g"); + defer lex.deinit(gpa); + try expectKind(&lex, gpa, .ident); + try expectKind(&lex, gpa, .plus_eq); + try expectKind(&lex, gpa, .ident); + try expectKind(&lex, gpa, .eq_eq); + try expectKind(&lex, gpa, .ident); + try expectKind(&lex, gpa, .lt_eq); + try expectKind(&lex, gpa, .ident); + try expectKind(&lex, gpa, .kw_and); + try expectKind(&lex, gpa, .ident); + try expectKind(&lex, gpa, .kw_or); + try expectKind(&lex, gpa, .ident); + try expectKind(&lex, gpa, .kw_not); + try expectKind(&lex, gpa, .ident); +} + +test "lexer disambiguates integer followed by dot-field-access" { + const gpa = std.testing.allocator; + var lex = Lexer.init("42.x"); + defer lex.deinit(gpa); + try expectKind(&lex, gpa, .int_literal); + try expectKind(&lex, gpa, .dot); + try expectKind(&lex, gpa, .ident); +} + +test "lexer accepts string literal with arbitrary UTF-8 inside" { + const gpa = std.testing.allocator; + // String contains a multi-byte UTF-8 codepoint (é = 0xC3 0xA9). + const src = "\"café\""; + var lex = Lexer.init(src); + defer lex.deinit(gpa); + const t = try lex.next(gpa); + try std.testing.expectEqual(TokenKind.string_literal, t.kind); + try expectKind(&lex, gpa, .eof); +} + +fn expectKind(lex: *Lexer, gpa: std.mem.Allocator, kind: TokenKind) !void { + const t = try lex.next(gpa); + try std.testing.expectEqual(kind, t.kind); +} diff --git a/src/etch/parser.zig b/src/etch/parser.zig new file mode 100644 index 0000000..3d24879 --- /dev/null +++ b/src/etch/parser.zig @@ -0,0 +1,1071 @@ +//! S3 Etch parser — recursive descent for declarations, statements and +//! `when` clauses; Pratt parsing for binary expressions using the +//! precedence table from `etch-grammar.md` §3.1 restricted to the S3 +//! operator set (all left-associative). +//! +//! Produces an `AstArena` directly (no intermediate CST). On the first +//! parse error the parser stops; the returned AST contains a best-effort +//! partial result so subsequent type-checking can run on declarations +//! parsed before the error (cf. `briefs/S3-etch-parser-subset.md` Scope). + +const std = @import("std"); +const token_mod = @import("token.zig"); +const ast_mod = @import("ast.zig"); +const diag_mod = @import("diagnostics.zig"); +const lexer_mod = @import("lexer.zig"); + +const Token = token_mod.Token; +const TokenKind = token_mod.TokenKind; +const SourceSpan = token_mod.SourceSpan; +const AstArena = ast_mod.AstArena; +const NodeId = ast_mod.NodeId; +const NodeCategory = ast_mod.NodeCategory; +const StringId = ast_mod.StringId; +const Diagnostic = diag_mod.Diagnostic; +const DiagnosticCode = diag_mod.DiagnosticCode; +const Lexer = lexer_mod.Lexer; + +pub const ParseError = error{ ParseError, OutOfMemory }; + +pub const ParseResult = struct { + ast: AstArena, + diagnostic: ?Diagnostic, +}; + +pub fn parse(gpa: std.mem.Allocator, source: []const u8) !ParseResult { + var lexer = Lexer.init(source); + // Without this `errdefer`, an OOM coming from `lexer.next` or + // `parser.parseFile` after the lexer has already appended a comment + // span would leak the `Lexer.comment_spans` slab. The two explicit + // `lexer.deinit(gpa)` calls on the value-return paths still fire + // (errdefer does not run on value returns). + errdefer lexer.deinit(gpa); + var arena = try AstArena.init(gpa); + errdefer arena.deinit(gpa); + + const c0 = try lexer.next(gpa); + const c1 = try lexer.next(gpa); + const c2 = try lexer.next(gpa); + var parser: Parser = .{ + .gpa = gpa, + .source = source, + .lexer = &lexer, + .arena = &arena, + .current = c0, + .next_tok = c1, + .next2_tok = c2, + }; + parser.parseFile() catch |err| switch (err) { + error.OutOfMemory => return err, + error.ParseError => { + // Diagnostic already populated; transfer comment spans. + try arena.comment_spans.appendSlice(gpa, lexer.comment_spans.items); + lexer.deinit(gpa); + return .{ .ast = arena, .diagnostic = parser.diagnostic }; + }, + }; + + try arena.comment_spans.appendSlice(gpa, lexer.comment_spans.items); + lexer.deinit(gpa); + return .{ .ast = arena, .diagnostic = parser.diagnostic }; +} + +pub const Parser = struct { + gpa: std.mem.Allocator, + source: []const u8, + lexer: *Lexer, + arena: *AstArena, + /// Current token plus a 2-token lookahead. The disambiguation + /// between `entity has T { field == value }` (has-with-filter) + /// and `entity has T { /* rule body */ }` requires peeking through + /// `{` and the first token inside (which can be `IDENT == ...` + /// for a filter or anything else for the rule body). + current: Token, + next_tok: Token, + next2_tok: Token, + diagnostic: ?Diagnostic = null, + + // ─── Token stream helpers ──────────────────────────────────────────── + + fn advance(self: *Parser) !Token { + const t = self.current; + self.current = self.next_tok; + self.next_tok = self.next2_tok; + self.next2_tok = try self.lexer.next(self.gpa); + return t; + } + + fn peek(self: *const Parser) TokenKind { + return self.current.kind; + } + + fn peekNext(self: *const Parser) TokenKind { + return self.next_tok.kind; + } + + fn peekNext2(self: *const Parser) TokenKind { + return self.next2_tok.kind; + } + + fn peekSpan(self: *const Parser) SourceSpan { + return self.current.span; + } + + fn expect(self: *Parser, kind: TokenKind, msg: []const u8) !Token { + if (self.current.kind != kind) { + return self.parseErr(self.current.span, msg); + } + return try self.advance(); + } + + fn match(self: *Parser, kind: TokenKind) !bool { + if (self.current.kind == kind) { + _ = try self.advance(); + return true; + } + return false; + } + + // ─── Diagnostic ────────────────────────────────────────────────────── + + fn parseErr(self: *Parser, span: SourceSpan, message: []const u8) ParseError { + if (self.diagnostic == null) { + const owned = self.gpa.dupe(u8, message) catch { + return error.OutOfMemory; + }; + self.diagnostic = .{ + .code = .parse_error, + .severity = .error_, + .primary_span = span, + .primary_message = owned, + }; + } + return error.ParseError; + } + + fn parseErrFmt(self: *Parser, span: SourceSpan, comptime fmt: []const u8, args: anytype) ParseError { + if (self.diagnostic == null) { + const owned = std.fmt.allocPrint(self.gpa, fmt, args) catch { + return error.OutOfMemory; + }; + self.diagnostic = .{ + .code = .parse_error, + .severity = .error_, + .primary_span = span, + .primary_message = owned, + }; + } + return error.ParseError; + } + + // ─── Source slice helpers ──────────────────────────────────────────── + + fn sliceOf(self: *const Parser, span: SourceSpan) []const u8 { + return self.source[span.byte_start..span.byte_end]; + } + + fn internSlice(self: *Parser, span: SourceSpan) !StringId { + return try self.arena.strings.intern(self.gpa, self.sliceOf(span)); + } + + fn internStringLiteral(self: *Parser, span: SourceSpan) !StringId { + // Trim the surrounding quotes; S3 string literals are simple-quote. + const raw = self.sliceOf(span); + if (raw.len < 2 or raw[0] != '"' or raw[raw.len - 1] != '"') { + return try self.arena.strings.intern(self.gpa, raw); + } + return try self.arena.strings.intern(self.gpa, raw[1 .. raw.len - 1]); + } + + // ─── Top-level ─────────────────────────────────────────────────────── + + pub fn parseFile(self: *Parser) ParseError!void { + while (self.peek() != .eof) { + try self.surfaceTokenErrors(); + const annotations = try self.parseAnnotations(); + try self.parseTopLevel(annotations); + } + } + + fn surfaceTokenErrors(self: *Parser) ParseError!void { + switch (self.peek()) { + .error_byte => return self.parseErrFmt(self.peekSpan(), "unexpected byte '{s}'", .{self.sliceOf(self.peekSpan())}), + .error_utf8 => return self.parseErr(self.peekSpan(), "invalid UTF-8 sequence"), + .error_unknown_keyword => return self.parseErrFmt(self.peekSpan(), "Etch keyword '{s}' is not supported in S3 (UnsupportedConstructInS3)", .{self.sliceOf(self.peekSpan())}), + else => {}, + } + } + + fn parseTopLevel(self: *Parser, annotations: AnnotationRange) ParseError!void { + switch (self.peek()) { + .kw_component => try self.parseComponentDecl(annotations), + .kw_resource => try self.parseResourceDecl(annotations), + .kw_rule => try self.parseRuleDecl(annotations), + .eof => {}, + else => return self.parseErrFmt(self.peekSpan(), "expected top-level declaration (component | resource | rule), got '{s}'", .{self.sliceOf(self.peekSpan())}), + } + } + + // ─── Annotations ───────────────────────────────────────────────────── + + pub const AnnotationRange = struct { + start: u32, + len: u32, + }; + + fn parseAnnotations(self: *Parser) ParseError!AnnotationRange { + const start: u32 = @intCast(self.arena.annot_pool.items.len); + while (self.peek() == .at) { + const at_tok = try self.advance(); + const name_tok = if (self.peek() == .ident or self.peek() == .type_ident) + try self.advance() + else + return self.parseErr(self.peekSpan(), "expected annotation name after '@'"); + + const name_slice = self.sliceOf(name_tok.span); + const name_id = try self.internSlice(name_tok.span); + const kind = ast_mod.AnnotationKind.fromName(name_slice); + + const args_start: u32 = @intCast(self.arena.annot_args.items.len); + var args_len: u32 = 0; + if (self.peek() == .lparen) { + _ = try self.advance(); + if (self.peek() != .rparen) { + while (true) { + const arg = try self.parseAnnotationArg(); + try self.arena.annot_args.append(self.gpa, arg); + args_len += 1; + if (!try self.match(.comma)) break; + } + } + _ = try self.expect(.rparen, "expected ')' to close annotation args"); + } + + const end_span = self.current.span; + const total_span: SourceSpan = .{ + .byte_start = at_tok.span.byte_start, + .byte_end = end_span.byte_start, + }; + try self.arena.annot_pool.append(self.gpa, .{ + .kind = kind, + .name = name_id, + .args_start = args_start, + .args_len = args_len, + .span = total_span, + }); + } + const len: u32 = @as(u32, @intCast(self.arena.annot_pool.items.len)) - start; + return .{ .start = start, .len = len }; + } + + fn parseAnnotationArg(self: *Parser) ParseError!ast_mod.AnnotationArg { + // Named arg if `ident ':' expr`. + if (self.peek() == .ident) { + // Lookahead: if next non-ident token is `:`, treat as named. + // The lexer's one-token lookahead is `self.current`; we have + // to commit to the ident and check the following token. + const saved = self.current; + _ = try self.advance(); + if (self.peek() == .colon) { + _ = try self.advance(); + const name_id = try self.internSlice(saved.span); + const value = try self.parseExpr(0); + return .{ .name = name_id, .value = value }; + } + // Not named: this was the start of a positional expression + // beginning with an ident. Build the expr starting from here + // by emitting an ident expr and continuing through Pratt. + const ident_id = try self.internSlice(saved.span); + const lhs = try self.arena.addExpr(self.gpa, .ident, ident_id, saved.span); + const continued = try self.continuePostfixAndBinary(lhs, 0); + return .{ .name = 0, .value = continued }; + } + // Positional: bare expression. + const expr = try self.parseExpr(0); + return .{ .name = 0, .value = expr }; + } + + // ─── Component / Resource ─────────────────────────────────────────── + + fn parseComponentDecl(self: *Parser, annotations: AnnotationRange) ParseError!void { + const kw_span = self.current.span; + _ = try self.advance(); // 'component' + const name_tok = try self.expect(.type_ident, "expected component name (TYPE_IDENT)"); + const name_id = try self.internSlice(name_tok.span); + _ = try self.expect(.lbrace, "expected '{' to start component body"); + + const fields_start: u32 = @intCast(self.arena.fields.items.len); + while (self.peek() != .rbrace) { + try self.surfaceTokenErrors(); + const field_annotations = try self.parseAnnotations(); + try self.parseField(field_annotations); + // Field separator: optional comma between fields. + _ = try self.match(.comma); + } + const closing = try self.expect(.rbrace, "expected '}' to close component body"); + const fields_len: u32 = @as(u32, @intCast(self.arena.fields.items.len)) - fields_start; + + const data_idx: u32 = @intCast(self.arena.component_decls.items.len); + try self.arena.component_decls.append(self.gpa, .{ + .name = name_id, + .fields_start = fields_start, + .fields_len = fields_len, + .annotations_extra = annotations.start, + .annotations_len = annotations.len, + }); + _ = try self.arena.addItem(self.gpa, .component_decl, data_idx, .{ + .byte_start = kw_span.byte_start, + .byte_end = closing.span.byte_end, + }); + } + + fn parseResourceDecl(self: *Parser, annotations: AnnotationRange) ParseError!void { + const kw_span = self.current.span; + _ = try self.advance(); // 'resource' + const name_tok = try self.expect(.type_ident, "expected resource name (TYPE_IDENT)"); + const name_id = try self.internSlice(name_tok.span); + _ = try self.expect(.lbrace, "expected '{' to start resource body"); + + const fields_start: u32 = @intCast(self.arena.fields.items.len); + while (self.peek() != .rbrace) { + try self.surfaceTokenErrors(); + const field_annotations = try self.parseAnnotations(); + try self.parseField(field_annotations); + _ = try self.match(.comma); + } + const closing = try self.expect(.rbrace, "expected '}' to close resource body"); + const fields_len: u32 = @as(u32, @intCast(self.arena.fields.items.len)) - fields_start; + + const data_idx: u32 = @intCast(self.arena.resource_decls.items.len); + try self.arena.resource_decls.append(self.gpa, .{ + .name = name_id, + .fields_start = fields_start, + .fields_len = fields_len, + .annotations_extra = annotations.start, + .annotations_len = annotations.len, + }); + _ = try self.arena.addItem(self.gpa, .resource_decl, data_idx, .{ + .byte_start = kw_span.byte_start, + .byte_end = closing.span.byte_end, + }); + } + + fn parseField(self: *Parser, annotations: AnnotationRange) ParseError!void { + const name_tok = try self.expect(.ident, "expected field name (identifier)"); + const name_id = try self.internSlice(name_tok.span); + _ = try self.expect(.colon, "expected ':' after field name"); + const type_node = try self.parseType(); + var default_value: NodeId = NodeId.none; + if (try self.match(.eq)) { + default_value = try self.parseExpr(0); + } + try self.arena.fields.append(self.gpa, .{ + .name = name_id, + .type_node = type_node, + .default_value = default_value, + .annotations_extra = annotations.start, + .annotations_len = annotations.len, + }); + } + + // ─── Type ──────────────────────────────────────────────────────────── + + fn parseType(self: *Parser) ParseError!NodeId { + switch (self.peek()) { + // PascalCase type identifiers (Entity, Vec3, Color, Duration, + // user-declared components/resources) and the primitive type + // keywords baked into the S3 lexer. + .type_ident, + .kw_int, + .kw_float, + .kw_bool, + .kw_i32, + .kw_u32, + .kw_f32, + .kw_f64, + // Lowercase identifiers that resemble types — including names + // outside the S3 builtin set (`string`, `char`, etc.). The + // type-checker emits `E0102 UndefinedSymbol` (or a POD-specific + // message when applicable). + .ident, + => { + const tok = try self.advance(); + const name_id = try self.internSlice(tok.span); + return try self.arena.addNamedType(self.gpa, name_id, tok.span); + }, + else => return self.parseErrFmt(self.peekSpan(), "expected type, got '{s}'", .{self.sliceOf(self.peekSpan())}), + } + } + + // ─── Rule ──────────────────────────────────────────────────────────── + + fn parseRuleDecl(self: *Parser, annotations: AnnotationRange) ParseError!void { + const kw_span = self.current.span; + _ = try self.advance(); // 'rule' + const name_tok = try self.expect(.ident, "expected rule name (identifier)"); + const name_id = try self.internSlice(name_tok.span); + + _ = try self.expect(.lparen, "expected '(' to begin rule parameters"); + const params_start: u32 = @intCast(self.arena.rule_params.items.len); + if (self.peek() != .rparen) { + while (true) { + const p_name = try self.expect(.ident, "expected parameter name"); + _ = try self.expect(.colon, "expected ':' after parameter name"); + const p_type = try self.parseType(); + try self.arena.rule_params.append(self.gpa, .{ + .name = try self.internSlice(p_name.span), + .type_node = p_type, + }); + if (!try self.match(.comma)) break; + } + } + _ = try self.expect(.rparen, "expected ')' to close rule parameters"); + const params_len: u32 = @as(u32, @intCast(self.arena.rule_params.items.len)) - params_start; + + var when_root: u32 = ast_mod.RuleDecl.none_when; + if (self.peek() == .kw_when) { + _ = try self.advance(); + when_root = try self.parseWhenExpr(); + } + + _ = try self.expect(.lbrace, "expected '{' to start rule body"); + const body_extra_start: u32 = @intCast(self.arena.extra.items.len); + while (self.peek() != .rbrace) { + try self.surfaceTokenErrors(); + const stmt_id = try self.parseStmt(); + try self.arena.extra.append(self.gpa, stmt_id.raw()); + } + const closing = try self.expect(.rbrace, "expected '}' to close rule body"); + const body_len: u32 = @as(u32, @intCast(self.arena.extra.items.len)) - body_extra_start; + + const data_idx: u32 = @intCast(self.arena.rule_decls.items.len); + try self.arena.rule_decls.append(self.gpa, .{ + .name = name_id, + .params_start = params_start, + .params_len = params_len, + .when_root = when_root, + .body_start = body_extra_start, + .body_len = body_len, + .annotations_extra = annotations.start, + .annotations_len = annotations.len, + }); + _ = try self.arena.addItem(self.gpa, .rule_decl, data_idx, .{ + .byte_start = kw_span.byte_start, + .byte_end = closing.span.byte_end, + }); + } + + // ─── When clause ───────────────────────────────────────────────────── + + fn parseWhenExpr(self: *Parser) ParseError!u32 { + return try self.parseWhenOr(); + } + + fn parseWhenOr(self: *Parser) ParseError!u32 { + var lhs = try self.parseWhenAnd(); + while (self.peek() == .kw_or) { + const op_span = (try self.advance()).span; + const rhs = try self.parseWhenAnd(); + const lhs_span = self.arena.when_nodes.items[lhs].span; + const rhs_span = self.arena.when_nodes.items[rhs].span; + const node = ast_mod.WhenNode{ + .kind = .logical_or, + .entity_name = 0, + .type_name = 0, + .field_name = 0, + .filter_value = NodeId.none, + .lhs = lhs, + .rhs = rhs, + .span = .{ + .byte_start = @min(lhs_span.byte_start, op_span.byte_start), + .byte_end = rhs_span.byte_end, + }, + }; + const idx: u32 = @intCast(self.arena.when_nodes.items.len); + try self.arena.when_nodes.append(self.gpa, node); + lhs = idx; + } + return lhs; + } + + fn parseWhenAnd(self: *Parser) ParseError!u32 { + var lhs = try self.parseWhenNot(); + while (self.peek() == .kw_and) { + const op_span = (try self.advance()).span; + const rhs = try self.parseWhenNot(); + const lhs_span = self.arena.when_nodes.items[lhs].span; + const rhs_span = self.arena.when_nodes.items[rhs].span; + const node = ast_mod.WhenNode{ + .kind = .logical_and, + .entity_name = 0, + .type_name = 0, + .field_name = 0, + .filter_value = NodeId.none, + .lhs = lhs, + .rhs = rhs, + .span = .{ + .byte_start = @min(lhs_span.byte_start, op_span.byte_start), + .byte_end = rhs_span.byte_end, + }, + }; + const idx: u32 = @intCast(self.arena.when_nodes.items.len); + try self.arena.when_nodes.append(self.gpa, node); + lhs = idx; + } + return lhs; + } + + fn parseWhenNot(self: *Parser) ParseError!u32 { + if (self.peek() == .kw_not) { + const op_span = (try self.advance()).span; + const child = try self.parseWhenPrimary(); + const child_span = self.arena.when_nodes.items[child].span; + const node = ast_mod.WhenNode{ + .kind = .logical_not, + .entity_name = 0, + .type_name = 0, + .field_name = 0, + .filter_value = NodeId.none, + .lhs = child, + .rhs = ast_mod.WhenNode.no_child, + .span = .{ + .byte_start = op_span.byte_start, + .byte_end = child_span.byte_end, + }, + }; + const idx: u32 = @intCast(self.arena.when_nodes.items.len); + try self.arena.when_nodes.append(self.gpa, node); + return idx; + } + return try self.parseWhenPrimary(); + } + + fn parseWhenPrimary(self: *Parser) ParseError!u32 { + if (self.peek() == .lparen) { + _ = try self.advance(); + const inner = try self.parseWhenOr(); + _ = try self.expect(.rparen, "expected ')' to close grouped when expression"); + return inner; + } + if (self.peek() == .kw_resource) { + const start_span = self.current.span; + _ = try self.advance(); + const type_tok = try self.expect(.type_ident, "expected resource type after 'resource'"); + const type_name = try self.internSlice(type_tok.span); + var kind = ast_mod.WhenNodeKind.resource; + var end_byte = type_tok.span.byte_end; + if (self.peek() == .kw_changed) { + const changed_tok = try self.advance(); + kind = .resource_changed; + end_byte = changed_tok.span.byte_end; + } + const node = ast_mod.WhenNode{ + .kind = kind, + .entity_name = 0, + .type_name = type_name, + .field_name = 0, + .filter_value = NodeId.none, + .lhs = ast_mod.WhenNode.no_child, + .rhs = ast_mod.WhenNode.no_child, + .span = .{ .byte_start = start_span.byte_start, .byte_end = end_byte }, + }; + const idx: u32 = @intCast(self.arena.when_nodes.items.len); + try self.arena.when_nodes.append(self.gpa, node); + return idx; + } + // `entity has T [{ field == value }]` + const entity_tok = try self.expect(.ident, "expected entity binding in when clause"); + const entity_name = try self.internSlice(entity_tok.span); + _ = try self.expect(.kw_has, "expected 'has' in when clause"); + const type_tok = try self.expect(.type_ident, "expected component type after 'has'"); + const type_name = try self.internSlice(type_tok.span); + + var kind = ast_mod.WhenNodeKind.has; + var field_name: StringId = 0; + var filter_value: NodeId = NodeId.none; + var end_byte = type_tok.span.byte_end; + // Disambiguation `{` filter vs `{` rule body: the filter form + // requires `{ IDENT == ... }`. Anything else (including `{ }` or + // `{ let ... }`) belongs to the surrounding rule body and must + // be left for `parseRuleDecl` to consume. + if (self.peek() == .lbrace and self.peekNext() == .ident and self.peekNext2() == .eq_eq) { + _ = try self.advance(); // '{' + const field_tok = try self.advance(); // IDENT + field_name = try self.internSlice(field_tok.span); + _ = try self.advance(); // '==' + filter_value = try self.parseExpr(0); + const closing = try self.expect(.rbrace, "expected '}' to close has-with-filter"); + end_byte = closing.span.byte_end; + kind = .has_with_filter; + } + const node = ast_mod.WhenNode{ + .kind = kind, + .entity_name = entity_name, + .type_name = type_name, + .field_name = field_name, + .filter_value = filter_value, + .lhs = ast_mod.WhenNode.no_child, + .rhs = ast_mod.WhenNode.no_child, + .span = .{ .byte_start = entity_tok.span.byte_start, .byte_end = end_byte }, + }; + const idx: u32 = @intCast(self.arena.when_nodes.items.len); + try self.arena.when_nodes.append(self.gpa, node); + return idx; + } + + // ─── Statements ────────────────────────────────────────────────────── + + fn parseStmt(self: *Parser) ParseError!NodeId { + if (self.peek() == .kw_let) { + return try self.parseLetStmt(); + } + // Either an assignment (lvalue followed by =/+=/etc.) or an expr stmt. + const expr_start = self.current.span; + const expr = try self.parseExpr(0); + if (isAssignOp(self.peek())) { + const op_tok = try self.advance(); + const op = assignOpFromKind(op_tok.kind); + const value = try self.parseExpr(0); + const span: SourceSpan = .{ + .byte_start = expr_start.byte_start, + .byte_end = self.arena.exprSpan(value).byte_end, + }; + return try self.arena.addAssignStmt(self.gpa, .{ + .target = expr, + .op = op, + .value = value, + }, span); + } + const span: SourceSpan = .{ + .byte_start = expr_start.byte_start, + .byte_end = self.arena.exprSpan(expr).byte_end, + }; + return try self.arena.addExprStmt(self.gpa, expr, span); + } + + fn parseLetStmt(self: *Parser) ParseError!NodeId { + const let_span = self.current.span; + _ = try self.advance(); + const is_mut = try self.match(.kw_mut); + const name_tok = try self.expect(.ident, "expected name after 'let'"); + const name_id = try self.internSlice(name_tok.span); + var type_annotation: NodeId = NodeId.none; + if (try self.match(.colon)) { + type_annotation = try self.parseType(); + } + _ = try self.expect(.eq, "expected '=' in let binding"); + const value = try self.parseExpr(0); + const span: SourceSpan = .{ + .byte_start = let_span.byte_start, + .byte_end = self.arena.exprSpan(value).byte_end, + }; + return try self.arena.addLetStmt(self.gpa, .{ + .name = name_id, + .is_mut = is_mut, + .type_annotation = type_annotation, + .value = value, + }, span); + } + + fn isAssignOp(kind: TokenKind) bool { + return switch (kind) { + .eq, .plus_eq, .minus_eq, .star_eq, .slash_eq, .percent_eq => true, + else => false, + }; + } + + fn assignOpFromKind(kind: TokenKind) ast_mod.AssignOp { + return switch (kind) { + .eq => .assign, + .plus_eq => .add_assign, + .minus_eq => .sub_assign, + .star_eq => .mul_assign, + .slash_eq => .div_assign, + .percent_eq => .rem_assign, + else => unreachable, + }; + } + + // ─── Expressions (Pratt) ───────────────────────────────────────────── + + pub fn parseExpr(self: *Parser, min_bp: u8) ParseError!NodeId { + const lhs = try self.parseUnary(); + return try self.continuePostfixAndBinary(lhs, min_bp); + } + + fn continuePostfixAndBinary(self: *Parser, lhs_in: NodeId, min_bp: u8) ParseError!NodeId { + var lhs = lhs_in; + while (true) { + const info = infixBindingPower(self.peek()) orelse break; + if (info.lbp < min_bp) break; + const op_tok = try self.advance(); + const op = binaryOpFromKind(op_tok.kind); + const rhs = try self.parseExpr(info.rbp); + const lhs_span = self.arena.exprSpan(lhs); + const rhs_span = self.arena.exprSpan(rhs); + const span: SourceSpan = .{ + .byte_start = lhs_span.byte_start, + .byte_end = rhs_span.byte_end, + }; + lhs = try self.arena.addBinary(self.gpa, op, lhs, rhs, span); + } + return lhs; + } + + fn parseUnary(self: *Parser) ParseError!NodeId { + switch (self.peek()) { + .minus => { + const op_span = (try self.advance()).span; + const operand = try self.parseUnary(); + const operand_span = self.arena.exprSpan(operand); + return try self.arena.addUnary(self.gpa, .neg, operand, .{ + .byte_start = op_span.byte_start, + .byte_end = operand_span.byte_end, + }); + }, + .kw_not => { + const op_span = (try self.advance()).span; + const operand = try self.parseUnary(); + const operand_span = self.arena.exprSpan(operand); + return try self.arena.addUnary(self.gpa, .logical_not, operand, .{ + .byte_start = op_span.byte_start, + .byte_end = operand_span.byte_end, + }); + }, + else => return try self.parsePostfix(), + } + } + + fn parsePostfix(self: *Parser) ParseError!NodeId { + var expr = try self.parsePrimary(); + while (self.peek() == .dot) { + _ = try self.advance(); + // After `.`: either a method `get(T)` / `get_mut(T)`, or a field. + switch (self.peek()) { + .kw_get => { + _ = try self.advance(); + expr = try self.parseGetCall(expr, .method_get); + }, + .kw_get_mut => { + _ = try self.advance(); + expr = try self.parseGetCall(expr, .method_get_mut); + }, + .ident => { + const field_tok = try self.advance(); + const field_id = try self.internSlice(field_tok.span); + const recv_span = self.arena.exprSpan(expr); + expr = try self.arena.addFieldAccess(self.gpa, expr, field_id, .{ + .byte_start = recv_span.byte_start, + .byte_end = field_tok.span.byte_end, + }); + }, + else => return self.parseErrFmt(self.peekSpan(), "expected field name or 'get'/'get_mut' after '.', got '{s}'", .{self.sliceOf(self.peekSpan())}), + } + } + return expr; + } + + fn parseGetCall(self: *Parser, receiver: NodeId, kind: ast_mod.ExprKind) ParseError!NodeId { + _ = try self.expect(.lparen, "expected '(' after get/get_mut"); + const type_tok = try self.expect(.type_ident, "expected component type inside get(T)"); + const type_name = try self.internSlice(type_tok.span); + const closing = try self.expect(.rparen, "expected ')' to close get/get_mut call"); + const recv_span = self.arena.exprSpan(receiver); + return try self.arena.addMethodGet(self.gpa, kind, receiver, type_name, .{ + .byte_start = recv_span.byte_start, + .byte_end = closing.span.byte_end, + }); + } + + fn parsePrimary(self: *Parser) ParseError!NodeId { + try self.surfaceTokenErrors(); + switch (self.peek()) { + .int_literal => { + const tok = try self.advance(); + const id = try self.internSlice(tok.span); + return try self.arena.addExpr(self.gpa, .int_lit, id, tok.span); + }, + .float_literal => { + const tok = try self.advance(); + const id = try self.internSlice(tok.span); + return try self.arena.addExpr(self.gpa, .float_lit, id, tok.span); + }, + .bool_literal => { + const tok = try self.advance(); + const id = try self.internSlice(tok.span); + return try self.arena.addExpr(self.gpa, .bool_lit, id, tok.span); + }, + .string_literal => { + const tok = try self.advance(); + const id = try self.internStringLiteral(tok.span); + return try self.arena.addExpr(self.gpa, .string_lit, id, tok.span); + }, + .ident => { + const tok = try self.advance(); + const id = try self.internSlice(tok.span); + return try self.arena.addExpr(self.gpa, .ident, id, tok.span); + }, + .type_ident => { + // TYPE_IDENT in expression position is a path-like value. + // S3 only accepts it as annotation argument shape — the + // type-checker does not resolve annotation args (Phase 0.2). + const tok = try self.advance(); + const id = try self.internSlice(tok.span); + return try self.arena.addExpr(self.gpa, .path, id, tok.span); + }, + .lparen => { + _ = try self.advance(); + const inner = try self.parseExpr(0); + _ = try self.expect(.rparen, "expected ')' to close parenthesized expression"); + return inner; + }, + .dot => { + // Enum variant shorthand `.foo` (e.g. annotation arg + // `.update`). S3 stores it as a `tag_path` kind expression + // with the bare identifier interned — the resolver in + // Phase 0.2 disambiguates enum variant vs tag path from + // the surrounding context. Tag path literals with + // multiple segments (`.foo.bar`) remain out-of-scope. + const dot_span = (try self.advance()).span; + if (self.peek() != .ident) { + return self.parseErrFmt(self.peekSpan(), "expected identifier after '.', got '{s}'", .{self.sliceOf(self.peekSpan())}); + } + const ident_tok = try self.advance(); + const id = try self.internSlice(ident_tok.span); + return try self.arena.addExpr(self.gpa, .tag_path, id, .{ + .byte_start = dot_span.byte_start, + .byte_end = ident_tok.span.byte_end, + }); + }, + else => return self.parseErrFmt(self.peekSpan(), "expected expression, got '{s}'", .{self.sliceOf(self.peekSpan())}), + } + } + + // Precedence table — values picked so that S3's left-associative + // operators behave correctly via the `rbp = lbp + 1` trick. + const InfixInfo = struct { lbp: u8, rbp: u8 }; + + fn infixBindingPower(kind: TokenKind) ?InfixInfo { + return switch (kind) { + .kw_or => .{ .lbp = 1, .rbp = 2 }, + .kw_and => .{ .lbp = 3, .rbp = 4 }, + .eq_eq, .bang_eq, .lt, .gt, .lt_eq, .gt_eq => .{ .lbp = 5, .rbp = 6 }, + .plus, .minus => .{ .lbp = 7, .rbp = 8 }, + .star, .slash, .percent => .{ .lbp = 9, .rbp = 10 }, + else => null, + }; + } + + fn binaryOpFromKind(kind: TokenKind) ast_mod.BinaryOp { + return switch (kind) { + .plus => .add, + .minus => .sub, + .star => .mul, + .slash => .div, + .percent => .rem, + .eq_eq => .eq, + .bang_eq => .neq, + .lt => .lt, + .gt => .gt, + .lt_eq => .le, + .gt_eq => .ge, + .kw_and => .logical_and, + .kw_or => .logical_or, + else => unreachable, + }; + } +}; + +// ─────────────────────────────── tests ────────────────────────────────── + +test "parser builds ComponentDecl with two annotated fields" { + const gpa = std.testing.allocator; + var result = try parse(gpa, + \\component Health { + \\ @range(0, 100) + \\ current: float = 100.0 + \\ @range(1, 100) + \\ max: float = 100.0 + \\} + ); + defer result.ast.deinit(gpa); + if (result.diagnostic) |d| { + var diag = d; + diag.deinit(gpa); + try std.testing.expect(false); + } + try std.testing.expectEqual(@as(usize, 1), result.ast.items.len); + try std.testing.expectEqual(ast_mod.ItemKind.component_decl, result.ast.items.items(.kind)[0]); + const cd = result.ast.component_decls.items[0]; + try std.testing.expectEqual(@as(u32, 2), cd.fields_len); + try std.testing.expectEqualStrings("Health", result.ast.strings.slice(cd.name)); + const f0 = result.ast.fields.items[cd.fields_start]; + try std.testing.expectEqualStrings("current", result.ast.strings.slice(f0.name)); + try std.testing.expectEqual(@as(u32, 1), f0.annotations_len); +} + +test "parser builds ResourceDecl with default value expression" { + const gpa = std.testing.allocator; + var result = try parse(gpa, + \\resource GameMode { + \\ max_players: int = 4 + \\} + ); + defer result.ast.deinit(gpa); + try std.testing.expect(result.diagnostic == null); + try std.testing.expectEqual(@as(usize, 1), result.ast.items.len); + try std.testing.expectEqual(ast_mod.ItemKind.resource_decl, result.ast.items.items(.kind)[0]); + const rd = result.ast.resource_decls.items[0]; + const f = result.ast.fields.items[rd.fields_start]; + try std.testing.expect(!f.default_value.isNone()); + try std.testing.expectEqual(ast_mod.ExprKind.int_lit, result.ast.exprKind(f.default_value)); +} + +test "parser builds RuleDecl with when clause composition (and / or / not)" { + const gpa = std.testing.allocator; + var result = try parse(gpa, + \\rule tick(entity: Entity, dt: float) + \\ when entity has Health + \\ and entity has Velocity + \\ or not entity has Frozen + \\{ + \\} + ); + defer result.ast.deinit(gpa); + if (result.diagnostic) |d| { + var diag = d; + defer diag.deinit(gpa); + std.debug.print("unexpected parse diagnostic: {s}\n", .{diag.primary_message}); + try std.testing.expect(false); + } + try std.testing.expectEqual(@as(usize, 1), result.ast.items.len); + const rd = result.ast.rule_decls.items[0]; + try std.testing.expect(rd.when_root != ast_mod.RuleDecl.none_when); + try std.testing.expectEqual(@as(u32, 2), rd.params_len); + // Root must be a logical_or (lowest precedence in the chain). + try std.testing.expectEqual(ast_mod.WhenNodeKind.logical_or, result.ast.when_nodes.items[rd.when_root].kind); +} + +test "parser handles binary expression precedence per grammar subset" { + const gpa = std.testing.allocator; + var result = try parse(gpa, + \\rule t() { + \\ let x = 1 + 2 * 3 + \\} + ); + defer result.ast.deinit(gpa); + try std.testing.expect(result.diagnostic == null); + const rd = result.ast.rule_decls.items[0]; + try std.testing.expectEqual(@as(u32, 1), rd.body_len); + const stmt_raw = result.ast.extra.items[rd.body_start]; + const stmt_id: NodeId = @bitCast(stmt_raw); + try std.testing.expectEqual(ast_mod.StmtKind.let_stmt, result.ast.stmtKind(stmt_id)); + const let = result.ast.let_stmts.items[result.ast.stmtData(stmt_id)]; + // Value should be (1 + (2 * 3)): top is binary `+`. + try std.testing.expectEqual(ast_mod.ExprKind.binary, result.ast.exprKind(let.value)); + const top = result.ast.binary_exprs.items[result.ast.exprData(let.value)]; + try std.testing.expectEqual(ast_mod.BinaryOp.add, top.op); + try std.testing.expectEqual(ast_mod.ExprKind.binary, result.ast.exprKind(top.rhs)); + const rhs = result.ast.binary_exprs.items[result.ast.exprData(top.rhs)]; + try std.testing.expectEqual(ast_mod.BinaryOp.mul, rhs.op); +} + +test "parser rejects unsupported top-level construct with E0001" { + const gpa = std.testing.allocator; + var result = try parse(gpa, + \\fn foo() {} + ); + defer result.ast.deinit(gpa); + try std.testing.expect(result.diagnostic != null); + var diag = result.diagnostic.?; + defer diag.deinit(gpa); + try std.testing.expectEqual(diag_mod.DiagnosticCode.parse_error, diag.code); +} + +test "parser stops at first parse error and returns partial AST" { + const gpa = std.testing.allocator; + var result = try parse(gpa, + \\component Health { current: float = 1.0 } + \\@@@@invalid + ); + defer result.ast.deinit(gpa); + try std.testing.expect(result.diagnostic != null); + var diag = result.diagnostic.?; + defer diag.deinit(gpa); + // First component should be parsed. + try std.testing.expectEqual(@as(usize, 1), result.ast.items.len); + try std.testing.expectEqual(ast_mod.ItemKind.component_decl, result.ast.items.items(.kind)[0]); +} + +test "parser accepts top-level declarations in any order" { + const gpa = std.testing.allocator; + var result = try parse(gpa, + \\rule uses_health(entity: Entity) + \\ when entity has Health + \\{ + \\ let h = entity.get(Health) + \\} + \\component Health { current: float = 100.0 } + ); + defer result.ast.deinit(gpa); + if (result.diagnostic) |d| { + var diag = d; + defer diag.deinit(gpa); + std.debug.print("unexpected diag: {s}\n", .{diag.primary_message}); + try std.testing.expect(false); + } + try std.testing.expectEqual(@as(usize, 2), result.ast.items.len); +} + +test "parser captures annotation kind and args" { + const gpa = std.testing.allocator; + var result = try parse(gpa, + \\component Health { + \\ @unit(.health_points) + \\ @range(0, 100) + \\ current: float = 100.0 + \\} + ); + defer result.ast.deinit(gpa); + // We don't currently parse `.foo` patterns; for S3 we accept named or + // bare expressions as annotation args. The brief notes annotation + // applicability is deferred — only "kind + args reachable" is required. + try std.testing.expect(result.diagnostic == null); + try std.testing.expectEqual(@as(usize, 1), result.ast.items.len); +} + +test "parser does not leak comment spans on OOM during init" { + // FailingAllocator wraps std.testing.allocator (which itself flags any + // leak as a test failure). Each `fail_index` from 1..N forces the Nth + // allocation to fail; we walk the range so that every distinct + // allocation site between the first byte read and the first parsed + // token gets exercised. The success path (no OOM at all) is excluded + // because that case is already covered by the rest of the test suite. + const sources = [_][]const u8{ + "// header\ncomponent X { f: int }", + "/* block */\ncomponent X { f: int }", + "// header line\n/// doc line\ncomponent X { f: int = 1 }", + }; + for (sources) |src| { + var fail_index: usize = 1; + while (fail_index < 64) : (fail_index += 1) { + var failing = std.testing.FailingAllocator.init(std.testing.allocator, .{ .fail_index = fail_index }); + const result = parse(failing.allocator(), src); + if (result) |ok| { + // No OOM happened at this fail index — the parser + // ran to completion with a real allocator. Free and + // move on; the test passes because no leak is reported. + var ok_mut = ok; + if (ok_mut.diagnostic) |*d| d.deinit(failing.allocator()); + ok_mut.ast.deinit(failing.allocator()); + break; + } else |err| { + try std.testing.expectEqual(error.OutOfMemory, err); + // std.testing.allocator under the failing wrapper will + // detect any leak when the test scope ends; the inner + // allocator is the testing allocator, so its leak + // tracker fires if `parse` failed to free. + } + } + } +} diff --git a/src/etch/root.zig b/src/etch/root.zig new file mode 100644 index 0000000..777b324 --- /dev/null +++ b/src/etch/root.zig @@ -0,0 +1,81 @@ +//! Public surface of the `weld_etch` module — the S3 Etch parser + minimal +//! type-checker. Designed to survive Phase 0.2 with additive changes only +//! per `briefs/S3-etch-parser-subset.md` Scope / Public surface. +//! +//! High-level helpers: +//! - `parse(gpa, source) !ParseResult` — runs the lexer + parser, returns +//! the AST plus at most one parse diagnostic. +//! - `typeCheck(gpa, ast, diags_out) !void` — runs pass 1 + pass 2 on a +//! resolved arena, accumulating diagnostics in `diags_out`. +//! +//! No public type exposes parser internal state, allocator-stored fields, +//! or pointers into the arena. + +const std = @import("std"); + +pub const lexer = @import("lexer.zig"); +pub const parser = @import("parser.zig"); +pub const ast = @import("ast.zig"); +pub const types = @import("types.zig"); +pub const diagnostics = @import("diagnostics.zig"); +pub const token = @import("token.zig"); + +pub const Lexer = lexer.Lexer; +pub const Token = token.Token; +pub const TokenKind = token.TokenKind; +pub const SourceSpan = token.SourceSpan; +pub const Parser = parser.Parser; +pub const ParseResult = parser.ParseResult; +pub const Ast = ast.AstArena; +pub const NodeId = ast.NodeId; +pub const NodeCategory = ast.NodeCategory; +pub const StringId = ast.StringId; +pub const TypeChecker = types.TypeChecker; +pub const Diagnostic = diagnostics.Diagnostic; +pub const DiagnosticCode = diagnostics.DiagnosticCode; +pub const Severity = diagnostics.Severity; +pub const LineIndex = diagnostics.LineIndex; + +/// Parse a full Etch source file. The returned `ParseResult` owns its +/// `AstArena` — call `result.ast.deinit(gpa)` when done. The diagnostic +/// (if any) owns its `primary_message` slice — call `diag.deinit(gpa)`. +pub fn parseSource(gpa: std.mem.Allocator, source: []const u8) !ParseResult { + return try parser.parse(gpa, source); +} + +/// Run pass 1 + pass 2 of the S3 type-checker on an already-parsed AST. +/// Accumulates diagnostics in `diags_out` (caller-owned). Each appended +/// diagnostic owns its `primary_message` slice. +pub fn typeCheck(gpa: std.mem.Allocator, arena: *Ast, diags_out: *std.ArrayListUnmanaged(Diagnostic)) !void { + try TypeChecker.check(gpa, arena, diags_out); +} + +test "public API parses an empty source successfully" { + const gpa = std.testing.allocator; + var result = try parseSource(gpa, ""); + defer result.ast.deinit(gpa); + try std.testing.expect(result.diagnostic == null); + try std.testing.expect(result.ast.isEmpty()); +} + +test "public API parses and type-checks a minimal component + rule" { + const gpa = std.testing.allocator; + var result = try parseSource(gpa, + \\component Health { current: float = 100.0 } + \\rule heal(entity: Entity) + \\ when entity has Health + \\{ + \\ entity.get_mut(Health).current += 1.0 + \\} + ); + defer result.ast.deinit(gpa); + try std.testing.expect(result.diagnostic == null); + + var diags: std.ArrayListUnmanaged(Diagnostic) = .empty; + defer { + for (diags.items) |*d| d.deinit(gpa); + diags.deinit(gpa); + } + try typeCheck(gpa, &result.ast, &diags); + try std.testing.expectEqual(@as(usize, 0), diags.items.len); +} diff --git a/src/etch/token.zig b/src/etch/token.zig new file mode 100644 index 0000000..b6e1583 --- /dev/null +++ b/src/etch/token.zig @@ -0,0 +1,191 @@ +//! Token types for the S3 Etch lexer. Keywords / operators / punctuation +//! mirror the brief's "Keywords recognized" and "Operators / punctuation +//! recognized" subsections of `briefs/S3-etch-parser-subset.md`. Any other +//! keyword from EBNF v0.6 is lexed as an `error_unknown_keyword` token so +//! the parser can emit `E0001 ParseError` with a precise span at the use +//! site (Scope: "Any other Etch keyword listed in `etch-grammar.md` §1.3 +//! is lexed as an unknown keyword token"). + +const std = @import("std"); + +/// Byte span in the original source. End is exclusive. +pub const SourceSpan = struct { + byte_start: u32, + byte_end: u32, + + pub fn merge(a: SourceSpan, b: SourceSpan) SourceSpan { + return .{ + .byte_start = @min(a.byte_start, b.byte_start), + .byte_end = @max(a.byte_end, b.byte_end), + }; + } +}; + +pub const TokenKind = enum { + // ── Literals ── + ident, // any identifier starting with [a-z_] + type_ident, // identifier starting with [A-Z] + int_literal, + float_literal, + bool_literal, // true / false + string_literal, // simple-quote only in S3 (no interpolation) + + // ── Keywords (S3 subset) ── + kw_let, + kw_mut, + kw_component, + kw_resource, + kw_rule, + kw_when, + kw_and, + kw_or, + kw_not, + kw_has, + kw_changed, + kw_get, + kw_get_mut, + + // ── Primitive type keywords (lexed as kw_type_*) ── + kw_int, + kw_float, + kw_bool, + kw_i32, + kw_u32, + kw_f32, + kw_f64, + + // ── Operators / punctuation ── + plus, + minus, + star, + slash, + percent, + eq, + plus_eq, + minus_eq, + star_eq, + slash_eq, + percent_eq, + eq_eq, + bang_eq, + lt, + gt, + lt_eq, + gt_eq, + lparen, + rparen, + lbrace, + rbrace, + colon, + comma, + dot, + at, + + // ── End / error ── + eof, + /// Unknown / unsupported byte. Carries the byte span; the parser + /// turns these into `E0001 ParseError` at use site. + error_byte, + /// Invalid UTF-8 continuation byte. The parser emits `E0001` with + /// the precise byte offset. + error_utf8, + /// Lexed an identifier that matches an Etch keyword outside the S3 + /// subset (e.g. `fn`, `enum`, `behavior`). The parser turns these + /// into `E0001 UnsupportedConstructInS3` at use site. + error_unknown_keyword, +}; + +pub const Token = struct { + kind: TokenKind, + span: SourceSpan, +}; + +/// Map `[]const u8` → `TokenKind` for keywords. The lookup is a linear +/// scan over a small static table — adequate for the S3 corpus (<200 LOC +/// per file, every identifier hit is amortised by the parser's main work). +pub const KeywordEntry = struct { lexeme: []const u8, kind: TokenKind }; + +pub const s3_keywords = [_]KeywordEntry{ + .{ .lexeme = "let", .kind = .kw_let }, + .{ .lexeme = "mut", .kind = .kw_mut }, + .{ .lexeme = "component", .kind = .kw_component }, + .{ .lexeme = "resource", .kind = .kw_resource }, + .{ .lexeme = "rule", .kind = .kw_rule }, + .{ .lexeme = "when", .kind = .kw_when }, + .{ .lexeme = "and", .kind = .kw_and }, + .{ .lexeme = "or", .kind = .kw_or }, + .{ .lexeme = "not", .kind = .kw_not }, + .{ .lexeme = "has", .kind = .kw_has }, + .{ .lexeme = "changed", .kind = .kw_changed }, + .{ .lexeme = "get", .kind = .kw_get }, + .{ .lexeme = "get_mut", .kind = .kw_get_mut }, + .{ .lexeme = "true", .kind = .bool_literal }, + .{ .lexeme = "false", .kind = .bool_literal }, + .{ .lexeme = "int", .kind = .kw_int }, + .{ .lexeme = "float", .kind = .kw_float }, + .{ .lexeme = "bool", .kind = .kw_bool }, + .{ .lexeme = "i32", .kind = .kw_i32 }, + .{ .lexeme = "u32", .kind = .kw_u32 }, + .{ .lexeme = "f32", .kind = .kw_f32 }, + .{ .lexeme = "f64", .kind = .kw_f64 }, +}; + +/// Etch keywords that introduce **constructs explicitly out of S3 scope** +/// (`briefs/S3-etch-parser-subset.md` Out-of-scope). Any identifier that +/// matches one of these is lexed as `error_unknown_keyword` so the parser +/// emits `E0001 UnsupportedConstructInS3` at use site. +/// +/// Type names (`string`, `Entity`, `Vec3`, ...) are deliberately omitted +/// — they reach the type-checker as plain identifiers or `TYPE_IDENT`s +/// and surface as `E0102 UndefinedSymbol` (or POD-specific messages on +/// component fields). Sub-construct keywords (`segment`, `state`, `layer`, +/// `bind`, ...) are also omitted: they are unreachable in legal S3 input +/// since their parent construct is already rejected, and including them +/// would collide with legitimate identifier names like `state`, `event`, +/// `priority`. +pub const non_s3_keywords = [_][]const u8{ + // ── Top-level constructs (26 of 29 from EBNF v0.6) ── + "fn", "struct", "enum", "trait", "impl", + "event", "tags", "import", "const", "type", + "private", "behavior", "routine", "quest", "dialogue", + "ability", "effect", "shader", "widget", "theme", + "motion", "anim_graph", "audio_graph", "audio_score", "sequence", + "data", "scene", "prefab", "input_mapping", "locale", + "test", "override", + + // ── Control flow (out of S3 rule body) ── + "if", "else", "for", + "in", "while", "break", "continue", "loop", + "match", "return", + + // ── Async machinery (out of S3) ── + "async", "await", "race", + "sync", "branch", "spawn", + + // ── Error handling (out of S3) ── + "try", "catch", + "throws", "throw", "assert", + + // ── Tag operators (out of S3) ── + "has_tag", "has_no_tag", + "has_any_tag", "has_all_tags", "has_no_tags", "add_tag", "remove_tag", + + // ── Timers / emit / lifecycle (out of S3) ── + "emit", "after", "every", "after_unscaled", "quantize", + + // Note: `as`, `where`, `self`, `none`, `some` are intentionally NOT + // listed — they appear in legitimate identifier-shaped positions in + // S3 annotation args (e.g. `@pause_group(.none)`). The S3 parser + // accepts them as plain identifiers; their grammar-level uses (cast, + // generic bound, impl self param, Optional construction) only show + // up in constructs already rejected at the top level. +}; + +test "non_s3_keywords does not collide with s3_keywords" { + inline for (s3_keywords) |s3_kw| { + for (non_s3_keywords) |non| { + // Each Etch keyword may appear in exactly one of the two tables. + try std.testing.expect(!std.mem.eql(u8, s3_kw.lexeme, non)); + } + } +} diff --git a/src/etch/types.zig b/src/etch/types.zig new file mode 100644 index 0000000..9ac00e7 --- /dev/null +++ b/src/etch/types.zig @@ -0,0 +1,869 @@ +//! S3 Etch type-checker — two passes over an `AstArena` produced by the +//! parser. Pass 1 collects top-level symbols (component / resource / rule) +//! and validates field declarations against the S3 builtin type set. +//! Pass 2 resolves the `when` clauses and rule bodies — checking ECS +//! access rules, expression types, and const-evaluable defaults. +//! +//! Behaviour mirrors `briefs/S3-etch-parser-subset.md` Scope / +//! "Type-checker — pass 1 (collect)" and "pass 2 (resolve / check)". +//! Diagnostics use the codes listed in `briefs/S3-etch-parser-subset.md` +//! Scope / Diagnostics typed API. + +const std = @import("std"); +const ast_mod = @import("ast.zig"); +const diag_mod = @import("diagnostics.zig"); +const token_mod = @import("token.zig"); + +const AstArena = ast_mod.AstArena; +const NodeId = ast_mod.NodeId; +const Diagnostic = diag_mod.Diagnostic; +const DiagnosticCode = diag_mod.DiagnosticCode; +const SourceSpan = token_mod.SourceSpan; +const StringId = ast_mod.StringId; + +pub const BuiltinType = enum { + int_, + float_, + bool_, + i32_, + u32_, + f32_, + f64_, + entity, + vec3, + color, + duration, + + pub fn isNumeric(self: BuiltinType) bool { + return switch (self) { + .int_, .float_, .i32_, .u32_, .f32_, .f64_ => true, + else => false, + }; + } + + pub fn isInteger(self: BuiltinType) bool { + return switch (self) { + .int_, .i32_, .u32_ => true, + else => false, + }; + } + + pub fn isFloat(self: BuiltinType) bool { + return switch (self) { + .float_, .f32_, .f64_ => true, + else => false, + }; + } + + pub fn fromName(name: []const u8) ?BuiltinType { + if (std.mem.eql(u8, name, "int")) return .int_; + if (std.mem.eql(u8, name, "float")) return .float_; + if (std.mem.eql(u8, name, "bool")) return .bool_; + if (std.mem.eql(u8, name, "i32")) return .i32_; + if (std.mem.eql(u8, name, "u32")) return .u32_; + if (std.mem.eql(u8, name, "f32")) return .f32_; + if (std.mem.eql(u8, name, "f64")) return .f64_; + if (std.mem.eql(u8, name, "Entity")) return .entity; + if (std.mem.eql(u8, name, "Vec3")) return .vec3; + if (std.mem.eql(u8, name, "Color")) return .color; + if (std.mem.eql(u8, name, "Duration")) return .duration; + return null; + } +}; + +/// `ResolvedType` is the type-checker's internal type representation. +pub const ResolvedType = union(enum) { + builtin: BuiltinType, + component: StringId, // user-declared component type name + resource: StringId, // user-declared resource type name + /// Type unknown / unresolved. Used as the fallback after a diagnostic + /// has been emitted; subsequent checks treat `unknown` as wildcard to + /// avoid cascade errors. + unknown, + + pub fn eql(a: ResolvedType, b: ResolvedType) bool { + if (std.meta.activeTag(a) != std.meta.activeTag(b)) return false; + return switch (a) { + .builtin => |bt| bt == b.builtin, + .component => |id| id == b.component, + .resource => |id| id == b.resource, + .unknown => true, + }; + } +}; + +/// Symbol entry in the file-local symbol table built by pass 1. +pub const SymbolKind = enum { component, resource, rule }; + +pub const Symbol = struct { + kind: SymbolKind, + name: StringId, + item_id: NodeId, +}; + +pub const TypeChecker = struct { + gpa: std.mem.Allocator, + arena: *AstArena, + diagnostics: *std.ArrayListUnmanaged(Diagnostic), + /// Symbol table keyed by interned name `StringId`. + symbols: std.AutoHashMapUnmanaged(StringId, Symbol) = .empty, + + pub fn deinit(self: *TypeChecker) void { + self.symbols.deinit(self.gpa); + } + + pub fn check(gpa: std.mem.Allocator, arena: *AstArena, diagnostics: *std.ArrayListUnmanaged(Diagnostic)) !void { + var tc: TypeChecker = .{ + .gpa = gpa, + .arena = arena, + .diagnostics = diagnostics, + }; + defer tc.deinit(); + try tc.pass1Collect(); + try tc.pass2Resolve(); + } + + // ─── Pass 1 ────────────────────────────────────────────────────────── + + fn pass1Collect(self: *TypeChecker) !void { + const kinds = self.arena.items.items(.kind); + const datas = self.arena.items.items(.data); + const spans = self.arena.items.items(.span); + var i: u28 = 0; + while (i < self.arena.items.len) : (i += 1) { + const item_id: NodeId = .{ .category = .item, .index = i }; + const kind = kinds[i]; + const data = datas[i]; + const span = spans[i]; + switch (kind) { + .component_decl => { + const decl = self.arena.component_decls.items[data]; + try self.registerSymbol(.component, decl.name, item_id, span); + try self.validateFieldsInDecl(decl.fields_start, decl.fields_len, true); + }, + .resource_decl => { + const decl = self.arena.resource_decls.items[data]; + try self.registerSymbol(.resource, decl.name, item_id, span); + try self.validateFieldsInDecl(decl.fields_start, decl.fields_len, false); + }, + .rule_decl => { + const decl = self.arena.rule_decls.items[data]; + try self.registerSymbol(.rule, decl.name, item_id, span); + }, + else => {}, // forward-compatible: unknown items ignored + } + } + } + + fn registerSymbol(self: *TypeChecker, kind: SymbolKind, name: StringId, item_id: NodeId, span: SourceSpan) !void { + const gop = try self.symbols.getOrPut(self.gpa, name); + if (gop.found_existing) { + const name_slice = self.arena.strings.slice(name); + try self.emit(.duplicate_symbol, .error_, span, "duplicate top-level symbol '{s}'", .{name_slice}); + return; + } + gop.value_ptr.* = .{ .kind = kind, .name = name, .item_id = item_id }; + } + + fn validateFieldsInDecl(self: *TypeChecker, fields_start: u32, fields_len: u32, is_component: bool) !void { + // Field name uniqueness within parent: collect into a small set. + var seen: std.AutoHashMapUnmanaged(StringId, void) = .empty; + defer seen.deinit(self.gpa); + + var i: u32 = 0; + while (i < fields_len) : (i += 1) { + const field = self.arena.fields.items[fields_start + i]; + const fname = self.arena.strings.slice(field.name); + + // Check uniqueness. + const gop = try seen.getOrPut(self.gpa, field.name); + if (gop.found_existing) { + const span = self.arena.typeNodeSpan(field.type_node); + try self.emit(.duplicate_symbol, .error_, span, "duplicate field '{s}'", .{fname}); + } + + // Resolve the type node. + const tspan = self.arena.typeNodeSpan(field.type_node); + const named_idx = self.arena.typeNodeData(field.type_node); + const named = self.arena.named_types.items[named_idx]; + const tname = self.arena.strings.slice(named.name); + + if (BuiltinType.fromName(tname) == null) { + // Try user-declared component or resource. + if (self.symbols.get(named.name)) |sym| { + if (sym.kind == .rule) { + try self.emit(.undefined_symbol, .error_, tspan, "type '{s}' is not a component, resource, or builtin", .{tname}); + } + // A field of component-typed or resource-typed value is + // still not in the S3 POD builtin set — reject as + // unsupported. The brief enforces builtin POD only. + try self.emit(.undefined_symbol, .error_, tspan, "type '{s}' is not in the S3 POD builtin set", .{tname}); + } else if (std.mem.eql(u8, tname, "string")) { + // `string` rejected on components per brief §POD; for + // resources `string` is also out of the S3 builtin set + // (resources POD-enforced via the same builtin table). + if (is_component) { + try self.emit(.undefined_symbol, .error_, tspan, "type 'string' is rejected on components in S3 (POD enforcement)", .{}); + } else { + try self.emit(.undefined_symbol, .error_, tspan, "type 'string' is not in the S3 builtin set", .{}); + } + } else { + try self.emit(.undefined_symbol, .error_, tspan, "unknown type '{s}'", .{tname}); + } + } + + // Default value type check + const-evaluability. + if (!field.default_value.isNone()) { + try self.checkFieldDefault(field.default_value, field.type_node); + } + } + } + + fn checkFieldDefault(self: *TypeChecker, value: NodeId, type_node: NodeId) !void { + // Const-evaluability check. + if (!isConstEvaluable(self.arena, value)) { + try self.emit(.not_const_evaluable, .error_, self.arena.exprSpan(value), "field default value must be a constant expression (literal, arithmetic on literals, or parenthesized)", .{}); + return; + } + const declared = self.namedTypeToResolved(type_node); + const actual = self.synthExpr(value, null); + if (declared == .builtin and actual == .builtin) { + if (!self.literalTypeFits(declared.builtin, value, actual.builtin)) { + try self.emit(.type_mismatch, .error_, self.arena.exprSpan(value), "default value type does not match declared field type", .{}); + } + } + // If declared isn't builtin (e.g. unknown), we already emitted a + // diagnostic during field-type resolution — skip cascade. + } + + /// Polymorphic int / float literal rule (cf. `etch-reference-part1.md` + /// §4.3). When the declared context type is given and the value is a + /// literal of the same numeric family (int family → any integer + /// builtin, float family → any float builtin), the literal fits. All + /// other forms require exact equality (no implicit numeric coercion). + fn literalTypeFits(self: *TypeChecker, declared: BuiltinType, actual_expr: NodeId, actual: BuiltinType) bool { + if (declared == actual) return true; + const kind = self.arena.exprKind(actual_expr); + if (kind == .int_lit and actual == .int_ and declared.isInteger()) return true; + if (kind == .float_lit and actual == .float_ and declared.isFloat()) return true; + // Negative literals via unary minus on a literal also fit when the + // operand is a matching numeric literal. + if (kind == .unary) { + const un = self.arena.unary_exprs.items[self.arena.exprData(actual_expr)]; + if (un.op == .neg) { + const inner_kind = self.arena.exprKind(un.operand); + if (inner_kind == .int_lit and actual == .int_ and declared.isInteger()) return true; + if (inner_kind == .float_lit and actual == .float_ and declared.isFloat()) return true; + } + } + return false; + } + + fn namedTypeToResolved(self: *TypeChecker, type_node: NodeId) ResolvedType { + const named_idx = self.arena.typeNodeData(type_node); + const named = self.arena.named_types.items[named_idx]; + const tname = self.arena.strings.slice(named.name); + if (BuiltinType.fromName(tname)) |bt| return .{ .builtin = bt }; + if (self.symbols.get(named.name)) |sym| { + return switch (sym.kind) { + .component => .{ .component = named.name }, + .resource => .{ .resource = named.name }, + else => .unknown, + }; + } + return .unknown; + } + + // ─── Pass 2 ────────────────────────────────────────────────────────── + + fn pass2Resolve(self: *TypeChecker) !void { + const kinds = self.arena.items.items(.kind); + const datas = self.arena.items.items(.data); + var i: u28 = 0; + while (i < self.arena.items.len) : (i += 1) { + const kind = kinds[i]; + const data = datas[i]; + switch (kind) { + .rule_decl => try self.checkRule(self.arena.rule_decls.items[data]), + else => {}, + } + } + } + + /// Per-rule context: components accessible via `entity.get(T)` and + /// resources accessible via `get(T)` (without receiver) as derived + /// from the `when` clause. + const RuleCtx = struct { + components_in_when: std.AutoHashMapUnmanaged(StringId, void) = .empty, + resources_in_when: std.AutoHashMapUnmanaged(StringId, void) = .empty, + /// Local variables in the rule body, keyed by name. + locals: std.AutoHashMapUnmanaged(StringId, Local) = .empty, + + pub const Local = struct { type_: ResolvedType, is_mut: bool }; + + pub fn deinit(self: *RuleCtx, gpa: std.mem.Allocator) void { + self.components_in_when.deinit(gpa); + self.resources_in_when.deinit(gpa); + self.locals.deinit(gpa); + } + }; + + fn checkRule(self: *TypeChecker, rule: ast_mod.RuleDecl) !void { + var ctx: RuleCtx = .{}; + defer ctx.deinit(self.gpa); + + // Resolve rule params. + var i: u32 = 0; + while (i < rule.params_len) : (i += 1) { + const p = self.arena.rule_params.items[rule.params_start + i]; + const ptype = self.namedTypeToResolved(p.type_node); + if (ptype == .unknown) { + const tname_idx = self.arena.typeNodeData(p.type_node); + const tname = self.arena.strings.slice(self.arena.named_types.items[tname_idx].name); + try self.emit(.undefined_symbol, .error_, self.arena.typeNodeSpan(p.type_node), "unknown type '{s}' on rule parameter", .{tname}); + } + try ctx.locals.put(self.gpa, p.name, .{ .type_ = ptype, .is_mut = false }); + } + + // Validate when-clause and collect accessible component/resource types. + if (rule.when_root != ast_mod.RuleDecl.none_when) { + try self.collectWhen(&ctx, rule.when_root); + } + + // Walk the body statements. + var s: u32 = 0; + while (s < rule.body_len) : (s += 1) { + const stmt_raw = self.arena.extra.items[rule.body_start + s]; + const stmt_id: NodeId = @bitCast(stmt_raw); + try self.checkStmt(&ctx, stmt_id); + } + } + + fn collectWhen(self: *TypeChecker, ctx: *RuleCtx, idx: u32) !void { + const node = self.arena.when_nodes.items[idx]; + switch (node.kind) { + .logical_and, .logical_or => { + try self.collectWhen(ctx, node.lhs); + try self.collectWhen(ctx, node.rhs); + }, + .logical_not => { + try self.collectWhen(ctx, node.lhs); + }, + .has, .has_with_filter => { + const tname_slice = self.arena.strings.slice(node.type_name); + if (self.symbols.get(node.type_name)) |sym| { + if (sym.kind != .component) { + try self.emit(.unknown_component_in_when, .error_, node.span, "'has' clause requires a component, '{s}' is a {s}", .{ tname_slice, @tagName(sym.kind) }); + } else { + try ctx.components_in_when.put(self.gpa, node.type_name, {}); + } + } else { + try self.emit(.unknown_component_in_when, .error_, node.span, "unknown component '{s}' in when clause", .{tname_slice}); + } + if (node.kind == .has_with_filter) { + // Validate field exists on component with compatible type. + try self.checkFieldFilter(node); + } + }, + .resource, .resource_changed => { + const tname_slice = self.arena.strings.slice(node.type_name); + if (self.symbols.get(node.type_name)) |sym| { + if (sym.kind != .resource) { + try self.emit(.resource_expected_in_when, .error_, node.span, "'resource' clause requires a resource, '{s}' is a {s}", .{ tname_slice, @tagName(sym.kind) }); + } else { + try ctx.resources_in_when.put(self.gpa, node.type_name, {}); + } + } else { + try self.emit(.resource_expected_in_when, .error_, node.span, "unknown resource '{s}' in when clause", .{tname_slice}); + } + }, + } + } + + fn checkFieldFilter(self: *TypeChecker, node: ast_mod.WhenNode) !void { + // `entity has T { field == value }` — verify field on T and value type. + const comp_sym = self.symbols.get(node.type_name) orelse return; + if (comp_sym.kind != .component) return; + const comp_data = self.arena.itemData(comp_sym.item_id); + const comp_decl = self.arena.component_decls.items[comp_data]; + var f_i: u32 = 0; + var found: ?ast_mod.Field = null; + while (f_i < comp_decl.fields_len) : (f_i += 1) { + const f = self.arena.fields.items[comp_decl.fields_start + f_i]; + if (f.name == node.field_name) { + found = f; + break; + } + } + if (found == null) { + const fname = self.arena.strings.slice(node.field_name); + const tname = self.arena.strings.slice(node.type_name); + try self.emit(.invalid_field_filter, .error_, node.span, "component '{s}' has no field '{s}'", .{ tname, fname }); + return; + } + const declared = self.namedTypeToResolved(found.?.type_node); + const actual = self.synthExpr(node.filter_value, null); + if (declared == .builtin and actual == .builtin and !declared.eql(actual)) { + try self.emit(.invalid_field_filter, .error_, node.span, "field filter type does not match field declared type", .{}); + } + } + + fn checkStmt(self: *TypeChecker, ctx: *RuleCtx, stmt_id: NodeId) !void { + const kind = self.arena.stmtKind(stmt_id); + const data = self.arena.stmtData(stmt_id); + switch (kind) { + .let_stmt => { + const let = self.arena.let_stmts.items[data]; + var declared: ?ResolvedType = null; + if (!let.type_annotation.isNone()) { + declared = self.namedTypeToResolved(let.type_annotation); + } + const inferred = self.synthExpr(let.value, ctx); + const final = if (declared) |d| blk: { + if (d == .builtin and inferred == .builtin and !self.literalTypeFits(d.builtin, let.value, inferred.builtin)) { + try self.emit(.type_mismatch, .error_, self.arena.exprSpan(let.value), "let initializer type does not match declared type", .{}); + } + break :blk d; + } else inferred; + // A binding to `entity.get_mut(T)` aliases the mutable + // component reference, so the local inherits mutability + // even when written `let h = ...` without `mut`. + const value_is_get_mut = self.arena.exprKind(let.value) == .method_get_mut; + try ctx.locals.put(self.gpa, let.name, .{ .type_ = final, .is_mut = let.is_mut or value_is_get_mut }); + }, + .assign_stmt => { + const assign = self.arena.assign_stmts.items[data]; + // Target must be either a mut local, or a field via get_mut. + const target_kind = self.arena.exprKind(assign.target); + if (target_kind == .ident) { + const name_id = self.arena.exprData(assign.target); + if (ctx.locals.get(name_id)) |local| { + if (!local.is_mut) { + const span = self.arena.exprSpan(assign.target); + try self.emit(.type_mismatch, .error_, span, "cannot assign to immutable binding (use 'let mut')", .{}); + } + const rhs_type = self.synthExpr(assign.value, ctx); + if (local.type_ == .builtin and rhs_type == .builtin and !self.literalTypeFits(local.type_.builtin, assign.value, rhs_type.builtin)) { + try self.emit(.type_mismatch, .error_, self.arena.exprSpan(assign.value), "assignment value type does not match binding type", .{}); + } + } else { + const name = self.arena.strings.slice(name_id); + try self.emit(.undefined_symbol, .error_, self.arena.exprSpan(assign.target), "unknown binding '{s}'", .{name}); + } + } else if (target_kind == .field_access) { + // Walk down: assignment is valid if the chain ends at + // either `entity.get_mut(T)` directly or an ident + // whose local binding is mutable (e.g. one bound via + // `let h = entity.get_mut(T)`). + const ok = isAssignTargetReachable(self.arena, ctx, assign.target); + if (!ok) { + try self.emit(.type_mismatch, .error_, self.arena.exprSpan(assign.target), "assignment target field must be accessed via entity.get_mut(T) or a mutable binding", .{}); + } + // Synthesize the field type and check the value matches it. + const lhs_type = self.synthExpr(assign.target, ctx); + const rhs_type = self.synthExpr(assign.value, ctx); + if (lhs_type == .builtin and rhs_type == .builtin and !self.literalTypeFits(lhs_type.builtin, assign.value, rhs_type.builtin)) { + try self.emit(.type_mismatch, .error_, self.arena.exprSpan(assign.value), "assignment value type does not match field type", .{}); + } + } else { + try self.emit(.type_mismatch, .error_, self.arena.exprSpan(assign.target), "unsupported assignment target in S3 rule body", .{}); + } + }, + .expr_stmt => { + const expr_id: NodeId = @bitCast(data); + _ = self.synthExpr(expr_id, ctx); + }, + else => {}, + } + } + + // ─── Expression typing ─────────────────────────────────────────────── + + const TypeError = std.mem.Allocator.Error; + + fn synthExpr(self: *TypeChecker, id: NodeId, ctx_opt: ?*RuleCtx) ResolvedType { + return self.synthExprE(id, ctx_opt) catch ResolvedType.unknown; + } + + fn synthExprE(self: *TypeChecker, id: NodeId, ctx_opt: ?*RuleCtx) TypeError!ResolvedType { + const kind = self.arena.exprKind(id); + const data = self.arena.exprData(id); + switch (kind) { + .int_lit => return .{ .builtin = .int_ }, + .float_lit => return .{ .builtin = .float_ }, + .bool_lit => return .{ .builtin = .bool_ }, + .string_lit => return ResolvedType.unknown, + .tag_path => return ResolvedType.unknown, // enum-variant shorthand; type unknown in S3 + .ident => { + const name_id: StringId = data; + if (ctx_opt) |ctx| { + if (ctx.locals.get(name_id)) |local| return local.type_; + } + try self.emit(.undefined_symbol, .error_, self.arena.exprSpan(id), "unknown identifier '{s}'", .{self.arena.strings.slice(name_id)}); + return ResolvedType.unknown; + }, + .field_access => { + const fa = self.arena.field_accesses.items[data]; + const receiver_type = try self.synthExprE(fa.receiver, ctx_opt); + return self.lookupFieldType(receiver_type, fa.field_name, self.arena.exprSpan(id)); + }, + .method_get, .method_get_mut => { + const mg = self.arena.method_gets.items[data]; + const receiver_type = try self.synthExprE(mg.receiver, ctx_opt); + if (receiver_type != .builtin or receiver_type.builtin != .entity) { + try self.emit(.type_mismatch, .error_, self.arena.exprSpan(id), "get / get_mut requires an Entity receiver", .{}); + return ResolvedType.unknown; + } + if (ctx_opt) |ctx| { + if (!ctx.components_in_when.contains(mg.type_name)) { + try self.emit(.unknown_component_in_when, .error_, self.arena.exprSpan(id), "component '{s}' is not accessible — add it to the rule's when clause", .{self.arena.strings.slice(mg.type_name)}); + } + } + return .{ .component = mg.type_name }; + }, + .binary => return try self.synthBinary(id, data, ctx_opt), + .unary => return try self.synthUnary(id, data, ctx_opt), + .paren => unreachable, // parser doesn't emit a paren node — it returns the inner expr + else => return ResolvedType.unknown, + } + } + + fn synthBinary(self: *TypeChecker, id: NodeId, data: u32, ctx_opt: ?*RuleCtx) TypeError!ResolvedType { + const bin = self.arena.binary_exprs.items[data]; + const lhs_t = try self.synthExprE(bin.lhs, ctx_opt); + const rhs_t = try self.synthExprE(bin.rhs, ctx_opt); + const span = self.arena.exprSpan(id); + + switch (bin.op) { + .add, .sub, .mul, .div, .rem => { + if (lhs_t == .builtin and rhs_t == .builtin) { + if (lhs_t.builtin.isInteger() and rhs_t.builtin.isInteger() and lhs_t.builtin == rhs_t.builtin) { + return lhs_t; + } + if (lhs_t.builtin.isFloat() and rhs_t.builtin.isFloat() and lhs_t.builtin == rhs_t.builtin) { + return lhs_t; + } + try self.emit(.type_mismatch, .error_, span, "arithmetic operands must have matching primitive type (no implicit coercion in S3)", .{}); + return ResolvedType.unknown; + } + if (lhs_t == .unknown or rhs_t == .unknown) return ResolvedType.unknown; + try self.emit(.type_mismatch, .error_, span, "arithmetic requires numeric primitive operands", .{}); + return ResolvedType.unknown; + }, + .eq, .neq, .lt, .gt, .le, .ge => { + if (lhs_t == .builtin and rhs_t == .builtin and lhs_t.builtin == rhs_t.builtin) { + return .{ .builtin = .bool_ }; + } + if (lhs_t == .unknown or rhs_t == .unknown) return ResolvedType.unknown; + try self.emit(.type_mismatch, .error_, span, "comparison requires matching primitive operands", .{}); + return ResolvedType.unknown; + }, + .logical_and, .logical_or => { + if (lhs_t == .builtin and lhs_t.builtin == .bool_ and rhs_t == .builtin and rhs_t.builtin == .bool_) { + return .{ .builtin = .bool_ }; + } + if (lhs_t == .unknown or rhs_t == .unknown) return ResolvedType.unknown; + try self.emit(.type_mismatch, .error_, span, "logical operators require bool operands", .{}); + return ResolvedType.unknown; + }, + } + } + + fn synthUnary(self: *TypeChecker, id: NodeId, data: u32, ctx_opt: ?*RuleCtx) TypeError!ResolvedType { + const un = self.arena.unary_exprs.items[data]; + const operand_t = try self.synthExprE(un.operand, ctx_opt); + const span = self.arena.exprSpan(id); + switch (un.op) { + .neg => { + if (operand_t == .builtin and (operand_t.builtin.isInteger() or operand_t.builtin.isFloat())) { + return operand_t; + } + if (operand_t == .unknown) return ResolvedType.unknown; + try self.emit(.type_mismatch, .error_, span, "unary minus requires numeric operand", .{}); + return ResolvedType.unknown; + }, + .logical_not => { + if (operand_t == .builtin and operand_t.builtin == .bool_) return .{ .builtin = .bool_ }; + if (operand_t == .unknown) return ResolvedType.unknown; + try self.emit(.type_mismatch, .error_, span, "'not' requires bool operand", .{}); + return ResolvedType.unknown; + }, + } + } + + fn lookupFieldType(self: *TypeChecker, receiver_type: ResolvedType, field_name: StringId, span: SourceSpan) !ResolvedType { + switch (receiver_type) { + .component => |name_id| { + const sym = self.symbols.get(name_id) orelse return ResolvedType.unknown; + const decl = self.arena.component_decls.items[self.arena.itemData(sym.item_id)]; + var i: u32 = 0; + while (i < decl.fields_len) : (i += 1) { + const f = self.arena.fields.items[decl.fields_start + i]; + if (f.name == field_name) return self.namedTypeToResolved(f.type_node); + } + try self.emit(.invalid_field_filter, .error_, span, "field '{s}' does not exist on component '{s}'", .{ self.arena.strings.slice(field_name), self.arena.strings.slice(name_id) }); + return ResolvedType.unknown; + }, + .resource => |name_id| { + const sym = self.symbols.get(name_id) orelse return ResolvedType.unknown; + const decl = self.arena.resource_decls.items[self.arena.itemData(sym.item_id)]; + var i: u32 = 0; + while (i < decl.fields_len) : (i += 1) { + const f = self.arena.fields.items[decl.fields_start + i]; + if (f.name == field_name) return self.namedTypeToResolved(f.type_node); + } + try self.emit(.invalid_field_filter, .error_, span, "field '{s}' does not exist on resource '{s}'", .{ self.arena.strings.slice(field_name), self.arena.strings.slice(name_id) }); + return ResolvedType.unknown; + }, + .builtin, .unknown => return ResolvedType.unknown, + } + } + + // ─── Diagnostic emit ───────────────────────────────────────────────── + + fn emit(self: *TypeChecker, code: DiagnosticCode, severity: diag_mod.Severity, span: SourceSpan, comptime fmt: []const u8, args: anytype) !void { + const message = try std.fmt.allocPrint(self.gpa, fmt, args); + try self.diagnostics.append(self.gpa, .{ + .code = code, + .severity = severity, + .primary_span = span, + .primary_message = message, + }); + } +}; + +// ─── Helpers reachable from tests ─────────────────────────────────────── + +pub fn isConstEvaluable(arena: *const AstArena, id: NodeId) bool { + const kind = arena.exprKind(id); + return switch (kind) { + .int_lit, .float_lit, .bool_lit, .string_lit, .tag_path => true, + .binary => blk: { + const bin = arena.binary_exprs.items[arena.exprData(id)]; + // Arithmetic / comparison / logic on const-evaluable args is OK. + // The S3 brief restricts defaults to "literals + arithmetic on + // literals + parenthesized" — we allow comparison/logic too as + // long as both sides are const-evaluable; the brief's intent is + // to keep defaults compile-time, and these operations are. + break :blk isConstEvaluable(arena, bin.lhs) and isConstEvaluable(arena, bin.rhs); + }, + .unary => blk: { + const un = arena.unary_exprs.items[arena.exprData(id)]; + break :blk isConstEvaluable(arena, un.operand); + }, + else => false, + }; +} + +fn isAssignTargetReachable(arena: *const AstArena, ctx: *TypeChecker.RuleCtx, id: NodeId) bool { + var cur = id; + while (true) { + const k = arena.exprKind(cur); + switch (k) { + .field_access => { + const fa = arena.field_accesses.items[arena.exprData(cur)]; + cur = fa.receiver; + }, + .method_get_mut => return true, + .method_get => return false, + .ident => { + const name_id = arena.exprData(cur); + if (ctx.locals.get(name_id)) |local| return local.is_mut; + return false; + }, + else => return false, + } + } +} + +// ─── tests ────────────────────────────────────────────────────────────── + +const parser_mod = @import("parser.zig"); + +pub const CheckOutcome = struct { + ast: AstArena, + parse_diag: ?Diagnostic, + diagnostics: std.ArrayListUnmanaged(Diagnostic), + + pub fn deinit(self: *CheckOutcome, gpa: std.mem.Allocator) void { + if (self.parse_diag) |*d| d.deinit(gpa); + for (self.diagnostics.items) |*d| d.deinit(gpa); + self.diagnostics.deinit(gpa); + self.ast.deinit(gpa); + } +}; + +fn parseAndCheck(gpa: std.mem.Allocator, source: []const u8) !CheckOutcome { + var pr = try parser_mod.parse(gpa, source); + var diags: std.ArrayListUnmanaged(Diagnostic) = .empty; + try TypeChecker.check(gpa, &pr.ast, &diags); + return .{ .ast = pr.ast, .parse_diag = pr.diagnostic, .diagnostics = diags }; +} + +fn expectAnyCode(diagnostics: []const Diagnostic, code: DiagnosticCode) !void { + for (diagnostics) |d| if (d.code == code) return; + return error.DiagnosticCodeNotEmitted; +} + +test "type-checker emits E0101 on duplicate component declaration" { + const gpa = std.testing.allocator; + var result = try parseAndCheck(gpa, + \\component Health { current: float = 100.0 } + \\component Health { max: float = 100.0 } + ); + defer result.deinit(gpa); + try expectAnyCode(result.diagnostics.items, .duplicate_symbol); +} + +test "type-checker emits E0102 on field referencing unknown type" { + const gpa = std.testing.allocator; + var result = try parseAndCheck(gpa, + \\component Health { current: NotAType = 0 } + ); + defer result.deinit(gpa); + try expectAnyCode(result.diagnostics.items, .undefined_symbol); +} + +test "type-checker emits E0200 on arithmetic between int and float without cast" { + const gpa = std.testing.allocator; + var result = try parseAndCheck(gpa, + \\component Health { current: float = 100.0 } + \\rule tick(entity: Entity) + \\ when entity has Health + \\{ + \\ let x = 1 + 2.0 + \\} + ); + defer result.deinit(gpa); + try expectAnyCode(result.diagnostics.items, .type_mismatch); +} + +test "type-checker emits E1101 on non-const default value" { + const gpa = std.testing.allocator; + var result = try parseAndCheck(gpa, + \\component Health { current: float = some_var } + ); + defer result.deinit(gpa); + try expectAnyCode(result.diagnostics.items, .not_const_evaluable); +} + +test "type-checker emits E1210 on rule when clause referencing unknown component" { + const gpa = std.testing.allocator; + var result = try parseAndCheck(gpa, + \\rule tick(entity: Entity) + \\ when entity has NotAComponent + \\{ + \\} + ); + defer result.deinit(gpa); + try expectAnyCode(result.diagnostics.items, .unknown_component_in_when); +} + +test "type-checker emits E1211 on field filter type mismatch" { + const gpa = std.testing.allocator; + var result = try parseAndCheck(gpa, + \\component Health { current: float = 100.0 } + \\rule tick(entity: Entity) + \\ when entity has Health { current == 5 } + \\{ + \\} + ); + defer result.deinit(gpa); + try expectAnyCode(result.diagnostics.items, .invalid_field_filter); +} + +test "type-checker emits E1213 on resource clause referencing unknown resource" { + const gpa = std.testing.allocator; + var result = try parseAndCheck(gpa, + \\rule tick() + \\ when resource NotAResource + \\{ + \\} + ); + defer result.deinit(gpa); + try expectAnyCode(result.diagnostics.items, .resource_expected_in_when); +} + +test "type-checker rejects get/get_mut for components absent from when clause" { + const gpa = std.testing.allocator; + var result = try parseAndCheck(gpa, + \\component Health { current: float = 100.0 } + \\component Armor { resistance: float = 0.0 } + \\rule tick(entity: Entity) + \\ when entity has Health + \\{ + \\ let a = entity.get(Armor) + \\} + ); + defer result.deinit(gpa); + try expectAnyCode(result.diagnostics.items, .unknown_component_in_when); +} + +test "type-checker rule body let mut allows reassignment, immutable let does not" { + const gpa = std.testing.allocator; + var result_ok = try parseAndCheck(gpa, + \\rule tick() { + \\ let mut x = 0 + \\ x = 5 + \\} + ); + defer result_ok.deinit(gpa); + for (result_ok.diagnostics.items) |d| { + try std.testing.expect(d.code != .type_mismatch); + } + + var result_bad = try parseAndCheck(gpa, + \\rule tick() { + \\ let x = 0 + \\ x = 5 + \\} + ); + defer result_bad.deinit(gpa); + try expectAnyCode(result_bad.diagnostics.items, .type_mismatch); +} + +test "type-checker accepts compound assignment += on numeric field via get_mut" { + const gpa = std.testing.allocator; + var result = try parseAndCheck(gpa, + \\component Health { current: float = 100.0 } + \\rule heal(entity: Entity) + \\ when entity has Health + \\{ + \\ entity.get_mut(Health).current += 1.0 + \\} + ); + defer result.deinit(gpa); + if (result.parse_diag) |d| { + var dd = d; + defer dd.deinit(gpa); + std.debug.print("parse diag: {s}\n", .{dd.primary_message}); + try std.testing.expect(false); + } + for (result.diagnostics.items) |d| { + std.debug.print("diag {s}: {s}\n", .{ d.code.code(), d.primary_message }); + } + try std.testing.expectEqual(@as(usize, 0), result.diagnostics.items.len); +} + +test "type-checker rejects string field on component (POD enforcement)" { + const gpa = std.testing.allocator; + var result = try parseAndCheck(gpa, + \\component Bad { name: string } + ); + defer result.deinit(gpa); + try expectAnyCode(result.diagnostics.items, .undefined_symbol); +} + +test "type-checker accepts top-level declarations in any order via pass 1 / pass 2" { + const gpa = std.testing.allocator; + var result = try parseAndCheck(gpa, + \\rule tick(entity: Entity) + \\ when entity has Health + \\{ + \\ let h = entity.get(Health) + \\} + \\component Health { current: float = 100.0 } + ); + defer result.deinit(gpa); + try std.testing.expectEqual(@as(usize, 0), result.diagnostics.items.len); +} diff --git a/tests/etch/corpus/invalid/E0001_unexpected_top_level.etch b/tests/etch/corpus/invalid/E0001_unexpected_top_level.etch new file mode 100644 index 0000000..d42d55d --- /dev/null +++ b/tests/etch/corpus/invalid/E0001_unexpected_top_level.etch @@ -0,0 +1,4 @@ +// E0001 ParseError — a bare expression at top level is not a valid S3 +// declaration; the parser emits ParseError at the offending token. + +let x = 5 diff --git a/tests/etch/corpus/invalid/E0001_unsupported_fn.etch b/tests/etch/corpus/invalid/E0001_unsupported_fn.etch new file mode 100644 index 0000000..c2d266e --- /dev/null +++ b/tests/etch/corpus/invalid/E0001_unsupported_fn.etch @@ -0,0 +1,11 @@ +// E0001 ParseError — `fn` top-level is out of S3 scope and the lexer +// flags it as `error_unknown_keyword`, which the parser raises with a +// "UnsupportedConstructInS3" message. + +fn add(a: int, b: int) -> int { + a + b +} + +component Filler { + v: int = 0 +} diff --git a/tests/etch/corpus/invalid/E0101_duplicate_component.etch b/tests/etch/corpus/invalid/E0101_duplicate_component.etch new file mode 100644 index 0000000..8b9e8dd --- /dev/null +++ b/tests/etch/corpus/invalid/E0101_duplicate_component.etch @@ -0,0 +1,10 @@ +// E0101 DuplicateSymbol — two `component Health` declarations in the same +// file collide on the symbol table built by pass 1. + +component Health { + current: float = 100.0 +} + +component Health { + max: float = 100.0 +} diff --git a/tests/etch/corpus/invalid/E0102_string_field.etch b/tests/etch/corpus/invalid/E0102_string_field.etch new file mode 100644 index 0000000..e1622ca --- /dev/null +++ b/tests/etch/corpus/invalid/E0102_string_field.etch @@ -0,0 +1,6 @@ +// E0102 UndefinedSymbol — `string` is not in the S3 POD builtin set; the +// type-checker rejects it on a component field with a POD-specific message. + +component Bad { + name: string +} diff --git a/tests/etch/corpus/invalid/E0102_unknown_field_type.etch b/tests/etch/corpus/invalid/E0102_unknown_field_type.etch new file mode 100644 index 0000000..8abebb0 --- /dev/null +++ b/tests/etch/corpus/invalid/E0102_unknown_field_type.etch @@ -0,0 +1,6 @@ +// E0102 UndefinedSymbol — `NotARealType` is neither an S3 builtin nor a +// user-declared component or resource. + +component Bad { + value: NotARealType = 0 +} diff --git a/tests/etch/corpus/invalid/E0200_int_plus_float.etch b/tests/etch/corpus/invalid/E0200_int_plus_float.etch new file mode 100644 index 0000000..5e84b4b --- /dev/null +++ b/tests/etch/corpus/invalid/E0200_int_plus_float.etch @@ -0,0 +1,11 @@ +// E0200 TypeMismatch — int + float without an explicit cast is rejected. + +component Stash { + v: int = 0 +} + +rule mixed(entity: Entity) + when entity has Stash +{ + let x = 1 + 2.0 +} diff --git a/tests/etch/corpus/invalid/E1101_non_const_default.etch b/tests/etch/corpus/invalid/E1101_non_const_default.etch new file mode 100644 index 0000000..562a622 --- /dev/null +++ b/tests/etch/corpus/invalid/E1101_non_const_default.etch @@ -0,0 +1,6 @@ +// E1101 NotConstEvaluable — the default value references an identifier +// (`some_var`) which is not a literal / arithmetic / parenthesized form. + +component Bad { + value: float = some_var +} diff --git a/tests/etch/corpus/invalid/E1210_unknown_component_in_when.etch b/tests/etch/corpus/invalid/E1210_unknown_component_in_when.etch new file mode 100644 index 0000000..e9f9930 --- /dev/null +++ b/tests/etch/corpus/invalid/E1210_unknown_component_in_when.etch @@ -0,0 +1,9 @@ +// E1210 UnknownComponentInWhen — the rule's `when` clause references a +// component that hasn't been declared. + +rule tick(entity: Entity) + when entity has NotAComponent +{ + let mut x: int = 0 + x = 1 +} diff --git a/tests/etch/corpus/invalid/E1211_field_filter_type_mismatch.etch b/tests/etch/corpus/invalid/E1211_field_filter_type_mismatch.etch new file mode 100644 index 0000000..b0a7cc7 --- /dev/null +++ b/tests/etch/corpus/invalid/E1211_field_filter_type_mismatch.etch @@ -0,0 +1,13 @@ +// E1211 InvalidFieldFilter — `current` is `float`, filtered against an +// `int` literal: the type-checker emits the field-filter mismatch. + +component Health { + current: float = 100.0 +} + +rule tick(entity: Entity) + when entity has Health { current == 5 } +{ + let mut x: int = 0 + x = 1 +} diff --git a/tests/etch/corpus/invalid/E1213_resource_expected_in_when.etch b/tests/etch/corpus/invalid/E1213_resource_expected_in_when.etch new file mode 100644 index 0000000..f8325b7 --- /dev/null +++ b/tests/etch/corpus/invalid/E1213_resource_expected_in_when.etch @@ -0,0 +1,9 @@ +// E1213 ResourceExpectedInWhen — the `when resource T` clause references +// a name that is not registered as a resource. + +rule tick(entity: Entity) + when resource NotARealResource +{ + let mut x: int = 0 + x = 1 +} diff --git a/tests/etch/corpus/valid/components/annotated.etch b/tests/etch/corpus/valid/components/annotated.etch new file mode 100644 index 0000000..7f810cd --- /dev/null +++ b/tests/etch/corpus/valid/components/annotated.etch @@ -0,0 +1,36 @@ +// Components decorated with every S3-recognised annotation kind. The S3 +// type-checker accepts unknown annotation arg shapes (applicability is +// deferred Phase 0.2), so the diverse arg patterns here all parse. + +@requires(Transform) +component Light { + @hidden + enabled: bool = true + + @range(0, 100000) + intensity: float = 1000.0 + + @readonly + cached_world_index: i32 = 0 +} + +@save +component Achievement { + unlocked: bool = false + unlock_tick: u32 = 0 +} + +@replicated(predicted) +component Predicted { + flag: bool = false +} + +@networked +component Networked { + seq_no: u32 = 0 +} + +@storage(sparse) +component Damaged { + amount_this_frame: float = 0.0 +} diff --git a/tests/etch/corpus/valid/components/combat.etch b/tests/etch/corpus/valid/components/combat.etch new file mode 100644 index 0000000..b662ff0 --- /dev/null +++ b/tests/etch/corpus/valid/components/combat.etch @@ -0,0 +1,31 @@ +// Combat-oriented components — multiple defaults exercise the const eval. + +component Armor { + @range(0.0, 1.0) + resistance: float = 0.5 + physical: f32 = 0.3 + magical: f32 = 0.2 +} + +component Damager { + @range(0, 9999) + amount: float = 10.0 + is_critical: bool = false + pierce: float = 0.0 +} + +component CombatTags { + is_player: bool = true + is_invulnerable: bool = false + is_stunned: bool = false + is_burning: bool = false + is_frozen: bool = false +} + +component AmmoStash { + arrows: i32 = 20 + bolts: i32 = 0 + bullets: i32 = 0 + grenades: i32 = 0 + max_per_kind: i32 = 99 +} diff --git a/tests/etch/corpus/valid/components/health.etch b/tests/etch/corpus/valid/components/health.etch new file mode 100644 index 0000000..340f358 --- /dev/null +++ b/tests/etch/corpus/valid/components/health.etch @@ -0,0 +1,32 @@ +// Canonical health component — every S3 builtin primitive type is exercised +// through a field, with annotations + const default values so the type- +// checker walks the full happy path (POD enforcement, builtin resolution, +// const-eval, annotation parsing without applicability validation). + +component Health { + @range(0, 999) + @unit(.health_points) + current: float = 100.0 + + @range(1, 999) + @unit(.health_points) + max: float = 100.0 + + @range(0, 100) + regen_rate: float = 1.0 + + invulnerable: bool = false + + damage_resist: float = 0.0 + + armor_class: i32 = 0 + + last_damage_tick: u32 = 0 + + crit_chance: f32 = 0.05 + + crit_multiplier: f64 = 2.0 + + @hidden + internal_seed: i32 = 42 +} diff --git a/tests/etch/corpus/valid/components/inventory.etch b/tests/etch/corpus/valid/components/inventory.etch new file mode 100644 index 0000000..cf0dd5a --- /dev/null +++ b/tests/etch/corpus/valid/components/inventory.etch @@ -0,0 +1,29 @@ +// Numeric inventory — every field is a primitive with a const default. + +component Inventory { + gold: int = 0 + + @range(0, 65000) + weight: float = 0.0 + + @range(1, 65000) + max_weight: float = 50.0 + + @hidden + slot_count: i32 = 0 + + @readonly + last_collected_tick: u32 = 0 +} + +component Wallet { + coins: int = 0 + gems: int = 0 + dust: i32 = 0 +} + +component Backpack { + capacity: i32 = 30 + free_slots: i32 = 30 + locked: bool = false +} diff --git a/tests/etch/corpus/valid/components/minimal.etch b/tests/etch/corpus/valid/components/minimal.etch new file mode 100644 index 0000000..ae24b34 --- /dev/null +++ b/tests/etch/corpus/valid/components/minimal.etch @@ -0,0 +1,14 @@ +// Trivially minimal component — exercises empty-body and single-field paths. + +component Flag { + set: bool = false +} + +component Counter { + value: i32 = 0 +} + +component Tagged { + tag_id: u32 = 0 + weight: float = 0.0 +} diff --git a/tests/etch/corpus/valid/components/movement.etch b/tests/etch/corpus/valid/components/movement.etch new file mode 100644 index 0000000..e714e9b --- /dev/null +++ b/tests/etch/corpus/valid/components/movement.etch @@ -0,0 +1,28 @@ +// Movement / steering components. + +component Speed { + @range(0.0, 200.0) + @unit(.meters_per_second) + walk: float = 4.0 + + @range(0.0, 200.0) + @unit(.meters_per_second) + sprint: float = 8.0 + + @range(0.0, 200.0) + @unit(.meters_per_second) + crouch: float = 2.0 +} + +component Drag { + linear: float = 0.5 + angular: float = 0.2 +} + +component MoveIntent { + forward: float = 0.0 + strafe: float = 0.0 + pitch: float = 0.0 + yaw: float = 0.0 + is_jumping: bool = false +} diff --git a/tests/etch/corpus/valid/components/multi_decl.etch b/tests/etch/corpus/valid/components/multi_decl.etch new file mode 100644 index 0000000..eb8d7ff --- /dev/null +++ b/tests/etch/corpus/valid/components/multi_decl.etch @@ -0,0 +1,30 @@ +// Multiple component declarations in a single file — pass 1 must register +// each name without collision, and pass 2 must resolve cross-references in +// rules (covered by tests/etch/corpus/valid/rules/*). + +component Stamina { + current: float = 100.0 + max: float = 100.0 + regen_per_second: float = 5.0 +} + +component Mana { + current: f32 = 50.0 + max: f32 = 100.0 + regen_per_second: f32 = 2.0 +} + +component Energy { + current: f64 = 75.0 + max: f64 = 100.0 +} + +component Focus { + current: int = 0 + max: int = 5 +} + +component Rage { + current: i32 = 0 + max: i32 = 100 +} diff --git a/tests/etch/corpus/valid/components/transform.etch b/tests/etch/corpus/valid/components/transform.etch new file mode 100644 index 0000000..1b54f96 --- /dev/null +++ b/tests/etch/corpus/valid/components/transform.etch @@ -0,0 +1,28 @@ +// Spatial transform components — Vec3 / Color / Duration builtin types +// reach the type-checker; their fields omit const defaults because S3 +// only exposes int / float / bool literals (DURATION_LIT and color +// literals are out of scope per `briefs/S3-etch-parser-subset.md` +// Operators / punctuation recognized). + +component Transform { + @unit(.meters) + position: Vec3 + rotation: Vec3 + scale: float = 1.0 +} + +component Velocity { + @unit(.meters) + linear: Vec3 + angular: Vec3 +} + +component Tint { + color: Color +} + +component Lifetime { + @unit(.seconds) + remaining: Duration + expired: bool = false +} diff --git a/tests/etch/corpus/valid/exprs/arithmetic.etch b/tests/etch/corpus/valid/exprs/arithmetic.etch new file mode 100644 index 0000000..1a189b9 --- /dev/null +++ b/tests/etch/corpus/valid/exprs/arithmetic.etch @@ -0,0 +1,22 @@ +// Pure expression coverage — int arithmetic with full precedence chain. + +component IntStash { + v: int = 0 +} + +rule int_math(entity: Entity) + when entity has IntStash +{ + let a = 1 + let b = 2 + let c = 3 + let d = a + b * c + let e = (a + b) * c + let f = a - b + let g = a * b + c + let h = a / b + let i = a % b + let j = a + b + c + d + e + let mut sum: int = 0 + sum = j +} diff --git a/tests/etch/corpus/valid/exprs/comparisons.etch b/tests/etch/corpus/valid/exprs/comparisons.etch new file mode 100644 index 0000000..ff49e11 --- /dev/null +++ b/tests/etch/corpus/valid/exprs/comparisons.etch @@ -0,0 +1,24 @@ +// Comparison + logical expressions resolving to bool. + +component CmpStash { + v: int = 0 +} + +rule comparisons(entity: Entity) + when entity has CmpStash +{ + let a = 1 + let b = 2 + let lt = a < b + let gt = a > b + let le = a <= b + let ge = a >= b + let eq = a == b + let neq = a != b + let combined = lt and gt + let alt = lt or gt + let negated = not lt + let chain = (a < b) and (b < a) or not eq + let mut answer: bool = false + answer = chain +} diff --git a/tests/etch/corpus/valid/exprs/float_math.etch b/tests/etch/corpus/valid/exprs/float_math.etch new file mode 100644 index 0000000..7015ddc --- /dev/null +++ b/tests/etch/corpus/valid/exprs/float_math.etch @@ -0,0 +1,22 @@ +// Float arithmetic with mixed precedence and unary negation. + +component FloatStash { + v: float = 0.0 +} + +rule float_math(entity: Entity, dt: float) + when entity has FloatStash +{ + let a = 1.5 + let b = 2.5 + let c = -a + let d = a + b + let e = a * b + let f = a / b + let g = a - b + let h = a + b - c * d + let i = (a + b) / (c - d) + let j = dt * 60.0 + let mut sum: float = 0.0 + sum = j +} diff --git a/tests/etch/corpus/valid/exprs/literals.etch b/tests/etch/corpus/valid/exprs/literals.etch new file mode 100644 index 0000000..bcdf97a --- /dev/null +++ b/tests/etch/corpus/valid/exprs/literals.etch @@ -0,0 +1,20 @@ +// Literal coverage — int / float / bool used in let bindings. + +component LitStash { + v: int = 0 +} + +rule literals(entity: Entity) + when entity has LitStash +{ + let i_a = 0 + let i_b = 42 + let i_c = 1000000 + let f_a = 0.0 + let f_b = 3.14 + let f_c = -2.5 + let b_t = true + let b_f = false + let mut x: int = 0 + x = i_b +} diff --git a/tests/etch/corpus/valid/resources/game_mode.etch b/tests/etch/corpus/valid/resources/game_mode.etch new file mode 100644 index 0000000..78bd98c --- /dev/null +++ b/tests/etch/corpus/valid/resources/game_mode.etch @@ -0,0 +1,33 @@ +// Canonical resource — @config lifecycle, full primitive coverage. + +@config +resource GameMode { + @range(1, 32) + max_players: int = 4 + + @range(0.0, 999.0) + @unit(.seconds) + respawn_time: float = 5.0 + + friendly_fire: bool = false + + @range(0, 9999) + win_score: i32 = 100 + + seed: u32 = 0 +} + +@state +resource GameState { + round: i32 = 0 + timer: float = 0.0 + is_paused: bool = false +} + +@transient +resource DebugFlags { + show_colliders: bool = false + show_navmesh: bool = false + show_fps: bool = false + god_mode: bool = false +} diff --git a/tests/etch/corpus/valid/resources/multi.etch b/tests/etch/corpus/valid/resources/multi.etch new file mode 100644 index 0000000..12bb43d --- /dev/null +++ b/tests/etch/corpus/valid/resources/multi.etch @@ -0,0 +1,29 @@ +// Single file with a representative mix of lifecycle annotations. + +@config +resource UIConfig { + text_scale: float = 1.0 + high_contrast: bool = false + show_tooltips: bool = true +} + +@state +resource Score { + points: int = 0 + multiplier: float = 1.0 + streak: i32 = 0 +} + +@state +resource Wave { + current: i32 = 0 + enemies_alive: i32 = 0 + boss_pending: bool = false +} + +@transient +resource ProfilerStats { + last_frame_ms: float = 0.0 + p99_ms: float = 0.0 + sample_count: u32 = 0 +} diff --git a/tests/etch/corpus/valid/resources/physics_config.etch b/tests/etch/corpus/valid/resources/physics_config.etch new file mode 100644 index 0000000..6a22873 --- /dev/null +++ b/tests/etch/corpus/valid/resources/physics_config.etch @@ -0,0 +1,33 @@ +// Physics + audio config bundled in a single file — multiple @config +// resources declared at top level. + +@config +resource PhysicsConfig { + @range(0.0, 100.0) + gravity_y: float = 9.81 + + @range(1, 240) + fixed_rate: i32 = 60 + + @range(1, 32) + solver_iterations: i32 = 8 + + allow_sleep: bool = true +} + +@config +resource AudioConfig { + master_volume: float = 1.0 + music_volume: float = 0.7 + sfx_volume: float = 1.0 + voice_volume: float = 1.0 + muted: bool = false +} + +@config +resource RenderConfig { + resolution_scale: float = 1.0 + shadow_quality: i32 = 3 + vsync: bool = true + bloom_intensity: float = 0.5 +} diff --git a/tests/etch/corpus/valid/resources/weather.etch b/tests/etch/corpus/valid/resources/weather.etch new file mode 100644 index 0000000..2d34924 --- /dev/null +++ b/tests/etch/corpus/valid/resources/weather.etch @@ -0,0 +1,17 @@ +// @state resource modelling persistent weather. Used by rules in the +// rules/ corpus to exercise `when resource X changed`. + +@state +resource WeatherState { + @range(0.0, 1.0) + intensity: float = 0.0 + + @range(0.0, 1.0) + wind: float = 0.0 + + is_raining: bool = false + is_thundering: bool = false + is_snowing: bool = false + + last_change_tick: u32 = 0 +} diff --git a/tests/etch/corpus/valid/resources/world_clock.etch b/tests/etch/corpus/valid/resources/world_clock.etch new file mode 100644 index 0000000..f00a4af --- /dev/null +++ b/tests/etch/corpus/valid/resources/world_clock.etch @@ -0,0 +1,22 @@ +// World clock resource — drives day/night cycle. + +@state +resource WorldClock { + @range(0.0, 86400.0) + @unit(.seconds) + seconds_in_day: float = 0.0 + + @range(0.0, 1.0) + day_phase: float = 0.0 + + is_day: bool = true + + day_count: i32 = 0 +} + +@transient +resource FrameInfo { + dt: float = 0.0 + total: float = 0.0 + frame_index: u32 = 0 +} diff --git a/tests/etch/corpus/valid/rules/annotated.etch b/tests/etch/corpus/valid/rules/annotated.etch new file mode 100644 index 0000000..add4907 --- /dev/null +++ b/tests/etch/corpus/valid/rules/annotated.etch @@ -0,0 +1,39 @@ +// Rules with scheduling annotations — S3 parses but does not validate +// applicability. + +component Burnable { + is_burning: bool = false + burn_dps: float = 1.0 + burn_left: float = 0.0 +} + +@phase(.update) +@priority(100) +rule tick_burn(entity: Entity, dt: float) + when entity has Burnable +{ + let b = entity.get_mut(Burnable) + b.burn_left -= dt +} + +@phase(.fixed_update) +@run_on(.server) +rule server_tick(entity: Entity, dt: float) + when entity has Burnable +{ + let mut x: float = 0.0 + x = dt +} + +@pause_group(.none) +rule menu_tick(dt: float) + when resource AlwaysOn +{ + let mut tick: float = 0.0 + tick = dt +} + +@state +resource AlwaysOn { + enabled: bool = true +} diff --git a/tests/etch/corpus/valid/rules/composition.etch b/tests/etch/corpus/valid/rules/composition.etch new file mode 100644 index 0000000..194e6c8 --- /dev/null +++ b/tests/etch/corpus/valid/rules/composition.etch @@ -0,0 +1,41 @@ +// Multiple rules in a single file with shared component dependencies — +// pass 2 must resolve cross-rule access without interference. + +component Charge { + energy: float = 100.0 + drain_per_second: float = 5.0 +} + +component Shield { + is_active: bool = false + strength: float = 0.0 +} + +rule drain(entity: Entity, dt: float) + when entity has Charge +{ + let h = entity.get_mut(Charge) + h.energy -= h.drain_per_second * dt +} + +rule recharge(entity: Entity, dt: float) + when entity has Charge +{ + let mut amount: float = 0.0 + amount = dt * 2.0 + entity.get_mut(Charge).energy += amount +} + +rule pulse_shield(entity: Entity) + when entity has Shield +{ + let s = entity.get_mut(Shield) + s.strength = 10.0 +} + +rule disable_shield(entity: Entity) + when entity has Shield +{ + let s = entity.get_mut(Shield) + s.strength = 0.0 +} diff --git a/tests/etch/corpus/valid/rules/damage.etch b/tests/etch/corpus/valid/rules/damage.etch new file mode 100644 index 0000000..7ca1751 --- /dev/null +++ b/tests/etch/corpus/valid/rules/damage.etch @@ -0,0 +1,32 @@ +// Damage pipeline — `let` with explicit type annotation, mixed get/get_mut. + +component HP { + current: float = 100.0 + max: float = 100.0 +} + +component Resist { + physical: float = 0.0 + magic: float = 0.0 +} + +rule apply_damage(entity: Entity, amount: float) + when entity has HP + and entity has Resist +{ + let r: float = entity.get(Resist).physical + let one_minus_r = 1.0 - r + let reduced = amount * one_minus_r + let h = entity.get_mut(HP) + h.current -= reduced +} + +rule cap_floor(entity: Entity) + when entity has HP +{ + let mut current = entity.get(HP).current + let mut clamped = current + let zero = 0.0 + let below = clamped - zero + entity.get_mut(HP).current = clamped +} diff --git a/tests/etch/corpus/valid/rules/forward_ref.etch b/tests/etch/corpus/valid/rules/forward_ref.etch new file mode 100644 index 0000000..8ebf1a3 --- /dev/null +++ b/tests/etch/corpus/valid/rules/forward_ref.etch @@ -0,0 +1,15 @@ +// Rule references a component declared later in the file — pass 1 / pass 2 +// architecture must support forward references. + +rule reference_later(entity: Entity) + when entity has Energy +{ + let e = entity.get(Energy) + let mut x: float = 0.0 + x = e.current +} + +component Energy { + current: float = 100.0 + max: float = 100.0 +} diff --git a/tests/etch/corpus/valid/rules/movement.etch b/tests/etch/corpus/valid/rules/movement.etch new file mode 100644 index 0000000..28f1e50 --- /dev/null +++ b/tests/etch/corpus/valid/rules/movement.etch @@ -0,0 +1,34 @@ +// Movement integration rule — combines two component reads (Velocity, +// Speed) with a get_mut on a third component (Transform-like position). + +component Position { + x: float = 0.0 + y: float = 0.0 + z: float = 0.0 +} + +component LinearVelocity { + x: float = 0.0 + y: float = 0.0 + z: float = 0.0 +} + +component MoveSpeed { + walk: float = 4.0 + sprint: float = 8.0 + is_sprinting: bool = false +} + +rule integrate(entity: Entity, dt: float) + when entity has Position + and entity has LinearVelocity + and entity has MoveSpeed +{ + let speed = entity.get(MoveSpeed).walk + let dx = entity.get(LinearVelocity).x * speed * dt + let dy = entity.get(LinearVelocity).y * speed * dt + let dz = entity.get(LinearVelocity).z * speed * dt + entity.get_mut(Position).x += dx + entity.get_mut(Position).y += dy + entity.get_mut(Position).z += dz +} diff --git a/tests/etch/corpus/valid/rules/no_when.etch b/tests/etch/corpus/valid/rules/no_when.etch new file mode 100644 index 0000000..eed57af --- /dev/null +++ b/tests/etch/corpus/valid/rules/no_when.etch @@ -0,0 +1,18 @@ +// Rule body with no when clause — global-effect rule, exercises the +// optional when_clause arm of the grammar. + +resource Counters { + total_calls: int = 0 +} + +rule update_counters() +{ + let mut n: int = 0 + n = 1 +} + +rule tick_world(dt: float) +{ + let mut elapsed: float = 0.0 + elapsed += dt +} diff --git a/tests/etch/corpus/valid/rules/regen.etch b/tests/etch/corpus/valid/rules/regen.etch new file mode 100644 index 0000000..c448160 --- /dev/null +++ b/tests/etch/corpus/valid/rules/regen.etch @@ -0,0 +1,26 @@ +// Health regeneration — single component access via get/get_mut, arithmetic +// on float fields, compound assignment via field path. + +component Health { + current: float = 100.0 + max: float = 100.0 + regen_rate: float = 1.0 +} + +rule tick_regen(entity: Entity, dt: float) + when entity has Health +{ + let h = entity.get_mut(Health) + let delta = h.regen_rate * dt + h.current += delta +} + +rule cap_at_max(entity: Entity) + when entity has Health +{ + let h = entity.get_mut(Health) + let m = h.max + let c = h.current + let over = c - m + h.current -= over +} diff --git a/tests/etch/corpus/valid/rules/resource_only.etch b/tests/etch/corpus/valid/rules/resource_only.etch new file mode 100644 index 0000000..506c8c1 --- /dev/null +++ b/tests/etch/corpus/valid/rules/resource_only.etch @@ -0,0 +1,27 @@ +// Rule with no entity binding — operates on resources only. + +@state +resource Score { + points: int = 0 + multiplier: int = 1 +} + +@state +resource Combo { + active: bool = false + count: i32 = 0 +} + +rule reset_combo() + when resource Score +{ + let mut x: int = 0 + x = 5 +} + +rule on_score_changed() + when resource Score changed +{ + let mut tmp: i32 = 0 + tmp = 1 +} diff --git a/tests/etch/corpus/valid/whens/composition.etch b/tests/etch/corpus/valid/whens/composition.etch new file mode 100644 index 0000000..806ac34 --- /dev/null +++ b/tests/etch/corpus/valid/whens/composition.etch @@ -0,0 +1,52 @@ +// `and` / `or` / `not` composition with parenthesization in the when clause. + +component A { + v: int = 0 +} + +component B { + v: int = 0 +} + +component C { + v: int = 0 +} + +component D { + v: int = 0 +} + +rule simple_and(entity: Entity) + when entity has A and entity has B +{ + let mut x: int = 0 + x = 1 +} + +rule simple_or(entity: Entity) + when entity has A or entity has B +{ + let mut x: int = 0 + x = 1 +} + +rule paren_and_or(entity: Entity) + when (entity has A and entity has B) or entity has C +{ + let mut x: int = 0 + x = 1 +} + +rule not_then_and(entity: Entity) + when not entity has D and entity has A +{ + let mut x: int = 0 + x = 1 +} + +rule nested(entity: Entity) + when (entity has A or entity has B) and (entity has C or entity has D) +{ + let mut x: int = 0 + x = 1 +} diff --git a/tests/etch/corpus/valid/whens/has_only.etch b/tests/etch/corpus/valid/whens/has_only.etch new file mode 100644 index 0000000..f32dfa9 --- /dev/null +++ b/tests/etch/corpus/valid/whens/has_only.etch @@ -0,0 +1,38 @@ +// `entity has T` exhaustively over every S3 component + a 3-way and chain. + +component A { + v: int = 0 +} + +component B { + v: int = 0 +} + +component C { + v: int = 0 +} + +rule and_three(entity: Entity) + when entity has A + and entity has B + and entity has C +{ + let mut x: int = 0 + x = 1 +} + +rule or_pair(entity: Entity) + when entity has A + or entity has B +{ + let mut x: int = 0 + x = 1 +} + +rule not_inverse(entity: Entity) + when entity has A + and not entity has C +{ + let mut x: int = 0 + x = 1 +} diff --git a/tests/etch/corpus/valid/whens/multi_entity.etch b/tests/etch/corpus/valid/whens/multi_entity.etch new file mode 100644 index 0000000..bc14c31 --- /dev/null +++ b/tests/etch/corpus/valid/whens/multi_entity.etch @@ -0,0 +1,33 @@ +// Multiple `has` clauses with the same entity binding — repeated uses +// of `entity` across the when chain. + +component Hp { + v: float = 100.0 +} + +component Mp { + v: float = 50.0 +} + +component Tag { + is_set: bool = false +} + +rule three(entity: Entity) + when entity has Hp + and entity has Mp + and entity has Tag +{ + let mut x: int = 0 + x = 1 +} + +rule four(entity: Entity) + when entity has Hp + and entity has Mp + and entity has Tag + and not entity has Hp +{ + let mut x: int = 0 + x = 1 +} diff --git a/tests/etch/corpus/valid/whens/resource_when.etch b/tests/etch/corpus/valid/whens/resource_when.etch new file mode 100644 index 0000000..f670d39 --- /dev/null +++ b/tests/etch/corpus/valid/whens/resource_when.etch @@ -0,0 +1,43 @@ +// `when resource T` and `when resource T changed` over multiple resource +// shapes. Demonstrates resource-only and resource-changed forms. + +@config +resource Config1 { + enabled: bool = true +} + +@state +resource State1 { + value: int = 0 +} + +@state +resource Weather { + intensity: float = 0.0 +} + +rule on_state(entity: Entity) + when resource State1 +{ + let mut x: int = 0 + x = 1 +} + +rule on_weather_changed(entity: Entity) + when resource Weather changed +{ + let mut tmp: float = 0.0 + tmp = 1.0 +} + +rule combined(entity: Entity) + when entity has Counter + and resource State1 +{ + let mut tmp: int = 0 + tmp = 1 +} + +component Counter { + n: int = 0 +} diff --git a/tests/etch/corpus/valid/whens/with_filter.etch b/tests/etch/corpus/valid/whens/with_filter.etch new file mode 100644 index 0000000..1686593 --- /dev/null +++ b/tests/etch/corpus/valid/whens/with_filter.etch @@ -0,0 +1,47 @@ +// `entity has T { field == value }` — every primitive field type matched +// against a const value of the matching builtin type. + +component Health { + current: float = 100.0 + max: float = 100.0 +} + +component Level { + value: int = 1 +} + +component Flagged { + on: bool = false +} + +component Reputation { + score: int = 0 +} + +rule on_max_health(entity: Entity) + when entity has Health { current == 100.0 } +{ + let mut x: int = 0 + x = 1 +} + +rule on_level_one(entity: Entity) + when entity has Level { value == 1 } +{ + let mut x: int = 0 + x = 1 +} + +rule on_flagged(entity: Entity) + when entity has Flagged { on == true } +{ + let mut x: int = 0 + x = 1 +} + +rule on_high_rep(entity: Entity) + when entity has Reputation { score == 0 } +{ + let mut x: int = 0 + x = 1 +} diff --git a/tests/etch/corpus_facade.zig b/tests/etch/corpus_facade.zig new file mode 100644 index 0000000..7b8b286 --- /dev/null +++ b/tests/etch/corpus_facade.zig @@ -0,0 +1,71 @@ +//! Corpus facade — exposes every fixture file under +//! `tests/etch/corpus/{valid,invalid}/` as a `[]const u8` constant so +//! that the bench binary (`bench/etch_parse.zig`) and the corpus driver +//! (`tests/etch/corpus_test.zig`) can both reach the same bytes through +//! a single module root. `@embedFile` is restricted to the package path +//! of the importing root, so this facade — sitting beside the corpus +//! tree — is the only module allowed to bake the fixtures in. + +pub const Entry = struct { + name: []const u8, + source: []const u8, +}; + +pub const InvalidEntry = struct { + name: []const u8, + /// Canonical short code (e.g. "E0001", "E1210") parsed from the + /// filename prefix and asserted against the diagnostics emitted by + /// the type-checker. + expected_code: []const u8, + source: []const u8, +}; + +pub const valid = [_]Entry{ + .{ .name = "components/health.etch", .source = @embedFile("corpus/valid/components/health.etch") }, + .{ .name = "components/transform.etch", .source = @embedFile("corpus/valid/components/transform.etch") }, + .{ .name = "components/inventory.etch", .source = @embedFile("corpus/valid/components/inventory.etch") }, + .{ .name = "components/combat.etch", .source = @embedFile("corpus/valid/components/combat.etch") }, + .{ .name = "components/movement.etch", .source = @embedFile("corpus/valid/components/movement.etch") }, + .{ .name = "components/minimal.etch", .source = @embedFile("corpus/valid/components/minimal.etch") }, + .{ .name = "components/multi_decl.etch", .source = @embedFile("corpus/valid/components/multi_decl.etch") }, + .{ .name = "components/annotated.etch", .source = @embedFile("corpus/valid/components/annotated.etch") }, + + .{ .name = "resources/game_mode.etch", .source = @embedFile("corpus/valid/resources/game_mode.etch") }, + .{ .name = "resources/physics_config.etch", .source = @embedFile("corpus/valid/resources/physics_config.etch") }, + .{ .name = "resources/weather.etch", .source = @embedFile("corpus/valid/resources/weather.etch") }, + .{ .name = "resources/world_clock.etch", .source = @embedFile("corpus/valid/resources/world_clock.etch") }, + .{ .name = "resources/multi.etch", .source = @embedFile("corpus/valid/resources/multi.etch") }, + + .{ .name = "rules/regen.etch", .source = @embedFile("corpus/valid/rules/regen.etch") }, + .{ .name = "rules/movement.etch", .source = @embedFile("corpus/valid/rules/movement.etch") }, + .{ .name = "rules/damage.etch", .source = @embedFile("corpus/valid/rules/damage.etch") }, + .{ .name = "rules/resource_only.etch", .source = @embedFile("corpus/valid/rules/resource_only.etch") }, + .{ .name = "rules/composition.etch", .source = @embedFile("corpus/valid/rules/composition.etch") }, + .{ .name = "rules/annotated.etch", .source = @embedFile("corpus/valid/rules/annotated.etch") }, + .{ .name = "rules/forward_ref.etch", .source = @embedFile("corpus/valid/rules/forward_ref.etch") }, + .{ .name = "rules/no_when.etch", .source = @embedFile("corpus/valid/rules/no_when.etch") }, + + .{ .name = "whens/has_only.etch", .source = @embedFile("corpus/valid/whens/has_only.etch") }, + .{ .name = "whens/with_filter.etch", .source = @embedFile("corpus/valid/whens/with_filter.etch") }, + .{ .name = "whens/resource_when.etch", .source = @embedFile("corpus/valid/whens/resource_when.etch") }, + .{ .name = "whens/composition.etch", .source = @embedFile("corpus/valid/whens/composition.etch") }, + .{ .name = "whens/multi_entity.etch", .source = @embedFile("corpus/valid/whens/multi_entity.etch") }, + + .{ .name = "exprs/arithmetic.etch", .source = @embedFile("corpus/valid/exprs/arithmetic.etch") }, + .{ .name = "exprs/float_math.etch", .source = @embedFile("corpus/valid/exprs/float_math.etch") }, + .{ .name = "exprs/comparisons.etch", .source = @embedFile("corpus/valid/exprs/comparisons.etch") }, + .{ .name = "exprs/literals.etch", .source = @embedFile("corpus/valid/exprs/literals.etch") }, +}; + +pub const invalid = [_]InvalidEntry{ + .{ .name = "E0001_unsupported_fn.etch", .expected_code = "E0001", .source = @embedFile("corpus/invalid/E0001_unsupported_fn.etch") }, + .{ .name = "E0001_unexpected_top_level.etch", .expected_code = "E0001", .source = @embedFile("corpus/invalid/E0001_unexpected_top_level.etch") }, + .{ .name = "E0101_duplicate_component.etch", .expected_code = "E0101", .source = @embedFile("corpus/invalid/E0101_duplicate_component.etch") }, + .{ .name = "E0102_unknown_field_type.etch", .expected_code = "E0102", .source = @embedFile("corpus/invalid/E0102_unknown_field_type.etch") }, + .{ .name = "E0102_string_field.etch", .expected_code = "E0102", .source = @embedFile("corpus/invalid/E0102_string_field.etch") }, + .{ .name = "E0200_int_plus_float.etch", .expected_code = "E0200", .source = @embedFile("corpus/invalid/E0200_int_plus_float.etch") }, + .{ .name = "E1101_non_const_default.etch", .expected_code = "E1101", .source = @embedFile("corpus/invalid/E1101_non_const_default.etch") }, + .{ .name = "E1210_unknown_component_in_when.etch", .expected_code = "E1210", .source = @embedFile("corpus/invalid/E1210_unknown_component_in_when.etch") }, + .{ .name = "E1211_field_filter_type_mismatch.etch", .expected_code = "E1211", .source = @embedFile("corpus/invalid/E1211_field_filter_type_mismatch.etch") }, + .{ .name = "E1213_resource_expected_in_when.etch", .expected_code = "E1213", .source = @embedFile("corpus/invalid/E1213_resource_expected_in_when.etch") }, +}; diff --git a/tests/etch/corpus_test.zig b/tests/etch/corpus_test.zig new file mode 100644 index 0000000..85e938e --- /dev/null +++ b/tests/etch/corpus_test.zig @@ -0,0 +1,77 @@ +//! S3 Etch corpus driver — enumerates every `.etch` file in +//! `tests/etch/corpus/` (via the shared facade module) and asserts: +//! +//! - Files under `valid/**` produce zero diagnostics from `parse` + +//! `typeCheck`. +//! - Files under `invalid/*.etch` produce **at least** the diagnostic code +//! whose stable short code prefixes the file name (e.g. +//! `E1210_unknown_component_in_when.etch`). Additional diagnostics +//! are tolerated so the test isn't coupled to internal accumulation +//! order. + +const std = @import("std"); +const etch = @import("weld_etch"); +const corpus = @import("corpus_facade"); + +test "all valid corpus files parse and type-check with zero diagnostics" { + const gpa = std.testing.allocator; + for (corpus.valid) |entry| { + var pr = try etch.parseSource(gpa, entry.source); + defer pr.ast.deinit(gpa); + defer if (pr.diagnostic) |*d| d.deinit(gpa); + if (pr.diagnostic) |d| { + std.debug.print("\nvalid file '{s}' had parse diagnostic: {s} — {s}\n", .{ entry.name, d.code.code(), d.primary_message }); + return error.UnexpectedParseDiagnostic; + } + + var diags: std.ArrayListUnmanaged(etch.Diagnostic) = .empty; + defer { + for (diags.items) |*d| d.deinit(gpa); + diags.deinit(gpa); + } + try etch.typeCheck(gpa, &pr.ast, &diags); + if (diags.items.len != 0) { + std.debug.print("\nvalid file '{s}' had {d} unexpected type diagnostics:\n", .{ entry.name, diags.items.len }); + for (diags.items) |d| { + std.debug.print(" {s} {s}: {s}\n", .{ d.code.code(), d.code.name(), d.primary_message }); + } + return error.UnexpectedTypeDiagnostic; + } + } +} + +test "every invalid corpus file emits the diagnostic code in its filename" { + const gpa = std.testing.allocator; + for (corpus.invalid) |entry| { + var pr = try etch.parseSource(gpa, entry.source); + defer pr.ast.deinit(gpa); + defer if (pr.diagnostic) |*d| d.deinit(gpa); + + var diags: std.ArrayListUnmanaged(etch.Diagnostic) = .empty; + defer { + for (diags.items) |*d| d.deinit(gpa); + diags.deinit(gpa); + } + try etch.typeCheck(gpa, &pr.ast, &diags); + + const parse_matches = pr.diagnostic != null and std.mem.eql(u8, pr.diagnostic.?.code.code(), entry.expected_code); + var type_matches = false; + for (diags.items) |d| { + if (std.mem.eql(u8, d.code.code(), entry.expected_code)) { + type_matches = true; + break; + } + } + if (!parse_matches and !type_matches) { + std.debug.print("\ninvalid file '{s}' did not emit the expected code {s}. ", .{ entry.name, entry.expected_code }); + if (pr.diagnostic) |d| { + std.debug.print("Parse diagnostic: {s} — {s}. ", .{ d.code.code(), d.primary_message }); + } + std.debug.print("Type diagnostics: {d}\n", .{diags.items.len}); + for (diags.items) |d| { + std.debug.print(" {s} {s}: {s}\n", .{ d.code.code(), d.code.name(), d.primary_message }); + } + return error.ExpectedDiagnosticNotEmitted; + } + } +}