diff --git a/CHANGELOG.md b/CHANGELOG.md index b1124e4f..59b77ef4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,22 @@ a [GitHub Release](https://github.com/colbymchenry/codegraph/releases) tagged This project follows [Keep a Changelog](https://keepachangelog.com/en/1.1.0/) and adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] - 2026-05-16 + +### Added +- Zig language support (Zig 0.16+). Indexes `.zig` and `.zon` files using + `tree-sitter-zig`. Extracts structs and their methods, enums and members, + error sets (modelled as enums), `@import` calls (as `import` nodes with + `imports` references), `pub` visibility, plain constants/variables, and + `test "..." { ... }` blocks as functions. + +### Known limitations +- Comptime-generated types such as + `fn Physics(comptime R: type) type { return struct { ... }; }` + are not resolvable at the tree-sitter level — full type inference would be + required. Methods defined on such types are not extracted. This is + documented at the top of `src/extraction/languages/zig.ts`. + ## [0.7.6] - 2026-05-13 ### Fixed diff --git a/__tests__/extraction.test.ts b/__tests__/extraction.test.ts index cb69e2ab..376a1d7c 100644 --- a/__tests__/extraction.test.ts +++ b/__tests__/extraction.test.ts @@ -94,6 +94,11 @@ describe('Language Detection', () => { expect(detectLanguage('main.dart')).toBe('dart'); }); + it('should detect Zig files', () => { + expect(detectLanguage('main.zig')).toBe('zig'); + expect(detectLanguage('build.zon')).toBe('zig'); + }); + it('should return unknown for unsupported extensions', () => { expect(detectLanguage('styles.css')).toBe('unknown'); expect(detectLanguage('data.json')).toBe('unknown'); @@ -122,6 +127,7 @@ describe('Language Support', () => { expect(languages).toContain('swift'); expect(languages).toContain('kotlin'); expect(languages).toContain('dart'); + expect(languages).toContain('zig'); }); }); @@ -3649,3 +3655,202 @@ class Svc { expect(decoratedNode?.name).toBe('method'); }); }); + +describe('Zig Extraction', () => { + it('should extract top-level function declarations', () => { + const code = ` +pub fn add(a: i32, b: i32) i32 { + return a + b; +} + +fn internal(x: u8) void { + _ = x; +} +`; + const result = extractFromSource('math.zig', code); + + const pubFn = result.nodes.find((n) => n.kind === 'function' && n.name === 'add'); + expect(pubFn).toBeDefined(); + expect(pubFn?.visibility).toBe('public'); + + const privFn = result.nodes.find((n) => n.kind === 'function' && n.name === 'internal'); + expect(privFn).toBeDefined(); + expect(privFn?.visibility).toBe('private'); + }); + + it('should extract function signatures', () => { + const code = ` +pub fn add(a: i32, b: i32) i32 { + return a + b; +} +`; + const result = extractFromSource('math.zig', code); + const fn = result.nodes.find((n) => n.name === 'add'); + expect(fn?.signature).toContain('(a: i32, b: i32)'); + }); + + it('should extract struct declarations with fields', () => { + const code = ` +pub const Vec2 = struct { + x: f32, + y: f32, +}; +`; + const result = extractFromSource('vec.zig', code); + + const struct = result.nodes.find((n) => n.kind === 'struct'); + expect(struct).toBeDefined(); + expect(struct?.name).toBe('Vec2'); + expect(struct?.visibility).toBe('public'); + + const fields = result.nodes.filter((n) => n.kind === 'field'); + const fieldNames = fields.map((f) => f.name); + expect(fieldNames).toContain('x'); + expect(fieldNames).toContain('y'); + }); + + it('should extract struct methods', () => { + const code = ` +pub const Vec2 = struct { + x: f32, + y: f32, + + pub fn length(self: Vec2) f32 { + return @sqrt(self.x * self.x + self.y * self.y); + } + + fn dot(self: Vec2, other: Vec2) f32 { + return self.x * other.x + self.y * other.y; + } +}; +`; + const result = extractFromSource('vec.zig', code); + + const pubMethod = result.nodes.find((n) => n.kind === 'method' && n.name === 'length'); + expect(pubMethod).toBeDefined(); + expect(pubMethod?.visibility).toBe('public'); + + const privMethod = result.nodes.find((n) => n.kind === 'method' && n.name === 'dot'); + expect(privMethod).toBeDefined(); + expect(privMethod?.visibility).toBe('private'); + + // Methods should be contained by the struct + const containsEdges = result.edges.filter((e) => e.kind === 'contains'); + const struct = result.nodes.find((n) => n.kind === 'struct'); + expect(containsEdges.some((e) => e.source === struct?.id && e.target === pubMethod?.id)).toBe(true); + }); + + it('should extract enum declarations with members', () => { + const code = ` +pub const Color = enum { + red, + green, + blue, +}; +`; + const result = extractFromSource('color.zig', code); + + const enumNode = result.nodes.find((n) => n.kind === 'enum'); + expect(enumNode).toBeDefined(); + expect(enumNode?.name).toBe('Color'); + + const members = result.nodes.filter((n) => n.kind === 'enum_member'); + const memberNames = members.map((m) => m.name); + expect(memberNames).toContain('red'); + expect(memberNames).toContain('green'); + expect(memberNames).toContain('blue'); + }); + + it('should extract error sets as enums', () => { + const code = ` +pub const AppError = error { + OutOfMemory, + InvalidInput, +}; +`; + const result = extractFromSource('errors.zig', code); + + const enumNode = result.nodes.find((n) => n.kind === 'enum' && n.name === 'AppError'); + expect(enumNode).toBeDefined(); + + const members = result.nodes.filter((n) => n.kind === 'enum_member'); + const memberNames = members.map((m) => m.name); + expect(memberNames).toContain('OutOfMemory'); + expect(memberNames).toContain('InvalidInput'); + }); + + it('should extract @import as import node', () => { + const code = ` +const std = @import("std"); +const math = @import("./math.zig"); +`; + const result = extractFromSource('main.zig', code); + + const importStd = result.nodes.find((n) => n.kind === 'import' && n.name === 'std'); + expect(importStd).toBeDefined(); + + const importMath = result.nodes.find((n) => n.kind === 'import' && n.name === 'math'); + expect(importMath).toBeDefined(); + + // Should produce unresolved references for the module paths + const stdRef = result.unresolvedReferences.find((r) => r.referenceName === 'std'); + expect(stdRef).toBeDefined(); + expect(stdRef?.referenceKind).toBe('imports'); + }); + + it('should extract chained @import as import node', () => { + const code = ` +const io = @import("std").io; +`; + const result = extractFromSource('main.zig', code); + + const importIo = result.nodes.find((n) => n.kind === 'import' && n.name === 'io'); + expect(importIo).toBeDefined(); + }); + + it('should extract plain constants and variables', () => { + const code = ` +pub const PI: f64 = 3.14159; +var global_count: i32 = 0; +`; + const result = extractFromSource('consts.zig', code); + + const pi = result.nodes.find((n) => n.name === 'PI'); + expect(pi).toBeDefined(); + expect(pi?.kind).toBe('constant'); + expect(pi?.visibility).toBe('public'); + + const count = result.nodes.find((n) => n.name === 'global_count'); + expect(count).toBeDefined(); + expect(count?.kind).toBe('variable'); + expect(count?.visibility).toBe('private'); + }); + + it('should extract test declarations as functions', () => { + const code = ` +test "addition works" { + const x = 1 + 2; + _ = x; +} +`; + const result = extractFromSource('math_test.zig', code); + + const testFn = result.nodes.find((n) => n.kind === 'function' && n.name === 'addition works'); + expect(testFn).toBeDefined(); + }); + + it('should extract function calls', () => { + const code = ` +fn helper() void {} + +pub fn main() void { + helper(); +} +`; + const result = extractFromSource('main.zig', code); + + const callRef = result.unresolvedReferences.find((r) => r.referenceKind === 'calls'); + expect(callRef).toBeDefined(); + expect(callRef?.referenceName).toBe('helper'); + }); +}); diff --git a/package-lock.json b/package-lock.json index 3cd20819..0196118e 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1903,7 +1903,6 @@ "integrity": "sha512-o5a9xKjbtuhY6Bi5S3+HvbRERmouabWbyUcpXXUA1u+GNUKoROi9byOJ8M0nHbHYHkYICiMlqxkg1KkYmm25Sw==", "dev": true, "license": "MIT", - "peer": true, "dependencies": { "esbuild": "^0.21.3", "postcss": "^8.4.43", diff --git a/src/extraction/grammars.ts b/src/extraction/grammars.ts index d1540424..b2f60593 100644 --- a/src/extraction/grammars.ts +++ b/src/extraction/grammars.ts @@ -35,6 +35,7 @@ const WASM_GRAMMAR_FILES: Record = { dart: 'tree-sitter-dart.wasm', pascal: 'tree-sitter-pascal.wasm', scala: 'tree-sitter-scala.wasm', + zig: 'tree-sitter-zig.wasm', }; /** @@ -78,6 +79,8 @@ export const EXTENSION_MAP: Record = { '.fmx': 'pascal', '.scala': 'scala', '.sc': 'scala', + '.zig': 'zig', + '.zon': 'zig', }; /** @@ -291,6 +294,7 @@ export function getLanguageDisplayName(language: Language): string { liquid: 'Liquid', pascal: 'Pascal / Delphi', scala: 'Scala', + zig: 'Zig', unknown: 'Unknown', }; return names[language] || language; diff --git a/src/extraction/languages/index.ts b/src/extraction/languages/index.ts index 1b82262e..09101128 100644 --- a/src/extraction/languages/index.ts +++ b/src/extraction/languages/index.ts @@ -23,6 +23,7 @@ import { kotlinExtractor } from './kotlin'; import { dartExtractor } from './dart'; import { pascalExtractor } from './pascal'; import { scalaExtractor } from './scala'; +import { zigExtractor } from './zig'; export const EXTRACTORS: Partial> = { typescript: typescriptExtractor, @@ -43,4 +44,5 @@ export const EXTRACTORS: Partial> = { dart: dartExtractor, pascal: pascalExtractor, scala: scalaExtractor, + zig: zigExtractor, }; diff --git a/src/extraction/languages/zig.ts b/src/extraction/languages/zig.ts new file mode 100644 index 00000000..5a5d4947 --- /dev/null +++ b/src/extraction/languages/zig.ts @@ -0,0 +1,241 @@ +/** + * Zig language extractor (tree-sitter-zig, targets Zig 0.16+). + * + * Known limitation: comptime-generated types such as + * `fn Physics(comptime R: type) type { return struct { ... }; }` + * are not resolvable at the tree-sitter level (requires full type inference). + * Methods on such types are not extracted. + */ + +import type { Node as SyntaxNode } from 'web-tree-sitter'; +import { getNodeText, getChildByField } from '../tree-sitter-helpers'; +import type { LanguageExtractor, ExtractorContext } from '../tree-sitter-types'; + +function hasPub(node: SyntaxNode): boolean { + for (let i = 0; i < node.childCount; i++) { + if (node.child(i)?.type === 'pub') return true; + } + return false; +} + +function isConstDecl(node: SyntaxNode): boolean { + for (let i = 0; i < node.childCount; i++) { + if (node.child(i)?.type === 'const') return true; + } + return false; +} + +/** Find the @import builtin_function node inside a variable_declaration's value. */ +function findImportBuiltin(node: SyntaxNode): SyntaxNode | null { + for (let i = 0; i < node.namedChildCount; i++) { + const child = node.namedChild(i); + if (!child) continue; + if (child.type === 'builtin_function') { + const bid = child.namedChild(0); + if (bid?.text === '@import') return child; + } + // `const io = @import("std").io` — value is field_expression + if (child.type === 'field_expression') { + const obj = child.namedChild(0); + if (obj?.type === 'builtin_function') { + const bid = obj.namedChild(0); + if (bid?.text === '@import') return obj; + } + } + } + return null; +} + +/** Extract the module string from `@import("module")`. */ +function extractImportModule(builtin: SyntaxNode, source: string): string | null { + // builtin_function → arguments → string → string_content + const args = builtin.namedChildren.find((c) => c.type === 'arguments'); + if (!args) return null; + const str = args.namedChildren.find((c) => c.type === 'string'); + if (!str) return null; + const content = str.namedChildren.find((c) => c.type === 'string_content'); + return content ? getNodeText(content, source) : null; +} + +/** + * Handle `variable_declaration` nodes. + * + * Zig uses variable_declaration as a container for structs, enums, error sets, + * @import calls, and plain constants/variables: + * pub const Foo = struct { ... } + * pub const Color = enum { ... } + * pub const AppError = error { ... } + * const std = @import("std") + * pub const PI: f64 = 3.14 + */ +function handleVariableDecl(node: SyntaxNode, ctx: ExtractorContext): boolean { + const nameNode = node.namedChildren.find((c) => c.type === 'identifier'); + if (!nameNode) return true; + const name = getNodeText(nameNode, ctx.source); + + const pub = hasPub(node); + const isConst = isConstDecl(node); + const visibility = pub ? ('public' as const) : ('private' as const); + + const structChild = node.namedChildren.find((c) => c.type === 'struct_declaration'); + const enumChild = node.namedChildren.find((c) => c.type === 'enum_declaration'); + const errorChild = node.namedChildren.find((c) => c.type === 'error_set_declaration'); + const importBuiltin = findImportBuiltin(node); + + if (structChild) { + const structNode = ctx.createNode('struct', name, node, { + visibility, + isExported: pub, + }); + if (structNode) { + ctx.pushScope(structNode.id); + for (let i = 0; i < structChild.namedChildCount; i++) { + const child = structChild.namedChild(i); + if (child) ctx.visitNode(child); + } + ctx.popScope(); + } + return true; + } + + if (enumChild) { + const enumNode = ctx.createNode('enum', name, node, { + visibility, + isExported: pub, + }); + if (enumNode) { + ctx.pushScope(enumNode.id); + for (let i = 0; i < enumChild.namedChildCount; i++) { + const child = enumChild.namedChild(i); + if (!child) continue; + if (child.type === 'container_field') { + // Enum members share the container_field node type with struct fields + const memberName = child.childForFieldName('name'); + if (memberName) { + ctx.createNode('enum_member', getNodeText(memberName, ctx.source), child); + } + } else { + // Methods and nested types inside an enum + ctx.visitNode(child); + } + } + ctx.popScope(); + } + return true; + } + + if (errorChild) { + // Error sets are modelled as enums; members are plain identifiers + const enumNode = ctx.createNode('enum', name, node, { + visibility, + isExported: pub, + }); + if (enumNode) { + ctx.pushScope(enumNode.id); + for (let i = 0; i < errorChild.namedChildCount; i++) { + const child = errorChild.namedChild(i); + if (child?.type === 'identifier') { + ctx.createNode('enum_member', getNodeText(child, ctx.source), child); + } + } + ctx.popScope(); + } + return true; + } + + if (importBuiltin) { + const moduleName = extractImportModule(importBuiltin, ctx.source); + if (moduleName) { + const sig = getNodeText(node, ctx.source).trim().replace(/;$/, ''); + ctx.createNode('import', name, node, { signature: sig }); + const parentId = ctx.nodeStack[ctx.nodeStack.length - 1]; + if (parentId) { + ctx.addUnresolvedReference({ + fromNodeId: parentId, + referenceName: moduleName, + referenceKind: 'imports', + line: node.startPosition.row + 1, + column: node.startPosition.column, + }); + } + } + return true; + } + + // Plain constant or variable (pub const PI: f64 = 3.14, var count: i32 = 0, etc.) + const kind = isConst ? ('constant' as const) : ('variable' as const); + ctx.createNode(kind, name, node, { + visibility, + isExported: pub, + }); + return true; +} + +/** + * Handle `test_declaration` nodes. + * test "name" { ... } → function node named after the string content + * test { ... } → function node named "unnamed" + */ +function handleTestDecl(node: SyntaxNode, ctx: ExtractorContext): boolean { + const strNode = node.namedChildren.find((c) => c.type === 'string'); + const bodyNode = node.namedChildren.find((c) => c.type === 'block'); + + let testName = 'unnamed'; + if (strNode) { + const content = strNode.namedChildren.find((c) => c.type === 'string_content'); + if (content) testName = getNodeText(content, ctx.source); + } + + const testNode = ctx.createNode('function', testName, node, { + visibility: 'public', + signature: `test "${testName}"`, + }); + if (testNode && bodyNode) { + ctx.pushScope(testNode.id); + ctx.visitFunctionBody(bodyNode, testNode.id); + ctx.popScope(); + } + return true; +} + +export const zigExtractor: LanguageExtractor = { + // function_declaration covers both top-level functions and struct methods. + // Methods are identified automatically when the struct node is on the scope stack. + functionTypes: ['function_declaration'], + classTypes: [], + methodTypes: ['function_declaration'], + interfaceTypes: [], + // struct/enum types are handled via visitNode (they nest inside variable_declaration) + structTypes: [], + enumTypes: [], + typeAliasTypes: [], + importTypes: [], + callTypes: ['call_expression'], + // Variables are handled via visitNode to detect struct/enum/import values + variableTypes: [], + // container_field covers struct fields (enum members are handled manually in visitNode) + fieldTypes: ['container_field'], + + nameField: 'name', + bodyField: 'body', + paramsField: 'parameters', + returnField: 'type', + + getSignature: (node, source) => { + // Zig's parameters node has no field name — find by type + const params = node.namedChildren.find((c) => c.type === 'parameters'); + const retType = getChildByField(node, 'type'); + if (!params) return undefined; + let sig = getNodeText(params, source); + if (retType) sig += ' ' + getNodeText(retType, source); + return sig; + }, + + getVisibility: (node) => (hasPub(node) ? 'public' : 'private'), + + visitNode: (node, ctx) => { + if (node.type === 'variable_declaration') return handleVariableDecl(node, ctx); + if (node.type === 'test_declaration') return handleTestDecl(node, ctx); + return false; + }, +}; diff --git a/src/types.ts b/src/types.ts index 328f7432..50dc36e6 100644 --- a/src/types.ts +++ b/src/types.ts @@ -85,6 +85,7 @@ export const LANGUAGES = [ 'liquid', 'pascal', 'scala', + 'zig', 'unknown', ] as const; @@ -545,6 +546,9 @@ export const DEFAULT_CONFIG: CodeGraphConfig = { // Scala '**/*.scala', '**/*.sc', + // Zig + '**/*.zig', + '**/*.zon', ], exclude: [ // Version control