From c63d92333ff0ae18e6807a5c6de550c925b10076 Mon Sep 17 00:00:00 2001 From: Anton Tsvetkov Date: Thu, 28 May 2026 23:58:19 +0100 Subject: [PATCH 01/12] docs: add C API implementation plan (plan 1 of 3) TDD plan for the vanedb-cpp extern "C" surface (distance, VectorStore, HNSW, MMap) consumed by vanedb-bench. Plans 2 (Rust vanedb-capi) and 3 (bench harness) follow. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../superpowers/plans/2026-05-28-cpp-c-api.md | 532 ++++++++++++++++++ 1 file changed, 532 insertions(+) create mode 100644 docs/superpowers/plans/2026-05-28-cpp-c-api.md diff --git a/docs/superpowers/plans/2026-05-28-cpp-c-api.md b/docs/superpowers/plans/2026-05-28-cpp-c-api.md new file mode 100644 index 0000000..7f11636 --- /dev/null +++ b/docs/superpowers/plans/2026-05-28-cpp-c-api.md @@ -0,0 +1,532 @@ +# vanedb-cpp C API Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Ship a shippable C API for vanedb-cpp exposing distance, VectorStore, HNSW, and MMap through `extern "C"` (symbol prefix `vanedb_cpp_`), built as a static library `libvanedb_cpp_capi.a` with public header `capi/vanedb_capi.h`, verified by a C smoke test. + +**Architecture:** A new top-level `capi/` directory. `vanedb_capi.h` is pure C (guarded for C++ inclusion) declaring the contract from the spec. `vanedb_capi.cpp` includes the header-only core, wraps each type in `extern "C"` functions over opaque handle pointers, and catches all exceptions at the boundary (exceptions must never cross the C ABI). A CMake option `VANEDB_BUILD_CAPI` builds the static lib and a C test (compiled as C, linked as C++). + +**Tech Stack:** C++20 (core, header-only), C11 (smoke test, to prove C linkage), CMake ≥3.20, ctest. + +**Spec:** `docs/superpowers/specs/2026-05-28-vanedb-bench-design.md` (in `vanedb/vanedb-bench`), §5 C-ABI Contract. + +--- + +## File Structure + +| File | Responsibility | +|------|----------------| +| `capi/vanedb_capi.h` | Pure-C public header: `vanedb_metric` enum, opaque handle typedefs, all `vanedb_cpp_*` declarations | +| `capi/vanedb_capi.cpp` | `extern "C"` implementations wrapping the C++ core; exception-to-error translation | +| `tests/capi/test_capi.c` | C smoke test exercising every function; built as C, linked as C++ | +| `CMakeLists.txt` | Add `VANEDB_BUILD_CAPI` option, `vanedb_cpp_capi` static lib target, `test_capi` test | + +**Conventions (apply to every wrapper):** +- Opaque handles are `reinterpret_cast` between the C `vanedb_cpp_*` struct pointer and the C++ class pointer. +- Every wrapper body is wrapped in `try { ... } catch (...) { }`. Constructors return `nullptr` on failure; `int` functions return non-zero on failure; `_search` returns the count written (0 on error/empty). +- `out_ids` / `out_dists` are caller-owned, length `k`. + +--- + +### Task 1: Scaffolding — header, CMake target, C test pipeline + +**Files:** +- Create: `capi/vanedb_capi.h` +- Create: `capi/vanedb_capi.cpp` +- Create: `tests/capi/test_capi.c` +- Modify: `CMakeLists.txt` (append before the final `install(...)` section) + +- [ ] **Step 1: Write the full contract header** + +Create `capi/vanedb_capi.h`: + +```c +#ifndef VANEDB_CAPI_H +#define VANEDB_CAPI_H +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { VANEDB_L2 = 0, VANEDB_COSINE = 1, VANEDB_DOT = 2 } vanedb_metric; + +/* Distance (stateless) */ +float vanedb_cpp_l2_sq(const float* a, const float* b, size_t dim); +float vanedb_cpp_cosine_distance(const float* a, const float* b, size_t dim); +float vanedb_cpp_dot_product(const float* a, const float* b, size_t dim); + +/* VectorStore (brute force) */ +typedef struct vanedb_cpp_store vanedb_cpp_store; +vanedb_cpp_store* vanedb_cpp_store_new(size_t dim, vanedb_metric metric); +int vanedb_cpp_store_add(vanedb_cpp_store* s, uint64_t id, const float* v); +size_t vanedb_cpp_store_search(vanedb_cpp_store* s, const float* q, size_t k, + uint64_t* out_ids, float* out_dists); +void vanedb_cpp_store_free(vanedb_cpp_store* s); + +/* HNSW */ +typedef struct vanedb_cpp_hnsw vanedb_cpp_hnsw; +vanedb_cpp_hnsw* vanedb_cpp_hnsw_new(size_t dim, vanedb_metric metric, size_t capacity, + size_t M, size_t ef_construction, uint64_t seed); +int vanedb_cpp_hnsw_add(vanedb_cpp_hnsw* h, uint64_t id, const float* v); +size_t vanedb_cpp_hnsw_search(vanedb_cpp_hnsw* h, const float* q, size_t k, size_t ef_search, + uint64_t* out_ids, float* out_dists); +int vanedb_cpp_hnsw_save(vanedb_cpp_hnsw* h, const char* path); +vanedb_cpp_hnsw* vanedb_cpp_hnsw_load(const char* path); +void vanedb_cpp_hnsw_free(vanedb_cpp_hnsw* h); + +/* MMap store */ +typedef struct vanedb_cpp_mmap vanedb_cpp_mmap; +int vanedb_cpp_mmap_build(const char* path, size_t dim, vanedb_metric metric, + const uint64_t* ids, const float* vecs, size_t n); +vanedb_cpp_mmap* vanedb_cpp_mmap_open(const char* path); +size_t vanedb_cpp_mmap_search(vanedb_cpp_mmap* m, const float* q, size_t k, + uint64_t* out_ids, float* out_dists); +void vanedb_cpp_mmap_free(vanedb_cpp_mmap* m); + +#ifdef __cplusplus +} +#endif +#endif /* VANEDB_CAPI_H */ +``` + +- [ ] **Step 2: Write the implementation skeleton (metric mapping + empty extern "C")** + +Create `capi/vanedb_capi.cpp`: + +```cpp +#include "capi/vanedb_capi.h" +#include "core/vector_store.h" +#include "core/hnsw_index.h" +#include "core/mmap_vector_store.h" + +using namespace vanedb; + +namespace { +DistanceMetric to_metric(vanedb_metric m) { + switch (m) { + case VANEDB_COSINE: return DistanceMetric::COSINE; + case VANEDB_DOT: return DistanceMetric::DOT; + case VANEDB_L2: + default: return DistanceMetric::L2; + } +} +} // namespace + +extern "C" { +// Implementations added per task below. +} +``` + +- [ ] **Step 3: Write the C smoke test (enum sanity only, for now)** + +Create `tests/capi/test_capi.c`: + +```c +#include "capi/vanedb_capi.h" +#include +#include + +int main(void) { + assert(VANEDB_L2 == 0); + assert(VANEDB_COSINE == 1); + assert(VANEDB_DOT == 2); + printf("capi: enum OK\n"); + return 0; +} +``` + +- [ ] **Step 4: Wire CMake** + +In `CMakeLists.txt`, add this block immediately before the final `install(TARGETS vanedb ...)` section: + +```cmake +# C API (shippable extern "C" surface; consumed by vanedb-bench) +option(VANEDB_BUILD_CAPI "Build the C API static library" OFF) +if(VANEDB_BUILD_CAPI) + add_library(vanedb_cpp_capi STATIC capi/vanedb_capi.cpp) + target_include_directories(vanedb_cpp_capi PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR}/src + ${CMAKE_CURRENT_SOURCE_DIR}) + target_compile_features(vanedb_cpp_capi PUBLIC cxx_std_20) + + if(VANEDB_BUILD_TESTS) + add_executable(test_capi tests/capi/test_capi.c) + target_include_directories(test_capi PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) + target_link_libraries(test_capi PRIVATE vanedb_cpp_capi) + # The static lib is C++; link the C test with the C++ runtime. + set_target_properties(test_capi PROPERTIES LINKER_LANGUAGE CXX) + add_test(NAME capi COMMAND test_capi) + endif() +endif() +``` + +- [ ] **Step 5: Configure, build, run — verify the pipeline** + +Run: +```bash +cmake -B build-capi -DCMAKE_BUILD_TYPE=Release -DVANEDB_BUILD_CAPI=ON +cmake --build build-capi --target test_capi --parallel +ctest --test-dir build-capi -R capi --output-on-failure +``` +Expected: build succeeds; test prints `capi: enum OK`; ctest reports `1/1 Passed`. + +- [ ] **Step 6: Commit** + +```bash +git add capi/vanedb_capi.h capi/vanedb_capi.cpp tests/capi/test_capi.c CMakeLists.txt +git commit -m "feat(capi): scaffold C API header, static lib target, C smoke test" +``` + +--- + +### Task 2: Distance C API + +**Files:** +- Modify: `capi/vanedb_capi.cpp` (inside `extern "C"`) +- Modify: `tests/capi/test_capi.c` + +- [ ] **Step 1: Write the failing test** + +In `tests/capi/test_capi.c`, add before `return 0;` in `main`: + +```c + { + float a[4] = {1.f, 2.f, 3.f, 4.f}; + float b[4] = {1.f, 2.f, 3.f, 5.f}; + float l2 = vanedb_cpp_l2_sq(a, b, 4); /* (4-5)^2 = 1 */ + assert(l2 > 0.99f && l2 < 1.01f); + float dot = vanedb_cpp_dot_product(a, b, 4); /* 1+4+9+20 = 34 */ + assert(dot > 33.9f && dot < 34.1f); + float cos = vanedb_cpp_cosine_distance(a, a, 4); /* identical => ~0 */ + assert(cos > -0.01f && cos < 0.01f); + printf("capi: distance OK\n"); + } +``` + +- [ ] **Step 2: Run to verify it fails** + +Run: `cmake --build build-capi --target test_capi 2>&1 | tail -5` +Expected: link error — undefined reference to `vanedb_cpp_l2_sq` (and the other two). + +- [ ] **Step 3: Implement the distance wrappers** + +In `capi/vanedb_capi.cpp`, inside the `extern "C" {` block: + +```cpp +float vanedb_cpp_l2_sq(const float* a, const float* b, size_t dim) { + return l2_sq(a, b, dim); +} +float vanedb_cpp_cosine_distance(const float* a, const float* b, size_t dim) { + return cosine_distance(a, b, dim); +} +float vanedb_cpp_dot_product(const float* a, const float* b, size_t dim) { + return dot_product(a, b, dim); +} +``` + +- [ ] **Step 4: Run to verify it passes** + +Run: `cmake --build build-capi --target test_capi --parallel && ctest --test-dir build-capi -R capi --output-on-failure` +Expected: prints `capi: distance OK`; `1/1 Passed`. + +- [ ] **Step 5: Commit** + +```bash +git add capi/vanedb_capi.cpp tests/capi/test_capi.c +git commit -m "feat(capi): distance functions" +``` + +--- + +### Task 3: VectorStore C API + +**Files:** +- Modify: `capi/vanedb_capi.cpp` +- Modify: `tests/capi/test_capi.c` + +- [ ] **Step 1: Write the failing test** + +Add to `main` in `tests/capi/test_capi.c` before `return 0;`: + +```c + { + float v0[2] = {0.f, 0.f}; + float v1[2] = {1.f, 1.f}; + float q[2] = {0.1f, 0.1f}; + vanedb_cpp_store* s = vanedb_cpp_store_new(2, VANEDB_L2); + assert(s != NULL); + assert(vanedb_cpp_store_add(s, 10, v0) == 0); + assert(vanedb_cpp_store_add(s, 20, v1) == 0); + uint64_t ids[2]; float ds[2]; + size_t n = vanedb_cpp_store_search(s, q, 2, ids, ds); + assert(n == 2); + assert(ids[0] == 10); /* nearest to (0.1,0.1) is (0,0) */ + assert(ds[0] <= ds[1]); /* sorted ascending */ + vanedb_cpp_store_free(s); + printf("capi: store OK\n"); + } +``` + +- [ ] **Step 2: Run to verify it fails** + +Run: `cmake --build build-capi --target test_capi 2>&1 | tail -5` +Expected: link error — undefined reference to `vanedb_cpp_store_new` etc. + +- [ ] **Step 3: Implement the VectorStore wrappers** + +In `capi/vanedb_capi.cpp`, inside `extern "C"`: + +```cpp +vanedb_cpp_store* vanedb_cpp_store_new(size_t dim, vanedb_metric metric) { + try { return reinterpret_cast(new VectorStore(dim, to_metric(metric))); } + catch (...) { return nullptr; } +} +int vanedb_cpp_store_add(vanedb_cpp_store* s, uint64_t id, const float* v) { + try { reinterpret_cast(s)->add(id, v); return 0; } + catch (...) { return 1; } +} +size_t vanedb_cpp_store_search(vanedb_cpp_store* s, const float* q, size_t k, + uint64_t* out_ids, float* out_dists) { + try { + auto res = reinterpret_cast(s)->search(q, k); + for (size_t i = 0; i < res.size(); ++i) { out_ids[i] = res[i].id; out_dists[i] = res[i].distance; } + return res.size(); + } catch (...) { return 0; } +} +void vanedb_cpp_store_free(vanedb_cpp_store* s) { + delete reinterpret_cast(s); +} +``` + +- [ ] **Step 4: Run to verify it passes** + +Run: `cmake --build build-capi --target test_capi --parallel && ctest --test-dir build-capi -R capi --output-on-failure` +Expected: prints `capi: store OK`; `1/1 Passed`. + +- [ ] **Step 5: Commit** + +```bash +git add capi/vanedb_capi.cpp tests/capi/test_capi.c +git commit -m "feat(capi): VectorStore" +``` + +--- + +### Task 4: HNSW C API + +**Files:** +- Modify: `capi/vanedb_capi.cpp` +- Modify: `tests/capi/test_capi.c` + +- [ ] **Step 1: Write the failing test** (build → search → save → load → search) + +Add to `main` in `tests/capi/test_capi.c` before `return 0;`: + +```c + { + float v0[2] = {0.f, 0.f}; + float v1[2] = {1.f, 1.f}; + float q[2] = {0.1f, 0.1f}; + vanedb_cpp_hnsw* h = vanedb_cpp_hnsw_new(2, VANEDB_L2, 100, 16, 200, 42); + assert(h != NULL); + assert(vanedb_cpp_hnsw_add(h, 10, v0) == 0); + assert(vanedb_cpp_hnsw_add(h, 20, v1) == 0); + uint64_t ids[2]; float ds[2]; + size_t n = vanedb_cpp_hnsw_search(h, q, 2, 50, ids, ds); + assert(n == 2); + assert(ids[0] == 10); + assert(vanedb_cpp_hnsw_save(h, "capi_hnsw.bin") == 0); + vanedb_cpp_hnsw_free(h); + + vanedb_cpp_hnsw* h2 = vanedb_cpp_hnsw_load("capi_hnsw.bin"); + assert(h2 != NULL); + uint64_t ids2[1]; float ds2[1]; + size_t n2 = vanedb_cpp_hnsw_search(h2, q, 1, 50, ids2, ds2); + assert(n2 == 1 && ids2[0] == 10); + vanedb_cpp_hnsw_free(h2); + printf("capi: hnsw OK\n"); + } +``` + +- [ ] **Step 2: Run to verify it fails** + +Run: `cmake --build build-capi --target test_capi 2>&1 | tail -5` +Expected: link error — undefined reference to `vanedb_cpp_hnsw_new` etc. + +- [ ] **Step 3: Implement the HNSW wrappers** + +In `capi/vanedb_capi.cpp`, inside `extern "C"`. Note `search` takes `ef_search`: set it then search. `load` returns a `std::unique_ptr`; release it to hand ownership to C. + +```cpp +vanedb_cpp_hnsw* vanedb_cpp_hnsw_new(size_t dim, vanedb_metric metric, size_t capacity, + size_t M, size_t ef_construction, uint64_t seed) { + try { + return reinterpret_cast( + new HNSWIndex(dim, to_metric(metric), capacity, M, ef_construction, + static_cast(seed))); + } catch (...) { return nullptr; } +} +int vanedb_cpp_hnsw_add(vanedb_cpp_hnsw* h, uint64_t id, const float* v) { + try { reinterpret_cast(h)->add(id, v); return 0; } + catch (...) { return 1; } +} +size_t vanedb_cpp_hnsw_search(vanedb_cpp_hnsw* h, const float* q, size_t k, size_t ef_search, + uint64_t* out_ids, float* out_dists) { + try { + auto* idx = reinterpret_cast(h); + idx->set_ef_search(ef_search); + auto res = idx->search(q, k); + for (size_t i = 0; i < res.size(); ++i) { out_ids[i] = res[i].id; out_dists[i] = res[i].distance; } + return res.size(); + } catch (...) { return 0; } +} +int vanedb_cpp_hnsw_save(vanedb_cpp_hnsw* h, const char* path) { + try { reinterpret_cast(h)->save(path); return 0; } + catch (...) { return 1; } +} +vanedb_cpp_hnsw* vanedb_cpp_hnsw_load(const char* path) { + try { return reinterpret_cast(HNSWIndex::load(path).release()); } + catch (...) { return nullptr; } +} +void vanedb_cpp_hnsw_free(vanedb_cpp_hnsw* h) { + delete reinterpret_cast(h); +} +``` + +- [ ] **Step 4: Run to verify it passes** + +Run: `cmake --build build-capi --target test_capi --parallel && ctest --test-dir build-capi -R capi --output-on-failure` +Expected: prints `capi: hnsw OK`; `1/1 Passed`. + +- [ ] **Step 5: Commit** + +```bash +git add capi/vanedb_capi.cpp tests/capi/test_capi.c +git commit -m "feat(capi): HNSW index with save/load" +``` + +--- + +### Task 5: MMap store C API + +**Files:** +- Modify: `capi/vanedb_capi.cpp` +- Modify: `tests/capi/test_capi.c` + +- [ ] **Step 1: Write the failing test** (build file → open → search) + +Add to `main` in `tests/capi/test_capi.c` before `return 0;`: + +```c + { + uint64_t ids_in[2] = {10, 20}; + float vecs[4] = {0.f, 0.f, 1.f, 1.f}; /* row-major: id10=(0,0), id20=(1,1) */ + float q[2] = {0.1f, 0.1f}; + assert(vanedb_cpp_mmap_build("capi_mmap.bin", 2, VANEDB_L2, ids_in, vecs, 2) == 0); + vanedb_cpp_mmap* m = vanedb_cpp_mmap_open("capi_mmap.bin"); + assert(m != NULL); + uint64_t ids[2]; float ds[2]; + size_t n = vanedb_cpp_mmap_search(m, q, 2, ids, ds); + assert(n == 2 && ids[0] == 10); + vanedb_cpp_mmap_free(m); + printf("capi: mmap OK\n"); + } +``` + +- [ ] **Step 2: Run to verify it fails** + +Run: `cmake --build build-capi --target test_capi 2>&1 | tail -5` +Expected: link error — undefined reference to `vanedb_cpp_mmap_build` etc. + +- [ ] **Step 3: Implement the MMap wrappers** + +In `capi/vanedb_capi.cpp`, inside `extern "C"`. Build uses `MMapVectorStoreBuilder`; open uses the `MMapVectorStore` filename constructor: + +```cpp +int vanedb_cpp_mmap_build(const char* path, size_t dim, vanedb_metric metric, + const uint64_t* ids, const float* vecs, size_t n) { + try { + MMapVectorStoreBuilder b(dim, to_metric(metric)); + for (size_t i = 0; i < n; ++i) b.add(ids[i], vecs + i * dim); + b.save(path); + return 0; + } catch (...) { return 1; } +} +vanedb_cpp_mmap* vanedb_cpp_mmap_open(const char* path) { + try { return reinterpret_cast(new MMapVectorStore(path)); } + catch (...) { return nullptr; } +} +size_t vanedb_cpp_mmap_search(vanedb_cpp_mmap* m, const float* q, size_t k, + uint64_t* out_ids, float* out_dists) { + try { + auto res = reinterpret_cast(m)->search(q, k); + for (size_t i = 0; i < res.size(); ++i) { out_ids[i] = res[i].id; out_dists[i] = res[i].distance; } + return res.size(); + } catch (...) { return 0; } +} +void vanedb_cpp_mmap_free(vanedb_cpp_mmap* m) { + delete reinterpret_cast(m); +} +``` + +- [ ] **Step 4: Run to verify it passes** + +Run: `cmake --build build-capi --target test_capi --parallel && ctest --test-dir build-capi -R capi --output-on-failure` +Expected: prints `capi: mmap OK`; `1/1 Passed`. + +- [ ] **Step 5: Commit** + +```bash +git add capi/vanedb_capi.cpp tests/capi/test_capi.c +git commit -m "feat(capi): MMap store" +``` + +--- + +### Task 6: Install/export the C API + full verification + +**Files:** +- Modify: `CMakeLists.txt` + +- [ ] **Step 1: Add install rules for the static lib + header** + +In `CMakeLists.txt`, inside the existing `if(VANEDB_BUILD_CAPI)` block (after the test wiring), add: + +```cmake + install(TARGETS vanedb_cpp_capi ARCHIVE DESTINATION lib) + install(FILES capi/vanedb_capi.h DESTINATION include/vanedb) +``` + +- [ ] **Step 2: Full clean build + run the whole C smoke test** + +Run: +```bash +rm -rf build-capi +cmake -B build-capi -DCMAKE_BUILD_TYPE=Release -DVANEDB_BUILD_CAPI=ON +cmake --build build-capi --target test_capi --parallel +ctest --test-dir build-capi -R capi --output-on-failure +``` +Expected output includes `capi: enum OK`, `capi: distance OK`, `capi: store OK`, `capi: hnsw OK`, `capi: mmap OK`, and `100% tests passed, 0 tests failed out of 1`. + +- [ ] **Step 3: Verify the static archive and header are produced** + +Run: `ls build-capi/libvanedb_cpp_capi.a && echo OK` +Expected: the path prints, then `OK`. + +- [ ] **Step 4: Commit** + +```bash +git add CMakeLists.txt +git commit -m "feat(capi): install rules for static lib and header" +``` + +--- + +## Self-Review + +- **Spec coverage:** §5 contract — distance (Task 2), VectorStore (Task 3), HNSW incl. seed + save/load (Task 4), MMap (Task 5). Namespacing `vanedb_cpp_*` (Task 1 header). Exception-to-error boundary (conventions + every wrapper). All covered. +- **Placeholder scan:** none — every step has concrete code/commands. +- **Type consistency:** opaque types `vanedb_cpp_store|hnsw|mmap` declared in Task 1 header match the `reinterpret_cast` targets (`VectorStore|HNSWIndex|MMapVectorStore`) in Tasks 3–5; `vanedb_cpp_hnsw_new` seed is `uint64_t` in C, cast to `uint32_t` for the C++ ctor (matches `hnsw_index.h`); `_search` signatures (with `ef_search` for HNSW) match header and impl. +- **Note for executor:** the C++ core throws on duplicate IDs / invalid args; the smoke test only exercises valid paths. The bench harness (Plan 3) always passes valid inputs, so the catch-all error returns are a safety net, not a tested path here. From e71aa81beca77c597a209da2653e77e8ba3cfdeb Mon Sep 17 00:00:00 2001 From: Anton Tsvetkov Date: Fri, 29 May 2026 19:07:18 +0100 Subject: [PATCH 02/12] feat(capi): scaffold C API header, static lib target, C smoke test Adds capi/vanedb_capi.h with the full extern "C" contract (distance, VectorStore, HNSW, MMap), an empty-bodied capi/vanedb_capi.cpp with metric mapping, a C smoke test (tests/capi/test_capi.c) that validates enum values, and CMake wiring for the VANEDB_BUILD_CAPI=ON static library target. Also adds build-capi/ to .gitignore and enables the C language when VANEDB_BUILD_CAPI+VANEDB_BUILD_TESTS are both ON (project declares LANGUAGES CXX only). Co-Authored-By: Claude Sonnet 4.6 --- .gitignore | 1 + CMakeLists.txt | 21 ++++++++++++++ capi/vanedb_capi.cpp | 21 ++++++++++++++ capi/vanedb_capi.h | 63 ++++++++++++++++++++++++++++++++++++++++++ tests/capi/test_capi.c | 11 ++++++++ 5 files changed, 117 insertions(+) create mode 100644 capi/vanedb_capi.cpp create mode 100644 capi/vanedb_capi.h create mode 100644 tests/capi/test_capi.c diff --git a/.gitignore b/.gitignore index 1dabcc3..be4e3ae 100644 --- a/.gitignore +++ b/.gitignore @@ -51,6 +51,7 @@ external/ # Build artifacts (multiple per-config build trees from benchmarking) build_*/ +build-capi/ # Coverage artifacts *.gcda diff --git a/CMakeLists.txt b/CMakeLists.txt index 03c0c50..f17fece 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -178,6 +178,27 @@ if(VANEDB_BUILD_CUDA) # hardware in CI. Metal is the supported GPU path. endif() +# C API (shippable extern "C" surface; consumed by vanedb-bench) +option(VANEDB_BUILD_CAPI "Build the C API static library" OFF) +if(VANEDB_BUILD_CAPI) + add_library(vanedb_cpp_capi STATIC capi/vanedb_capi.cpp) + target_include_directories(vanedb_cpp_capi PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR}/src + ${CMAKE_CURRENT_SOURCE_DIR}) + target_compile_features(vanedb_cpp_capi PRIVATE cxx_std_20) + + if(VANEDB_BUILD_TESTS) + # Enable C so CMake can compile test_capi.c (project declares LANGUAGES CXX only). + enable_language(C) + add_executable(test_capi tests/capi/test_capi.c) + target_include_directories(test_capi PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) + target_link_libraries(test_capi PRIVATE vanedb_cpp_capi) + # The static lib is C++; link the C test with the C++ runtime. + set_target_properties(test_capi PROPERTIES LINKER_LANGUAGE CXX) + add_test(NAME capi COMMAND test_capi) + endif() +endif() + # Install install(TARGETS vanedb EXPORT vanedb-targets) install(DIRECTORY src/core/ DESTINATION include/vanedb FILES_MATCHING PATTERN "*.h" PATTERN "*.cuh") diff --git a/capi/vanedb_capi.cpp b/capi/vanedb_capi.cpp new file mode 100644 index 0000000..0f41ac5 --- /dev/null +++ b/capi/vanedb_capi.cpp @@ -0,0 +1,21 @@ +#include "capi/vanedb_capi.h" +#include "core/vector_store.h" +#include "core/hnsw_index.h" +#include "core/mmap_vector_store.h" + +using namespace vanedb; + +namespace { +DistanceMetric to_metric(vanedb_metric m) { + switch (m) { + case VANEDB_COSINE: return DistanceMetric::COSINE; + case VANEDB_DOT: return DistanceMetric::DOT; + case VANEDB_L2: + default: return DistanceMetric::L2; + } +} +} // namespace + +extern "C" { +// Implementations added per task below. +} diff --git a/capi/vanedb_capi.h b/capi/vanedb_capi.h new file mode 100644 index 0000000..80a0376 --- /dev/null +++ b/capi/vanedb_capi.h @@ -0,0 +1,63 @@ +#ifndef VANEDB_CAPI_H +#define VANEDB_CAPI_H +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +typedef enum { VANEDB_L2 = 0, VANEDB_COSINE = 1, VANEDB_DOT = 2 } vanedb_metric; + +/* + * ABI conventions (C API v0 — unstable until a tagged release): + * - Handles are opaque; every constructor (_new/_open) has a matching _free. + * - Handle pointers are intentionally non-const (incl. read-only search/save) + * to keep this ABI byte-identical to the parallel Rust C API (vanedb_rs_*), + * which the benchmark harness calls through one uniform FFI. Do not add const. + * - int returns: 0 = success, non-zero = failure. Constructors return NULL on + * failure. _search returns the number of results written (0 on error/empty). + * - out_ids / out_dists are caller-owned buffers of length k. + * - vanedb_cpp_hnsw_search takes ef_search per call (the implementation sets it + * then searches). This mirrors the Rust ABI; it is not thread-safe to call + * concurrently with different ef_search values on the same handle, which the + * single-threaded benchmark consumer never does. Do not remove this parameter. + */ + +/* Distance (stateless) */ +float vanedb_cpp_l2_sq(const float* a, const float* b, size_t dim); +float vanedb_cpp_cosine_distance(const float* a, const float* b, size_t dim); +float vanedb_cpp_dot_product(const float* a, const float* b, size_t dim); + +/* VectorStore (brute force) */ +typedef struct vanedb_cpp_store vanedb_cpp_store; +vanedb_cpp_store* vanedb_cpp_store_new(size_t dim, vanedb_metric metric); +int vanedb_cpp_store_add(vanedb_cpp_store* s, uint64_t id, const float* v); +size_t vanedb_cpp_store_search(vanedb_cpp_store* s, const float* q, size_t k, + uint64_t* out_ids, float* out_dists); +void vanedb_cpp_store_free(vanedb_cpp_store* s); + +/* HNSW */ +typedef struct vanedb_cpp_hnsw vanedb_cpp_hnsw; +vanedb_cpp_hnsw* vanedb_cpp_hnsw_new(size_t dim, vanedb_metric metric, size_t capacity, + size_t M, size_t ef_construction, uint64_t seed); +int vanedb_cpp_hnsw_add(vanedb_cpp_hnsw* h, uint64_t id, const float* v); +size_t vanedb_cpp_hnsw_search(vanedb_cpp_hnsw* h, const float* q, size_t k, size_t ef_search, + uint64_t* out_ids, float* out_dists); +int vanedb_cpp_hnsw_save(vanedb_cpp_hnsw* h, const char* path); +vanedb_cpp_hnsw* vanedb_cpp_hnsw_load(const char* path); +void vanedb_cpp_hnsw_free(vanedb_cpp_hnsw* h); + +/* MMap store */ +typedef struct vanedb_cpp_mmap vanedb_cpp_mmap; +int vanedb_cpp_mmap_build(const char* path, size_t dim, vanedb_metric metric, + const uint64_t* ids, const float* vecs, size_t n); +vanedb_cpp_mmap* vanedb_cpp_mmap_open(const char* path); +size_t vanedb_cpp_mmap_search(vanedb_cpp_mmap* m, const float* q, size_t k, + uint64_t* out_ids, float* out_dists); +void vanedb_cpp_mmap_free(vanedb_cpp_mmap* m); + +#ifdef __cplusplus +} +#endif +#endif /* VANEDB_CAPI_H */ diff --git a/tests/capi/test_capi.c b/tests/capi/test_capi.c new file mode 100644 index 0000000..86ed01c --- /dev/null +++ b/tests/capi/test_capi.c @@ -0,0 +1,11 @@ +#include "capi/vanedb_capi.h" +#include +#include + +int main(void) { + assert(VANEDB_L2 == 0); + assert(VANEDB_COSINE == 1); + assert(VANEDB_DOT == 2); + printf("capi: enum OK\n"); + return 0; +} From fde7aad39491ad5eefc66b549ec221d472236df0 Mon Sep 17 00:00:00 2001 From: Anton Tsvetkov Date: Fri, 29 May 2026 19:15:00 +0100 Subject: [PATCH 03/12] feat(capi): distance functions Co-Authored-By: Claude Sonnet 4.6 --- capi/vanedb_capi.cpp | 12 +++++++++++- tests/capi/test_capi.c | 13 +++++++++++++ 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/capi/vanedb_capi.cpp b/capi/vanedb_capi.cpp index 0f41ac5..e4755a8 100644 --- a/capi/vanedb_capi.cpp +++ b/capi/vanedb_capi.cpp @@ -17,5 +17,15 @@ DistanceMetric to_metric(vanedb_metric m) { } // namespace extern "C" { -// Implementations added per task below. + +float vanedb_cpp_l2_sq(const float* a, const float* b, size_t dim) { + return l2_sq(a, b, dim); +} +float vanedb_cpp_cosine_distance(const float* a, const float* b, size_t dim) { + return cosine_distance(a, b, dim); +} +float vanedb_cpp_dot_product(const float* a, const float* b, size_t dim) { + return dot_product(a, b, dim); +} + } diff --git a/tests/capi/test_capi.c b/tests/capi/test_capi.c index 86ed01c..74517c8 100644 --- a/tests/capi/test_capi.c +++ b/tests/capi/test_capi.c @@ -7,5 +7,18 @@ int main(void) { assert(VANEDB_COSINE == 1); assert(VANEDB_DOT == 2); printf("capi: enum OK\n"); + + { + float a[4] = {1.f, 2.f, 3.f, 4.f}; + float b[4] = {1.f, 2.f, 3.f, 5.f}; + float l2 = vanedb_cpp_l2_sq(a, b, 4); /* (4-5)^2 = 1 */ + assert(l2 > 0.99f && l2 < 1.01f); + float dot = vanedb_cpp_dot_product(a, b, 4); /* 1+4+9+20 = 34 */ + assert(dot > 33.9f && dot < 34.1f); + float cos = vanedb_cpp_cosine_distance(a, a, 4); /* identical => ~0 */ + assert(cos > -0.01f && cos < 0.01f); + printf("capi: distance OK\n"); + } + return 0; } From 8654e36cff711722ee6082422b004226522fd39f Mon Sep 17 00:00:00 2001 From: Anton Tsvetkov Date: Fri, 29 May 2026 19:19:15 +0100 Subject: [PATCH 04/12] feat(capi): VectorStore Co-Authored-By: Claude Sonnet 4.6 --- capi/vanedb_capi.cpp | 22 ++++++++++++++++++++++ tests/capi/test_capi.c | 23 +++++++++++++++++++++++ 2 files changed, 45 insertions(+) diff --git a/capi/vanedb_capi.cpp b/capi/vanedb_capi.cpp index e4755a8..ce33f11 100644 --- a/capi/vanedb_capi.cpp +++ b/capi/vanedb_capi.cpp @@ -28,4 +28,26 @@ float vanedb_cpp_dot_product(const float* a, const float* b, size_t dim) { return dot_product(a, b, dim); } +vanedb_cpp_store* vanedb_cpp_store_new(size_t dim, vanedb_metric metric) { + try { return reinterpret_cast(new VectorStore(dim, to_metric(metric))); } + catch (...) { return nullptr; } +} +int vanedb_cpp_store_add(vanedb_cpp_store* s, uint64_t id, const float* v) { + if (!s) return 1; + try { reinterpret_cast(s)->add(id, v); return 0; } + catch (...) { return 1; } +} +size_t vanedb_cpp_store_search(vanedb_cpp_store* s, const float* q, size_t k, + uint64_t* out_ids, float* out_dists) { + if (!s) return 0; + try { + auto res = reinterpret_cast(s)->search(q, k); + for (size_t i = 0; i < res.size(); ++i) { out_ids[i] = res[i].id; out_dists[i] = res[i].distance; } + return res.size(); + } catch (...) { return 0; } +} +void vanedb_cpp_store_free(vanedb_cpp_store* s) { + delete reinterpret_cast(s); +} + } diff --git a/tests/capi/test_capi.c b/tests/capi/test_capi.c index 74517c8..8c9084d 100644 --- a/tests/capi/test_capi.c +++ b/tests/capi/test_capi.c @@ -20,5 +20,28 @@ int main(void) { printf("capi: distance OK\n"); } + { + float v0[2] = {0.f, 0.f}; + float v1[2] = {1.f, 1.f}; + float q[2] = {0.1f, 0.1f}; + vanedb_cpp_store* s = vanedb_cpp_store_new(2, VANEDB_L2); + assert(s != NULL); + assert(vanedb_cpp_store_add(s, 10, v0) == 0); + assert(vanedb_cpp_store_add(s, 20, v1) == 0); + uint64_t ids[2]; float ds[2]; + size_t n = vanedb_cpp_store_search(s, q, 2, ids, ds); + assert(n == 2); + assert(ids[0] == 10); /* nearest to (0.1,0.1) is (0,0) */ + assert(ds[0] <= ds[1]); /* sorted ascending */ + + /* negative paths */ + assert(vanedb_cpp_store_new(0, VANEDB_L2) == NULL); /* dim=0 => ctor throws => NULL */ + assert(vanedb_cpp_store_add(NULL, 1, v0) == 1); /* null handle guarded */ + assert(vanedb_cpp_store_search(NULL, q, 2, ids, ds) == 0); /* null handle guarded */ + + vanedb_cpp_store_free(s); + printf("capi: store OK\n"); + } + return 0; } From 5f94df69d34d290d097f70f44dc051eb75acc2da Mon Sep 17 00:00:00 2001 From: Anton Tsvetkov Date: Fri, 29 May 2026 19:25:11 +0100 Subject: [PATCH 05/12] feat(capi): HNSW index with save/load Co-Authored-By: Claude Sonnet 4.6 --- capi/vanedb_capi.cpp | 37 +++++++++++++++++++++++++++++++++++++ tests/capi/test_capi.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) diff --git a/capi/vanedb_capi.cpp b/capi/vanedb_capi.cpp index ce33f11..a3707ae 100644 --- a/capi/vanedb_capi.cpp +++ b/capi/vanedb_capi.cpp @@ -50,4 +50,41 @@ void vanedb_cpp_store_free(vanedb_cpp_store* s) { delete reinterpret_cast(s); } +vanedb_cpp_hnsw* vanedb_cpp_hnsw_new(size_t dim, vanedb_metric metric, size_t capacity, + size_t M, size_t ef_construction, uint64_t seed) { + try { + return reinterpret_cast( + new HNSWIndex(dim, to_metric(metric), capacity, M, ef_construction, + static_cast(seed))); + } catch (...) { return nullptr; } +} +int vanedb_cpp_hnsw_add(vanedb_cpp_hnsw* h, uint64_t id, const float* v) { + if (!h) return 1; + try { reinterpret_cast(h)->add(id, v); return 0; } + catch (...) { return 1; } +} +size_t vanedb_cpp_hnsw_search(vanedb_cpp_hnsw* h, const float* q, size_t k, size_t ef_search, + uint64_t* out_ids, float* out_dists) { + if (!h) return 0; + try { + auto* idx = reinterpret_cast(h); + idx->set_ef_search(ef_search); + auto res = idx->search(q, k); + for (size_t i = 0; i < res.size(); ++i) { out_ids[i] = res[i].id; out_dists[i] = res[i].distance; } + return res.size(); + } catch (...) { return 0; } +} +int vanedb_cpp_hnsw_save(vanedb_cpp_hnsw* h, const char* path) { + if (!h) return 1; + try { reinterpret_cast(h)->save(path); return 0; } + catch (...) { return 1; } +} +vanedb_cpp_hnsw* vanedb_cpp_hnsw_load(const char* path) { + try { return reinterpret_cast(HNSWIndex::load(path).release()); } + catch (...) { return nullptr; } +} +void vanedb_cpp_hnsw_free(vanedb_cpp_hnsw* h) { + delete reinterpret_cast(h); +} + } diff --git a/tests/capi/test_capi.c b/tests/capi/test_capi.c index 8c9084d..93e21a3 100644 --- a/tests/capi/test_capi.c +++ b/tests/capi/test_capi.c @@ -43,5 +43,34 @@ int main(void) { printf("capi: store OK\n"); } + { + float v0[2] = {0.f, 0.f}; + float v1[2] = {1.f, 1.f}; + float q[2] = {0.1f, 0.1f}; + vanedb_cpp_hnsw* h = vanedb_cpp_hnsw_new(2, VANEDB_L2, 100, 16, 200, 42); + assert(h != NULL); + assert(vanedb_cpp_hnsw_add(h, 10, v0) == 0); + assert(vanedb_cpp_hnsw_add(h, 20, v1) == 0); + uint64_t ids[2]; float ds[2]; + size_t n = vanedb_cpp_hnsw_search(h, q, 2, 50, ids, ds); + assert(n == 2); + assert(ids[0] == 10); + assert(vanedb_cpp_hnsw_save(h, "capi_hnsw.bin") == 0); + vanedb_cpp_hnsw_free(h); + + vanedb_cpp_hnsw* h2 = vanedb_cpp_hnsw_load("capi_hnsw.bin"); + assert(h2 != NULL); + uint64_t ids2[1]; float ds2[1]; + size_t n2 = vanedb_cpp_hnsw_search(h2, q, 1, 50, ids2, ds2); + assert(n2 == 1 && ids2[0] == 10); + vanedb_cpp_hnsw_free(h2); + /* negative paths */ + assert(vanedb_cpp_hnsw_new(0, VANEDB_L2, 100, 16, 200, 42) == NULL); /* dim=0 throws => NULL */ + assert(vanedb_cpp_hnsw_add(NULL, 1, v0) == 1); /* null handle guarded */ + assert(vanedb_cpp_hnsw_search(NULL, q, 1, 50, ids2, ds2) == 0); /* null handle guarded */ + assert(vanedb_cpp_hnsw_save(NULL, "x.bin") == 1); /* null handle guarded */ + printf("capi: hnsw OK\n"); + } + return 0; } From 748455602d9585be583c16735225d0d136b2f7d3 Mon Sep 17 00:00:00 2001 From: Anton Tsvetkov Date: Fri, 29 May 2026 19:29:52 +0100 Subject: [PATCH 06/12] feat(capi): MMap store Add vanedb_cpp_mmap_build/_open/_search/_free wrappers; transient MMapVectorStoreBuilder in _build, MMapVectorStore* behind opaque handle. Co-Authored-By: Claude Sonnet 4.6 --- capi/vanedb_capi.cpp | 26 ++++++++++++++++++++++++++ tests/capi/test_capi.c | 16 ++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/capi/vanedb_capi.cpp b/capi/vanedb_capi.cpp index a3707ae..329b3ff 100644 --- a/capi/vanedb_capi.cpp +++ b/capi/vanedb_capi.cpp @@ -87,4 +87,30 @@ void vanedb_cpp_hnsw_free(vanedb_cpp_hnsw* h) { delete reinterpret_cast(h); } +int vanedb_cpp_mmap_build(const char* path, size_t dim, vanedb_metric metric, + const uint64_t* ids, const float* vecs, size_t n) { + try { + MMapVectorStoreBuilder b(dim, to_metric(metric)); + for (size_t i = 0; i < n; ++i) b.add(ids[i], vecs + i * dim); + b.save(path); + return 0; + } catch (...) { return 1; } +} +vanedb_cpp_mmap* vanedb_cpp_mmap_open(const char* path) { + try { return reinterpret_cast(new MMapVectorStore(path)); } + catch (...) { return nullptr; } +} +size_t vanedb_cpp_mmap_search(vanedb_cpp_mmap* m, const float* q, size_t k, + uint64_t* out_ids, float* out_dists) { + if (!m) return 0; + try { + auto res = reinterpret_cast(m)->search(q, k); + for (size_t i = 0; i < res.size(); ++i) { out_ids[i] = res[i].id; out_dists[i] = res[i].distance; } + return res.size(); + } catch (...) { return 0; } +} +void vanedb_cpp_mmap_free(vanedb_cpp_mmap* m) { + delete reinterpret_cast(m); +} + } diff --git a/tests/capi/test_capi.c b/tests/capi/test_capi.c index 93e21a3..ea977d2 100644 --- a/tests/capi/test_capi.c +++ b/tests/capi/test_capi.c @@ -72,5 +72,21 @@ int main(void) { printf("capi: hnsw OK\n"); } + { + uint64_t ids_in[2] = {10, 20}; + float vecs[4] = {0.f, 0.f, 1.f, 1.f}; /* row-major: id10=(0,0), id20=(1,1) */ + float q[2] = {0.1f, 0.1f}; + assert(vanedb_cpp_mmap_build("capi_mmap.bin", 2, VANEDB_L2, ids_in, vecs, 2) == 0); + vanedb_cpp_mmap* m = vanedb_cpp_mmap_open("capi_mmap.bin"); + assert(m != NULL); + uint64_t ids[2]; float ds[2]; + size_t n = vanedb_cpp_mmap_search(m, q, 2, ids, ds); + assert(n == 2 && ids[0] == 10); + vanedb_cpp_mmap_free(m); + /* negative path */ + assert(vanedb_cpp_mmap_search(NULL, q, 2, ids, ds) == 0); /* null handle guarded */ + printf("capi: mmap OK\n"); + } + return 0; } From b218b83b61e58a17fdd4508c1e16fff3f551478e Mon Sep 17 00:00:00 2001 From: Anton Tsvetkov Date: Fri, 29 May 2026 19:33:50 +0100 Subject: [PATCH 07/12] feat(capi): install rules for static lib and header Co-Authored-By: Claude Sonnet 4.6 --- CMakeLists.txt | 3 +++ 1 file changed, 3 insertions(+) diff --git a/CMakeLists.txt b/CMakeLists.txt index f17fece..2113e88 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -197,6 +197,9 @@ if(VANEDB_BUILD_CAPI) set_target_properties(test_capi PROPERTIES LINKER_LANGUAGE CXX) add_test(NAME capi COMMAND test_capi) endif() + + install(TARGETS vanedb_cpp_capi ARCHIVE DESTINATION lib) + install(FILES capi/vanedb_capi.h DESTINATION include/vanedb) endif() # Install From 322c4281b4ecc64541ea7aff7f85bfa69bcfa2ec Mon Sep 17 00:00:00 2001 From: Anton Tsvetkov Date: Fri, 29 May 2026 19:44:38 +0100 Subject: [PATCH 08/12] fix(capi): make C smoke test effective in Release builds The test put load-bearing calls inside assert(), which are compiled out under -DNDEBUG. The project strips NDEBUG from CXX release flags but not C, so the C test silently no-op'd in Release. Fix: set CMAKE_C_FLAGS_RELEASE without NDEBUG (matching CXX), and hoist all calls out of assert() so they execute regardless of NDEBUG. Also document the hnsw seed narrowing. Co-Authored-By: Claude Opus 4.8 (1M context) --- CMakeLists.txt | 3 +++ capi/vanedb_capi.cpp | 1 + tests/capi/test_capi.c | 42 ++++++++++++++++++++++++++++-------------- 3 files changed, 32 insertions(+), 14 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 2113e88..e29825b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -21,6 +21,9 @@ if(NOT MSVC) set(CMAKE_CXX_FLAGS_RELEASE "-O3") set(CMAKE_CXX_FLAGS_DEBUG "-O0 -g") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "-O2 -g") + set(CMAKE_C_FLAGS_RELEASE "-O3") + set(CMAKE_C_FLAGS_DEBUG "-O0 -g") + set(CMAKE_C_FLAGS_RELWITHDEBINFO "-O2 -g") endif() # Detect x86_64 for AVX2 support diff --git a/capi/vanedb_capi.cpp b/capi/vanedb_capi.cpp index 329b3ff..c6cd0ba 100644 --- a/capi/vanedb_capi.cpp +++ b/capi/vanedb_capi.cpp @@ -54,6 +54,7 @@ vanedb_cpp_hnsw* vanedb_cpp_hnsw_new(size_t dim, vanedb_metric metric, size_t ca size_t M, size_t ef_construction, uint64_t seed) { try { return reinterpret_cast( + // seed is uint64_t in the ABI (Rust parity) but the core takes uint32_t; high bits are dropped. new HNSWIndex(dim, to_metric(metric), capacity, M, ef_construction, static_cast(seed))); } catch (...) { return nullptr; } diff --git a/tests/capi/test_capi.c b/tests/capi/test_capi.c index ea977d2..cc059cc 100644 --- a/tests/capi/test_capi.c +++ b/tests/capi/test_capi.c @@ -26,8 +26,10 @@ int main(void) { float q[2] = {0.1f, 0.1f}; vanedb_cpp_store* s = vanedb_cpp_store_new(2, VANEDB_L2); assert(s != NULL); - assert(vanedb_cpp_store_add(s, 10, v0) == 0); - assert(vanedb_cpp_store_add(s, 20, v1) == 0); + int rc_add0 = vanedb_cpp_store_add(s, 10, v0); + assert(rc_add0 == 0); + int rc_add1 = vanedb_cpp_store_add(s, 20, v1); + assert(rc_add1 == 0); uint64_t ids[2]; float ds[2]; size_t n = vanedb_cpp_store_search(s, q, 2, ids, ds); assert(n == 2); @@ -35,9 +37,12 @@ int main(void) { assert(ds[0] <= ds[1]); /* sorted ascending */ /* negative paths */ - assert(vanedb_cpp_store_new(0, VANEDB_L2) == NULL); /* dim=0 => ctor throws => NULL */ - assert(vanedb_cpp_store_add(NULL, 1, v0) == 1); /* null handle guarded */ - assert(vanedb_cpp_store_search(NULL, q, 2, ids, ds) == 0); /* null handle guarded */ + vanedb_cpp_store* s_bad = vanedb_cpp_store_new(0, VANEDB_L2); /* dim=0 => ctor throws => NULL */ + assert(s_bad == NULL); + int rc_null_add = vanedb_cpp_store_add(NULL, 1, v0); /* null handle guarded */ + assert(rc_null_add == 1); + size_t n_null = vanedb_cpp_store_search(NULL, q, 2, ids, ds); /* null handle guarded */ + assert(n_null == 0); vanedb_cpp_store_free(s); printf("capi: store OK\n"); @@ -49,13 +54,16 @@ int main(void) { float q[2] = {0.1f, 0.1f}; vanedb_cpp_hnsw* h = vanedb_cpp_hnsw_new(2, VANEDB_L2, 100, 16, 200, 42); assert(h != NULL); - assert(vanedb_cpp_hnsw_add(h, 10, v0) == 0); - assert(vanedb_cpp_hnsw_add(h, 20, v1) == 0); + int rc_hadd0 = vanedb_cpp_hnsw_add(h, 10, v0); + assert(rc_hadd0 == 0); + int rc_hadd1 = vanedb_cpp_hnsw_add(h, 20, v1); + assert(rc_hadd1 == 0); uint64_t ids[2]; float ds[2]; size_t n = vanedb_cpp_hnsw_search(h, q, 2, 50, ids, ds); assert(n == 2); assert(ids[0] == 10); - assert(vanedb_cpp_hnsw_save(h, "capi_hnsw.bin") == 0); + int rc_save = vanedb_cpp_hnsw_save(h, "capi_hnsw.bin"); + assert(rc_save == 0); vanedb_cpp_hnsw_free(h); vanedb_cpp_hnsw* h2 = vanedb_cpp_hnsw_load("capi_hnsw.bin"); @@ -65,10 +73,14 @@ int main(void) { assert(n2 == 1 && ids2[0] == 10); vanedb_cpp_hnsw_free(h2); /* negative paths */ - assert(vanedb_cpp_hnsw_new(0, VANEDB_L2, 100, 16, 200, 42) == NULL); /* dim=0 throws => NULL */ - assert(vanedb_cpp_hnsw_add(NULL, 1, v0) == 1); /* null handle guarded */ - assert(vanedb_cpp_hnsw_search(NULL, q, 1, 50, ids2, ds2) == 0); /* null handle guarded */ - assert(vanedb_cpp_hnsw_save(NULL, "x.bin") == 1); /* null handle guarded */ + vanedb_cpp_hnsw* h_bad = vanedb_cpp_hnsw_new(0, VANEDB_L2, 100, 16, 200, 42); /* dim=0 throws => NULL */ + assert(h_bad == NULL); + int rc_null_hadd = vanedb_cpp_hnsw_add(NULL, 1, v0); /* null handle guarded */ + assert(rc_null_hadd == 1); + size_t n_null_h = vanedb_cpp_hnsw_search(NULL, q, 1, 50, ids2, ds2); /* null handle guarded */ + assert(n_null_h == 0); + int rc_null_save = vanedb_cpp_hnsw_save(NULL, "x.bin"); /* null handle guarded */ + assert(rc_null_save == 1); printf("capi: hnsw OK\n"); } @@ -76,7 +88,8 @@ int main(void) { uint64_t ids_in[2] = {10, 20}; float vecs[4] = {0.f, 0.f, 1.f, 1.f}; /* row-major: id10=(0,0), id20=(1,1) */ float q[2] = {0.1f, 0.1f}; - assert(vanedb_cpp_mmap_build("capi_mmap.bin", 2, VANEDB_L2, ids_in, vecs, 2) == 0); + int rc_build = vanedb_cpp_mmap_build("capi_mmap.bin", 2, VANEDB_L2, ids_in, vecs, 2); + assert(rc_build == 0); vanedb_cpp_mmap* m = vanedb_cpp_mmap_open("capi_mmap.bin"); assert(m != NULL); uint64_t ids[2]; float ds[2]; @@ -84,7 +97,8 @@ int main(void) { assert(n == 2 && ids[0] == 10); vanedb_cpp_mmap_free(m); /* negative path */ - assert(vanedb_cpp_mmap_search(NULL, q, 2, ids, ds) == 0); /* null handle guarded */ + size_t n_null_m = vanedb_cpp_mmap_search(NULL, q, 2, ids, ds); /* null handle guarded */ + assert(n_null_m == 0); printf("capi: mmap OK\n"); } From 386175794b4b78cd70018ffce21330632ce078e4 Mon Sep 17 00:00:00 2001 From: Anton Tsvetkov Date: Sat, 30 May 2026 08:44:21 +0100 Subject: [PATCH 09/12] fix(capi): guard null paths and cap search output (review) - null const char* path args reached fstream/mmap before any guard; a null path raises SIGSEGV (a signal, not a C++ exception) which catch(...) cannot intercept. Guard path on hnsw_save/load and mmap_build/open. - cap _search copy loops at k (defense-in-depth; the core already caps, but the wrapper now enforces it too). - document to_metric's L2-default and the hnsw seed narrowing in the header. Co-Authored-By: Claude Opus 4.8 (1M context) --- capi/vanedb_capi.cpp | 19 +++++++++++++------ capi/vanedb_capi.h | 2 ++ tests/capi/test_capi.c | 8 ++++++++ 3 files changed, 23 insertions(+), 6 deletions(-) diff --git a/capi/vanedb_capi.cpp b/capi/vanedb_capi.cpp index c6cd0ba..2d6fe18 100644 --- a/capi/vanedb_capi.cpp +++ b/capi/vanedb_capi.cpp @@ -42,8 +42,9 @@ size_t vanedb_cpp_store_search(vanedb_cpp_store* s, const float* q, size_t k, if (!s) return 0; try { auto res = reinterpret_cast(s)->search(q, k); - for (size_t i = 0; i < res.size(); ++i) { out_ids[i] = res[i].id; out_dists[i] = res[i].distance; } - return res.size(); + size_t n = res.size() < k ? res.size() : k; + for (size_t i = 0; i < n; ++i) { out_ids[i] = res[i].id; out_dists[i] = res[i].distance; } + return n; } catch (...) { return 0; } } void vanedb_cpp_store_free(vanedb_cpp_store* s) { @@ -71,16 +72,19 @@ size_t vanedb_cpp_hnsw_search(vanedb_cpp_hnsw* h, const float* q, size_t k, size auto* idx = reinterpret_cast(h); idx->set_ef_search(ef_search); auto res = idx->search(q, k); - for (size_t i = 0; i < res.size(); ++i) { out_ids[i] = res[i].id; out_dists[i] = res[i].distance; } - return res.size(); + size_t n = res.size() < k ? res.size() : k; + for (size_t i = 0; i < n; ++i) { out_ids[i] = res[i].id; out_dists[i] = res[i].distance; } + return n; } catch (...) { return 0; } } int vanedb_cpp_hnsw_save(vanedb_cpp_hnsw* h, const char* path) { if (!h) return 1; + if (!path) return 1; try { reinterpret_cast(h)->save(path); return 0; } catch (...) { return 1; } } vanedb_cpp_hnsw* vanedb_cpp_hnsw_load(const char* path) { + if (!path) return nullptr; try { return reinterpret_cast(HNSWIndex::load(path).release()); } catch (...) { return nullptr; } } @@ -90,6 +94,7 @@ void vanedb_cpp_hnsw_free(vanedb_cpp_hnsw* h) { int vanedb_cpp_mmap_build(const char* path, size_t dim, vanedb_metric metric, const uint64_t* ids, const float* vecs, size_t n) { + if (!path) return 1; try { MMapVectorStoreBuilder b(dim, to_metric(metric)); for (size_t i = 0; i < n; ++i) b.add(ids[i], vecs + i * dim); @@ -98,6 +103,7 @@ int vanedb_cpp_mmap_build(const char* path, size_t dim, vanedb_metric metric, } catch (...) { return 1; } } vanedb_cpp_mmap* vanedb_cpp_mmap_open(const char* path) { + if (!path) return nullptr; try { return reinterpret_cast(new MMapVectorStore(path)); } catch (...) { return nullptr; } } @@ -106,8 +112,9 @@ size_t vanedb_cpp_mmap_search(vanedb_cpp_mmap* m, const float* q, size_t k, if (!m) return 0; try { auto res = reinterpret_cast(m)->search(q, k); - for (size_t i = 0; i < res.size(); ++i) { out_ids[i] = res[i].id; out_dists[i] = res[i].distance; } - return res.size(); + size_t n = res.size() < k ? res.size() : k; + for (size_t i = 0; i < n; ++i) { out_ids[i] = res[i].id; out_dists[i] = res[i].distance; } + return n; } catch (...) { return 0; } } void vanedb_cpp_mmap_free(vanedb_cpp_mmap* m) { diff --git a/capi/vanedb_capi.h b/capi/vanedb_capi.h index 80a0376..94df65a 100644 --- a/capi/vanedb_capi.h +++ b/capi/vanedb_capi.h @@ -22,6 +22,8 @@ typedef enum { VANEDB_L2 = 0, VANEDB_COSINE = 1, VANEDB_DOT = 2 } vanedb_metric; * then searches). This mirrors the Rust ABI; it is not thread-safe to call * concurrently with different ef_search values on the same handle, which the * single-threaded benchmark consumer never does. Do not remove this parameter. + * - to_metric maps any unrecognized metric value to L2 (no error). + * - vanedb_cpp_hnsw_new: seed is uint64_t for ABI parity; only the low 32 bits are used. */ /* Distance (stateless) */ diff --git a/tests/capi/test_capi.c b/tests/capi/test_capi.c index cc059cc..0dbdf4f 100644 --- a/tests/capi/test_capi.c +++ b/tests/capi/test_capi.c @@ -102,5 +102,13 @@ int main(void) { printf("capi: mmap OK\n"); } + { + int rc_ns = vanedb_cpp_hnsw_save(NULL, NULL); assert(rc_ns == 1); /* null handle */ + vanedb_cpp_hnsw* h_np = vanedb_cpp_hnsw_load(NULL); assert(h_np == NULL); + int rc_mb = vanedb_cpp_mmap_build(NULL, 2, VANEDB_L2, NULL, NULL, 0); assert(rc_mb == 1); + vanedb_cpp_mmap* m_np = vanedb_cpp_mmap_open(NULL); assert(m_np == NULL); + printf("capi: null-path guards OK\n"); + } + return 0; } From b7512ac9b57bed443e315ae9dbe0349d6e0037d6 Mon Sep 17 00:00:00 2001 From: Anton Tsvetkov Date: Sat, 30 May 2026 09:40:13 +0100 Subject: [PATCH 10/12] =?UTF-8?q?chore(capi):=20address=20review=20?= =?UTF-8?q?=E2=80=94=20CI=20coverage,=20header=20notes,=20test=20cleanup,?= =?UTF-8?q?=20project(C)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - add a dedicated 'C API' CI job (configures VANEDB_BUILD_CAPI=ON, builds+runs the C smoke test) - header: note per-thread handles for concurrent hnsw_search; clarify seed low-32 behavior - declare C at project() level (LANGUAGES CXX C); drop the nested enable_language(C) - test: remove capi_hnsw.bin/capi_mmap.bin artifacts after use - plan doc: fix stale PUBLIC->PRIVATE snippet; mark as internal artifact --- .github/workflows/build-and-test.yml | 24 +++++++++++++++++++ CMakeLists.txt | 4 +--- capi/vanedb_capi.h | 5 +++- .../superpowers/plans/2026-05-28-cpp-c-api.md | 4 +++- tests/capi/test_capi.c | 2 ++ 5 files changed, 34 insertions(+), 5 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 7270423..4d06282 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -361,3 +361,27 @@ jobs: - name: Verify Android x86_64 binary run: | echo "Android x86_64 build completed successfully" + + c-api: + name: C API (extern "C") + runs-on: ubuntu-latest + timeout-minutes: 15 + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Cache CMake deps + uses: actions/cache@v5 + with: + path: build-capi/_deps + key: ${{ runner.os }}-capi-deps-${{ hashFiles('CMakeLists.txt') }} + restore-keys: ${{ runner.os }}-capi-deps- + + - name: Configure CMake (C API) + run: cmake -B build-capi -DCMAKE_BUILD_TYPE=Release -DVANEDB_BUILD_CAPI=ON -DVANEDB_BUILD_BENCHMARKS=OFF -DVANEDB_BUILD_EXAMPLES=OFF -DVANEDB_BUILD_PYTHON=OFF + + - name: Build C API smoke test + run: cmake --build build-capi --target test_capi --parallel + + - name: Run C API test + run: ctest --test-dir build-capi -R capi --output-on-failure diff --git a/CMakeLists.txt b/CMakeLists.txt index e29825b..54a8f24 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.20) -project(vanedb VERSION 0.1.0 LANGUAGES CXX) +project(vanedb VERSION 0.1.0 LANGUAGES CXX C) set(CMAKE_CXX_STANDARD 20) set(CMAKE_CXX_STANDARD_REQUIRED ON) @@ -191,8 +191,6 @@ if(VANEDB_BUILD_CAPI) target_compile_features(vanedb_cpp_capi PRIVATE cxx_std_20) if(VANEDB_BUILD_TESTS) - # Enable C so CMake can compile test_capi.c (project declares LANGUAGES CXX only). - enable_language(C) add_executable(test_capi tests/capi/test_capi.c) target_include_directories(test_capi PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) target_link_libraries(test_capi PRIVATE vanedb_cpp_capi) diff --git a/capi/vanedb_capi.h b/capi/vanedb_capi.h index 94df65a..5138909 100644 --- a/capi/vanedb_capi.h +++ b/capi/vanedb_capi.h @@ -22,8 +22,11 @@ typedef enum { VANEDB_L2 = 0, VANEDB_COSINE = 1, VANEDB_DOT = 2 } vanedb_metric; * then searches). This mirrors the Rust ABI; it is not thread-safe to call * concurrently with different ef_search values on the same handle, which the * single-threaded benchmark consumer never does. Do not remove this parameter. + * Callers needing concurrent search must use a separate handle per thread. * - to_metric maps any unrecognized metric value to L2 (no error). - * - vanedb_cpp_hnsw_new: seed is uint64_t for ABI parity; only the low 32 bits are used. + * - vanedb_cpp_hnsw_new: seed is uint64_t for ABI parity but only the low 32 bits are used + * (the C++ core takes uint32_t); cross-implementation graphs differ by RNG regardless, + * so this does not affect the recall-based comparison. */ /* Distance (stateless) */ diff --git a/docs/superpowers/plans/2026-05-28-cpp-c-api.md b/docs/superpowers/plans/2026-05-28-cpp-c-api.md index 7f11636..c01fe57 100644 --- a/docs/superpowers/plans/2026-05-28-cpp-c-api.md +++ b/docs/superpowers/plans/2026-05-28-cpp-c-api.md @@ -1,5 +1,7 @@ # vanedb-cpp C API Implementation Plan +> **Internal planning artifact** — historical record of how this feature was implemented; not user-facing documentation. + > **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. **Goal:** Ship a shippable C API for vanedb-cpp exposing distance, VectorStore, HNSW, and MMap through `extern "C"` (symbol prefix `vanedb_cpp_`), built as a static library `libvanedb_cpp_capi.a` with public header `capi/vanedb_capi.h`, verified by a C smoke test. @@ -149,7 +151,7 @@ if(VANEDB_BUILD_CAPI) target_include_directories(vanedb_cpp_capi PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}/src ${CMAKE_CURRENT_SOURCE_DIR}) - target_compile_features(vanedb_cpp_capi PUBLIC cxx_std_20) + target_compile_features(vanedb_cpp_capi PRIVATE cxx_std_20) if(VANEDB_BUILD_TESTS) add_executable(test_capi tests/capi/test_capi.c) diff --git a/tests/capi/test_capi.c b/tests/capi/test_capi.c index 0dbdf4f..46fac63 100644 --- a/tests/capi/test_capi.c +++ b/tests/capi/test_capi.c @@ -82,6 +82,7 @@ int main(void) { int rc_null_save = vanedb_cpp_hnsw_save(NULL, "x.bin"); /* null handle guarded */ assert(rc_null_save == 1); printf("capi: hnsw OK\n"); + remove("capi_hnsw.bin"); } { @@ -100,6 +101,7 @@ int main(void) { size_t n_null_m = vanedb_cpp_mmap_search(NULL, q, 2, ids, ds); /* null handle guarded */ assert(n_null_m == 0); printf("capi: mmap OK\n"); + remove("capi_mmap.bin"); } { From d139078c683bab818cb045ad5cb4e70e92d6fa1f Mon Sep 17 00:00:00 2001 From: Anton Tsvetkov Date: Sat, 30 May 2026 10:05:32 +0100 Subject: [PATCH 11/12] ci(capi): run C API job on ubuntu, windows, macos MSVC is exactly where the C API's C-source-linked-against-C++-static-lib pattern (LINKER_LANGUAGE CXX) can diverge, so a Linux-only smoke test gives false cross-platform confidence. Matrix it; --config Release for the MSVC multi-config generator. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/build-and-test.yml | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index 4d06282..eb0ccbf 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -363,9 +363,13 @@ jobs: echo "Android x86_64 build completed successfully" c-api: - name: C API (extern "C") - runs-on: ubuntu-latest + name: C API (extern "C") - ${{ matrix.os }} + runs-on: ${{ matrix.os }} timeout-minutes: 15 + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, windows-latest, macos-latest] steps: - name: Checkout code uses: actions/checkout@v4 @@ -380,8 +384,9 @@ jobs: - name: Configure CMake (C API) run: cmake -B build-capi -DCMAKE_BUILD_TYPE=Release -DVANEDB_BUILD_CAPI=ON -DVANEDB_BUILD_BENCHMARKS=OFF -DVANEDB_BUILD_EXAMPLES=OFF -DVANEDB_BUILD_PYTHON=OFF + # --config Release is required for MSVC (multi-config generator); harmless on single-config Unix generators. - name: Build C API smoke test - run: cmake --build build-capi --target test_capi --parallel + run: cmake --build build-capi --target test_capi --config Release --parallel - name: Run C API test - run: ctest --test-dir build-capi -R capi --output-on-failure + run: ctest --test-dir build-capi -R capi --config Release --output-on-failure From e97f1775f8653ef292e7f9213e553109c45f0e22 Mon Sep 17 00:00:00 2001 From: Anton Tsvetkov Date: Sat, 30 May 2026 10:10:16 +0100 Subject: [PATCH 12/12] ci(capi): fix ctest config flag (--build-config, not --config) ctest uses -C/--build-config; --config is a cmake --build flag. The build step was correct; the ctest step errored 'Unknown argument: --config' on all three OSes. --build-config is needed by MSVC's multi-config generator and ignored by single-config Unix generators. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/build-and-test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/build-and-test.yml b/.github/workflows/build-and-test.yml index eb0ccbf..acc4946 100644 --- a/.github/workflows/build-and-test.yml +++ b/.github/workflows/build-and-test.yml @@ -389,4 +389,4 @@ jobs: run: cmake --build build-capi --target test_capi --config Release --parallel - name: Run C API test - run: ctest --test-dir build-capi -R capi --config Release --output-on-failure + run: ctest --test-dir build-capi -R capi --build-config Release --output-on-failure