vortex-data · connortsui20 · May 4, 2026 · May 4, 2026 · May 4, 2026 · May 4, 2026
diff --git a/.github/workflows/bench.yml b/.github/workflows/bench.yml
@@ -92,7 +92,7 @@ jobs:
           VORTEX_EXPERIMENTAL_PATCHED_ARRAY: "1"
           FLAT_LAYOUT_INLINE_ARRAY_NODE: "1"
         run: |
-          bash scripts/bench-taskset.sh target/release_debug/${{ matrix.benchmark.id }} --formats ${{ matrix.benchmark.formats }} -d gh-json -o results.json
+          bash scripts/bench-taskset.sh target/release_debug/${{ matrix.benchmark.id }} --formats ${{ matrix.benchmark.formats }} -d gh-json -o results.json --gh-json-v3 results.v3.jsonl
 
       - name: Setup AWS CLI
         uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37  # v6
@@ -105,6 +105,19 @@ jobs:
         run: |
           bash scripts/cat-s3.sh vortex-ci-benchmark-results data.json.gz results.json
 
+      - name: Ingest results to v3 server
+        if: vars.V3_INGEST_URL != ''
+        continue-on-error: true
+        shell: bash
+        env:
+          INGEST_BEARER_TOKEN: ${{ secrets.INGEST_BEARER_TOKEN }}
+        run: |
+          python3 scripts/post-ingest.py results.v3.jsonl \
+            --server "${{ vars.V3_INGEST_URL }}" \
+            --commit-sha "${{ github.sha }}" \
+            --benchmark-id "${{ matrix.benchmark.id }}" \
+            --repo-url "${{ github.server_url }}/${{ github.repository }}"
+
       - name: Alert incident.io
         if: failure()
         uses: ./.github/actions/alert-incident-io

diff --git a/.github/workflows/sql-benchmarks.yml b/.github/workflows/sql-benchmarks.yml
@@ -376,6 +376,7 @@ jobs:
           bash scripts/bench-taskset.sh uv run --project bench-orchestrator vx-bench run "${{ matrix.subcommand }}" \
             --targets-json '${{ steps.targets.outputs.targets_json }}' \
             --output results.json \
+            --gh-json-v3 results.v3.jsonl \
             --no-build \
             --runner "ec2_${{ inputs.machine_type }}" \
             ${{ matrix.iterations && format('--iterations {0}', matrix.iterations) || '' }} \
@@ -395,6 +396,7 @@ jobs:
           bash scripts/bench-taskset.sh uv run --project bench-orchestrator vx-bench run "${{ matrix.subcommand }}" \
             --targets-json '${{ steps.targets.outputs.targets_json }}' \
             --output results.json \
+            --gh-json-v3 results.v3.jsonl \
             --no-build \
             --runner "ec2_${{ inputs.machine_type }}" \
             ${{ matrix.iterations && format('--iterations {0}', matrix.iterations) || '' }} \
@@ -499,6 +501,19 @@ jobs:
         run: |
           bash scripts/cat-s3.sh vortex-ci-benchmark-results data.json.gz results.json
 
+      - name: Ingest results to v3 server
+        if: inputs.mode == 'develop' && vars.V3_INGEST_URL != ''
+        continue-on-error: true
+        shell: bash
+        env:
+          INGEST_BEARER_TOKEN: ${{ secrets.INGEST_BEARER_TOKEN }}
+        run: |
+          python3 scripts/post-ingest.py results.v3.jsonl \
+            --server "${{ vars.V3_INGEST_URL }}" \
+            --commit-sha "${{ github.sha }}" \
+            --benchmark-id "${{ matrix.id }}" \
+            --repo-url "${{ github.server_url }}/${{ github.repository }}"
+
       - name: Upload File Sizes
         if: inputs.mode == 'develop' && matrix.remote_storage == null
         shell: bash

diff --git a/.github/workflows/v3-commit-metadata.yml b/.github/workflows/v3-commit-metadata.yml
@@ -0,0 +1,35 @@
+# Posts a v3 ingest envelope with no records on every push to develop, so the
+# `commits` dim stays populated even when no benchmark ran.
+
+name: v3 commit metadata
+
+on:
+  push:
+    branches: [develop]
+  workflow_dispatch: { }
+
+permissions:
+  contents: read
+
+jobs:
+  commit-metadata:
+    runs-on: ubuntu-latest
+    timeout-minutes: 10
+    steps:
+      - uses: actions/checkout@v6
+        with:
+          fetch-depth: 2
+
+      - name: Ingest commit metadata to v3 server
+        if: vars.V3_INGEST_URL != ''
+        continue-on-error: true
+        shell: bash
+        env:
+          INGEST_BEARER_TOKEN: ${{ secrets.INGEST_BEARER_TOKEN }}
+        run: |
+          echo -n > empty.jsonl
+          python3 scripts/post-ingest.py empty.jsonl \
+            --server "${{ vars.V3_INGEST_URL }}" \
+            --commit-sha "${{ github.sha }}" \
+            --benchmark-id "commit-metadata" \
+            --repo-url "${{ github.server_url }}/${{ github.repository }}"
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/bench-orchestrator/bench_orchestrator/cli.py b/bench-orchestrator/bench_orchestrator/cli.py
@@ -7,6 +7,7 @@
 from contextlib import contextmanager
 from datetime import datetime, timedelta
 from pathlib import Path
+from tempfile import TemporaryDirectory
 from typing import Annotated
 
 import pandas as pd
@@ -115,6 +116,38 @@ def open_results_output(path: Path | None):
         yield handle
 
 
+@contextmanager
+def temporary_v3_output_dir(enabled: bool):
+    """Create a temporary directory for per-backend v3 JSONL files."""
+    if not enabled:
+        yield None
+        return
+
+    with TemporaryDirectory(prefix="vx-bench-v3-") as temp_dir:
+        yield Path(temp_dir)
+
+
+def backend_v3_output_path(temp_dir: Path | None, index: int, backend: Engine) -> Path | None:
+    """Return the v3 JSONL path a backend should write, if v3 output is enabled."""
+    if temp_dir is None:
+        return None
+    return temp_dir / f"{index:02d}-{backend.value}.jsonl"
+
+
+def write_combined_v3_output(output_path: Path, input_paths: list[Path]) -> None:
+    """Concatenate successful per-backend v3 JSONL files into the requested output."""
+    if output_path.parent != Path():
+        output_path.parent.mkdir(parents=True, exist_ok=True)
+
+    with output_path.open("w", encoding="utf-8") as output:
+        for input_path in input_paths:
+            if not input_path.exists():
+                raise RuntimeError(f"v3 output was not written by benchmark backend: {input_path}")
+            with input_path.open("r", encoding="utf-8") as input_file:
+                for line in input_file:
+                    output.write(line)
+
+
 def write_result_line(line: str, store_writer, compatibility_file) -> None:
     """Write a raw result line to the run store and optional compatibility output."""
     store_writer(line)
@@ -210,6 +243,10 @@ def run(
         Path | None,
         typer.Option("--output", help="Optional path for compatibility JSONL output"),
     ] = None,
+    gh_json_v3: Annotated[
+        Path | None,
+        typer.Option("--gh-json-v3", help="Optional path for v3 JSONL records emitted by the benchmark binary"),
+    ] = None,
     options: Annotated[list[str] | None, typer.Option("--opt", help="Engine or benchmark specific options")] = None,
 ) -> None:
     """Run benchmarks with specified configuration."""
@@ -276,10 +313,16 @@ def run(
     soft_failures: list[str] = []
 
     try:
-        with store.create_run(config, build_config) as ctx, open_results_output(output) as compatibility_file:
-            for backend, backend_targets in backend_groups.items():
+        with (
+            store.create_run(config, build_config) as ctx,
+            open_results_output(output) as compatibility_file,
+            temporary_v3_output_dir(gh_json_v3 is not None) as v3_temp_dir,
+        ):
+            v3_output_parts: list[Path] = []
+            for backend_idx, (backend, backend_targets) in enumerate(backend_groups.items()):
                 executor = BenchmarkExecutor(binary_paths[backend], backend, verbose=verbose)
                 backend_formats = [target.format for target in backend_targets]
+                backend_gh_json_v3 = backend_v3_output_path(v3_temp_dir, backend_idx, backend)
 
                 try:
                     results = executor.run(
@@ -294,6 +337,7 @@ def run(
                         sample_rate=sample_rate,
                         tracing=tracing,
                         runner=runner,
+                        gh_json_v3=backend_gh_json_v3,
                         on_result=lambda line, store_writer=ctx.write_raw_json, compatibility=compatibility_file: (
                             write_result_line(
                                 line,
@@ -302,6 +346,8 @@ def run(
                             )
                         ),
                     )
+                    if backend_gh_json_v3 is not None:
+                        v3_output_parts.append(backend_gh_json_v3)
                     console.print(f"[green]{backend.value}: {len(results)} results[/green]")
                 except RuntimeError as exc:
                     ctx.metadata.partial = True
@@ -310,6 +356,9 @@ def run(
                     console.print(f"[red]{backend.value} failed: {exc}[/red]")
                     soft_failures.append(str(exc))
 
+            if gh_json_v3 is not None:
+                write_combined_v3_output(gh_json_v3, v3_output_parts)
+
             ctx.metadata.binaries = {backend.value: str(path) for backend, path in binary_paths.items()}
     except RuntimeError as exc:
         console.print(f"[red]{exc}[/red]")

diff --git a/bench-orchestrator/bench_orchestrator/runner/executor.py b/bench-orchestrator/bench_orchestrator/runner/executor.py
@@ -40,6 +40,7 @@ def build_command(
         sample_rate: int | None = None,
         tracing: bool = False,
         runner: str | None = None,
+        gh_json_v3: Path | None = None,
     ) -> list[str]:
         """Build the command used to execute a benchmark binary."""
         cmd = [
@@ -67,6 +68,8 @@ def build_command(
             cmd.append("--tracing")
         if runner:
             cmd.extend(["--runner", runner])
+        if gh_json_v3 is not None:
+            cmd.extend(["--gh-json-v3", str(gh_json_v3)])
         if options:
             for key, value in options.items():
                 cmd.extend(["--opt", f"{key}={value}"])
@@ -98,6 +101,7 @@ def run(
         sample_rate: int | None = None,
         tracing: bool = False,
         runner: str | None = None,
+        gh_json_v3: Path | None = None,
         on_result: Callable[[str], None] | None = None,
     ) -> list[str]:
         """
@@ -128,6 +132,7 @@ def run(
             sample_rate=sample_rate,
             tracing=tracing,
             runner=runner,
+            gh_json_v3=gh_json_v3,
         )
 
         if self.verbose:

diff --git a/bench-orchestrator/tests/test_cli.py b/bench-orchestrator/tests/test_cli.py
@@ -105,3 +105,47 @@ def fake_run(self, **kwargs):
     metadata = json.loads((run_dirs[0] / "metadata.json").read_text(encoding="utf-8"))
     assert metadata["targets"] == [{"engine": "datafusion", "format": "parquet"}]
     assert metadata["binaries"] == {"datafusion": str(binary_path)}
+
+
+def test_run_combines_gh_json_v3_output_per_backend(tmp_path, monkeypatch) -> None:
+    run_store = ResultStore(base_dir=tmp_path / "runs")
+    output_path = tmp_path / "artifacts" / "results.v3.jsonl"
+    binary_paths = {
+        cli_module.Engine.DATAFUSION: tmp_path / "datafusion-bench",
+        cli_module.Engine.DUCKDB: tmp_path / "duckdb-bench",
+    }
+    for binary_path in binary_paths.values():
+        binary_path.write_text("", encoding="utf-8")
+
+    monkeypatch.setattr(cli_module, "ResultStore", lambda: run_store)
+    monkeypatch.setattr(cli_module.BenchmarkBuilder, "get_binary_path", lambda self, backend: binary_paths[backend])
+
+    seen_backend_paths = []
+
+    def fake_run(self, **kwargs):
+        backend_output = kwargs["gh_json_v3"]
+        assert backend_output is not None
+        assert backend_output != output_path
+        backend_output.write_text(f"{self.backend.value}-v3\n", encoding="utf-8")
+        seen_backend_paths.append(backend_output)
+        return []
+
+    monkeypatch.setattr(BenchmarkExecutor, "run", fake_run)
+
+    result = runner.invoke(
+        cli_module.app,
+        [
+            "run",
+            "tpch",
+            "--targets-json",
+            '[{"engine":"datafusion","format":"parquet"},{"engine":"duckdb","format":"parquet"}]',
+            "--no-build",
+            "--gh-json-v3",
+            str(output_path),
+        ],
+    )
+
+    assert result.exit_code == 0
+    assert output_path.read_text(encoding="utf-8") == "datafusion-v3\nduckdb-v3\n"
+    assert len(seen_backend_paths) == 2
+    assert seen_backend_paths[0] != seen_backend_paths[1]
diff --git a/bench-orchestrator/tests/test_executor.py b/bench-orchestrator/tests/test_executor.py
@@ -48,6 +48,31 @@ def test_build_command_omits_formats_for_lance_backend() -> None:
     assert "1,3" in cmd
 
 
+def test_build_command_includes_gh_json_v3_when_set() -> None:
+    executor = BenchmarkExecutor(Path("/tmp/duckdb-bench"), Engine.DUCKDB)
+
+    cmd = executor.build_command(
+        benchmark=Benchmark.TPCH,
+        formats=[Format.PARQUET],
+        gh_json_v3=Path("results.v3.jsonl"),
+    )
+
+    assert "--gh-json-v3" in cmd
+    flag_idx = cmd.index("--gh-json-v3")
+    assert cmd[flag_idx + 1] == "results.v3.jsonl"
+
+
+def test_build_command_omits_gh_json_v3_when_unset() -> None:
+    executor = BenchmarkExecutor(Path("/tmp/duckdb-bench"), Engine.DUCKDB)
+
+    cmd = executor.build_command(
+        benchmark=Benchmark.TPCH,
+        formats=[Format.PARQUET],
+    )
+
+    assert "--gh-json-v3" not in cmd
+
+
 def test_run_streams_logs_without_counting_them(tmp_path: Path) -> None:
     script = tmp_path / "fake-bench.py"
     script.write_text(