Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 14 additions & 1 deletion .github/workflows/bench.yml
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ jobs:
VORTEX_EXPERIMENTAL_PATCHED_ARRAY: "1"
FLAT_LAYOUT_INLINE_ARRAY_NODE: "1"
run: |
bash scripts/bench-taskset.sh target/release_debug/${{ matrix.benchmark.id }} --formats ${{ matrix.benchmark.formats }} -d gh-json -o results.json
bash scripts/bench-taskset.sh target/release_debug/${{ matrix.benchmark.id }} --formats ${{ matrix.benchmark.formats }} -d gh-json -o results.json --gh-json-v3 results.v3.jsonl

- name: Setup AWS CLI
uses: aws-actions/configure-aws-credentials@ec61189d14ec14c8efccab744f656cffd0e33f37 # v6
Expand All @@ -105,6 +105,19 @@ jobs:
run: |
bash scripts/cat-s3.sh vortex-ci-benchmark-results data.json.gz results.json

- name: Ingest results to v3 server
if: vars.V3_INGEST_URL != ''
continue-on-error: true
shell: bash
env:
INGEST_BEARER_TOKEN: ${{ secrets.INGEST_BEARER_TOKEN }}
run: |
python3 scripts/post-ingest.py results.v3.jsonl \
--server "${{ vars.V3_INGEST_URL }}" \
--commit-sha "${{ github.sha }}" \
--benchmark-id "${{ matrix.benchmark.id }}" \
--repo-url "${{ github.server_url }}/${{ github.repository }}"

- name: Alert incident.io
if: failure()
uses: ./.github/actions/alert-incident-io
Expand Down
15 changes: 15 additions & 0 deletions .github/workflows/sql-benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -376,6 +376,7 @@ jobs:
bash scripts/bench-taskset.sh uv run --project bench-orchestrator vx-bench run "${{ matrix.subcommand }}" \
--targets-json '${{ steps.targets.outputs.targets_json }}' \
--output results.json \
--gh-json-v3 results.v3.jsonl \
--no-build \
--runner "ec2_${{ inputs.machine_type }}" \
${{ matrix.iterations && format('--iterations {0}', matrix.iterations) || '' }} \
Expand All @@ -395,6 +396,7 @@ jobs:
bash scripts/bench-taskset.sh uv run --project bench-orchestrator vx-bench run "${{ matrix.subcommand }}" \
--targets-json '${{ steps.targets.outputs.targets_json }}' \
--output results.json \
--gh-json-v3 results.v3.jsonl \
--no-build \
--runner "ec2_${{ inputs.machine_type }}" \
${{ matrix.iterations && format('--iterations {0}', matrix.iterations) || '' }} \
Expand Down Expand Up @@ -499,6 +501,19 @@ jobs:
run: |
bash scripts/cat-s3.sh vortex-ci-benchmark-results data.json.gz results.json

- name: Ingest results to v3 server
if: inputs.mode == 'develop' && vars.V3_INGEST_URL != ''
continue-on-error: true
shell: bash
env:
INGEST_BEARER_TOKEN: ${{ secrets.INGEST_BEARER_TOKEN }}
run: |
python3 scripts/post-ingest.py results.v3.jsonl \
--server "${{ vars.V3_INGEST_URL }}" \
--commit-sha "${{ github.sha }}" \
--benchmark-id "${{ matrix.id }}" \
--repo-url "${{ github.server_url }}/${{ github.repository }}"

- name: Upload File Sizes
if: inputs.mode == 'develop' && matrix.remote_storage == null
shell: bash
Expand Down
35 changes: 35 additions & 0 deletions .github/workflows/v3-commit-metadata.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# Posts a v3 ingest envelope with no records on every push to develop, so the
# `commits` dim stays populated even when no benchmark ran.

name: v3 commit metadata

on:
push:
branches: [develop]
workflow_dispatch: { }

permissions:
contents: read

jobs:
commit-metadata:
runs-on: ubuntu-latest
timeout-minutes: 10
steps:
- uses: actions/checkout@v6
with:
fetch-depth: 2

- name: Ingest commit metadata to v3 server
if: vars.V3_INGEST_URL != ''
continue-on-error: true
shell: bash
env:
INGEST_BEARER_TOKEN: ${{ secrets.INGEST_BEARER_TOKEN }}
run: |
echo -n > empty.jsonl
python3 scripts/post-ingest.py empty.jsonl \
--server "${{ vars.V3_INGEST_URL }}" \
--commit-sha "${{ github.sha }}" \
--benchmark-id "commit-metadata" \
--repo-url "${{ github.server_url }}/${{ github.repository }}"
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

53 changes: 51 additions & 2 deletions bench-orchestrator/bench_orchestrator/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from contextlib import contextmanager
from datetime import datetime, timedelta
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Annotated

import pandas as pd
Expand Down Expand Up @@ -115,6 +116,38 @@ def open_results_output(path: Path | None):
yield handle


@contextmanager
def temporary_v3_output_dir(enabled: bool):
"""Create a temporary directory for per-backend v3 JSONL files."""
if not enabled:
yield None
return

with TemporaryDirectory(prefix="vx-bench-v3-") as temp_dir:
yield Path(temp_dir)


def backend_v3_output_path(temp_dir: Path | None, index: int, backend: Engine) -> Path | None:
"""Return the v3 JSONL path a backend should write, if v3 output is enabled."""
if temp_dir is None:
return None
return temp_dir / f"{index:02d}-{backend.value}.jsonl"


def write_combined_v3_output(output_path: Path, input_paths: list[Path]) -> None:
"""Concatenate successful per-backend v3 JSONL files into the requested output."""
if output_path.parent != Path():
output_path.parent.mkdir(parents=True, exist_ok=True)

with output_path.open("w", encoding="utf-8") as output:
for input_path in input_paths:
if not input_path.exists():
raise RuntimeError(f"v3 output was not written by benchmark backend: {input_path}")
with input_path.open("r", encoding="utf-8") as input_file:
for line in input_file:
output.write(line)


def write_result_line(line: str, store_writer, compatibility_file) -> None:
"""Write a raw result line to the run store and optional compatibility output."""
store_writer(line)
Expand Down Expand Up @@ -210,6 +243,10 @@ def run(
Path | None,
typer.Option("--output", help="Optional path for compatibility JSONL output"),
] = None,
gh_json_v3: Annotated[
Path | None,
typer.Option("--gh-json-v3", help="Optional path for v3 JSONL records emitted by the benchmark binary"),
] = None,
options: Annotated[list[str] | None, typer.Option("--opt", help="Engine or benchmark specific options")] = None,
) -> None:
"""Run benchmarks with specified configuration."""
Expand Down Expand Up @@ -276,10 +313,16 @@ def run(
soft_failures: list[str] = []

try:
with store.create_run(config, build_config) as ctx, open_results_output(output) as compatibility_file:
for backend, backend_targets in backend_groups.items():
with (
store.create_run(config, build_config) as ctx,
open_results_output(output) as compatibility_file,
temporary_v3_output_dir(gh_json_v3 is not None) as v3_temp_dir,
):
v3_output_parts: list[Path] = []
for backend_idx, (backend, backend_targets) in enumerate(backend_groups.items()):
executor = BenchmarkExecutor(binary_paths[backend], backend, verbose=verbose)
backend_formats = [target.format for target in backend_targets]
backend_gh_json_v3 = backend_v3_output_path(v3_temp_dir, backend_idx, backend)

try:
results = executor.run(
Expand All @@ -294,6 +337,7 @@ def run(
sample_rate=sample_rate,
tracing=tracing,
runner=runner,
gh_json_v3=backend_gh_json_v3,
on_result=lambda line, store_writer=ctx.write_raw_json, compatibility=compatibility_file: (
write_result_line(
line,
Expand All @@ -302,6 +346,8 @@ def run(
)
),
)
if backend_gh_json_v3 is not None:
v3_output_parts.append(backend_gh_json_v3)
console.print(f"[green]{backend.value}: {len(results)} results[/green]")
except RuntimeError as exc:
ctx.metadata.partial = True
Expand All @@ -310,6 +356,9 @@ def run(
console.print(f"[red]{backend.value} failed: {exc}[/red]")
soft_failures.append(str(exc))

if gh_json_v3 is not None:
write_combined_v3_output(gh_json_v3, v3_output_parts)

ctx.metadata.binaries = {backend.value: str(path) for backend, path in binary_paths.items()}
except RuntimeError as exc:
console.print(f"[red]{exc}[/red]")
Expand Down
5 changes: 5 additions & 0 deletions bench-orchestrator/bench_orchestrator/runner/executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ def build_command(
sample_rate: int | None = None,
tracing: bool = False,
runner: str | None = None,
gh_json_v3: Path | None = None,
) -> list[str]:
"""Build the command used to execute a benchmark binary."""
cmd = [
Expand Down Expand Up @@ -67,6 +68,8 @@ def build_command(
cmd.append("--tracing")
if runner:
cmd.extend(["--runner", runner])
if gh_json_v3 is not None:
cmd.extend(["--gh-json-v3", str(gh_json_v3)])
if options:
for key, value in options.items():
cmd.extend(["--opt", f"{key}={value}"])
Expand Down Expand Up @@ -98,6 +101,7 @@ def run(
sample_rate: int | None = None,
tracing: bool = False,
runner: str | None = None,
gh_json_v3: Path | None = None,
on_result: Callable[[str], None] | None = None,
) -> list[str]:
"""
Expand Down Expand Up @@ -128,6 +132,7 @@ def run(
sample_rate=sample_rate,
tracing=tracing,
runner=runner,
gh_json_v3=gh_json_v3,
)

if self.verbose:
Expand Down
44 changes: 44 additions & 0 deletions bench-orchestrator/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,3 +105,47 @@ def fake_run(self, **kwargs):
metadata = json.loads((run_dirs[0] / "metadata.json").read_text(encoding="utf-8"))
assert metadata["targets"] == [{"engine": "datafusion", "format": "parquet"}]
assert metadata["binaries"] == {"datafusion": str(binary_path)}


def test_run_combines_gh_json_v3_output_per_backend(tmp_path, monkeypatch) -> None:
run_store = ResultStore(base_dir=tmp_path / "runs")
output_path = tmp_path / "artifacts" / "results.v3.jsonl"
binary_paths = {
cli_module.Engine.DATAFUSION: tmp_path / "datafusion-bench",
cli_module.Engine.DUCKDB: tmp_path / "duckdb-bench",
}
for binary_path in binary_paths.values():
binary_path.write_text("", encoding="utf-8")

monkeypatch.setattr(cli_module, "ResultStore", lambda: run_store)
monkeypatch.setattr(cli_module.BenchmarkBuilder, "get_binary_path", lambda self, backend: binary_paths[backend])

seen_backend_paths = []

def fake_run(self, **kwargs):
backend_output = kwargs["gh_json_v3"]
assert backend_output is not None
assert backend_output != output_path
backend_output.write_text(f"{self.backend.value}-v3\n", encoding="utf-8")
seen_backend_paths.append(backend_output)
return []

monkeypatch.setattr(BenchmarkExecutor, "run", fake_run)

result = runner.invoke(
cli_module.app,
[
"run",
"tpch",
"--targets-json",
'[{"engine":"datafusion","format":"parquet"},{"engine":"duckdb","format":"parquet"}]',
"--no-build",
"--gh-json-v3",
str(output_path),
],
)

assert result.exit_code == 0
assert output_path.read_text(encoding="utf-8") == "datafusion-v3\nduckdb-v3\n"
assert len(seen_backend_paths) == 2
assert seen_backend_paths[0] != seen_backend_paths[1]
25 changes: 25 additions & 0 deletions bench-orchestrator/tests/test_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,31 @@ def test_build_command_omits_formats_for_lance_backend() -> None:
assert "1,3" in cmd


def test_build_command_includes_gh_json_v3_when_set() -> None:
executor = BenchmarkExecutor(Path("/tmp/duckdb-bench"), Engine.DUCKDB)

cmd = executor.build_command(
benchmark=Benchmark.TPCH,
formats=[Format.PARQUET],
gh_json_v3=Path("results.v3.jsonl"),
)

assert "--gh-json-v3" in cmd
flag_idx = cmd.index("--gh-json-v3")
assert cmd[flag_idx + 1] == "results.v3.jsonl"


def test_build_command_omits_gh_json_v3_when_unset() -> None:
executor = BenchmarkExecutor(Path("/tmp/duckdb-bench"), Engine.DUCKDB)

cmd = executor.build_command(
benchmark=Benchmark.TPCH,
formats=[Format.PARQUET],
)

assert "--gh-json-v3" not in cmd


def test_run_streams_logs_without_counting_them(tmp_path: Path) -> None:
script = tmp_path / "fake-bench.py"
script.write_text(
Expand Down
Loading
Loading