From f000e1b05d821910eba601eab282a0da4f38725b Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Fri, 24 Apr 2026 13:40:59 +0100 Subject: [PATCH 1/2] feat: add a ec2_runner field to all runners Signed-off-by: Joe Isaacs --- .github/workflows/sql-benchmarks.yml | 2 + bench-orchestrator/bench_orchestrator/cli.py | 5 +++ .../bench_orchestrator/runner/executor.py | 5 +++ benchmarks/datafusion-bench/src/main.rs | 4 ++ benchmarks/duckdb-bench/src/main.rs | 4 ++ benchmarks/lance-bench/src/main.rs | 4 ++ vortex-bench/src/datasets/mod.rs | 11 +++++ vortex-bench/src/measurements.rs | 8 ++++ vortex-bench/src/runner.rs | 41 +++++++++++++++++++ 9 files changed, 84 insertions(+) diff --git a/.github/workflows/sql-benchmarks.yml b/.github/workflows/sql-benchmarks.yml index 5f57862937a..e2da59838e9 100644 --- a/.github/workflows/sql-benchmarks.yml +++ b/.github/workflows/sql-benchmarks.yml @@ -377,6 +377,7 @@ jobs: --targets-json '${{ steps.targets.outputs.targets_json }}' \ --output results.json \ --no-build \ + --runner "ec2_${{ inputs.machine_type }}" \ ${{ matrix.iterations && format('--iterations {0}', matrix.iterations) || '' }} \ ${{ matrix.scale_factor && format('--opt scale-factor={0}', matrix.scale_factor) || '' }} @@ -395,6 +396,7 @@ jobs: --targets-json '${{ steps.targets.outputs.targets_json }}' \ --output results.json \ --no-build \ + --runner "ec2_${{ inputs.machine_type }}" \ ${{ matrix.iterations && format('--iterations {0}', matrix.iterations) || '' }} \ --opt remote-data-dir=${{ matrix.remote_storage }} \ ${{ matrix.scale_factor && format('--opt scale-factor={0}', matrix.scale_factor) || '' }} diff --git a/bench-orchestrator/bench_orchestrator/cli.py b/bench-orchestrator/bench_orchestrator/cli.py index 8ee8a7ef055..d497d85ed13 100644 --- a/bench-orchestrator/bench_orchestrator/cli.py +++ b/bench-orchestrator/bench_orchestrator/cli.py @@ -202,6 +202,10 @@ def run( str | None, typer.Option("--targets-json", help="Exact benchmark targets as a JSON array"), ] = None, + runner: Annotated[ + str | None, + typer.Option("--runner", help="Benchmark runner ID (e.g., ec2_c6id.8xlarge)"), + ] = None, output: Annotated[ Path | None, typer.Option("--output", help="Optional path for compatibility JSONL output"), @@ -289,6 +293,7 @@ def run( samply=samply, sample_rate=sample_rate, tracing=tracing, + runner=runner, on_result=lambda line, store_writer=ctx.write_raw_json, compatibility=compatibility_file: ( write_result_line( line, diff --git a/bench-orchestrator/bench_orchestrator/runner/executor.py b/bench-orchestrator/bench_orchestrator/runner/executor.py index 4fa87076d64..b895afdc2e1 100644 --- a/bench-orchestrator/bench_orchestrator/runner/executor.py +++ b/bench-orchestrator/bench_orchestrator/runner/executor.py @@ -39,6 +39,7 @@ def build_command( samply: bool = False, sample_rate: int | None = None, tracing: bool = False, + runner: str | None = None, ) -> list[str]: """Build the command used to execute a benchmark binary.""" cmd = [ @@ -64,6 +65,8 @@ def build_command( cmd.append("--track-memory") if tracing: cmd.append("--tracing") + if runner: + cmd.extend(["--runner", runner]) if options: for key, value in options.items(): cmd.extend(["--opt", f"{key}={value}"]) @@ -94,6 +97,7 @@ def run( samply: bool = False, sample_rate: int | None = None, tracing: bool = False, + runner: str | None = None, on_result: Callable[[str], None] | None = None, ) -> list[str]: """ @@ -123,6 +127,7 @@ def run( samply=samply, sample_rate=sample_rate, tracing=tracing, + runner=runner, ) if self.verbose: diff --git a/benchmarks/datafusion-bench/src/main.rs b/benchmarks/datafusion-bench/src/main.rs index ad0df8ea74a..745d8371303 100644 --- a/benchmarks/datafusion-bench/src/main.rs +++ b/benchmarks/datafusion-bench/src/main.rs @@ -94,6 +94,9 @@ struct Args { #[arg(long, default_value_t = false)] track_memory: bool, + #[arg(long, default_value = "unknown")] + runner: String, + #[arg(long, default_value_t = false)] explain: bool, @@ -149,6 +152,7 @@ async fn main() -> anyhow::Result<()> { let mut runner = SqlBenchmarkRunner::new( &*benchmark, Engine::DataFusion, + args.runner.clone(), args.formats.clone(), args.track_memory, args.hide_progress_bar, diff --git a/benchmarks/duckdb-bench/src/main.rs b/benchmarks/duckdb-bench/src/main.rs index 7ab8f1ac7ab..d8a3306b224 100644 --- a/benchmarks/duckdb-bench/src/main.rs +++ b/benchmarks/duckdb-bench/src/main.rs @@ -64,6 +64,9 @@ struct Args { #[arg(long, default_value_t = false)] hide_progress_bar: bool, + #[arg(long, default_value = "unknown")] + runner: String, + #[arg(long, value_delimiter = ',', value_parser = value_parser!(Format))] formats: Vec, @@ -142,6 +145,7 @@ fn main() -> anyhow::Result<()> { let mut runner = SqlBenchmarkRunner::new( &*benchmark, Engine::DuckDB, + args.runner.clone(), args.formats.clone(), args.track_memory, args.hide_progress_bar, diff --git a/benchmarks/lance-bench/src/main.rs b/benchmarks/lance-bench/src/main.rs index 73fa8426ffe..6cce97d2548 100644 --- a/benchmarks/lance-bench/src/main.rs +++ b/benchmarks/lance-bench/src/main.rs @@ -65,6 +65,9 @@ struct Args { #[arg(long, default_value_t = false)] track_memory: bool, + #[arg(long, default_value = "unknown")] + runner: String, + #[arg(long = "opt", value_delimiter = ',', value_parser = value_parser!(Opt))] options: Vec, } @@ -93,6 +96,7 @@ async fn main() -> anyhow::Result<()> { let mut runner = SqlBenchmarkRunner::new( &*benchmark, Engine::DataFusion, + args.runner.clone(), vec![Format::Lance], args.track_memory, args.hide_progress_bar, diff --git a/vortex-bench/src/datasets/mod.rs b/vortex-bench/src/datasets/mod.rs index 9429e06fd8f..7136d5451e1 100644 --- a/vortex-bench/src/datasets/mod.rs +++ b/vortex-bench/src/datasets/mod.rs @@ -22,6 +22,17 @@ pub mod tpch_l_comment; use std::path::PathBuf; +pub(crate) const DEFAULT_BENCHMARK_RUNNER_ID: &str = "unknown"; + +pub(crate) fn normalize_benchmark_runner_id(benchmark_runner: &str) -> String { + let benchmark_runner = benchmark_runner.trim().replace('/', "_"); + if benchmark_runner.is_empty() { + DEFAULT_BENCHMARK_RUNNER_ID.to_string() + } else { + benchmark_runner + } +} + #[async_trait] pub trait Dataset { fn name(&self) -> &str; diff --git a/vortex-bench/src/measurements.rs b/vortex-bench/src/measurements.rs index f49349cd95e..186e380a98c 100644 --- a/vortex-bench/src/measurements.rs +++ b/vortex-bench/src/measurements.rs @@ -243,6 +243,7 @@ pub struct QueryMeasurement { pub query_idx: usize, pub target: Target, pub benchmark_dataset: BenchmarkDataset, + pub benchmark_runner: String, /// The storage backend against which this test was run. One of: s3, gcs, nvme. pub storage: String, pub runs: Vec, @@ -279,6 +280,7 @@ pub struct QueryMeasurementJson { pub name: String, pub storage: String, pub dataset: BenchmarkDataset, + pub ec2_runner: String, pub unit: String, pub value: u128, pub all_runtimes: Vec, @@ -310,6 +312,7 @@ impl ToJson for QueryMeasurement { name, storage: self.storage.clone(), dataset: self.benchmark_dataset.clone(), + ec2_runner: self.benchmark_runner.clone(), unit: "ns".to_string(), value: self.median_run().as_nanos(), all_runtimes: self.runs.iter().map(|r| r.as_nanos()).collect_vec(), @@ -430,6 +433,7 @@ pub struct MemoryMeasurement { pub query_idx: usize, pub target: Target, pub benchmark_dataset: BenchmarkDataset, + pub benchmark_runner: String, pub storage: String, pub physical_memory_delta: i64, pub virtual_memory_delta: i64, @@ -442,6 +446,7 @@ impl MemoryMeasurement { query_idx: usize, target: Target, benchmark_dataset: BenchmarkDataset, + benchmark_runner: String, storage: String, memory_result: MemoryMeasurementResult, ) -> Self { @@ -449,6 +454,7 @@ impl MemoryMeasurement { query_idx, target, benchmark_dataset, + benchmark_runner, storage, physical_memory_delta: memory_result.physical_memory_delta, virtual_memory_delta: memory_result.virtual_memory_delta, @@ -474,6 +480,7 @@ impl ToJson for MemoryMeasurement { name, storage: self.storage.clone(), dataset: self.benchmark_dataset.clone(), + ec2_runner: self.benchmark_runner.clone(), physical_memory_delta: self.physical_memory_delta, virtual_memory_delta: self.virtual_memory_delta, peak_physical_memory: self.peak_physical_memory, @@ -507,6 +514,7 @@ pub struct MemoryMeasurementJson { pub name: String, pub storage: String, pub dataset: BenchmarkDataset, + pub ec2_runner: String, pub physical_memory_delta: i64, pub virtual_memory_delta: i64, pub peak_physical_memory: u64, diff --git a/vortex-bench/src/runner.rs b/vortex-bench/src/runner.rs index dc7d729232e..3885bace2af 100644 --- a/vortex-bench/src/runner.rs +++ b/vortex-bench/src/runner.rs @@ -20,6 +20,8 @@ use crate::BenchmarkDataset; use crate::Engine; use crate::Format; use crate::Target; +use crate::datasets::DEFAULT_BENCHMARK_RUNNER_ID; +use crate::datasets::normalize_benchmark_runner_id; /// Controls whether queries are benchmarked or explained. pub enum BenchmarkMode { @@ -66,6 +68,7 @@ pub struct BenchmarkResults { pub struct SqlBenchmarkRunner { engine: Engine, benchmark_dataset: BenchmarkDataset, + benchmark_runner: String, storage: String, expected_row_counts: Option>, /// Deduplicated, preserving insertion order. @@ -81,6 +84,7 @@ impl SqlBenchmarkRunner { pub fn new( benchmark: &B, engine: Engine, + benchmark_runner: String, formats: impl IntoIterator, track_memory: bool, hide_progress_bar: bool, @@ -88,12 +92,15 @@ impl SqlBenchmarkRunner { let mut seen = HashSet::new(); let formats: Vec = formats.into_iter().filter(|f| seen.insert(*f)).collect(); let storage = url_scheme_to_storage(benchmark.data_url())?; + let benchmark_runner = normalize_benchmark_runner_id(&benchmark_runner); + validate_benchmark_runner_id(&benchmark_runner, is_ci())?; let memory_tracker = track_memory.then(BenchmarkMemoryTracker::new); Ok(Self { engine, benchmark_dataset: benchmark.dataset(), + benchmark_runner, storage, expected_row_counts: benchmark.expected_row_counts().map(|s| s.to_vec()), formats, @@ -168,6 +175,7 @@ impl SqlBenchmarkRunner { query_idx, target, benchmark_dataset: self.benchmark_dataset.clone(), + benchmark_runner: self.benchmark_runner.clone(), storage: self.storage.clone(), runs, }); @@ -192,6 +200,7 @@ impl SqlBenchmarkRunner { query_idx, target, self.benchmark_dataset.clone(), + self.benchmark_runner.clone(), self.storage.clone(), memory_result, )); @@ -411,6 +420,18 @@ impl SqlBenchmarkRunner { } } +fn is_ci() -> bool { + matches!(std::env::var("CI").as_deref(), Ok("true")) +} + +fn validate_benchmark_runner_id(benchmark_runner: &str, is_ci: bool) -> anyhow::Result<()> { + anyhow::ensure!( + !is_ci || benchmark_runner != DEFAULT_BENCHMARK_RUNNER_ID, + "benchmark runner must not be unknown in CI; pass --runner" + ); + Ok(()) +} + pub fn export_results( queries: Vec, memory: Vec, @@ -460,3 +481,23 @@ pub fn filter_queries( }) .collect() } + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn ci_rejects_unknown_benchmark_runner() { + assert!(validate_benchmark_runner_id("unknown", true).is_err()); + } + + #[test] + fn ci_accepts_explicit_benchmark_runner() { + assert!(validate_benchmark_runner_id("ec2_c6id.8xlarge", true).is_ok()); + } + + #[test] + fn local_accepts_unknown_benchmark_runner() { + assert!(validate_benchmark_runner_id("unknown", false).is_ok()); + } +} From fd78e96a41393cd94aa3692a2c7dd7c4092c4a1e Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Fri, 24 Apr 2026 14:06:52 +0100 Subject: [PATCH 2/2] feat: add a ec2_runner field to all runners Signed-off-by: Joe Isaacs --- vortex-bench/src/measurements.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/vortex-bench/src/measurements.rs b/vortex-bench/src/measurements.rs index 186e380a98c..91036db3f5e 100644 --- a/vortex-bench/src/measurements.rs +++ b/vortex-bench/src/measurements.rs @@ -280,7 +280,8 @@ pub struct QueryMeasurementJson { pub name: String, pub storage: String, pub dataset: BenchmarkDataset, - pub ec2_runner: String, + /// The cloud runner used to run this + pub runner: String, pub unit: String, pub value: u128, pub all_runtimes: Vec, @@ -312,7 +313,7 @@ impl ToJson for QueryMeasurement { name, storage: self.storage.clone(), dataset: self.benchmark_dataset.clone(), - ec2_runner: self.benchmark_runner.clone(), + runner: self.benchmark_runner.clone(), unit: "ns".to_string(), value: self.median_run().as_nanos(), all_runtimes: self.runs.iter().map(|r| r.as_nanos()).collect_vec(), @@ -480,7 +481,7 @@ impl ToJson for MemoryMeasurement { name, storage: self.storage.clone(), dataset: self.benchmark_dataset.clone(), - ec2_runner: self.benchmark_runner.clone(), + runner: self.benchmark_runner.clone(), physical_memory_delta: self.physical_memory_delta, virtual_memory_delta: self.virtual_memory_delta, peak_physical_memory: self.peak_physical_memory, @@ -514,7 +515,7 @@ pub struct MemoryMeasurementJson { pub name: String, pub storage: String, pub dataset: BenchmarkDataset, - pub ec2_runner: String, + pub runner: String, pub physical_memory_delta: i64, pub virtual_memory_delta: i64, pub peak_physical_memory: u64,