diff --git a/benchmarks-website/server/src/api.rs b/benchmarks-website/server/src/api.rs index 6fa10d04f48..42c3ab6c83c 100644 --- a/benchmarks-website/server/src/api.rs +++ b/benchmarks-website/server/src/api.rs @@ -31,10 +31,10 @@ use crate::error::ApiError; use crate::slug::ChartKey; use crate::slug::GroupKey; -/// Default cap on the number of commits returned per chart. +/// Default cap on the number of commits returned per chart when no `?n=` is +/// supplied. The HTML routes override this with their own per-page defaults +/// (see [`crate::html`]). pub const DEFAULT_COMMIT_WINDOW: u32 = 100; -/// Hard server-side ceiling on `?n=NNN`. -pub const MAX_COMMIT_WINDOW: u32 = 1000; /// Canonical group ordering, ported from the v2 site's hard-coded list at /// `origin/ct/vfvb:benchmarks-website/index.html`. Group names not in this @@ -90,7 +90,11 @@ impl Default for CommitWindow { impl CommitWindow { /// Parse the `?n=...` query string parameter. `None` and malformed values /// fall back to [`CommitWindow::default`]. `"all"` (any case) means - /// unbounded. Numeric values are clamped to `[1, MAX_COMMIT_WINDOW]`. + /// unbounded. Numeric values are floored to `1` so `?n=0` becomes + /// `?n=1`; there is no upper bound — large histories are kept as-is. + /// Any further reduction in rendered point count happens client-side + /// (see `static/chart-init.js` for the LTTB pass on the visible + /// commit range). pub fn parse(raw: Option<&str>) -> Self { let Some(s) = raw else { return Self::default(); @@ -102,7 +106,7 @@ impl CommitWindow { trimmed .parse::() .ok() - .map(|v| v.clamp(1, MAX_COMMIT_WINDOW)) + .map(|v| v.max(1)) .and_then(NonZeroU32::new) .map(Self::Last) .unwrap_or_default() @@ -276,7 +280,7 @@ pub struct ChartLink { pub slug: String, } -#[derive(Debug, Serialize)] +#[derive(Debug, Clone, Serialize)] pub struct ChartResponse { pub display_name: String, pub unit: &'static str, @@ -296,7 +300,7 @@ pub struct ChartResponse { /// engine + format, while `compression_*` and `random_access_times` only /// carry format. Vector-search series have neither and are omitted from the /// map entirely. -#[derive(Debug, Default, Serialize)] +#[derive(Debug, Default, Clone, Serialize)] pub struct SeriesTag { #[serde(skip_serializing_if = "Option::is_none")] pub engine: Option, @@ -313,7 +317,7 @@ pub struct FilterUniverse { pub formats: Vec, } -#[derive(Debug, Serialize)] +#[derive(Debug, Clone, Serialize)] pub struct CommitPoint { pub sha: String, pub timestamp: String, @@ -1644,13 +1648,18 @@ mod tests { } #[test] - fn commit_window_parse_clamps() { + fn commit_window_parse_floors_zero_but_keeps_large_values() { + // Large values are kept as-is — full history is no longer clamped + // server-side. Visual downsampling happens client-side in + // `static/chart-init.js`, on the currently visible commit range. let CommitWindow::Last(n) = CommitWindow::parse(Some("99999")) else { panic!() }; - assert_eq!(n.get(), MAX_COMMIT_WINDOW); + assert_eq!(n.get(), 99_999); + + // 0 floors to 1 since the underlying type is `NonZeroU32`. let CommitWindow::Last(n) = CommitWindow::parse(Some("0")) else { - panic!("clamp of 0 should round to 1") + panic!("floor of 0 should round to 1") }; assert_eq!(n.get(), 1); } diff --git a/benchmarks-website/server/src/html.rs b/benchmarks-website/server/src/html.rs index d9408e1e901..9eebedc2e07 100644 --- a/benchmarks-website/server/src/html.rs +++ b/benchmarks-website/server/src/html.rs @@ -4,25 +4,33 @@ //! HTML routes for the bench.vortex.dev v3 web UI. //! //! Three pages, all backed by the same per-chart UX: -//! - `GET /` — landing page. Every group is a collapsible `
`. The -//! first group is open by default and its charts pre-inline their JSON -//! payload for a fast first paint; closed groups carry only the chart-card -//! shell and their payloads are fetched on first toggle (`details.open`). +//! - `GET /` — landing page. Every group is a collapsible `
`, +//! all collapsed by default; the user picks which to expand. The +//! *first* group's chart payloads are still pre-inlined in the HTML +//! so opening it skips the JS fetch round-trip; every other group +//! ships only chart-card shells and is fetched on first toggle. //! - `GET /chart/{slug}` — single chart page; permalink for sharing. //! - `GET /group/{slug}` — every chart in one group on a single page. //! //! Each chart card owns its own compact toolbar (scope slider + Y-axis). There //! is no page-level toolbar — every chart is independent. Scope is -//! **zoom-as-scope**: each chart fetches up to [`api::MAX_COMMIT_WINDOW`] -//! commits once, then the toolbar manipulates `chart.options.scales.x.min`/ -//! `max` to set the visible window. No refetches on scope change. +//! **zoom-as-scope**: each chart fetches a generous window once, then the +//! toolbar manipulates `chart.options.scales.x.min`/`max` to set the visible +//! window. No refetches on scope change. //! -//! URL query params (`?n=`) are accepted as power-user overrides on the -//! initial fetch but are not written back from the toolbar. Per-chart UI +//! Every HTML route defaults to the unbounded commit window +//! ([`CommitWindow::All`]) so users can pan/zoom all the way back to the +//! very first commit. The chart payload is sent **raw** — any visual +//! downsampling happens client-side in `chart-init.js`, applied only to +//! the currently visible commit range. The common case (a chart zoomed in +//! to the last ~100 commits) renders raw with no LTTB at all. +//! +//! URL query param `?n=` is accepted as a power-user override on the +//! initial fetch but is not written back from the toolbar. Per-chart UI //! state is intentionally not persisted in the URL — the user feedback //! emphasised that this UX should feel local-and-immediate, not "share a -//! perfect view via URL". Permalinks (`/chart/{slug}`, `/group/{slug}`) are -//! the sharing mechanism, not query strings. +//! perfect view via URL". Permalinks (`/chart/{slug}`, `/group/{slug}`) +//! are the sharing mechanism, not query strings. //! //! Slugs are opaque strings the server received from `/api/groups`; the //! handler echoes them straight into [`crate::slug::ChartKey::from_slug`] @@ -32,8 +40,6 @@ //! script) are served from `/static/...` via [`include_bytes!`] so the //! binary is fully self-contained. -use std::num::NonZeroU32; - use anyhow::Result; use axum::Router; use axum::extract::Path; @@ -62,10 +68,11 @@ use crate::db; use crate::slug::ChartKey; use crate::slug::GroupKey; -/// How many commits each chart pre-fetches. The toolbar's scope slider zooms -/// into smaller windows of this slice; we never refetch on scope change. -/// Capped at the API ceiling so a future bigger ceiling is picked up here too. -const PER_CHART_FETCH_N: u32 = api::MAX_COMMIT_WINDOW; +// All HTML routes default to the unbounded commit window. The wire payload +// is the raw `(commits, series)` data; visual downsampling (LTTB on the +// currently visible commit range) happens client-side in +// `static/chart-init.js`. `?n=` remains a power-user override on the +// commit window itself (not on the rendered point count). const CHART_JS: &[u8] = include_bytes!("../static/chart.umd.js"); const CHART_ZOOM_JS: &[u8] = include_bytes!("../static/chartjs-plugin-zoom.umd.min.js"); @@ -73,7 +80,7 @@ const CHART_INIT_JS: &[u8] = include_bytes!("../static/chart-init.js"); const STYLE_CSS: &[u8] = include_bytes!("../static/style.css"); const VORTEX_BLACK_SVG: &[u8] = include_bytes!("../../public/vortex_black_nobg.svg"); const VORTEX_WHITE_SVG: &[u8] = include_bytes!("../../public/vortex_white_nobg.svg"); -const STATIC_ASSET_VERSION: &str = "bench-v3-ui-10"; +const STATIC_ASSET_VERSION: &str = "bench-v3-ui-15"; /// HTML routes mounted under `/`. pub fn router() -> Router { @@ -92,15 +99,14 @@ pub fn router() -> Router { .route("/vortex_white_nobg.svg", get(serve_vortex_white_svg)) } -/// Query string for HTML routes. `?n=` overrides the per-chart fetch size; +/// Query string for HTML routes. `?n=` overrides the commit window; /// `?engine=` and `?format=` carry the global filter bar's selection so a /// shared link or refresh preserves which engines/formats are visible. The /// per-chart toolbar (Y axis, scope slider) remains local-only — its state /// is intentionally not in the URL. #[derive(Debug, Default, Deserialize)] pub struct UiQuery { - /// Override for the per-chart fetch size. Defaults to `PER_CHART_FETCH_N`. - /// Accepts `25|50|100|250|all`. + /// Override for the per-chart fetch size. Accepts `25|50|100|250|all`. pub n: Option, /// Comma-separated list of engines to keep visible across every chart. /// Empty / unset means no engine filter is active. Unknown engines are @@ -113,14 +119,15 @@ pub struct UiQuery { } impl UiQuery { - /// Resolve the [`CommitWindow`] for the initial fetch. When `?n=` is - /// unset, falls back to [`PER_CHART_FETCH_N`]. + /// Resolve the [`CommitWindow`] for HTML routes. Defaults to + /// [`CommitWindow::All`] so users can pan/zoom all the way back to + /// the very first commit on every chart, including the first + /// (open-by-default) group on the landing page. Visual downsampling + /// happens client-side on the visible commit range only. fn fetch_window(&self) -> CommitWindow { match self.n.as_deref() { Some(_) => CommitWindow::parse(self.n.as_deref()), - None => { - CommitWindow::Last(NonZeroU32::new(PER_CHART_FETCH_N).expect("non-zero default")) - } + None => CommitWindow::All, } } @@ -190,8 +197,9 @@ async fn landing(State(state): State, Query(ui): Query) -> Re /// One group's worth of data for the landing page. /// -/// The first group (in canonical order) ships with `charts` populated so the -/// open-by-default `
` paints immediately. Subsequent groups ship +/// The first group (in canonical order) ships with `charts` populated so +/// the moment the user expands it the chart hydrates from the inline +/// JSON without a network round-trip. Every other group ships /// with `charts` empty and only their chart-card shells — payloads are /// fetched client-side on first `details.toggle` to keep the cold landing /// HTML small. @@ -220,8 +228,9 @@ fn collect_landing_groups(conn: &Connection, window: &CommitWindow) -> Result Markup { // `

unit: ns · 2 series · 3 commits

Show
Y
+tpch sf=1 Q1 [nvme] — bench.vortex.dev

unit: ns · 2 series · 3 commits

Show
Y
diff --git a/benchmarks-website/server/tests/snapshots/group_page_query.snap b/benchmarks-website/server/tests/snapshots/group_page_query.snap index 7bdf3a8847f..9e4a3d2f5bc 100644 --- a/benchmarks-website/server/tests/snapshots/group_page_query.snap +++ b/benchmarks-website/server/tests/snapshots/group_page_query.snap @@ -2,4 +2,4 @@ source: benchmarks-website/server/tests/web_ui.rs expression: body --- -TPC-H (NVMe) (SF=1) — bench.vortex.dev

2 charts

Performance Summary

#1datafusion:vortex-file-compressed1.11x1.80 ms
#2duckdb:parquet1.60x900.00 us
Geomean of query time ratio to fastest (lower is better)

Q1

Show
Y

Q2

Show
Y
+TPC-H (NVMe) (SF=1) — bench.vortex.dev

2 charts

Performance Summary

#1datafusion:vortex-file-compressed1.11x1.80 ms
#2duckdb:parquet1.60x900.00 us
Geomean of query time ratio to fastest (lower is better)

Q1

Show
Y

Q2

Show
Y
diff --git a/benchmarks-website/server/tests/snapshots/landing_page.snap b/benchmarks-website/server/tests/snapshots/landing_page.snap index d42416fde8e..8c5a360d8d5 100644 --- a/benchmarks-website/server/tests/snapshots/landing_page.snap +++ b/benchmarks-website/server/tests/snapshots/landing_page.snap @@ -2,4 +2,4 @@ source: benchmarks-website/server/tests/web_ui.rs expression: body --- -bench.vortex.dev
Random Access1 chart

Random Access Performance

#1vortex-file-compressed100.50 us1.00x
#2parquet201.00 us2.00x
Random access time | Ratio to fastest (lower is better)

taxi

Show
Y
Compression1 chart

Compression Throughput vs Parquet

Write Speed (Compression)2.00x
📤Scan Speed (Decompression)2.00x
Inverse geomean of Vortex/Parquet ratios (higher is better)

tpch-lineitem

Show
Y
Compression Size1 chart

Compression Size Summary

⬇️Min Size Ratio0.50x
📊Mean Size Ratio0.50x
⬆️Max Size Ratio0.50x
Geomean of Vortex/Parquet size ratios (lower is better)

tpch-lineitem

Show
Y
TPC-H (NVMe) (SF=1)2 charts

Performance Summary

#1datafusion:vortex-file-compressed1.11x1.80 ms
#2duckdb:parquet1.60x900.00 us
Geomean of query time ratio to fastest (lower is better)

Q1

Show
Y

Q2

Show
Y
cohere-large-10m / partitioned1 chart

threshold=0.75

Show
Y
+bench.vortex.dev
Random Access1 chart

Random Access Performance

#1vortex-file-compressed100.50 us1.00x
#2parquet201.00 us2.00x
Random access time | Ratio to fastest (lower is better)

taxi

Show
Y
Compression1 chart

Compression Throughput vs Parquet

Write Speed (Compression)2.00x
📤Scan Speed (Decompression)2.00x
Inverse geomean of Vortex/Parquet ratios (higher is better)

tpch-lineitem

Show
Y
Compression Size1 chart

Compression Size Summary

⬇️Min Size Ratio0.50x
📊Mean Size Ratio0.50x
⬆️Max Size Ratio0.50x
Geomean of Vortex/Parquet size ratios (lower is better)

tpch-lineitem

Show
Y
TPC-H (NVMe) (SF=1)2 charts

Performance Summary

#1datafusion:vortex-file-compressed1.11x1.80 ms
#2duckdb:parquet1.60x900.00 us
Geomean of query time ratio to fastest (lower is better)

Q1

Show
Y

Q2

Show
Y
cohere-large-10m / partitioned1 chart

threshold=0.75

Show
Y
diff --git a/benchmarks-website/server/tests/web_ui.rs b/benchmarks-website/server/tests/web_ui.rs index a0a95f1ec92..e09249ef950 100644 --- a/benchmarks-website/server/tests/web_ui.rs +++ b/benchmarks-website/server/tests/web_ui.rs @@ -240,6 +240,90 @@ async fn seed(server: &Server) -> Result<()> { Ok(()) } +/// Slim ingest envelope carrying just a `random_access_time` pair so we can +/// drive a long-history fixture cheaply (the full envelope is ~12 records; +/// this is two). Used by the downsample tests. +fn ra_envelope_for(sha: &str, ts: &str, msg: &str, bias: i64) -> Value { + json!({ + "run_meta": { + "benchmark_id": "downsample-fixture", + "schema_version": 1, + "started_at": ts + }, + "commit": { + "sha": sha, + "timestamp": ts, + "message": msg, + "author_name": "Test Author", + "author_email": "author@example.com", + "committer_name": "Test Committer", + "committer_email": "committer@example.com", + "tree_sha": "fedcba9876543210fedcba9876543210fedcba98", + "url": format!("https://github.com/vortex-data/vortex/commit/{sha}") + }, + "records": [ + { + "kind": "random_access_time", + "commit_sha": sha, + "dataset": "taxi", + "format": "vortex-file-compressed", + "value_ns": 500 + bias, + "all_runtimes_ns": [500 + bias] + }, + { + "kind": "random_access_time", + "commit_sha": sha, + "dataset": "taxi", + "format": "parquet", + "value_ns": 1_000 + (2 * bias), + "all_runtimes_ns": [1_000 + (2 * bias)] + } + ] + }) +} + +/// Seed a `Random Access` chart with `n` synthetic commits so the +/// downsampler has something to chew on. SHAs are deterministic +/// `{i:040x}`; timestamps are 1 minute apart starting 2025-01-01 so the +/// commits sort stably. +async fn seed_long_history(server: &Server, n: usize) -> Result<()> { + let client = reqwest::Client::new(); + for i in 0..n { + let sha = format!("{i:040x}"); + let minutes = i; + let ts = format!( + "2025-01-01T{:02}:{:02}:00Z", + (minutes / 60) % 24, + minutes % 60 + ); + // Sinusoidal bias so the series has interior peaks LTTB will retain. + let bias = ((i as f64).sin() * 1_000.0) as i64 + i as i64 * 10; + let resp = client + .post(server.url("/api/ingest")) + .bearer_auth(TOKEN) + .json(&ra_envelope_for(&sha, &ts, "synthetic", bias)) + .send() + .await?; + anyhow::ensure!( + resp.status().is_success(), + "long-history ingest #{i} failed: {}", + resp.status() + ); + } + Ok(()) +} + +/// Pull the inline `` JSON out of an +/// HTML body. Returns `None` if the script tag isn't present. +fn extract_chart_data(body: &str, idx: usize) -> Option { + let needle = format!(r#"")? + start; + // Reverse the ` insta::Settings { let mut s = insta::Settings::clone_current(); s.set_snapshot_path("snapshots"); @@ -374,11 +458,12 @@ async fn landing_page_snapshot() -> Result<()> { Ok(()) } -/// The first group disclosure is rendered with the `open` attribute; every -/// other group lacks it, so the user sees only the first group's charts on -/// first paint. +/// All group disclosures render closed by default — the user picks which +/// to expand. The first group's chart payloads are still inlined in the +/// HTML (so opening it skips the JS fetch), but the disclosure itself +/// stays collapsed until clicked. #[tokio::test] -async fn details_first_group_open_others_closed() -> Result<()> { +async fn details_all_groups_closed_by_default() -> Result<()> { let server = Server::start().await?; seed(&server).await?; @@ -393,10 +478,15 @@ async fn details_first_group_open_others_closed() -> Result<()> { }) .collect(); assert!(!opens.is_empty(), "landing page must render
"); - assert!(opens[0], "first group must be open"); - for (i, is_open) in opens.iter().enumerate().skip(1) { + for (i, is_open) in opens.iter().enumerate() { assert!(!is_open, "group #{i} must be closed by default"); } + // The first group's chart payload should still be inlined — fast + // hydration on toggle without a network round-trip. + assert!( + body.contains(r#"id="chart-data-0""#), + "first group's chart payload should be inlined for fast on-toggle hydration", + ); Ok(()) } @@ -795,7 +885,8 @@ async fn chart_page_window_caps_commits() -> Result<()> { let slug = pick_chart_slug(&server, |s| s.starts_with("TPC-H")).await?; let client = reqwest::Client::new(); - // Without ?n, default is the 1000-commit per-chart cap — fixture has 3. + // Without `?n`, the API default is `Last(DEFAULT_COMMIT_WINDOW)`. The + // fixture has 3 commits which fits comfortably. let full: Value = client .get(server.url(&format!("/api/chart/{slug}"))) .send() @@ -815,7 +906,7 @@ async fn chart_page_window_caps_commits() -> Result<()> { let one_count = one["commits"].as_array().map(|a| a.len()).unwrap_or(0); assert_eq!(one_count, 1, "?n=1 should keep exactly one commit"); - // ?n=all bypasses the cap. + // ?n=all returns the unbounded view (the per-chart hard cap is gone). let all: Value = client .get(server.url(&format!("/api/chart/{slug}?n=all"))) .send() @@ -825,6 +916,19 @@ async fn chart_page_window_caps_commits() -> Result<()> { let all_count = all["commits"].as_array().map(|a| a.len()).unwrap_or(0); assert_eq!(all_count, full_count, "?n=all should match unbounded view"); + // Even very large `?n` survives without being clamped. + let huge: Value = client + .get(server.url(&format!("/api/chart/{slug}?n=99999"))) + .send() + .await? + .json() + .await?; + let huge_count = huge["commits"].as_array().map(|a| a.len()).unwrap_or(0); + assert_eq!( + huge_count, full_count, + "?n=99999 should no longer be clamped to 1000" + ); + // Malformed ?n gracefully falls back to default. let bad = client .get(server.url(&format!("/api/chart/{slug}?n=banana"))) @@ -834,6 +938,79 @@ async fn chart_page_window_caps_commits() -> Result<()> { Ok(()) } +/// `/chart/{slug}` and `/group/{slug}` permalinks default to the unbounded +/// commit window, and the inlined JSON payload contains the full raw +/// history (no server-side downsampling). Visual downsampling now lives in +/// `chart-init.js` and runs on the *visible* commit range only. +#[tokio::test] +async fn permalink_pages_inline_full_raw_history() -> Result<()> { + let server = Server::start().await?; + seed_long_history(&server, 200).await?; + + let chart_slug = pick_chart_slug(&server, |s| s == "Random Access").await?; + let group_slug = pick_group_slug(&server, |s| s == "Random Access").await?; + let client = reqwest::Client::new(); + + let chart_body = client + .get(server.url(&format!("/chart/{chart_slug}"))) + .send() + .await? + .text() + .await?; + let chart_payload = + extract_chart_data(&chart_body, 0).context("chart inline payload present")?; + assert_eq!( + chart_payload["commits"] + .as_array() + .context("commits is array")? + .len(), + 200, + "/chart permalink should inline the full raw history", + ); + + let group_body = client + .get(server.url(&format!("/group/{group_slug}"))) + .send() + .await? + .text() + .await?; + let group_payload = + extract_chart_data(&group_body, 0).context("group inline payload present")?; + assert_eq!( + group_payload["commits"] + .as_array() + .context("commits is array")? + .len(), + 200, + "/group permalink should inline the full raw history", + ); + + Ok(()) +} + +/// The wire payload no longer carries a `raw_commit_count` field — visual +/// downsampling moved to the client, so the server has no opinion on +/// rendered point count. +#[tokio::test] +async fn chart_payload_does_not_carry_raw_commit_count() -> Result<()> { + let server = Server::start().await?; + seed_long_history(&server, 50).await?; + + let slug = pick_chart_slug(&server, |s| s == "Random Access").await?; + let client = reqwest::Client::new(); + let body: Value = client + .get(server.url(&format!("/api/chart/{slug}"))) + .send() + .await? + .json() + .await?; + assert!( + body.get("raw_commit_count").is_none(), + "raw_commit_count should not appear on the wire; got {body:?}" + ); + Ok(()) +} + #[tokio::test] async fn chart_page_round_trips_every_slug() -> Result<()> { let server = Server::start().await?;