diff --git a/Cargo.lock b/Cargo.lock index 2819f2bacd0..9e5005a5132 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10898,6 +10898,7 @@ dependencies = [ "async-stream", "async-trait", "bit-vec", + "codspeed-divan-compat", "flatbuffers", "futures", "itertools 0.14.0", diff --git a/vortex-array/Cargo.toml b/vortex-array/Cargo.toml index 78a3bb0481f..12e6c6b704d 100644 --- a/vortex-array/Cargo.toml +++ b/vortex-array/Cargo.toml @@ -210,3 +210,7 @@ harness = false [[bench]] name = "to_arrow" harness = false + +[[bench]] +name = "buffer_lengths" +harness = false diff --git a/vortex-array/benches/buffer_lengths.rs b/vortex-array/benches/buffer_lengths.rs new file mode 100644 index 00000000000..1e02fa906de --- /dev/null +++ b/vortex-array/benches/buffer_lengths.rs @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Benchmarks `SerializedArray::buffer_lengths()` against the old `root::`-based path. +//! +//! Motivation: `buffer_lengths()` is called per `SerializedArray` (and called multiple times in +//! the display path). The previous implementation re-ran the full FlatBuffer verifier on every +//! call, even though the buffer was already validated at construction time. This bench measures +//! the actual saving. + +#![expect(clippy::cast_possible_truncation)] +#![expect(clippy::unwrap_used)] + +use divan::Bencher; +use flatbuffers::FlatBufferBuilder; +use flatbuffers::root; +use vortex_array::serde::SerializedArray; +use vortex_buffer::Alignment; +use vortex_buffer::ByteBuffer; +use vortex_flatbuffers::array as fba; + +fn main() { + divan::main(); +} + +fn build_array_node<'a>( + fbb: &mut FlatBufferBuilder<'a>, + n_buffers: usize, + children: Vec>>, +) -> flatbuffers::WIPOffset> { + let buffers: Vec = (0..n_buffers as u16).collect(); + let buffers = fbb.create_vector(&buffers); + let children = if children.is_empty() { + None + } else { + Some(fbb.create_vector(&children)) + }; + fba::ArrayNode::create( + fbb, + &fba::ArrayNodeArgs { + encoding: 1, + metadata: None, + children, + buffers: Some(buffers), + stats: None, + }, + ) +} + +/// Build an Array tree: struct of `n_fields` flat leaves, each owning one buffer. +fn build_struct_array_bytes(n_fields: usize) -> ByteBuffer { + let mut fbb = FlatBufferBuilder::with_capacity(1 << 16); + let leaves: Vec<_> = (0..n_fields) + .map(|_| build_array_node(&mut fbb, 1, vec![])) + .collect(); + let root_node = build_array_node(&mut fbb, 0, leaves); + let bufs: Vec<_> = (0..n_fields) + .map(|_| fba::Buffer::new(0, 0, fba::Compression::None, 1024)) + .collect(); + let buffers = fbb.create_vector(&bufs); + let array = fba::Array::create( + &mut fbb, + &fba::ArrayArgs { + root: Some(root_node), + buffers: Some(buffers), + }, + ); + fbb.finish_minimal(array); + ByteBuffer::from(fbb.finished_data().to_vec()).aligned(Alignment::none()) +} + +const ARRAY_FIELDS: &[usize] = &[1, 8, 32, 100, 1000]; + +/// Post-fix implementation: uses `root_as_array_unchecked` on an already-validated buffer. +#[divan::bench(args = ARRAY_FIELDS)] +fn buffer_lengths_fixed(bencher: Bencher, n_fields: usize) { + let bytes = build_struct_array_bytes(n_fields); + let sa = SerializedArray::from_array_tree(bytes).unwrap(); + bencher.bench_local(|| { + let lengths = sa.buffer_lengths(); + divan::black_box(lengths); + }); +} + +/// Legacy implementation: re-runs the FlatBuffer verifier on every call. +/// Replicates the pre-fix `buffer_lengths()` body byte-for-byte against the same payload so +/// we can compare apples-to-apples. +#[divan::bench(args = ARRAY_FIELDS)] +fn buffer_lengths_legacy_root(bencher: Bencher, n_fields: usize) { + let bytes = build_struct_array_bytes(n_fields); + bencher.bench_local(|| { + let fb_array = root::(bytes.as_ref()).unwrap(); + let lengths: Vec = fb_array + .buffers() + .map(|buffers| buffers.iter().map(|b| b.length() as usize).collect()) + .unwrap_or_default(); + divan::black_box(lengths); + }); +} diff --git a/vortex-array/src/serde.rs b/vortex-array/src/serde.rs index 637b57324c0..19083672fbf 100644 --- a/vortex-array/src/serde.rs +++ b/vortex-array/src/serde.rs @@ -512,8 +512,9 @@ impl SerializedArray { /// serialized length of each buffer. This is useful for displaying buffer sizes /// without needing to access the actual buffer data. pub fn buffer_lengths(&self) -> Vec { - let fb_array = root::(self.flatbuffer.as_ref()) - .vortex_expect("SerializedArray flatbuffer must be a valid Array"); + // SAFETY: `self.flatbuffer` was verified by `validate_array_tree` at construction + // time. Same invariant as `from_flatbuffer_and_segment_with_overrides` relies on. + let fb_array = unsafe { fba::root_as_array_unchecked(self.flatbuffer.as_ref()) }; fb_array .buffers() .map(|buffers| buffers.iter().map(|b| b.length() as usize).collect()) diff --git a/vortex-layout/Cargo.toml b/vortex-layout/Cargo.toml index 61b1253ef43..2a2d7a610d5 100644 --- a/vortex-layout/Cargo.toml +++ b/vortex-layout/Cargo.toml @@ -54,6 +54,7 @@ vortex-session = { workspace = true } vortex-utils = { workspace = true, features = ["dashmap"] } [dev-dependencies] +divan = { workspace = true } futures = { workspace = true, features = ["executor"] } rstest = { workspace = true } temp-env = { workspace = true } @@ -68,5 +69,9 @@ tokio = ["dep:tokio", "vortex-error/tokio"] [lints] workspace = true +[[bench]] +name = "flatbuffer_verify" +harness = false + [package.metadata.cargo-machete] ignored = ["uuid"] diff --git a/vortex-layout/benches/flatbuffer_verify.rs b/vortex-layout/benches/flatbuffer_verify.rs new file mode 100644 index 00000000000..efaaa6d2644 --- /dev/null +++ b/vortex-layout/benches/flatbuffer_verify.rs @@ -0,0 +1,228 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Benchmarks the cost of FlatBuffers verification for Layout and Array messages. +//! +//! Compares three modes for each shape: +//! - `root::()` — full verification (current default for footer/layout/array decode). +//! - `root_with_opts::()` — verification with the Vortex Layout `VerifierOptions`. +//! - `root_unchecked::() + first field touch` — the unsafe lower bound. +//! +//! The shapes mirror what Vortex actually serializes: +//! - chunked-of-flat (deep, narrow): models row groups. +//! - struct-of-flat (wide): models a wide schema. +//! - chunked-of-struct (both): models a wide schema with row groups. + +#![expect(clippy::cast_possible_truncation)] +#![expect(clippy::unwrap_used)] + +use std::env; +use std::sync::LazyLock; + +use divan::Bencher; +use flatbuffers::FlatBufferBuilder; +use flatbuffers::VerifierOptions; +use flatbuffers::root; +use flatbuffers::root_unchecked; +use flatbuffers::root_with_opts; +use vortex_flatbuffers::layout as fbl; + +fn main() { + divan::main(); +} + +static LAYOUT_VERIFIER: LazyLock = LazyLock::new(|| VerifierOptions { + max_tables: env::var("VORTEX_MAX_LAYOUT_TABLES") + .ok() + .and_then(|lmt| lmt.parse::().ok()) + .unwrap_or(1_000_000), + max_depth: env::var("VORTEX_MAX_LAYOUT_DEPTH") + .ok() + .and_then(|lmt| lmt.parse::().ok()) + .unwrap_or(64), + max_apparent_size: 1 << 31, + ignore_missing_null_terminator: false, +}); + +// ----- Layout flatbuffer builders ----- + +/// Build a flat leaf layout: one segment, no children, small metadata. +fn build_flat_leaf<'a>(fbb: &mut FlatBufferBuilder<'a>) -> flatbuffers::WIPOffset> { + let segments = fbb.create_vector(&[0u32]); + fbl::Layout::create( + fbb, + &fbl::LayoutArgs { + encoding: 1, + row_count: 1024, + metadata: None, + children: None, + segments: Some(segments), + }, + ) +} + +/// Build a struct layout with `n_fields` flat children. +fn build_struct<'a>( + fbb: &mut FlatBufferBuilder<'a>, + n_fields: usize, +) -> flatbuffers::WIPOffset> { + let children: Vec<_> = (0..n_fields).map(|_| build_flat_leaf(fbb)).collect(); + let children = fbb.create_vector(&children); + fbl::Layout::create( + fbb, + &fbl::LayoutArgs { + encoding: 3, // Columnar + row_count: 1024, + metadata: None, + children: Some(children), + segments: None, + }, + ) +} + +/// Build `n_chunks` chunks of (struct of `n_fields` flat). +fn build_chunked_of_struct(n_chunks: usize, n_fields: usize) -> Vec { + let mut fbb = FlatBufferBuilder::with_capacity(1 << 16); + let chunks: Vec<_> = (0..n_chunks) + .map(|_| build_struct(&mut fbb, n_fields)) + .collect(); + let children = fbb.create_vector(&chunks); + let root = fbl::Layout::create( + &mut fbb, + &fbl::LayoutArgs { + encoding: 2, // Chunked + row_count: 1024 * n_chunks as u64, + metadata: None, + children: Some(children), + segments: None, + }, + ); + fbb.finish_minimal(root); + fbb.finished_data().to_vec() +} + +// ----- Array flatbuffer builders ----- + +use vortex_flatbuffers::array as fba; + +fn build_array_node<'a>( + fbb: &mut FlatBufferBuilder<'a>, + n_buffers: usize, + children: Vec>>, +) -> flatbuffers::WIPOffset> { + let buffers: Vec = (0..n_buffers as u16).collect(); + let buffers = fbb.create_vector(&buffers); + let children = if children.is_empty() { + None + } else { + Some(fbb.create_vector(&children)) + }; + fba::ArrayNode::create( + fbb, + &fba::ArrayNodeArgs { + encoding: 1, + metadata: None, + children, + buffers: Some(buffers), + stats: None, + }, + ) +} + +/// Build an Array tree: top-level struct with `n_fields` flat primitive children. +fn build_struct_array(n_fields: usize) -> Vec { + let mut fbb = FlatBufferBuilder::with_capacity(1 << 16); + let leaves: Vec<_> = (0..n_fields) + .map(|_| build_array_node(&mut fbb, 1, vec![])) + .collect(); + let root_node = build_array_node(&mut fbb, 0, leaves); + // Build a vector of buffer descriptors, one per child. + let bufs: Vec<_> = (0..n_fields) + .map(|_| fba::Buffer::new(0, 0, fba::Compression::None, 1024)) + .collect(); + let buffers = fbb.create_vector(&bufs); + let array = fba::Array::create( + &mut fbb, + &fba::ArrayArgs { + root: Some(root_node), + buffers: Some(buffers), + }, + ); + fbb.finish_minimal(array); + fbb.finished_data().to_vec() +} + +// ----- Benchmarks: Layout ----- + +/// Tuples are (n_chunks, n_fields). Picked to cover small/medium/large/very-wide. +const LAYOUT_SHAPES: &[(usize, usize)] = &[ + (1, 8), // single chunk, narrow struct — small footer-like + (1, 100), // single chunk, wide struct + (16, 32), // 16 chunks of 32-field struct — medium + (128, 32), // 128 chunks of 32-field struct — large + (1024, 32), // 1024 chunks — very large + (1, 1000), // single chunk, very wide — wide-only +]; + +#[divan::bench(args = LAYOUT_SHAPES)] +fn layout_root_checked(bencher: Bencher, shape: &(usize, usize)) { + let bytes = build_chunked_of_struct(shape.0, shape.1); + bencher.bench(|| { + let layout = root::(divan::black_box(&bytes)).unwrap(); + divan::black_box(layout.row_count()); + }); +} + +#[divan::bench(args = LAYOUT_SHAPES)] +fn layout_root_with_opts(bencher: Bencher, shape: &(usize, usize)) { + let bytes = build_chunked_of_struct(shape.0, shape.1); + bencher.bench(|| { + let layout = + root_with_opts::(&LAYOUT_VERIFIER, divan::black_box(&bytes)).unwrap(); + divan::black_box(layout.row_count()); + }); +} + +#[divan::bench(args = LAYOUT_SHAPES)] +fn layout_root_unchecked(bencher: Bencher, shape: &(usize, usize)) { + let bytes = build_chunked_of_struct(shape.0, shape.1); + bencher.bench(|| { + // SAFETY: bytes were produced by our own builder above. + let layout = unsafe { root_unchecked::(divan::black_box(&bytes)) }; + divan::black_box(layout.row_count()); + }); +} + +/// Report buffer size for context. +#[divan::bench(args = LAYOUT_SHAPES)] +fn layout_buffer_size(shape: &(usize, usize)) -> usize { + build_chunked_of_struct(shape.0, shape.1).len() +} + +// ----- Benchmarks: Array ----- + +const ARRAY_FIELDS: &[usize] = &[1, 8, 32, 100, 1000]; + +#[divan::bench(args = ARRAY_FIELDS)] +fn array_root_checked(bencher: Bencher, n_fields: usize) { + let bytes = build_struct_array(n_fields); + bencher.bench(|| { + let array = root::(divan::black_box(&bytes)).unwrap(); + divan::black_box(array.buffers().map(|b| b.len())); + }); +} + +#[divan::bench(args = ARRAY_FIELDS)] +fn array_root_unchecked(bencher: Bencher, n_fields: usize) { + let bytes = build_struct_array(n_fields); + bencher.bench(|| { + // SAFETY: bytes were produced by our own builder above. + let array = unsafe { root_unchecked::(divan::black_box(&bytes)) }; + divan::black_box(array.buffers().map(|b| b.len())); + }); +} + +#[divan::bench(args = ARRAY_FIELDS)] +fn array_buffer_size(n_fields: usize) -> usize { + build_struct_array(n_fields).len() +}