From e0977f6f6e56d962b83c03e87868ddf8120198a6 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Thu, 16 Apr 2026 11:55:37 -0400 Subject: [PATCH 1/4] perf Signed-off-by: Joe Isaacs --- .../src/arrays/varbinview/build_views.rs | 83 +++++++++++++------ vortex-array/src/arrays/varbinview/view.rs | 83 +++++-------------- 2 files changed, 77 insertions(+), 89 deletions(-) diff --git a/vortex-array/src/arrays/varbinview/build_views.rs b/vortex-array/src/arrays/varbinview/build_views.rs index 87f5c84706b..6760fcd673e 100644 --- a/vortex-array/src/arrays/varbinview/build_views.rs +++ b/vortex-array/src/arrays/varbinview/build_views.rs @@ -24,7 +24,12 @@ pub fn offsets_to_lengths(offsets: &[P]) -> Buffer

{ /// Maximum number of buffer bytes that can be referenced by a single `BinaryView` pub const MAX_BUFFER_LEN: usize = i32::MAX as usize; -/// Split a large buffer of input `bytes` holding string data +/// Build `BinaryView`s from a contiguous byte buffer and per-element lengths. +/// +/// When total data exceeds `max_buffer_len` (2 GiB), buffers are split to ensure +/// offsets fit in `u32`. When data fits in a single buffer, per-element split checks +/// are skipped entirely. +#[allow(clippy::cast_possible_truncation)] pub fn build_views>( start_buf_index: u32, max_buffer_len: usize, @@ -33,35 +38,63 @@ pub fn build_views>( ) -> (Vec, Buffer) { let mut views = BufferMut::::with_capacity(lens.len()); - let mut buffers = Vec::new(); - let mut buf_index = start_buf_index; - - let mut offset = 0; - for &len in lens { - let len = len.as_(); - assert!(len <= max_buffer_len, "values cannot exceed max_buffer_len"); - - if (offset + len) > max_buffer_len { - // Roll the buffer every 2GiB, to avoid overflowing VarBinView offset field - let rest = bytes.split_off(offset); + if bytes.len() <= max_buffer_len { + // Fast path: all data fits in a single buffer. No split checks needed per element + // and offsets are guaranteed to fit in u32 (max_buffer_len <= i32::MAX < u32::MAX). + let bytes_ptr = bytes.as_slice().as_ptr(); + + // Write views directly via pointer to avoid per-element push_unchecked overhead. + let views_dst = views.spare_capacity_mut().as_mut_ptr() as *mut BinaryView; + + let mut offset: usize = 0; + for (i, &len) in lens.iter().enumerate() { + let len: usize = len.as_(); + // SAFETY: the sum of all lengths equals bytes.len() and we process them + // sequentially, so offset + len <= bytes.len() at every iteration. + let value = unsafe { std::slice::from_raw_parts(bytes_ptr.add(offset), len) }; + let view = BinaryView::make_view(value, start_buf_index, offset as u32); + // SAFETY: we reserved capacity for lens.len() views and i < lens.len(). + unsafe { views_dst.add(i).write(view) }; + offset += len; + } + // SAFETY: we wrote exactly lens.len() views above. + unsafe { views.set_len(lens.len()) }; + + let buffers = if bytes.is_empty() { + Vec::new() + } else { + vec![bytes.freeze()] + }; + + (buffers, views.freeze()) + } else { + // Slow path: may need to split across multiple 2 GiB buffers. + let mut buffers = Vec::new(); + let mut buf_index = start_buf_index; + let mut offset = 0; + + for &len in lens { + let len = len.as_(); + assert!(len <= max_buffer_len, "values cannot exceed max_buffer_len"); + + if (offset + len) > max_buffer_len { + let rest = bytes.split_off(offset); + buffers.push(bytes.freeze()); + buf_index += 1; + offset = 0; + bytes = rest; + } + let view = BinaryView::make_view(&bytes[offset..][..len], buf_index, offset.as_()); + unsafe { views.push_unchecked(view) }; + offset += len; + } + if !bytes.is_empty() { buffers.push(bytes.freeze()); - buf_index += 1; - offset = 0; - - bytes = rest; } - let view = BinaryView::make_view(&bytes[offset..][..len], buf_index, offset.as_()); - // SAFETY: we reserved the right capacity beforehand - unsafe { views.push_unchecked(view) }; - offset += len; - } - if !bytes.is_empty() { - buffers.push(bytes.freeze()); + (buffers, views.freeze()) } - - (buffers, views.freeze()) } #[cfg(test)] diff --git a/vortex-array/src/arrays/varbinview/view.rs b/vortex-array/src/arrays/varbinview/view.rs index 38cbd2798c6..0da933ed0a5 100644 --- a/vortex-array/src/arrays/varbinview/view.rs +++ b/vortex-array/src/arrays/varbinview/view.rs @@ -10,7 +10,6 @@ use std::ops::Range; use static_assertions::assert_eq_align; use static_assertions::assert_eq_size; -use vortex_error::VortexExpect; /// A view over a variable-length binary value. /// @@ -46,17 +45,6 @@ pub struct Inlined { } impl Inlined { - /// Creates a new inlined representation from the provided value of constant size. - fn new(value: &[u8]) -> Self { - debug_assert_eq!(value.len(), N); - let mut inlined = Self { - size: N.try_into().vortex_expect("inlined size must fit in u32"), - data: [0u8; BinaryView::MAX_INLINED_SIZE], - }; - inlined.data[..N].copy_from_slice(&value[..N]); - inlined - } - /// Returns the full inlined value. #[inline] pub fn value(&self) -> &[u8] { @@ -101,66 +89,33 @@ impl BinaryView { /// Maximum size of an inlined binary value. pub const MAX_INLINED_SIZE: usize = 12; - /// Create a view from a value, block and offset + /// Create a view from a value, block and offset. /// /// Depending on the length of the provided value either a new inlined /// or a reference view will be constructed. - /// - /// Adapted from arrow-rs - /// Explicitly enumerating inlined view produces code that avoids calling generic `ptr::copy_non_interleave` that's slower than explicit stores - #[inline(never)] + #[inline] + #[allow(clippy::cast_possible_truncation)] pub fn make_view(value: &[u8], block: u32, offset: u32) -> Self { - match value.len() { - 0 => Self { - inlined: Inlined::new::<0>(value), - }, - 1 => Self { - inlined: Inlined::new::<1>(value), - }, - 2 => Self { - inlined: Inlined::new::<2>(value), - }, - 3 => Self { - inlined: Inlined::new::<3>(value), - }, - 4 => Self { - inlined: Inlined::new::<4>(value), - }, - 5 => Self { - inlined: Inlined::new::<5>(value), - }, - 6 => Self { - inlined: Inlined::new::<6>(value), - }, - 7 => Self { - inlined: Inlined::new::<7>(value), - }, - 8 => Self { - inlined: Inlined::new::<8>(value), - }, - 9 => Self { - inlined: Inlined::new::<9>(value), - }, - 10 => Self { - inlined: Inlined::new::<10>(value), - }, - 11 => Self { - inlined: Inlined::new::<11>(value), - }, - 12 => Self { - inlined: Inlined::new::<12>(value), - }, - _ => Self { + let len = value.len(); + if len <= Self::MAX_INLINED_SIZE { + let mut view = Self { + le_bytes: [0u8; 16], + }; + unsafe { + view.inlined.size = len as u32; + std::ptr::copy_nonoverlapping(value.as_ptr(), view.inlined.data.as_mut_ptr(), len); + } + view + } else { + Self { _ref: Ref { - size: u32::try_from(value.len()).vortex_expect("value length must fit in u32"), - prefix: value[0..4] - .try_into() - .ok() - .vortex_expect("prefix must be exactly 4 bytes"), + size: len as u32, + // SAFETY: len >= 13, so reading 4 bytes from the start is always valid. + prefix: unsafe { (value.as_ptr() as *const [u8; 4]).read_unaligned() }, buffer_index: block, offset, }, - }, + } } } From a298dc755cace3e80e78612d6f6eada9ba66777c Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Thu, 16 Apr 2026 15:16:46 -0400 Subject: [PATCH 2/4] perf Signed-off-by: Joe Isaacs --- .../src/arrays/varbinview/build_views.rs | 2 +- vortex-array/src/arrays/varbinview/view.rs | 83 ++++++++++++++----- 2 files changed, 65 insertions(+), 20 deletions(-) diff --git a/vortex-array/src/arrays/varbinview/build_views.rs b/vortex-array/src/arrays/varbinview/build_views.rs index 6760fcd673e..f75ba20a897 100644 --- a/vortex-array/src/arrays/varbinview/build_views.rs +++ b/vortex-array/src/arrays/varbinview/build_views.rs @@ -29,7 +29,6 @@ pub const MAX_BUFFER_LEN: usize = i32::MAX as usize; /// When total data exceeds `max_buffer_len` (2 GiB), buffers are split to ensure /// offsets fit in `u32`. When data fits in a single buffer, per-element split checks /// are skipped entirely. -#[allow(clippy::cast_possible_truncation)] pub fn build_views>( start_buf_index: u32, max_buffer_len: usize, @@ -52,6 +51,7 @@ pub fn build_views>( // SAFETY: the sum of all lengths equals bytes.len() and we process them // sequentially, so offset + len <= bytes.len() at every iteration. let value = unsafe { std::slice::from_raw_parts(bytes_ptr.add(offset), len) }; + #[allow(clippy::cast_possible_truncation)] let view = BinaryView::make_view(value, start_buf_index, offset as u32); // SAFETY: we reserved capacity for lens.len() views and i < lens.len(). unsafe { views_dst.add(i).write(view) }; diff --git a/vortex-array/src/arrays/varbinview/view.rs b/vortex-array/src/arrays/varbinview/view.rs index 0da933ed0a5..38cbd2798c6 100644 --- a/vortex-array/src/arrays/varbinview/view.rs +++ b/vortex-array/src/arrays/varbinview/view.rs @@ -10,6 +10,7 @@ use std::ops::Range; use static_assertions::assert_eq_align; use static_assertions::assert_eq_size; +use vortex_error::VortexExpect; /// A view over a variable-length binary value. /// @@ -45,6 +46,17 @@ pub struct Inlined { } impl Inlined { + /// Creates a new inlined representation from the provided value of constant size. + fn new(value: &[u8]) -> Self { + debug_assert_eq!(value.len(), N); + let mut inlined = Self { + size: N.try_into().vortex_expect("inlined size must fit in u32"), + data: [0u8; BinaryView::MAX_INLINED_SIZE], + }; + inlined.data[..N].copy_from_slice(&value[..N]); + inlined + } + /// Returns the full inlined value. #[inline] pub fn value(&self) -> &[u8] { @@ -89,33 +101,66 @@ impl BinaryView { /// Maximum size of an inlined binary value. pub const MAX_INLINED_SIZE: usize = 12; - /// Create a view from a value, block and offset. + /// Create a view from a value, block and offset /// /// Depending on the length of the provided value either a new inlined /// or a reference view will be constructed. - #[inline] - #[allow(clippy::cast_possible_truncation)] + /// + /// Adapted from arrow-rs + /// Explicitly enumerating inlined view produces code that avoids calling generic `ptr::copy_non_interleave` that's slower than explicit stores + #[inline(never)] pub fn make_view(value: &[u8], block: u32, offset: u32) -> Self { - let len = value.len(); - if len <= Self::MAX_INLINED_SIZE { - let mut view = Self { - le_bytes: [0u8; 16], - }; - unsafe { - view.inlined.size = len as u32; - std::ptr::copy_nonoverlapping(value.as_ptr(), view.inlined.data.as_mut_ptr(), len); - } - view - } else { - Self { + match value.len() { + 0 => Self { + inlined: Inlined::new::<0>(value), + }, + 1 => Self { + inlined: Inlined::new::<1>(value), + }, + 2 => Self { + inlined: Inlined::new::<2>(value), + }, + 3 => Self { + inlined: Inlined::new::<3>(value), + }, + 4 => Self { + inlined: Inlined::new::<4>(value), + }, + 5 => Self { + inlined: Inlined::new::<5>(value), + }, + 6 => Self { + inlined: Inlined::new::<6>(value), + }, + 7 => Self { + inlined: Inlined::new::<7>(value), + }, + 8 => Self { + inlined: Inlined::new::<8>(value), + }, + 9 => Self { + inlined: Inlined::new::<9>(value), + }, + 10 => Self { + inlined: Inlined::new::<10>(value), + }, + 11 => Self { + inlined: Inlined::new::<11>(value), + }, + 12 => Self { + inlined: Inlined::new::<12>(value), + }, + _ => Self { _ref: Ref { - size: len as u32, - // SAFETY: len >= 13, so reading 4 bytes from the start is always valid. - prefix: unsafe { (value.as_ptr() as *const [u8; 4]).read_unaligned() }, + size: u32::try_from(value.len()).vortex_expect("value length must fit in u32"), + prefix: value[0..4] + .try_into() + .ok() + .vortex_expect("prefix must be exactly 4 bytes"), buffer_index: block, offset, }, - } + }, } } From 249d6918252ca2b900be58afcb6a8f4536a0c76a Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Thu, 16 Apr 2026 15:44:22 -0400 Subject: [PATCH 3/4] perf Signed-off-by: Joe Isaacs --- .../src/arrays/varbinview/build_views.rs | 33 +++++++------------ 1 file changed, 11 insertions(+), 22 deletions(-) diff --git a/vortex-array/src/arrays/varbinview/build_views.rs b/vortex-array/src/arrays/varbinview/build_views.rs index f75ba20a897..4ee8915db0a 100644 --- a/vortex-array/src/arrays/varbinview/build_views.rs +++ b/vortex-array/src/arrays/varbinview/build_views.rs @@ -37,36 +37,23 @@ pub fn build_views>( ) -> (Vec, Buffer) { let mut views = BufferMut::::with_capacity(lens.len()); - if bytes.len() <= max_buffer_len { + let buffers = if bytes.len() <= max_buffer_len { // Fast path: all data fits in a single buffer. No split checks needed per element // and offsets are guaranteed to fit in u32 (max_buffer_len <= i32::MAX < u32::MAX). - let bytes_ptr = bytes.as_slice().as_ptr(); - - // Write views directly via pointer to avoid per-element push_unchecked overhead. - let views_dst = views.spare_capacity_mut().as_mut_ptr() as *mut BinaryView; - let mut offset: usize = 0; - for (i, &len) in lens.iter().enumerate() { + for &len in lens { let len: usize = len.as_(); - // SAFETY: the sum of all lengths equals bytes.len() and we process them - // sequentially, so offset + len <= bytes.len() at every iteration. - let value = unsafe { std::slice::from_raw_parts(bytes_ptr.add(offset), len) }; - #[allow(clippy::cast_possible_truncation)] - let view = BinaryView::make_view(value, start_buf_index, offset as u32); - // SAFETY: we reserved capacity for lens.len() views and i < lens.len(). - unsafe { views_dst.add(i).write(view) }; + let view = + BinaryView::make_view(&bytes[offset..][..len], start_buf_index, offset.as_()); + unsafe { views.push_unchecked(view) }; offset += len; } - // SAFETY: we wrote exactly lens.len() views above. - unsafe { views.set_len(lens.len()) }; - let buffers = if bytes.is_empty() { + if bytes.is_empty() { Vec::new() } else { vec![bytes.freeze()] - }; - - (buffers, views.freeze()) + } } else { // Slow path: may need to split across multiple 2 GiB buffers. let mut buffers = Vec::new(); @@ -93,8 +80,10 @@ pub fn build_views>( buffers.push(bytes.freeze()); } - (buffers, views.freeze()) - } + buffers + }; + + (buffers, views.freeze()) } #[cfg(test)] From edd6b0d237f86681043ab71098af6851d2966f91 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Thu, 16 Apr 2026 15:59:36 -0400 Subject: [PATCH 4/4] perf Signed-off-by: Joe Isaacs --- .../src/arrays/varbinview/build_views.rs | 84 +++++++++---------- 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/vortex-array/src/arrays/varbinview/build_views.rs b/vortex-array/src/arrays/varbinview/build_views.rs index 4ee8915db0a..2325185b0e9 100644 --- a/vortex-array/src/arrays/varbinview/build_views.rs +++ b/vortex-array/src/arrays/varbinview/build_views.rs @@ -27,8 +27,7 @@ pub const MAX_BUFFER_LEN: usize = i32::MAX as usize; /// Build `BinaryView`s from a contiguous byte buffer and per-element lengths. /// /// When total data exceeds `max_buffer_len` (2 GiB), buffers are split to ensure -/// offsets fit in `u32`. When data fits in a single buffer, per-element split checks -/// are skipped entirely. +/// offsets fit in `u32`. pub fn build_views>( start_buf_index: u32, max_buffer_len: usize, @@ -36,56 +35,57 @@ pub fn build_views>( lens: &[P], ) -> (Vec, Buffer) { let mut views = BufferMut::::with_capacity(lens.len()); + let views_dst = views.spare_capacity_mut().as_mut_ptr().cast::(); - let buffers = if bytes.len() <= max_buffer_len { - // Fast path: all data fits in a single buffer. No split checks needed per element - // and offsets are guaranteed to fit in u32 (max_buffer_len <= i32::MAX < u32::MAX). - let mut offset: usize = 0; - for &len in lens { - let len: usize = len.as_(); - let view = - BinaryView::make_view(&bytes[offset..][..len], start_buf_index, offset.as_()); - unsafe { views.push_unchecked(view) }; - offset += len; - } + let mut buffers = Vec::new(); + let mut buf_index = start_buf_index; + let mut offset = 0; + let mut bytes_ptr = bytes.as_slice().as_ptr(); - if bytes.is_empty() { - Vec::new() - } else { - vec![bytes.freeze()] - } - } else { - // Slow path: may need to split across multiple 2 GiB buffers. - let mut buffers = Vec::new(); - let mut buf_index = start_buf_index; - let mut offset = 0; - - for &len in lens { - let len = len.as_(); - assert!(len <= max_buffer_len, "values cannot exceed max_buffer_len"); - - if (offset + len) > max_buffer_len { - let rest = bytes.split_off(offset); - buffers.push(bytes.freeze()); - buf_index += 1; - offset = 0; - bytes = rest; - } - let view = BinaryView::make_view(&bytes[offset..][..len], buf_index, offset.as_()); - unsafe { views.push_unchecked(view) }; - offset += len; - } + for (i, &len) in lens.iter().enumerate() { + let len = len.as_(); + assert!(len <= max_buffer_len, "values cannot exceed max_buffer_len"); - if !bytes.is_empty() { + if (offset + len) > max_buffer_len { + let rest = bytes.split_off(offset); buffers.push(bytes.freeze()); + buf_index += 1; + offset = 0; + bytes = rest; + bytes_ptr = bytes.as_slice().as_ptr(); } - buffers - }; + // SAFETY: we reserved capacity for lens.len() views and i < lens.len(). + // The split check above keeps offsets within max_buffer_len; the lengths + // describe bytes sequentially, so offset + len fits in the current byte buffer. + unsafe { write_view(views_dst, i, bytes_ptr, len, buf_index, offset) }; + offset += len; + } + + if !bytes.is_empty() { + buffers.push(bytes.freeze()); + } + + // SAFETY: the loop writes exactly lens.len() views into the reserved capacity. + unsafe { views.set_len(lens.len()) }; (buffers, views.freeze()) } +#[inline(always)] +unsafe fn write_view( + views_dst: *mut BinaryView, + view_index: usize, + bytes_ptr: *const u8, + len: usize, + buf_index: u32, + offset: usize, +) { + let value = unsafe { std::slice::from_raw_parts(bytes_ptr.add(offset), len) }; + let view = BinaryView::make_view(value, buf_index, offset.as_()); + unsafe { views_dst.add(view_index).write(view) }; +} + #[cfg(test)] mod tests { use vortex_buffer::ByteBuffer;