From 408386b489caea974425ff89807bc5b90d944112 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Thu, 16 Apr 2026 16:32:16 -0400 Subject: [PATCH 1/2] perf: optimize iterative exe Signed-off-by: Joe Isaacs --- .../src/arrays/chunked/vtable/canonical.rs | 16 +++++++--------- vortex-array/src/arrays/chunked/vtable/mod.rs | 15 ++++++++++++++- 2 files changed, 21 insertions(+), 10 deletions(-) diff --git a/vortex-array/src/arrays/chunked/vtable/canonical.rs b/vortex-array/src/arrays/chunked/vtable/canonical.rs index 1e32f7295bc..2fc45e6544a 100644 --- a/vortex-array/src/arrays/chunked/vtable/canonical.rs +++ b/vortex-array/src/arrays/chunked/vtable/canonical.rs @@ -5,12 +5,12 @@ use vortex_buffer::Buffer; use vortex_error::VortexExpect; use vortex_error::VortexResult; -use crate::ArrayRef; +use crate::{AnyCanonical, ArrayRef}; use crate::Canonical; use crate::ExecutionCtx; use crate::IntoArray; use crate::array::ArrayView; -use crate::arrays::Chunked; +use crate::arrays::{Chunked, ListView, Struct}; use crate::arrays::ChunkedArray; use crate::arrays::ListViewArray; use crate::arrays::PrimitiveArray; @@ -36,7 +36,7 @@ pub(super) fn _canonicalize( return Ok(Canonical::empty(array.dtype())); } if array.nchunks() == 1 { - return array.chunk(0).clone().execute::(ctx); + return Ok(Canonical::from(array.chunk(0).as_::())); } let owned_chunks: Vec = array.iter_chunks().cloned().collect(); @@ -46,7 +46,6 @@ pub(super) fn _canonicalize( &owned_chunks, Validity::copy_from_array(array.array())?, struct_dtype, - ctx, )?; Canonical::Struct(struct_array) } @@ -72,15 +71,14 @@ fn pack_struct_chunks( chunks: &[ArrayRef], validity: Validity, struct_dtype: &StructFields, - ctx: &mut ExecutionCtx, ) -> VortexResult { let len = chunks.iter().map(|chunk| chunk.len()).sum(); let mut field_arrays = Vec::new(); - let executed_chunks: Vec = chunks + let executed_chunks = chunks .iter() - .map(|c| c.clone().execute::(ctx)) - .collect::>()?; + .map(|c| c.clone().downcast::()) + .collect::>(); for (field_idx, field_dtype) in struct_dtype.fields().enumerate() { let mut field_chunks = Vec::with_capacity(chunks.len()); @@ -140,7 +138,7 @@ fn swizzle_list_chunks( let mut next_list = 0usize; for chunk in chunks { - let chunk_array = chunk.clone().execute::(ctx)?; + let chunk_array = chunk.clone().downcast::(); // By rebuilding as zero-copy to `List` and trimming all elements (to prevent gaps), we make // the final output `ListView` also zero-copyable to `List`. let chunk_array = chunk_array.rebuild(ListViewRebuildMode::MakeExact)?; diff --git a/vortex-array/src/arrays/chunked/vtable/mod.rs b/vortex-array/src/arrays/chunked/vtable/mod.rs index 5e9c4a4d817..42209aa1730 100644 --- a/vortex-array/src/arrays/chunked/vtable/mod.rs +++ b/vortex-array/src/arrays/chunked/vtable/mod.rs @@ -13,7 +13,7 @@ use vortex_error::vortex_panic; use vortex_session::VortexSession; use vortex_session::registry::CachedId; -use crate::ArrayEq; +use crate::{AnyCanonical, ArrayEq}; use crate::ArrayHash; use crate::ArrayRef; use crate::Canonical; @@ -239,6 +239,19 @@ impl VTable for Chunked { } fn execute(array: Array, ctx: &mut ExecutionCtx) -> VortexResult { + // Iteratively request execution of each chunk until it reaches canonical form. + // This gives the scheduler visibility into child execution and enables + // cross-step optimizations between chunk decoding steps. + for i in 0..array.nchunks() { + if !array.chunk(i).is::() { + return Ok(ExecutionResult::execute_slot::( + array, + i + CHUNKS_OFFSET, + )); + } + } + + // All chunks are now canonical — combine them. Ok(ExecutionResult::done(_canonicalize(array.as_view(), ctx)?)) } From 9e7ca16b823feb3ffdc7e9211b9137719ff9984f Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Fri, 17 Apr 2026 09:39:19 -0400 Subject: [PATCH 2/2] add faster take/put methods on arrays Signed-off-by: Joe Isaacs --- vortex-array/src/array/erased.rs | 48 +++++++++++++++++++ vortex-array/src/array/mod.rs | 7 +++ .../src/arrays/chunked/vtable/canonical.rs | 7 ++- vortex-array/src/arrays/chunked/vtable/mod.rs | 3 +- vortex-array/src/executor.rs | 15 +++--- 5 files changed, 71 insertions(+), 9 deletions(-) diff --git a/vortex-array/src/array/erased.rs b/vortex-array/src/array/erased.rs index 84367076314..a4ac4cfd00d 100644 --- a/vortex-array/src/array/erased.rs +++ b/vortex-array/src/array/erased.rs @@ -431,6 +431,54 @@ impl ArrayRef { self.with_slots(slots) } + /// Take a slot for executor-owned physical rewrites. This has the result that the array may + /// either be taken or cloned from the parent. + /// + /// The array can be put back with [`put_slot_unchecked`]. + /// + /// # Safety + /// The caller must put back a slot with the same logical dtype and length before exposing the + /// parent array, and must only use this for physical rewrites. + pub(crate) unsafe fn take_slot_unchecked( + mut self, + slot_idx: usize, + ) -> VortexResult<(ArrayRef, ArrayRef)> { + let child = if let Some(inner) = Arc::get_mut(&mut self.0) { + unsafe { inner.slots_mut()[slot_idx].take() } + .vortex_expect("take_slot_unchecked cannot take an absent slot") + } else { + self.slots()[slot_idx] + .as_ref() + .vortex_expect("take_slot_unchecked cannot take an absent slot") + .clone() + }; + + Ok((self, child)) + } + + /// Puts an array into `slot_idx` by either, cloning the inner array if the Arc is not exclusive + /// or replacing the slot in this `ArrayRef`. + /// This is the mirror of [`take_slot_unchecked`]. + /// + /// # Safety + /// The replacement must have the same logical dtype and length as the taken slot, and this + /// must only be used for physical rewrites. + pub(crate) unsafe fn put_slot_unchecked( + mut self, + slot_idx: usize, + replacement: ArrayRef, + ) -> VortexResult { + if let Some(inner) = Arc::get_mut(&mut self.0) { + unsafe { inner.slots_mut()[slot_idx] = Some(replacement) }; + return Ok(self); + } + + let mut slots = self.slots().to_vec(); + slots[slot_idx] = Some(replacement); + let inner = Arc::clone(&self.0); + inner.with_slots(self, slots) + } + /// Returns a new array with the provided slots. /// /// This is only valid for physical rewrites: slot count, presence, logical `DType`, and diff --git a/vortex-array/src/array/mod.rs b/vortex-array/src/array/mod.rs index 8193c128b1e..d4ed9096f82 100644 --- a/vortex-array/src/array/mod.rs +++ b/vortex-array/src/array/mod.rs @@ -68,6 +68,9 @@ pub(crate) trait DynArray: 'static + private::Sealed + Send + Sync + Debug { /// Returns the slots of the array. fn slots(&self) -> &[Option]; + /// Returns mutable slots of the array. + unsafe fn slots_mut(&mut self) -> &mut [Option]; + /// Returns the encoding ID of the array. fn encoding_id(&self) -> ArrayId; @@ -212,6 +215,10 @@ impl DynArray for ArrayInner { &self.slots } + unsafe fn slots_mut(&mut self) -> &mut [Option] { + &mut self.slots + } + fn encoding_id(&self) -> ArrayId { self.vtable.id() } diff --git a/vortex-array/src/arrays/chunked/vtable/canonical.rs b/vortex-array/src/arrays/chunked/vtable/canonical.rs index 2fc45e6544a..2364e57f514 100644 --- a/vortex-array/src/arrays/chunked/vtable/canonical.rs +++ b/vortex-array/src/arrays/chunked/vtable/canonical.rs @@ -5,15 +5,18 @@ use vortex_buffer::Buffer; use vortex_error::VortexExpect; use vortex_error::VortexResult; -use crate::{AnyCanonical, ArrayRef}; +use crate::AnyCanonical; +use crate::ArrayRef; use crate::Canonical; use crate::ExecutionCtx; use crate::IntoArray; use crate::array::ArrayView; -use crate::arrays::{Chunked, ListView, Struct}; +use crate::arrays::Chunked; use crate::arrays::ChunkedArray; +use crate::arrays::ListView; use crate::arrays::ListViewArray; use crate::arrays::PrimitiveArray; +use crate::arrays::Struct; use crate::arrays::StructArray; use crate::arrays::chunked::ChunkedArrayExt; use crate::arrays::listview::ListViewArrayExt; diff --git a/vortex-array/src/arrays/chunked/vtable/mod.rs b/vortex-array/src/arrays/chunked/vtable/mod.rs index 42209aa1730..1171f40d230 100644 --- a/vortex-array/src/arrays/chunked/vtable/mod.rs +++ b/vortex-array/src/arrays/chunked/vtable/mod.rs @@ -13,7 +13,8 @@ use vortex_error::vortex_panic; use vortex_session::VortexSession; use vortex_session::registry::CachedId; -use crate::{AnyCanonical, ArrayEq}; +use crate::AnyCanonical; +use crate::ArrayEq; use crate::ArrayHash; use crate::ArrayRef; use crate::Canonical; diff --git a/vortex-array/src/executor.rs b/vortex-array/src/executor.rs index 8e0c86ff44f..35d638487c8 100644 --- a/vortex-array/src/executor.rs +++ b/vortex-array/src/executor.rs @@ -122,7 +122,8 @@ impl ArrayRef { return Ok(current); } Some((parent, slot_idx, _)) => { - current = parent.with_slot(slot_idx, current)?.optimize()?; + current = + unsafe { parent.put_slot_unchecked(slot_idx, current)? }.optimize()?; continue; } } @@ -140,7 +141,8 @@ impl ArrayRef { )); current = rewritten.optimize()?; if let Some((parent, slot_idx, _)) = stack.pop() { - current = parent.with_slot(slot_idx, current)?.optimize()?; + current = + unsafe { parent.put_slot_unchecked(slot_idx, current)? }.optimize()?; } continue; } @@ -151,13 +153,14 @@ impl ArrayRef { match step { ExecutionStep::ExecuteSlot(i, done) => { let child = array.slots()[i] - .clone() + .as_ref() .vortex_expect("ExecuteSlot index in bounds"); ctx.log(format_args!( "ExecuteSlot({i}): pushing {}, focusing on {}", array, child )); - stack.push((array, i, done)); + let (parent, child) = unsafe { array.take_slot_unchecked(i)? }; + stack.push((parent, i, done)); current = child.optimize()?; } ExecutionStep::Done => { @@ -327,9 +330,9 @@ impl Executable for ArrayRef { ExecutionStep::ExecuteSlot(i, _) => { // For single-step execution, handle ExecuteSlot by executing the slot, // replacing it, and returning the updated array. - let child = array.slots()[i].clone().vortex_expect("valid slot index"); + let (array, child) = unsafe { array.take_slot_unchecked(i)? }; let executed_child = child.execute::(ctx)?; - array.with_slot(i, executed_child) + unsafe { array.put_slot_unchecked(i, executed_child) } } } }