-
Notifications
You must be signed in to change notification settings - Fork 156
perf: remove implicit ListViewArray rebuild during take and filter operations
#8048
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: develop
Are you sure you want to change the base?
Changes from all commits
faccc95
c8b117f
54df45a
3196d0f
a2aa05f
9164418
f4d3e79
9b91eab
52c3916
94cf0b5
15dd4d3
14b76c9
cd7c97e
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -7,14 +7,17 @@ use std::sync::Arc; | |
|
|
||
| use num_traits::AsPrimitive; | ||
| use smallvec::smallvec; | ||
| use vortex_buffer::BitBufferMut; | ||
| use vortex_error::VortexExpect; | ||
| use vortex_error::VortexResult; | ||
| use vortex_error::vortex_bail; | ||
| use vortex_error::vortex_ensure; | ||
| use vortex_error::vortex_err; | ||
| use vortex_mask::Mask; | ||
|
|
||
| use crate::ArrayRef; | ||
| use crate::ArraySlots; | ||
| use crate::ExecutionCtx; | ||
| use crate::LEGACY_SESSION; | ||
| #[expect(deprecated)] | ||
| use crate::ToCanonical as _; | ||
|
|
@@ -30,6 +33,7 @@ use crate::arrays::PrimitiveArray; | |
| use crate::arrays::bool; | ||
| use crate::dtype::DType; | ||
| use crate::dtype::IntegerPType; | ||
| use crate::expr::stats::Stat; | ||
| use crate::match_each_integer_ptype; | ||
| use crate::validity::Validity; | ||
|
|
||
|
|
@@ -396,6 +400,91 @@ pub trait ListViewArrayExt: TypedArrayRef<ListView> { | |
| let sizes_primitive = self.sizes().to_primitive(); | ||
| validate_zctl(self.elements(), offsets_primitive, sizes_primitive).is_ok() | ||
| } | ||
|
|
||
| /// Returns a [`Mask`] of length `elements.len()` where each bit is set iff that | ||
| /// position in `elements` is referenced by at least one view. | ||
| /// | ||
| /// Walks every `(offset, size)` pair, canonicalizes both `offsets` and `sizes`, | ||
| /// and allocates a `BitBuffer` of length `elements.len()`, so it is extremely costly. | ||
| /// | ||
| /// Returns `Ok(None)` when `elements` is empty. | ||
| #[allow(clippy::cognitive_complexity, clippy::unnecessary_fallible_conversions)] | ||
| fn compute_referenced_elements_mask( | ||
| &self, | ||
| ctx: &mut ExecutionCtx, | ||
| ) -> VortexResult<Option<Mask>> { | ||
| let len = self.elements().len(); | ||
| if len == 0 { | ||
| return Ok(None); | ||
| } | ||
|
|
||
| let offsets_primitive = self.offsets().clone().execute::<PrimitiveArray>(ctx)?; | ||
| let sizes_primitive = self.sizes().clone().execute::<PrimitiveArray>(ctx)?; | ||
|
|
||
| let mut buf = BitBufferMut::new_unset(len); | ||
| let offset_len = self.as_ref().len(); | ||
|
|
||
| match_each_integer_ptype!(offsets_primitive.ptype(), |O| { | ||
| match_each_integer_ptype!(sizes_primitive.ptype(), |S| { | ||
| let offsets_slice = offsets_primitive.as_slice::<O>(); | ||
| let sizes_slice = sizes_primitive.as_slice::<S>(); | ||
|
|
||
| for i in 0..offset_len { | ||
| let start = | ||
| usize::try_from(offsets_slice[i]).vortex_expect("offset must fit in usize"); | ||
| let size = | ||
| usize::try_from(sizes_slice[i]).vortex_expect("size must fit in usize"); | ||
| buf.fill_range(start, start + size, true); | ||
| } | ||
| }) | ||
| }); | ||
|
|
||
| Ok(Some(Mask::from_buffer(buf.freeze()))) | ||
| } | ||
|
|
||
| /// Exact fraction of `elements` referenced by some view, in `[0.0, 1.0]`. Extremely costly. | ||
| /// | ||
| /// Returns `Ok(None)` when `elements` is empty. | ||
| fn compute_density(&self, ctx: &mut ExecutionCtx) -> VortexResult<Option<f32>> { | ||
| Ok(self | ||
| .compute_referenced_elements_mask(ctx)? | ||
| .map(|mask| match mask { | ||
| Mask::AllTrue(_) => 1.0, | ||
| Mask::AllFalse(_) => 0.0, | ||
| Mask::Values(values) => values.true_count() as f32 / self.elements().len() as f32, | ||
| })) | ||
| } | ||
|
|
||
| /// Upper-bound estimate of [`compute_density`](Self::compute_density) via | ||
| /// `sum(sizes) / elements.len()`, clamped to `[0.0, 1.0]`. | ||
| /// | ||
| /// Exact for non-overlapping views, but overcounts when multiple views share the same elements. | ||
| /// | ||
| /// Returns `Ok(None)` when `elements` is empty. | ||
| fn estimate_density(&self, ctx: &mut ExecutionCtx) -> VortexResult<Option<f32>> { | ||
| let n_elts = self.elements().len(); | ||
| if n_elts == 0 { | ||
| return Ok(None); | ||
| } | ||
|
|
||
| let sizes = self.sizes(); | ||
| if sizes.is_empty() { | ||
| return Ok(Some(0.0)); | ||
| } | ||
|
|
||
| // compute_stat short-circuits on a cached exact Sum and otherwise computes-and-caches. | ||
| let sizes_sum = sizes | ||
| .statistics() | ||
| .compute_stat(Stat::Sum, ctx)? | ||
| .ok_or_else(|| vortex_err!("Sum stat unavailable for sizes"))? | ||
| .as_primitive() | ||
| .as_::<u64>() | ||
| .ok_or_else(|| vortex_err!("could not cast sum of sizes to u64"))?; | ||
|
|
||
| let estimate = (sizes_sum as f32 / n_elts as f32).clamp(0.0, 1.0); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how can this go above or below 0,1 maybe a panic here. or debug assert
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would prefer a debug assert here |
||
|
|
||
| Ok(Some(estimate)) | ||
| } | ||
| } | ||
| impl<T: TypedArrayRef<ListView>> ListViewArrayExt for T {} | ||
|
|
||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -6,6 +6,7 @@ use vortex_buffer::BufferMut; | |
| use vortex_error::VortexExpect; | ||
| use vortex_error::VortexResult; | ||
|
|
||
| use crate::ExecutionCtx; | ||
| use crate::IntoArray; | ||
| use crate::LEGACY_SESSION; | ||
| #[expect(deprecated)] | ||
|
|
@@ -25,6 +26,13 @@ use crate::match_each_integer_ptype; | |
| use crate::scalar::Scalar; | ||
| use crate::scalar_fn::fns::operators::Operator; | ||
|
|
||
| /// Density threshold to decide whether to rebuild a sparse `ListViewArray`. | ||
| /// | ||
| /// A `ListViewArray` can accumulate unreferenced bytes in its `elements` buffer after | ||
| /// metadata-only operations like `take` and `filter`. When density (referenced fraction of `elements`) | ||
| /// falls below this threshold, the benefits of a rebuild may outweigh its cost. | ||
| const REBUILD_DENSITY_THRESHOLD: f32 = 0.1; | ||
|
mhk197 marked this conversation as resolved.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. explain it's a guess and likely different for different element dtypes and different users of the list |
||
|
|
||
| /// Modes for rebuilding a [`ListViewArray`]. | ||
| pub enum ListViewRebuildMode { | ||
| /// Removes all unused data and flattens out all list data, such that the array is zero-copyable | ||
|
|
@@ -376,6 +384,16 @@ impl ListViewArray { | |
| self.rebuild_zero_copy_to_list() | ||
| } | ||
| } | ||
|
|
||
| pub fn should_rebuild(&self, exact: bool, ctx: &mut ExecutionCtx) -> VortexResult<bool> { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. comment. explain this. also this looks like a footgun. add the docstr with a detail message here or point to the threshold!
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Like Joe said, this seems like a footgun. I think that the caller should just decide, and the |
||
| let density = if exact { | ||
| self.compute_density(ctx)? | ||
| } else { | ||
| self.estimate_density(ctx)? | ||
| }; | ||
|
|
||
| Ok(density.unwrap_or(1.0) < REBUILD_DENSITY_THRESHOLD) | ||
| } | ||
| } | ||
|
|
||
| #[cfg(test)] | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
don't use a fallible convert use the numtrait infallible one.
less branching