Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions encodings/sparse/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,7 @@ vortex-array = { workspace = true, features = ["_test-harness"] }
[[bench]]
name = "sparse_canonical"
harness = false

[[bench]]
name = "sparse_pushdown"
harness = false
144 changes: 144 additions & 0 deletions encodings/sparse/benches/sparse_pushdown.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

//! Benchmarks for the Sparse pushdown kernels (`is_constant`, `sum`, `min_max`,
//! `null_count`, compare).
//!
//! Each benchmark exercises the registered kernel path on a single representative
//! sparse `i32` array. All are `O(num_patches)`; the patch counts below are sized so
//! each lands in the ~10-100µs range for a stable CodSpeed signal. `between`/`fill_null`/
//! `nan_count` are omitted since they mirror the compare/null_count cost profiles.

#![expect(clippy::cast_possible_truncation)]

use std::sync::LazyLock;

use divan::Bencher;
use vortex_array::ArrayRef;
use vortex_array::Canonical;
use vortex_array::ExecutionCtx;
use vortex_array::IntoArray;
use vortex_array::VortexSessionExecute;
use vortex_array::aggregate_fn::fns::is_constant::is_constant;
use vortex_array::aggregate_fn::fns::min_max::min_max;
use vortex_array::aggregate_fn::fns::null_count::null_count;
use vortex_array::aggregate_fn::fns::sum::sum;
use vortex_array::arrays::ConstantArray;
use vortex_array::arrays::PrimitiveArray;
use vortex_array::builtins::ArrayBuiltins;
use vortex_array::dtype::DType;
use vortex_array::dtype::Nullability;
use vortex_array::dtype::PType;
use vortex_array::scalar::Scalar;
use vortex_array::scalar_fn::fns::operators::Operator;
use vortex_array::session::ArraySession;
use vortex_buffer::Buffer;
use vortex_error::VortexExpect;
use vortex_session::VortexSession;
use vortex_sparse::Sparse;

fn main() {
divan::main();
}

const LEN: usize = 1_000_000;

/// Session with Sparse and its pushdown kernels registered.
static SESSION: LazyLock<VortexSession> = LazyLock::new(|| {
let session = VortexSession::empty().with::<ArraySession>();
vortex_sparse::initialize(&session);
session
});

/// Build a sparse `i32` array of `LEN` with `num_patches` uniformly-spaced patches and
/// fill value 1. When `constant` is true every patch also equals 1, so the whole array
/// is constant (the worst case for `is_constant`: it must scan all patches to confirm).
fn make_sparse(num_patches: usize, constant: bool) -> ArrayRef {
let stride = LEN / num_patches;
let indices: Buffer<u32> = (0..num_patches).map(|i| (i * stride) as u32).collect();
let values: Buffer<i32> = (0..num_patches)
.map(|i| if constant { 1 } else { 2 + i as i32 })
.collect();
Sparse::try_new(
indices.into_array(),
values.into_array(),
LEN,
Scalar::from(1i32),
)
.vortex_expect("valid sparse")
.into_array()
}

/// Build a sparse `i32` array of `LEN` with a null fill and `num_patches` nullable patches
/// (every third patch null), so `null_count` does real `O(P)` work over the patch validity.
fn make_sparse_nullable(num_patches: usize) -> ArrayRef {
let stride = LEN / num_patches;
let indices: Buffer<u32> = (0..num_patches).map(|i| (i * stride) as u32).collect();
let values = PrimitiveArray::from_option_iter(
(0..num_patches).map(|i| if i % 3 == 0 { None } else { Some(i as i32) }),
)
.into_array();
let nullable = DType::Primitive(PType::I32, Nullability::Nullable);
Sparse::try_new(indices.into_array(), values, LEN, Scalar::null(nullable))
.vortex_expect("valid sparse")
.into_array()
}

#[divan::bench]
fn sparse_is_constant(bencher: Bencher) {
bencher
.with_inputs(|| (make_sparse(100_000, true), SESSION.create_execution_ctx()))
.bench_values(|(array, mut ctx)| {
divan::black_box(is_constant(&array, &mut ctx).vortex_expect("is_constant"))
});
}

#[divan::bench]
fn sparse_sum(bencher: Bencher) {
bencher
.with_inputs(|| (make_sparse(100_000, false), SESSION.create_execution_ctx()))
.bench_values(|(array, mut ctx)| {
divan::black_box(sum(&array, &mut ctx).vortex_expect("sum"))
});
}

#[divan::bench]
fn sparse_min_max(bencher: Bencher) {
bencher
.with_inputs(|| (make_sparse(40_000, false), SESSION.create_execution_ctx()))
.bench_values(|(array, mut ctx)| {
divan::black_box(min_max(&array, &mut ctx).vortex_expect("min_max"))
});
}

#[divan::bench]
fn sparse_null_count(bencher: Bencher) {
bencher
.with_inputs(|| {
(
make_sparse_nullable(130_000),
SESSION.create_execution_ctx(),
)
})
.bench_values(|(array, mut ctx)| {
divan::black_box(null_count(&array, &mut ctx).vortex_expect("null_count"))
});
}

#[divan::bench]
fn sparse_compare(bencher: Bencher) {
bencher
.with_inputs(|| (make_sparse(10_000, false), SESSION.create_execution_ctx()))
.bench_values(|(array, mut ctx)| {
let rhs = ConstantArray::new(Scalar::from(1i32), array.len()).into_array();
let result = array.binary(rhs, Operator::Eq).vortex_expect("binary");
divan::black_box(materialize(result, &mut ctx))
});
}

fn materialize(array: ArrayRef, ctx: &mut ExecutionCtx) -> ArrayRef {
array
.execute::<Canonical>(ctx)
.vortex_expect("execute")
.into_array()
}
14 changes: 14 additions & 0 deletions encodings/sparse/public-api.lock
Original file line number Diff line number Diff line change
Expand Up @@ -68,10 +68,22 @@ impl vortex_array::arrays::slice::SliceKernel for vortex_sparse::Sparse

pub fn vortex_sparse::Sparse::slice(vortex_array::array::view::ArrayView<'_, Self>, core::ops::range::Range<usize>, &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<core::option::Option<vortex_array::array::erased::ArrayRef>>

impl vortex_array::scalar_fn::fns::between::kernel::BetweenKernel for vortex_sparse::Sparse

pub fn vortex_sparse::Sparse::between(vortex_array::array::view::ArrayView<'_, Self>, &vortex_array::array::erased::ArrayRef, &vortex_array::array::erased::ArrayRef, &vortex_array::scalar_fn::fns::between::BetweenOptions, &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<core::option::Option<vortex_array::array::erased::ArrayRef>>

impl vortex_array::scalar_fn::fns::binary::compare::CompareKernel for vortex_sparse::Sparse

pub fn vortex_sparse::Sparse::compare(vortex_array::array::view::ArrayView<'_, Self>, &vortex_array::array::erased::ArrayRef, vortex_array::scalar_fn::fns::operators::CompareOperator, &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<core::option::Option<vortex_array::array::erased::ArrayRef>>

impl vortex_array::scalar_fn::fns::cast::kernel::CastReduce for vortex_sparse::Sparse

pub fn vortex_sparse::Sparse::cast(vortex_array::array::view::ArrayView<'_, Self>, &vortex_array::dtype::DType) -> vortex_error::VortexResult<core::option::Option<vortex_array::array::erased::ArrayRef>>

impl vortex_array::scalar_fn::fns::fill_null::kernel::FillNullKernel for vortex_sparse::Sparse

pub fn vortex_sparse::Sparse::fill_null(vortex_array::array::view::ArrayView<'_, Self>, &vortex_array::scalar::Scalar, &mut vortex_array::executor::ExecutionCtx) -> vortex_error::VortexResult<core::option::Option<vortex_array::array::erased::ArrayRef>>

impl vortex_array::scalar_fn::fns::not::kernel::NotReduce for vortex_sparse::Sparse

pub fn vortex_sparse::Sparse::invert(vortex_array::array::view::ArrayView<'_, Self>) -> vortex_error::VortexResult<core::option::Option<vortex_array::array::erased::ArrayRef>>
Expand Down Expand Up @@ -222,4 +234,6 @@ pub fn vortex_array::array::view::ArrayView<'_, vortex_sparse::Sparse>::patches(

pub fn vortex_array::array::view::ArrayView<'_, vortex_sparse::Sparse>::resolved_patches(&self) -> vortex_error::VortexResult<vortex_array::patches::Patches>

pub fn vortex_sparse::initialize(&vortex_session::VortexSession)

pub type vortex_sparse::SparseArray = vortex_array::array::typed::Array<vortex_sparse::Sparse>
137 changes: 137 additions & 0 deletions encodings/sparse/src/compute/between.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use vortex_array::ArrayRef;
use vortex_array::ArrayView;
use vortex_array::ExecutionCtx;
use vortex_array::IntoArray;
use vortex_array::arrays::ConstantArray;
use vortex_array::builtins::ArrayBuiltins;
use vortex_array::scalar_fn::fns::between::BetweenKernel;
use vortex_array::scalar_fn::fns::between::BetweenOptions;
use vortex_error::VortexResult;

use crate::Sparse;
use crate::SparseExt as _;

/// Sparse-specific between kernel.
///
/// `lower <= x <= upper` (with per-bound strictness) over a Sparse column with constant
/// bounds is itself sparse: every unpatched position resolves to `between(F, lo, hi)` and
/// every patched position to `between(patch, lo, hi)`. We push the range check into the
/// patches and rebuild a `Sparse<Bool>` with the new fill, preserving downstream sparsity.
///
/// Declines (falls back to canonical) unless both bounds are constants.
impl BetweenKernel for Sparse {
fn between(
array: ArrayView<'_, Self>,
lower: &ArrayRef,
upper: &ArrayRef,
options: &BetweenOptions,
ctx: &mut ExecutionCtx,
) -> VortexResult<Option<ArrayRef>> {
let (Some(lo), Some(hi)) = (lower.as_constant(), upper.as_constant()) else {
return Ok(None);
};

let patches = array.patches();

let fill_bool = ConstantArray::new(array.fill_scalar().clone(), 1)
.into_array()
.between(
ConstantArray::new(lo.clone(), 1).into_array(),
ConstantArray::new(hi.clone(), 1).into_array(),
options.clone(),
)?
.execute_scalar(0, ctx)?;

let new_patches = patches.map_values(|values| {
let len = values.len();
values.between(
ConstantArray::new(lo.clone(), len).into_array(),
ConstantArray::new(hi.clone(), len).into_array(),
options.clone(),
)
})?;

Ok(Some(
Sparse::try_new_from_patches(new_patches, fill_bool)?.into_array(),
))
}
}

#[cfg(test)]
mod tests {
use std::sync::LazyLock;

use rstest::rstest;
use vortex_array::Canonical;
use vortex_array::IntoArray;
use vortex_array::VortexSessionExecute;
use vortex_array::arrays::ConstantArray;
use vortex_array::assert_arrays_eq;
use vortex_array::builtins::ArrayBuiltins;
use vortex_array::scalar::Scalar;
use vortex_array::scalar_fn::fns::between::BetweenOptions;
use vortex_array::scalar_fn::fns::between::StrictComparison;
use vortex_array::session::ArraySession;
use vortex_buffer::buffer;
use vortex_session::VortexSession;

use crate::Sparse;
use crate::initialize;

static SESSION: LazyLock<VortexSession> = LazyLock::new(|| {
let session = VortexSession::empty().with::<ArraySession>();
initialize(&session);
session
});

#[rstest]
#[case(0i32, 100i32, StrictComparison::NonStrict, StrictComparison::NonStrict)]
#[case(5i32, 25i32, StrictComparison::Strict, StrictComparison::Strict)]
#[case(1i32, 20i32, StrictComparison::NonStrict, StrictComparison::Strict)]
fn between_matches_canonical(
#[case] lo: i32,
#[case] hi: i32,
#[case] lower_strict: StrictComparison,
#[case] upper_strict: StrictComparison,
) {
let array = Sparse::try_new(
buffer![1u64, 3, 5].into_array(),
buffer![10i32, 20, 30].into_array(),
8,
Scalar::from(1i32),
)
.unwrap()
.into_array();
let len = array.len();
let options = BetweenOptions {
lower_strict,
upper_strict,
};

let lower = ConstantArray::new(Scalar::from(lo), len).into_array();
let upper = ConstantArray::new(Scalar::from(hi), len).into_array();

let mut ctx = SESSION.create_execution_ctx();

// Kernel path: between pushes through the Sparse encoding.
let kernel = array
.clone()
.between(lower.clone(), upper.clone(), options.clone())
.unwrap()
.execute::<Canonical>(&mut ctx)
.unwrap();

// Baseline: canonicalize the input first so between runs on a PrimitiveArray.
let canonical_input = array.execute::<Canonical>(&mut ctx).unwrap().into_array();
let baseline = canonical_input
.between(lower, upper, options)
.unwrap()
.execute::<Canonical>(&mut ctx)
.unwrap();

assert_arrays_eq!(kernel, baseline);
}
}
Loading
Loading