diff --git a/vortex-array/src/array/erased.rs b/vortex-array/src/array/erased.rs index f9dea4e0eef..f72653558d0 100644 --- a/vortex-array/src/array/erased.rs +++ b/vortex-array/src/array/erased.rs @@ -38,6 +38,7 @@ use crate::arrays::DictArray; use crate::arrays::FilterArray; use crate::arrays::Null; use crate::arrays::Primitive; +use crate::arrays::ReversedArray; use crate::arrays::SliceArray; use crate::arrays::VarBin; use crate::arrays::VarBinView; @@ -212,6 +213,19 @@ impl ArrayRef { .optimize() } + /// Wraps the array in a [`ReversedArray`] so that it is logically reversed. + /// + /// The optimizer is applied immediately, eliminating the wrapper for known encodings: + /// + /// * `Reversed(Reversed(x)) → x` — double reversal cancels out. + /// * `Reversed(Dict(codes, values)) → Dict(Reversed(codes), values)` — only the + /// codes array is reversed; the values dictionary is reused unchanged. + pub fn reverse(&self) -> VortexResult { + ReversedArray::try_new(self.clone())? + .into_array() + .optimize() + } + /// Fetch the scalar at the given index. #[deprecated( note = "Use `execute_scalar` instead, which allows passing an execution context for more \ diff --git a/vortex-array/src/arrays/chunked/compute/mod.rs b/vortex-array/src/arrays/chunked/compute/mod.rs index bbc9029657a..3438849152e 100644 --- a/vortex-array/src/arrays/chunked/compute/mod.rs +++ b/vortex-array/src/arrays/chunked/compute/mod.rs @@ -7,6 +7,7 @@ mod fill_null; mod filter; pub(crate) mod kernel; mod mask; +mod reverse; pub(crate) mod rules; mod slice; mod take; diff --git a/vortex-array/src/arrays/chunked/compute/reverse.rs b/vortex-array/src/arrays/chunked/compute/reverse.rs new file mode 100644 index 00000000000..0a0ff2bde56 --- /dev/null +++ b/vortex-array/src/arrays/chunked/compute/reverse.rs @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_error::VortexResult; + +use crate::ArrayRef; +use crate::IntoArray as _; +use crate::array::ArrayView; +use crate::arrays::Chunked; +use crate::arrays::ChunkedArray; +use crate::arrays::chunked::ChunkedArrayExt as _; +use crate::arrays::reversed::ReverseReduce; + +/// Reverses a `ChunkedArray` by reversing the chunk order and lazily reversing each chunk. +/// +/// Transforms `Reversed(Chunked([c0, c1, …, cn]))` into +/// `Chunked([reverse(cn), …, reverse(c1), reverse(c0)])`. +/// +/// This avoids eagerly merging all chunks into a single canonical array before reversing. +/// Each per-chunk `reverse()` call goes through the optimizer, so further reduce rules +/// (e.g. `Dict` codes-only reversal) still fire on individual chunks. +impl ReverseReduce for Chunked { + fn reverse(array: ArrayView<'_, Self>) -> VortexResult> { + let dtype = array.as_ref().dtype().clone(); + let reversed_chunks = array + .chunks() + .into_iter() + .rev() + .map(|chunk| chunk.reverse()) + .collect::>>()?; + // SAFETY: all chunks come from the original ChunkedArray and share its DType; + // reversing order and wrapping in Reversed preserves the invariant. + Ok(Some( + unsafe { ChunkedArray::new_unchecked(reversed_chunks, dtype) }.into_array(), + )) + } +} diff --git a/vortex-array/src/arrays/chunked/compute/rules.rs b/vortex-array/src/arrays/chunked/compute/rules.rs index d8d324a8e86..406ec562df7 100644 --- a/vortex-array/src/arrays/chunked/compute/rules.rs +++ b/vortex-array/src/arrays/chunked/compute/rules.rs @@ -14,6 +14,7 @@ use crate::arrays::ConstantArray; use crate::arrays::ScalarFn; use crate::arrays::ScalarFnArray; use crate::arrays::chunked::ChunkedArrayExt; +use crate::arrays::reversed::ReverseReduceAdaptor; use crate::arrays::scalar_fn::AnyScalarFn; use crate::arrays::scalar_fn::ScalarFnArrayExt; use crate::optimizer::ArrayOptimizer; @@ -27,6 +28,7 @@ pub(crate) const PARENT_RULES: ParentRuleSet = ParentRuleSet::new(&[ ParentRuleSet::lift(&ChunkedUnaryScalarFnPushDownRule), ParentRuleSet::lift(&ChunkedConstantScalarFnPushDownRule), ParentRuleSet::lift(&FillNullReduceAdaptor(Chunked)), + ParentRuleSet::lift(&ReverseReduceAdaptor(Chunked)), ]); /// Push down any unary scalar function through chunked arrays. diff --git a/vortex-array/src/arrays/dict/compute/mod.rs b/vortex-array/src/arrays/dict/compute/mod.rs index c56cc8ef367..a8970794267 100644 --- a/vortex-array/src/arrays/dict/compute/mod.rs +++ b/vortex-array/src/arrays/dict/compute/mod.rs @@ -9,6 +9,7 @@ pub(crate) mod is_sorted; mod like; mod mask; pub(crate) mod min_max; +mod reverse; pub(crate) mod rules; mod slice; diff --git a/vortex-array/src/arrays/dict/compute/reverse.rs b/vortex-array/src/arrays/dict/compute/reverse.rs new file mode 100644 index 00000000000..1c9eb256365 --- /dev/null +++ b/vortex-array/src/arrays/dict/compute/reverse.rs @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_error::VortexResult; + +use crate::ArrayRef; +use crate::IntoArray as _; +use crate::array::ArrayView; +use crate::arrays::Dict; +use crate::arrays::DictArray; +use crate::arrays::dict::DictArraySlotsExt as _; +use crate::arrays::reversed::ReverseReduce; + +/// Reverses a `DictArray` by reversing only the codes array. +/// +/// The values dictionary is reused unchanged. Since codes are typically small +/// integers (`u8` or `u16`), the reversal is O(n_codes) rather than O(n_rows × value_size). +/// +/// # Example +/// +/// For `Dict(codes=[2,2,1,1,0,0], values=[A, B, C])` → decoded `[C,C,B,B,A,A]`: +/// `Dict(codes=[0,0,1,1,2,2], values=[A, B, C])` → decoded `[A,A,B,B,C,C]` ✓ +impl ReverseReduce for Dict { + fn reverse(array: ArrayView<'_, Self>) -> VortexResult> { + let reversed_codes = array.codes().reverse()?; + // SAFETY: reversing codes doesn't change the dict invariants; the values + // dictionary is untouched and all code indices remain valid. + Ok(Some( + unsafe { DictArray::new_unchecked(reversed_codes, array.values().clone()) } + .into_array(), + )) + } +} diff --git a/vortex-array/src/arrays/dict/compute/rules.rs b/vortex-array/src/arrays/dict/compute/rules.rs index f6fe816a6cc..c86114c4988 100644 --- a/vortex-array/src/arrays/dict/compute/rules.rs +++ b/vortex-array/src/arrays/dict/compute/rules.rs @@ -17,6 +17,7 @@ use crate::arrays::ScalarFn; use crate::arrays::ScalarFnArray; use crate::arrays::dict::DictArraySlotsExt; use crate::arrays::filter::FilterReduceAdaptor; +use crate::arrays::reversed::ReverseReduceAdaptor; use crate::arrays::scalar_fn::AnyScalarFn; use crate::arrays::scalar_fn::ScalarFnArrayExt; use crate::arrays::slice::SliceReduceAdaptor; @@ -38,6 +39,7 @@ pub(crate) const PARENT_RULES: ParentRuleSet = ParentRuleSet::new(&[ ParentRuleSet::lift(&LikeReduceAdaptor(Dict)), ParentRuleSet::lift(&DictionaryScalarFnValuesPushDownRule), ParentRuleSet::lift(&DictionaryScalarFnCodesPullUpRule), + ParentRuleSet::lift(&ReverseReduceAdaptor(Dict)), ParentRuleSet::lift(&SliceReduceAdaptor(Dict)), ]); diff --git a/vortex-array/src/arrays/mod.rs b/vortex-array/src/arrays/mod.rs index 212f4bbe619..c79fe8c88f1 100644 --- a/vortex-array/src/arrays/mod.rs +++ b/vortex-array/src/arrays/mod.rs @@ -74,6 +74,10 @@ pub mod primitive; pub use primitive::Primitive; pub use primitive::PrimitiveArray; +pub mod reversed; +pub use reversed::Reversed; +pub use reversed::ReversedArray; + pub mod scalar_fn; pub use scalar_fn::ScalarFn; pub use scalar_fn::ScalarFnArray; diff --git a/vortex-array/src/arrays/reversed/array.rs b/vortex-array/src/arrays/reversed/array.rs new file mode 100644 index 00000000000..c16fa5c514f --- /dev/null +++ b/vortex-array/src/arrays/reversed/array.rs @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_error::VortexExpect as _; +use vortex_error::VortexResult; + +use crate::ArrayRef; +use crate::array::{Array, ArrayParts, EmptyArrayData, TypedArrayRef}; +use crate::arrays::Reversed; + +/// Slot index for the inner (to-be-reversed) child array. +pub(super) const CHILD_SLOT: usize = 0; +pub(super) const NUM_SLOTS: usize = 1; +pub(super) const SLOT_NAMES: [&str; NUM_SLOTS] = ["child"]; + +/// Extension trait for accessing [`ReversedArray`](crate::arrays::ReversedArray) properties. +pub trait ReversedArrayExt: TypedArrayRef { + /// Returns the inner array whose elements will be yielded in reverse order. + fn child(&self) -> &ArrayRef { + self.as_ref().slots()[CHILD_SLOT] + .as_ref() + .vortex_expect("validated ReversedArray child slot") + } +} + +impl> ReversedArrayExt for T {} + +impl Array { + /// Wraps `child` in a [`ReversedArray`](crate::arrays::ReversedArray). + pub fn try_new(child: ArrayRef) -> VortexResult { + let dtype = child.dtype().clone(); + let len = child.len(); + Array::try_from_parts( + ArrayParts::new(Reversed, dtype, len, EmptyArrayData).with_slots(vec![Some(child)]), + ) + } + + /// Wraps `child` in a [`ReversedArray`](crate::arrays::ReversedArray) without validation. + /// + /// # Safety + /// + /// Caller must ensure `child` is a valid array. + pub unsafe fn new_unchecked(child: ArrayRef) -> Self { + let dtype = child.dtype().clone(); + let len = child.len(); + unsafe { + Array::from_parts_unchecked( + ArrayParts::new(Reversed, dtype, len, EmptyArrayData).with_slots(vec![Some(child)]), + ) + } + } +} diff --git a/vortex-array/src/arrays/reversed/execute.rs b/vortex-array/src/arrays/reversed/execute.rs new file mode 100644 index 00000000000..375a727532f --- /dev/null +++ b/vortex-array/src/arrays/reversed/execute.rs @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_buffer::BitBuffer; +use vortex_buffer::Buffer; +use vortex_error::VortexResult; + +use crate::arrays::BoolArray; +use crate::arrays::PrimitiveArray; +use crate::arrays::StructArray; +use crate::arrays::bool::BoolArrayExt as _; +use crate::arrays::primitive::PrimitiveArrayExt as _; +use crate::arrays::struct_::StructArrayExt as _; +use crate::canonical::Canonical; +use crate::executor::ExecutionCtx; +use crate::match_each_native_ptype; +use crate::validity::Validity; +use crate::{ArrayRef, IntoArray as _}; + +/// Reverses a canonical array, dispatching to type-specific fast paths where possible. +/// +/// Fast paths: +/// - `Bool`: reverses the bit buffer directly via `value_unchecked` — O(n), no extra allocation. +/// - `Primitive`: reverses the element buffer directly — O(n), no extra allocation. +/// - `Struct`: reverses each field lazily via [`ArrayRef::reverse`] — allows per-field +/// optimisations (e.g. the `Dict` reduce rule fires on dict-encoded fields). +/// +/// All other canonical variants fall back to a reversed-index `take`, which is equivalent +/// to the generic path but is deferred to decode time. +pub(super) fn reverse_canonical( + child: &ArrayRef, + ctx: &mut ExecutionCtx, +) -> VortexResult { + let n = child.len(); + if n <= 1 { + return Ok(child.clone()); + } + + let canonical = child.clone().execute::(ctx)?; + Ok(match canonical { + Canonical::Bool(a) => reverse_bool(&a)?.into_array(), + Canonical::Primitive(a) => reverse_primitive(&a)?.into_array(), + Canonical::Struct(a) => reverse_struct(&a)?.into_array(), + // All other canonical types: reverse via take with reversed indices. + _ => { + let indices = PrimitiveArray::from_iter((0u64..n as u64).rev()).into_array(); + child.take(indices)? + } + }) +} + +/// Reverses a `BoolArray` by reading each bit in reverse order. +/// +/// Uses `value_unchecked` for O(n) direct bit access with no intermediate `Vec` allocation, +/// and correctly handles the buffer's bit offset. +fn reverse_bool(array: &BoolArray) -> VortexResult { + let validity = reverse_validity(array.validity()?)?; + let bits = array.to_bit_buffer(); + let n = bits.len(); + let reversed = BitBuffer::collect_bool(n, |i| { + // SAFETY: `n - 1 - i` is in `[0, n)` since `i` is in `[0, n)`. + unsafe { bits.value_unchecked(n - 1 - i) } + }); + Ok(BoolArray::new(reversed, validity)) +} + +/// Reverses a `PrimitiveArray` by iterating the typed buffer backwards. +/// +/// This is O(n × element_width) and sequential in both reads and writes, so it is +/// highly cache-friendly and eligible for auto-vectorisation. +fn reverse_primitive(array: &PrimitiveArray) -> VortexResult { + let validity = reverse_validity(array.validity()?)?; + match_each_native_ptype!(array.ptype(), |T| { + let reversed: Vec = array.as_slice::().iter().rev().copied().collect(); + Ok(PrimitiveArray::new(Buffer::from(reversed), validity)) + }) +} + +/// Reverses a `StructArray` by lazily reversing each child field. +/// +/// Each field is reversed via [`ArrayRef::reverse`], which in turn runs the optimizer. +/// For dict-encoded fields this fires the `ReverseReduce for Dict` rule, so only the +/// (small) codes array is reversed; the values dictionary remains untouched. +fn reverse_struct(array: &StructArray) -> VortexResult { + let validity = reverse_validity(array.struct_validity())?; + let names = array.names().clone(); + let n = array.len(); + let reversed_fields = array + .iter_unmasked_fields() + .map(|field| field.reverse()) + .collect::>>()?; + StructArray::try_new(names, reversed_fields, n, validity) +} + +/// Reverses a [`Validity`] value. +/// +/// `NonNullable`, `AllValid`, and `AllInvalid` are identity under reversal. +/// `Array` variants are reversed lazily: `arr.reverse()` creates a +/// `ReversedArray` wrapper that is further optimised at decode time. +fn reverse_validity(validity: Validity) -> VortexResult { + match validity { + Validity::NonNullable => Ok(Validity::NonNullable), + Validity::AllValid => Ok(Validity::AllValid), + Validity::AllInvalid => Ok(Validity::AllInvalid), + Validity::Array(arr) => Ok(Validity::Array(arr.reverse()?)), + } +} diff --git a/vortex-array/src/arrays/reversed/mod.rs b/vortex-array/src/arrays/reversed/mod.rs new file mode 100644 index 00000000000..a327aecb804 --- /dev/null +++ b/vortex-array/src/arrays/reversed/mod.rs @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +//! Reverse encoding — yields the elements of the inner array in reverse order. +//! +//! [`ReversedArray`] is a lazy wrapper created by [`ArrayRef::reverse`]. The +//! optimizer is applied immediately after construction, collapsing common patterns +//! before any data is read: +//! +//! * **Double-reversal cancellation**: `Reversed(Reversed(x)) → x` — both wrappers +//! are eliminated with zero data movement. +//! * **Dict codes reversal**: `Reversed(Dict(codes, values)) → Dict(Reversed(codes), values)` — +//! only the codes array (typically `u8`/`u16`) is reversed; the values dictionary is +//! reused unchanged. This is the primary optimisation: most real-world columns are +//! dictionary-encoded, so the per-chunk reversal cost is O(n_codes) rather than O(n_rows). +//! +//! For encodings that have no reduce rule the `ReversedArray` wrapper survives to +//! decode time, where [`execute.rs`](self::execute) reverses the canonical form +//! directly: +//! +//! * `Primitive`: iterates the typed buffer backwards — O(n), fully sequential. +//! * `Struct`: calls [`ArrayRef::reverse`] on every child field, enabling per-field +//! optimisations (e.g. the Dict rule fires on dict-encoded struct fields). +//! * Everything else: falls back to a reversed-index `take`. +//! +//! ## Implementing a custom optimisation +//! +//! Encodings that can be reversed more efficiently than `take(reversed_indices)` should +//! implement [`ReverseReduce`] and register [`ReverseReduceAdaptor`] in their +//! `PARENT_RULES`. See `dict/compute/reverse.rs` for a worked example. + +mod array; +pub(crate) mod execute; +mod rules; +#[cfg(test)] +mod tests; +mod vtable; + +pub use array::ReversedArrayExt; +pub use vtable::{Reversed, ReversedArray}; + +use vortex_error::VortexResult; + +use crate::ArrayRef; +use crate::array::{ArrayView, VTable}; +use crate::matcher::Matcher; +use crate::optimizer::rules::ArrayParentReduceRule; + +/// Metadata-only reversal for encodings that can avoid a full `take`. +/// +/// Implement this for your encoding and register [`ReverseReduceAdaptor`] in its +/// `PARENT_RULES` to enable structural reversal optimisation. The most important +/// case is [`Dict`](crate::arrays::Dict): reversing only requires reversing the +/// codes array; the values dictionary is reused unchanged. +/// +/// # Contract +/// +/// The returned array, when decoded, must yield the same elements as `array` in +/// reverse order. Return `None` to fall back to the default execution path. +pub trait ReverseReduce: VTable { + /// Returns an array equivalent to reversing `array`, or `None` to fall back. + fn reverse(array: ArrayView<'_, Self>) -> VortexResult>; +} + +/// Adaptor that wraps a [`ReverseReduce`] implementation as an +/// [`ArrayParentReduceRule`]. +/// +/// Register a `ReverseReduceAdaptor(YourEncoding)` in your encoding's +/// `PARENT_RULES` constant to enable the structural reversal optimisation. +#[derive(Default, Debug)] +pub struct ReverseReduceAdaptor(pub V); + +impl ArrayParentReduceRule for ReverseReduceAdaptor { + type Parent = Reversed; + + fn reduce_parent( + &self, + array: ArrayView<'_, V>, + _parent: ::Match<'_>, + child_idx: usize, + ) -> VortexResult> { + debug_assert_eq!(child_idx, 0, "ReversedArray has exactly one child"); + // A one-element (or empty) array is already its own reverse. + if array.len() <= 1 { + return Ok(Some(array.array().clone())); + } + ::reverse(array) + } +} diff --git a/vortex-array/src/arrays/reversed/rules.rs b/vortex-array/src/arrays/reversed/rules.rs new file mode 100644 index 00000000000..c61ca99965c --- /dev/null +++ b/vortex-array/src/arrays/reversed/rules.rs @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_error::VortexResult; + +use crate::ArrayRef; +use crate::array::ArrayView; +use crate::arrays::Reversed; +use crate::arrays::reversed::ReversedArrayExt as _; +use crate::optimizer::rules::ArrayParentReduceRule; +use crate::optimizer::rules::ParentRuleSet; + +/// Parent rules for [`ReversedArray`](crate::arrays::ReversedArray). +/// +/// Registers the double-reversal cancellation rule: `Reversed(Reversed(x)) → x`. +/// When an inner `ReversedArray` sees another `ReversedArray` as its parent, +/// it returns its own child, eliminating both wrappers with zero data movement. +pub(super) const PARENT_RULES: ParentRuleSet = + ParentRuleSet::new(&[ParentRuleSet::lift(&DoubleReversalCancelRule)]); + +/// Cancels two nested reversals: `Reversed(Reversed(x)) → x`. +#[derive(Debug)] +struct DoubleReversalCancelRule; + +impl ArrayParentReduceRule for DoubleReversalCancelRule { + type Parent = Reversed; + + fn reduce_parent( + &self, + array: ArrayView<'_, Reversed>, + _parent: ArrayView<'_, Reversed>, + child_idx: usize, + ) -> VortexResult> { + debug_assert_eq!(child_idx, 0, "ReversedArray has exactly one child"); + Ok(Some(array.child().clone())) + } +} diff --git a/vortex-array/src/arrays/reversed/tests.rs b/vortex-array/src/arrays/reversed/tests.rs new file mode 100644 index 00000000000..a4aab4ddf1b --- /dev/null +++ b/vortex-array/src/arrays/reversed/tests.rs @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_buffer::buffer; + +use crate::Canonical; +use crate::IntoArray as _; +use crate::LEGACY_SESSION; +use crate::VortexSessionExecute as _; +use crate::arrays::struct_::StructArrayExt as _; +use crate::arrays::{ + BoolArray, Chunked, ChunkedArray, Dict, PrimitiveArray, Reversed, StructArray, +}; +use crate::assert_arrays_eq; +use crate::builders::dict::dict_encode; +use crate::dtype::DType; +use crate::dtype::FieldNames; +use crate::dtype::Nullability; +use crate::dtype::PType; +use crate::validity::Validity; + +#[test] +fn test_reverse_primitive() { + let arr = buffer![1i32, 2, 3, 4, 5].into_array(); + let reversed = arr.reverse().unwrap(); + let expected = PrimitiveArray::from_iter([5i32, 4, 3, 2, 1]); + assert_arrays_eq!(reversed, expected); +} + +#[test] +fn test_reverse_nullable_primitive() { + let arr = PrimitiveArray::from_option_iter([Some(1i32), None, Some(3)]).into_array(); + let reversed = arr.reverse().unwrap(); + let expected = PrimitiveArray::from_option_iter([Some(3i32), None, Some(1)]); + assert_arrays_eq!(reversed, expected); +} + +#[test] +fn test_reverse_empty_is_identity() { + let arr = PrimitiveArray::from_iter([] as [i32; 0]).into_array(); + let reversed = arr.reverse().unwrap(); + assert_arrays_eq!(reversed, arr); +} + +#[test] +fn test_reverse_single_is_identity() { + let arr = buffer![42i32].into_array(); + let reversed = arr.reverse().unwrap(); + assert_arrays_eq!(reversed, arr); +} + +/// Double reversal must cancel out: `x.reverse().reverse()` returns the original +/// array without any `ReversedArray` wrapper. +#[test] +fn test_double_reversal_cancels() { + let arr = buffer![1i32, 2, 3, 4, 5].into_array(); + let double_reversed = arr.reverse().unwrap().reverse().unwrap(); + assert!( + !double_reversed.is::(), + "double reversal should eliminate both Reversed wrappers" + ); + assert_arrays_eq!(double_reversed, arr); +} + +#[test] +fn test_reverse_bool() { + let arr = BoolArray::from_iter([true, false, true, true, false]).into_array(); + let reversed = arr.reverse().unwrap(); + let expected = BoolArray::from_iter([false, true, true, false, true]); + assert_arrays_eq!(reversed, expected); +} + +#[test] +fn test_reverse_nullable_bool() { + let arr = BoolArray::from_iter([Some(true), None, Some(false)]).into_array(); + let reversed = arr.reverse().unwrap(); + let expected = BoolArray::from_iter([Some(false), None, Some(true)]); + assert_arrays_eq!(reversed, expected); +} + +/// Reversing a dict-encoded array must fire the `ReverseReduceAdaptor(Dict)` rule, +/// producing `Dict(Reversed(codes), values)` rather than `Reversed(Dict(...))`. +/// Only the codes array (small integers) is reversed; the values dictionary is reused. +#[test] +fn test_reverse_dict_produces_dict() { + let arr = dict_encode(&buffer![1i32, 2, 3, 2, 1].into_array()).unwrap(); + let reversed = arr.into_array().reverse().unwrap(); + assert!( + reversed.is::(), + "dict reversal should produce a Dict, not a Reversed(Dict)" + ); + let expected = PrimitiveArray::from_iter([1i32, 2, 3, 2, 1].iter().rev().copied()); + assert_arrays_eq!(reversed, expected); +} + +/// Reversing a nullable dict-encoded array also preserves the Dict encoding. +#[test] +fn test_reverse_nullable_dict_produces_dict() { + let arr = dict_encode( + &PrimitiveArray::from_option_iter([Some(10i32), None, Some(20), None, Some(10)]) + .into_array(), + ) + .unwrap(); + let reversed = arr.into_array().reverse().unwrap(); + assert!(reversed.is::()); + let expected = PrimitiveArray::from_option_iter([Some(10i32), None, Some(20), None, Some(10)]); + assert_arrays_eq!(reversed, expected); +} + +/// Reversing a struct array reverses each field independently. +#[test] +fn test_reverse_struct() { + let arr = StructArray::try_new( + FieldNames::from(["x", "y"]), + vec![ + buffer![10i32, 20, 30].into_array(), + buffer![1u64, 2, 3].into_array(), + ], + 3, + Validity::NonNullable, + ) + .unwrap() + .into_array(); + + let reversed = arr.reverse().unwrap(); + let expected = StructArray::try_new( + FieldNames::from(["x", "y"]), + vec![ + buffer![30i32, 20, 10].into_array(), + buffer![3u64, 2, 1].into_array(), + ], + 3, + Validity::NonNullable, + ) + .unwrap(); + assert_arrays_eq!(reversed, expected); +} + +/// Dict-encoded fields inside a struct remain dict-encoded after reversal. +/// The struct's `reverse_struct` path calls `field.reverse()` on each child, +/// which in turn fires `ReverseReduceAdaptor(Dict)`. +#[test] +fn test_reverse_struct_preserves_dict_encoding() { + let field = dict_encode(&buffer![1i32, 2, 1, 2].into_array()) + .unwrap() + .into_array(); + let arr = StructArray::try_new( + FieldNames::from(["x"]), + vec![field], + 4, + Validity::NonNullable, + ) + .unwrap() + .into_array(); + + let reversed = arr.reverse().unwrap(); + + // Execute to get the canonical struct with its reversed fields. + let canonical = reversed + .execute::(&mut LEGACY_SESSION.create_execution_ctx()) + .unwrap(); + let Canonical::Struct(s) = canonical else { + panic!("expected Struct canonical"); + }; + // The field should still be dict-encoded (codes reversed, values intact). + assert!( + s.unmasked_field(0).is::(), + "dict field should remain Dict-encoded after struct reversal" + ); + let expected = PrimitiveArray::from_iter([2i32, 1, 2, 1]); + assert_arrays_eq!(s.unmasked_field(0), expected); +} + +/// Reversing a `ChunkedArray` must fire the `ReverseReduceAdaptor(Chunked)` rule, +/// producing `Chunked([reverse(cn), …, reverse(c0)])` rather than `Reversed(Chunked(…))`. +/// This avoids eagerly merging all chunks before reversing. +#[test] +fn test_reverse_chunked_produces_chunked() { + let arr = ChunkedArray::try_new( + vec![ + buffer![1i32, 2, 3].into_array(), + buffer![4i32, 5].into_array(), + ], + DType::Primitive(PType::I32, Nullability::NonNullable), + ) + .unwrap() + .into_array(); + + let reversed = arr.reverse().unwrap(); + assert!( + reversed.is::(), + "chunked reversal should produce Chunked, not Reversed(Chunked)" + ); + // Values must be fully reversed across chunk boundaries. + let expected = PrimitiveArray::from_iter([5i32, 4, 3, 2, 1]); + assert_arrays_eq!(reversed, expected); +} + +/// Each individual chunk within the reversed `ChunkedArray` must itself be reversed, +/// not just the chunk order. +#[test] +fn test_reverse_chunked_per_chunk_reversal() { + let arr = ChunkedArray::try_new( + vec![ + buffer![10i32, 20, 30].into_array(), + buffer![40i32, 50].into_array(), + buffer![60i32].into_array(), + ], + DType::Primitive(PType::I32, Nullability::NonNullable), + ) + .unwrap() + .into_array(); + + // Expected: last chunk first ([60]), middle chunk reversed ([50, 40]), + // first chunk reversed ([30, 20, 10]). + let reversed = arr.reverse().unwrap(); + let expected = PrimitiveArray::from_iter([60i32, 50, 40, 30, 20, 10]); + assert_arrays_eq!(reversed, expected); +} diff --git a/vortex-array/src/arrays/reversed/vtable.rs b/vortex-array/src/arrays/reversed/vtable.rs new file mode 100644 index 00000000000..65af6077c98 --- /dev/null +++ b/vortex-array/src/arrays/reversed/vtable.rs @@ -0,0 +1,162 @@ +// SPDX-License-Identifier: Apache-2.0 +// SPDX-FileCopyrightText: Copyright the Vortex contributors + +use vortex_error::VortexExpect as _; +use vortex_error::VortexResult; +use vortex_error::vortex_bail; +use vortex_error::vortex_ensure; +use vortex_error::vortex_panic; +use vortex_session::VortexSession; +use vortex_session::registry::CachedId; + +use crate::AnyCanonical; +use crate::ArrayRef; +use crate::array::Array; +use crate::array::ArrayId; +use crate::array::ArrayView; +use crate::array::EmptyArrayData; +use crate::array::OperationsVTable; +use crate::array::VTable; +use crate::array::ValidityVTable; +use crate::arrays::reversed::ReversedArrayExt as _; +use crate::arrays::reversed::array::{CHILD_SLOT, SLOT_NAMES}; +use crate::arrays::reversed::execute::reverse_canonical; +use crate::arrays::reversed::rules::PARENT_RULES; +use crate::buffer::BufferHandle; +use crate::dtype::DType; +use crate::executor::ExecutionCtx; +use crate::executor::ExecutionResult; +use crate::require_child; +use crate::scalar::Scalar; +use crate::serde::ArrayChildren; +use crate::validity::Validity; + +/// A [`Reversed`]-encoded Vortex array. +/// +/// A lazy wrapper that yields the elements of the inner array in reverse order. +/// The reversal is applied at execution time via [`reverse_canonical`]. +/// +/// Use [`ArrayRef::reverse`] to construct one; the optimizer is applied immediately +/// and may eliminate the wrapper for well-known encodings. +pub type ReversedArray = Array; + +/// Encoding tag for [`ReversedArray`]. +#[derive(Clone, Debug)] +pub struct Reversed; + +impl VTable for Reversed { + type ArrayData = EmptyArrayData; + type OperationsVTable = Self; + type ValidityVTable = Self; + + fn id(&self) -> ArrayId { + static ID: CachedId = CachedId::new("vortex.reversed"); + *ID + } + + fn validate( + &self, + _data: &EmptyArrayData, + dtype: &DType, + len: usize, + slots: &[Option], + ) -> VortexResult<()> { + vortex_ensure!( + slots[CHILD_SLOT].is_some(), + "ReversedArray child slot must be present" + ); + let child = slots[CHILD_SLOT] + .as_ref() + .vortex_expect("validated child slot"); + vortex_ensure!( + child.dtype() == dtype, + "ReversedArray dtype {} does not match child dtype {}", + dtype, + child.dtype(), + ); + vortex_ensure!( + child.len() == len, + "ReversedArray length {} does not match child length {}", + len, + child.len(), + ); + Ok(()) + } + + fn nbuffers(_array: ArrayView<'_, Self>) -> usize { + 0 + } + + fn buffer(_array: ArrayView<'_, Self>, idx: usize) -> BufferHandle { + vortex_panic!("ReversedArray has no buffers (index {idx})") + } + + fn buffer_name(_array: ArrayView<'_, Self>, _idx: usize) -> Option { + None + } + + fn slot_name(_array: ArrayView<'_, Self>, idx: usize) -> String { + SLOT_NAMES + .get(idx) + .copied() + .unwrap_or_else(|| vortex_panic!("ReversedArray slot index {idx} out of bounds")) + .to_string() + } + + fn serialize( + _array: ArrayView<'_, Self>, + _session: &VortexSession, + ) -> VortexResult>> { + vortex_bail!("ReversedArray is not serializable") + } + + fn deserialize( + &self, + _dtype: &DType, + _len: usize, + _metadata: &[u8], + _buffers: &[BufferHandle], + _children: &dyn ArrayChildren, + _session: &VortexSession, + ) -> VortexResult> { + vortex_bail!("ReversedArray is not serializable") + } + + fn execute(array: Array, ctx: &mut ExecutionCtx) -> VortexResult { + // Ensure the child is in canonical form before reversing. + let array = require_child!(array, array.child(), CHILD_SLOT => AnyCanonical); + debug_assert!(array.child().is_canonical()); + reverse_canonical(array.child(), ctx).map(ExecutionResult::done) + } + + fn reduce_parent( + array: ArrayView<'_, Self>, + parent: &ArrayRef, + child_idx: usize, + ) -> VortexResult> { + PARENT_RULES.evaluate(array, parent, child_idx) + } +} + +impl OperationsVTable for Reversed { + fn scalar_at( + array: ArrayView<'_, Reversed>, + index: usize, + ctx: &mut ExecutionCtx, + ) -> VortexResult { + let reversed_index = array.len() - 1 - index; + array.child().execute_scalar(reversed_index, ctx) + } +} + +impl ValidityVTable for Reversed { + fn validity(array: ArrayView<'_, Reversed>) -> VortexResult { + let inner = array.child().validity()?; + match inner { + Validity::NonNullable => Ok(Validity::NonNullable), + Validity::AllValid => Ok(Validity::AllValid), + Validity::AllInvalid => Ok(Validity::AllInvalid), + Validity::Array(arr) => Ok(Validity::Array(arr.reverse()?)), + } + } +} diff --git a/vortex-layout/src/scan/repeated_scan.rs b/vortex-layout/src/scan/repeated_scan.rs index a0f2101556c..777c951fff9 100644 --- a/vortex-layout/src/scan/repeated_scan.rs +++ b/vortex-layout/src/scan/repeated_scan.rs @@ -40,6 +40,8 @@ pub struct RepeatedScan { projection: Expression, filter: Option, ordered: bool, + /// Whether to iterate chunks in reverse order (last chunk first). + reversed: bool, /// Optionally read a subset of the rows in the file. row_range: Option>, /// The selection mask to apply to the selected row range. @@ -101,6 +103,7 @@ impl RepeatedScan { map_fn: Arc VortexResult + Send + Sync>, limit: Option, dtype: DType, + reversed: bool, ) -> Self { Self { session, @@ -108,6 +111,7 @@ impl RepeatedScan { projection, filter, ordered, + reversed, row_range, selection, splits, @@ -168,6 +172,12 @@ impl RepeatedScan { let mut limit = self.limit; let mut tasks = Vec::new(); + let ranges = if self.reversed { + Either::Left(ranges.collect::>().into_iter().rev()) + } else { + Either::Right(ranges) + }; + for range in ranges { if range.start >= range.end { continue; @@ -195,7 +205,7 @@ impl RepeatedScan { let stream = futures::stream::iter(self.execute(row_range)?).map(move |task| handle.spawn(task)); - let stream = if self.ordered { + let stream = if self.ordered || self.reversed { stream.buffered(concurrency).boxed() } else { stream.buffer_unordered(concurrency).boxed() diff --git a/vortex-layout/src/scan/scan_builder.rs b/vortex-layout/src/scan/scan_builder.rs index bdf5d0bfb11..dc71aa34e32 100644 --- a/vortex-layout/src/scan/scan_builder.rs +++ b/vortex-layout/src/scan/scan_builder.rs @@ -56,6 +56,10 @@ pub struct ScanBuilder { filter: Option, /// Whether the scan needs to return splits in the order they appear in the file. ordered: bool, + /// Whether to yield chunks in reverse file order, with rows within each chunk also reversed. + /// + /// Implies ordered output (chunks are emitted in strict reverse sequence, not interleaved). + reversed: bool, /// Optionally read a subset of the rows in the file. row_range: Option>, /// The selection mask to apply to the selected row range. @@ -96,6 +100,7 @@ impl ScanBuilder { file_stats: None, limit: None, row_offset: 0, + reversed: false, } } @@ -147,6 +152,20 @@ impl ScanBuilder { self } + pub fn reversed(&self) -> bool { + self.reversed + } + + /// Reverse the scan order: chunks are yielded last-to-first, and rows within each chunk are + /// also reversed. This produces a globally reversed row sequence without reading the whole + /// file first. + /// + /// Reversed scans always produce ordered output (equivalent to `with_ordered(true)`). + pub fn with_reversed(mut self, reversed: bool) -> Self { + self.reversed = reversed; + self + } + pub fn with_row_range(mut self, row_range: Range) -> Self { self.row_range = Some(row_range); self @@ -234,6 +253,7 @@ impl ScanBuilder { file_stats: self.file_stats, limit: self.limit, row_offset: self.row_offset, + reversed: self.reversed, map_fn: Arc::new(move |a| old_map_fn(a).and_then(&map_fn)), } } @@ -286,6 +306,14 @@ impl ScanBuilder { )?) }; + let map_fn = if self.reversed { + let original = self.map_fn; + Arc::new(move |array: ArrayRef| original(array.reverse()?)) + as Arc VortexResult + Send + Sync> + } else { + self.map_fn + }; + Ok(RepeatedScan::new( self.session.clone(), layout_reader, @@ -296,9 +324,10 @@ impl ScanBuilder { self.selection, splits, self.concurrency, - self.map_fn, + map_fn, self.limit, dtype, + self.reversed, )) } @@ -367,7 +396,7 @@ impl Stream for LazyScanStream { match &mut self.state { LazyScanState::Builder(builder) => { let builder = builder.take().vortex_expect("polled after completion"); - let ordered = builder.ordered; + let ordered = builder.ordered || builder.reversed; let num_workers = get_available_parallelism().unwrap_or(1); let concurrency = builder.concurrency * num_workers; let handle = builder.session.handle();