Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions encodings/dict/src/builders/bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,8 @@ use std::sync::Arc;

use arrow_buffer::NullBufferBuilder;
use vortex_array::accessor::ArrayAccessor;
use vortex_array::arrays::{
BinaryView, PrimitiveArray, VarBinVTable, VarBinViewArray, VarBinViewVTable,
};
use vortex_array::arrays::binary_view::BinaryView;
use vortex_array::arrays::{PrimitiveArray, VarBinVTable, VarBinViewArray, VarBinViewVTable};
use vortex_array::validity::Validity;
use vortex_array::{Array, ArrayRef, IntoArray};
use vortex_buffer::{BufferMut, ByteBufferMut};
Expand Down Expand Up @@ -65,7 +64,7 @@ impl<Code: UnsignedPType> BytesDictBuilder<Code> {
if bin_view.is_inlined() {
bin_view.as_inlined().value()
} else {
&self.values[bin_view.as_view().to_range()]
&self.values[bin_view.as_view().as_range()]
}
})
}
Expand Down
3 changes: 2 additions & 1 deletion encodings/fsst/src/canonical.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,8 @@

use std::sync::Arc;

use vortex_array::arrays::{BinaryView, VarBinViewArray};
use vortex_array::arrays::VarBinViewArray;
use vortex_array::arrays::binary_view::BinaryView;
use vortex_array::builders::{ArrayBuilder, VarBinViewBuilder};
use vortex_array::vtable::{CanonicalVTable, ValidityHelper};
use vortex_array::{Canonical, IntoArray, ToCanonical};
Expand Down
7 changes: 4 additions & 3 deletions encodings/sparse/src/canonical.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,10 @@ use std::sync::Arc;

use itertools::Itertools;
use num_traits::NumCast;
use vortex_array::arrays::binary_view::BinaryView;
use vortex_array::arrays::{
BinaryView, BoolArray, BooleanBuffer, ConstantArray, FixedSizeListArray, ListArray, NullArray,
PrimitiveArray, StructArray, VarBinViewArray, smallest_storage_type,
BoolArray, BooleanBuffer, ConstantArray, FixedSizeListArray, ListArray, NullArray,
PrimitiveArray, StructArray, VarBinViewArray, smallest_decimal_value_type,
};
use vortex_array::builders::{ArrayBuilder, DecimalBuilder, ListBuilder, builder_with_capacity};
use vortex_array::patches::Patches;
Expand Down Expand Up @@ -56,7 +57,7 @@ impl CanonicalVTable<SparseVTable> for SparseVTable {
array.len(),
),
DType::Decimal(decimal_dtype, nullability) => {
let canonical_decimal_value_type = smallest_storage_type(decimal_dtype);
let canonical_decimal_value_type = smallest_decimal_value_type(decimal_dtype);
let fill_value = array.fill_scalar().as_decimal();
match_each_decimal_value_type!(canonical_decimal_value_type, |D| {
canonicalize_sparse_decimal::<D>(
Expand Down
3 changes: 2 additions & 1 deletion encodings/zstd/src/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ use std::sync::Arc;

use itertools::Itertools as _;
use vortex_array::accessor::ArrayAccessor;
use vortex_array::arrays::{BinaryView, ConstantArray, PrimitiveArray, VarBinViewArray};
use vortex_array::arrays::binary_view::BinaryView;
use vortex_array::arrays::{ConstantArray, PrimitiveArray, VarBinViewArray};
use vortex_array::compute::filter;
use vortex_array::stats::{ArrayStats, StatsSetRef};
use vortex_array::validity::Validity;
Expand Down
8 changes: 3 additions & 5 deletions vortex-array/src/arrays/arbitrary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,8 @@ use vortex_error::{VortexExpect, VortexUnwrap};
use vortex_scalar::arbitrary::random_scalar;
use vortex_scalar::{Scalar, match_each_decimal_value_type};

use super::{
BoolArray, ChunkedArray, NullArray, PrimitiveArray, StructArray, smallest_storage_type,
};
use crate::arrays::{VarBinArray, VarBinViewArray};
use super::{BoolArray, ChunkedArray, NullArray, PrimitiveArray, StructArray};
use crate::arrays::{VarBinArray, VarBinViewArray, smallest_decimal_value_type};
use crate::builders::{ArrayBuilder, DecimalBuilder, FixedSizeListBuilder};
use crate::validity::Validity;
use crate::{Array, ArrayRef, IntoArray, ToCanonical, builders};
Expand Down Expand Up @@ -99,7 +97,7 @@ fn random_array_chunk(
},
DType::Decimal(decimal, n) => {
let elem_len = chunk_len.unwrap_or(u.int_in_range(0..=20)?);
match_each_decimal_value_type!(smallest_storage_type(decimal), |DVT| {
match_each_decimal_value_type!(smallest_decimal_value_type(decimal), |DVT| {
let mut builder =
DecimalBuilder::new::<DVT>(decimal.precision(), decimal.scale(), *n);
for _i in 0..elem_len {
Expand Down
131 changes: 74 additions & 57 deletions vortex-array/src/arrays/bool/array.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,17 +5,35 @@ use std::ops::BitAnd;

use arrow_array::BooleanArray;
use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder, MutableBuffer};
use itertools::Itertools;
use vortex_buffer::ByteBuffer;
use vortex_dtype::DType;
use vortex_dtype::{DType, match_each_integer_ptype};
use vortex_error::{VortexExpect, VortexResult, vortex_ensure};
use vortex_mask::Mask;

use crate::Canonical;
use crate::arrays::{BoolVTable, bool};
use crate::builders::ArrayBuilder;
use crate::stats::{ArrayStats, StatsSetRef};
use crate::ToCanonical;
use crate::arrays::bool;
use crate::patches::Patches;
use crate::stats::ArrayStats;
use crate::validity::Validity;
use crate::vtable::{ArrayVTable, CanonicalVTable, ValidityHelper};
use crate::vtable::ValidityHelper;

pub trait BooleanBufferExt {
/// Slice any full bytes from the buffer, leaving the offset < 8.
fn shrink_offset(self) -> Self;
}

impl BooleanBufferExt for BooleanBuffer {
fn shrink_offset(self) -> Self {
let byte_offset = self.offset() / 8;
let bit_offset = self.offset() % 8;
let len = self.len();
let buffer = self
.into_inner()
.slice_with_length(byte_offset, (len + bit_offset).div_ceil(8));
BooleanBuffer::new(buffer, bit_offset, len)
}
}

/// A boolean array that stores true/false values in a compact bit-packed format.
///
Expand Down Expand Up @@ -48,10 +66,10 @@ use crate::vtable::{ArrayVTable, CanonicalVTable, ValidityHelper};
/// ```
#[derive(Clone, Debug)]
pub struct BoolArray {
dtype: DType,
buffer: BooleanBuffer,
pub(crate) validity: Validity,
pub(crate) stats_set: ArrayStats,
pub(super) dtype: DType,
pub(super) buffer: BooleanBuffer,
pub(super) validity: Validity,
pub(super) stats_set: ArrayStats,
}

impl BoolArray {
Expand Down Expand Up @@ -96,6 +114,10 @@ impl BoolArray {
len: usize,
validity: Validity,
) -> Self {
#[cfg(debug_assertions)]
Self::validate(&buffer, offset, len, &validity)
.vortex_expect("[Debug Assertion]: Invalid `BoolArray` parameters");

let buffer = BooleanBuffer::new(buffer.into_arrow_buffer(), offset, len);
let buffer = buffer.shrink_offset();
Self {
Expand Down Expand Up @@ -243,6 +265,32 @@ impl BoolArray {
};
Mask::from_buffer(buffer)
}

pub fn patch(self, patches: &Patches) -> Self {
let len = self.len();
let offset = patches.offset();
let indices = patches.indices().to_primitive();
let values = patches.values().to_bool();

let patched_validity =
self.validity()
.clone()
.patch(len, offset, indices.as_ref(), values.validity());

let (mut own_values, bit_offset) = self.into_boolean_builder();
match_each_integer_ptype!(indices.ptype(), |I| {
for (idx, value) in indices
.as_slice::<I>()
.iter()
.zip_eq(values.boolean_buffer().iter())
{
#[allow(clippy::cast_possible_truncation)]
own_values.set_bit(*idx as usize - offset + bit_offset, value);
}
});

Self::from_bool_buffer(own_values.finish().slice(bit_offset, len), patched_validity)
}
}

impl From<BooleanBuffer> for BoolArray {
Expand All @@ -268,53 +316,6 @@ impl FromIterator<Option<bool>> for BoolArray {
}
}

impl ValidityHelper for BoolArray {
fn validity(&self) -> &Validity {
&self.validity
}
}

impl ArrayVTable<BoolVTable> for BoolVTable {
fn len(array: &BoolArray) -> usize {
array.buffer.len()
}

fn dtype(array: &BoolArray) -> &DType {
&array.dtype
}

fn stats(array: &BoolArray) -> StatsSetRef<'_> {
array.stats_set.to_ref(array.as_ref())
}
}

impl CanonicalVTable<BoolVTable> for BoolVTable {
fn canonicalize(array: &BoolArray) -> Canonical {
Canonical::Bool(array.clone())
}

fn append_to_builder(array: &BoolArray, builder: &mut dyn ArrayBuilder) {
builder.extend_from_array(array.as_ref())
}
}

pub trait BooleanBufferExt {
/// Slice any full bytes from the buffer, leaving the offset < 8.
fn shrink_offset(self) -> Self;
}

impl BooleanBufferExt for BooleanBuffer {
fn shrink_offset(self) -> Self {
let byte_offset = self.offset() / 8;
let bit_offset = self.offset() % 8;
let len = self.len();
let buffer = self
.into_inner()
.slice_with_length(byte_offset, (len + bit_offset).div_ceil(8));
BooleanBuffer::new(buffer, bit_offset, len)
}
}

#[cfg(test)]
mod tests {
use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder};
Expand Down Expand Up @@ -367,6 +368,13 @@ mod tests {

#[test]
fn patch_sliced_bools() {
let arr = BoolArray::from(BooleanBuffer::new_set(12));
let sliced = arr.slice(4..12);
let (values, offset) = sliced.to_bool().into_boolean_builder();
assert_eq!(offset, 4);
assert_eq!(values.len(), 12);
assert_eq!(values.as_slice(), &[255, 15]);

let arr = {
let mut builder = BooleanBufferBuilder::new(12);
builder.append(false);
Expand Down Expand Up @@ -431,4 +439,13 @@ mod tests {
let (values, _byte_bit_offset) = arr.to_bool().into_boolean_builder();
assert_eq!(values.as_slice(), &[254, 127]);
}

#[test]
fn patch_sliced_bools_offset() {
let arr = BoolArray::from(BooleanBuffer::new_set(15));
let sliced = arr.slice(4..15);
let (values, offset) = sliced.to_bool().into_boolean_builder();
assert_eq!(offset, 4);
assert_eq!(values.as_slice(), &[255, 127]);
}
}
45 changes: 7 additions & 38 deletions vortex-array/src/arrays/bool/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,45 +2,14 @@
// SPDX-FileCopyrightText: Copyright the Vortex contributors

mod array;
pub mod compute;
mod ops;
mod patch;
mod serde;
#[cfg(feature = "test-harness")]
mod test;

pub use array::*;
// Re-export the BooleanBuffer type on our API surface.
pub use array::{BoolArray, BooleanBufferExt};
// Re-export Arrow's `BooleanBuffer` type on our API surface.
pub use arrow_buffer::{BooleanBuffer, BooleanBufferBuilder};

use crate::vtable::{NotSupported, VTable, ValidityVTableFromValidityHelper};
use crate::{EncodingId, EncodingRef, vtable};

vtable!(Bool);

impl VTable for BoolVTable {
type Array = BoolArray;
type Encoding = BoolEncoding;

type ArrayVTable = Self;
type CanonicalVTable = Self;
type OperationsVTable = Self;
type ValidityVTable = ValidityVTableFromValidityHelper;
type VisitorVTable = Self;
type ComputeVTable = NotSupported;
type EncodeVTable = NotSupported;
type PipelineVTable = NotSupported;
// Enable serde for this encoding
type SerdeVTable = Self;

fn id(_encoding: &Self::Encoding) -> EncodingId {
EncodingId::new_ref("vortex.bool")
}
pub mod compute;

fn encoding(_array: &Self::Array) -> EncodingRef {
EncodingRef::new_ref(BoolEncoding.as_ref())
}
}
mod vtable;
pub use vtable::{BoolEncoding, BoolVTable};

#[derive(Clone, Debug)]
pub struct BoolEncoding;
#[cfg(feature = "test-harness")]
mod test_harness;
65 changes: 0 additions & 65 deletions vortex-array/src/arrays/bool/patch.rs

This file was deleted.

Loading
Loading