Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
69 changes: 67 additions & 2 deletions encodings/roaring/src/integer/compute.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use croaring::Bitmap;
use vortex::compute::unary::ScalarAtFn;
use vortex::compute::ArrayCompute;
use vortex::compute::{ArrayCompute, SliceFn};
use vortex::{Array, IntoArray};
use vortex_dtype::PType;
use vortex_error::{vortex_err, VortexResult, VortexUnwrap as _};
use vortex_scalar::Scalar;
Expand All @@ -10,12 +12,16 @@ impl ArrayCompute for RoaringIntArray {
fn scalar_at(&self) -> Option<&dyn ScalarAtFn> {
Some(self)
}

fn slice(&self) -> Option<&dyn SliceFn> {
Some(self)
}
}

impl ScalarAtFn for RoaringIntArray {
fn scalar_at(&self, index: usize) -> VortexResult<Scalar> {
let bitmap_value = self
.bitmap()
.owned_bitmap()
.select(index as u32)
.ok_or_else(|| vortex_err!(OutOfBounds: index, 0, self.len()))?;
let scalar: Scalar = match self.metadata().ptype {
Expand All @@ -32,3 +38,62 @@ impl ScalarAtFn for RoaringIntArray {
<Self as ScalarAtFn>::scalar_at(self, index).vortex_unwrap()
}
}

impl SliceFn for RoaringIntArray {
fn slice(&self, start: usize, stop: usize) -> VortexResult<Array> {
let mut bitmap = self.owned_bitmap();
let start = bitmap
.select(start as u32)
.ok_or_else(|| vortex_err!(OutOfBounds: start, 0, self.len()))?;
let stop_inclusive = if stop == self.len() {
bitmap.maximum().unwrap_or(0)
} else {
bitmap
.select(stop.saturating_sub(1) as u32)
.ok_or_else(|| vortex_err!(OutOfBounds: stop, 0, self.len()))?
};

bitmap.and_inplace(&Bitmap::from_range(start..=stop_inclusive));
Self::try_new(bitmap, self.ptype()).map(IntoArray::into_array)
}
}

#[cfg(test)]
mod tests {
use vortex::array::PrimitiveArray;
use vortex::compute::slice;
use vortex::compute::unary::scalar_at;

use super::*;

#[test]
#[cfg_attr(miri, ignore)]
pub fn test_scalar_at() {
let ints = PrimitiveArray::from(vec![2u32, 12, 22, 32]).into_array();
let array = RoaringIntArray::encode(ints).unwrap();

assert_eq!(scalar_at(&array, 0).unwrap(), 2u32.into());
assert_eq!(scalar_at(&array, 1).unwrap(), 12u32.into());
}

#[test]
#[cfg_attr(miri, ignore)]
fn test_slice() {
let array = RoaringIntArray::try_new(Bitmap::from_range(10..20), PType::U32).unwrap();

let sliced = slice(&array, 0, 5).unwrap();
assert_eq!(sliced.len(), 5);
assert_eq!(scalar_at(&sliced, 0).unwrap(), 10u32.into());
assert_eq!(scalar_at(&sliced, 4).unwrap(), 14u32.into());

let sliced = slice(&array, 5, 10).unwrap();
assert_eq!(sliced.len(), 5);
assert_eq!(scalar_at(&sliced, 0).unwrap(), 15u32.into());
assert_eq!(scalar_at(&sliced, 4).unwrap(), 19u32.into());

let sliced = slice(&sliced, 3, 5).unwrap();
assert_eq!(sliced.len(), 2);
assert_eq!(scalar_at(&sliced, 0).unwrap(), 18u32.into());
assert_eq!(scalar_at(&sliced, 1).unwrap(), 19u32.into());
}
}
76 changes: 48 additions & 28 deletions encodings/roaring/src/integer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,16 @@ pub use compress::*;
use croaring::{Bitmap, Portable};
use serde::{Deserialize, Serialize};
use vortex::array::PrimitiveArray;
use vortex::compute::unary::try_cast;
use vortex::encoding::ids;
use vortex::stats::{ArrayStatisticsCompute, StatsSet};
use vortex::validity::{ArrayValidity, LogicalValidity};
use vortex::stats::{ArrayStatistics, ArrayStatisticsCompute, Stat, StatsSet};
use vortex::validity::{ArrayValidity, LogicalValidity, Validity};
use vortex::variants::{ArrayVariants, PrimitiveArrayTrait};
use vortex::visitor::{AcceptArrayVisitor, ArrayVisitor};
use vortex::{impl_encoding, Array, ArrayTrait, Canonical, IntoArray, IntoCanonical, TypedArray};
use vortex::{
impl_encoding, Array, ArrayDType as _, ArrayTrait, Canonical, IntoArray, IntoCanonical,
TypedArray,
};
use vortex_buffer::Buffer;
use vortex_dtype::Nullability::NonNullable;
use vortex_dtype::{DType, PType};
Expand All @@ -34,9 +38,25 @@ impl Display for RoaringIntMetadata {
impl RoaringIntArray {
pub fn try_new(bitmap: Bitmap, ptype: PType) -> VortexResult<Self> {
if !ptype.is_unsigned_int() {
vortex_bail!("RoaringInt expected unsigned int");
vortex_bail!(MismatchedTypes: "unsigned int", ptype);
}

let length = bitmap.statistics().cardinality as usize;
let max = bitmap.maximum();
if max.map(|mv| mv as u64 > ptype.max_value()).unwrap_or(false) {
vortex_bail!(
"RoaringInt maximum value is greater than the maximum value for the primitive type"
);
}

let mut stats = StatsSet::new();
stats.set(Stat::NullCount, 0.into());
stats.set(Stat::Max, max.into());
stats.set(Stat::Min, bitmap.minimum().into());
stats.set(Stat::IsConstant, (length <= 1).into());
stats.set(Stat::IsSorted, true.into());
stats.set(Stat::IsStrictSorted, true.into());

Ok(Self {
typed: TypedArray::try_from_parts(
DType::Primitive(ptype, NonNullable),
Expand All @@ -49,8 +69,7 @@ impl RoaringIntArray {
})
}

pub fn bitmap(&self) -> Bitmap {
//TODO(@jdcasale): figure out a way to avoid this deserialization per-call
pub fn owned_bitmap(&self) -> Bitmap {
Bitmap::deserialize::<Portable>(
self.as_ref()
.buffer()
Expand Down Expand Up @@ -88,39 +107,40 @@ impl ArrayValidity for RoaringIntArray {
}

fn logical_validity(&self) -> LogicalValidity {
LogicalValidity::AllValid(self.bitmap().iter().count())
LogicalValidity::AllValid(self.len())
}
}

impl IntoCanonical for RoaringIntArray {
fn into_canonical(self) -> VortexResult<Canonical> {
todo!()
try_cast(
PrimitiveArray::from_vec(self.owned_bitmap().to_vec(), Validity::NonNullable),
self.dtype(),
)
.and_then(Array::into_canonical)
}
}

impl AcceptArrayVisitor for RoaringIntArray {
fn accept(&self, _visitor: &mut dyn ArrayVisitor) -> VortexResult<()> {
todo!()
fn accept(&self, visitor: &mut dyn ArrayVisitor) -> VortexResult<()> {
visitor.visit_buffer(
self.as_ref()
.buffer()
.vortex_expect("Missing buffer in RoaringIntArray"),
)
}
}

impl ArrayStatisticsCompute for RoaringIntArray {}

#[cfg(test)]
mod test {
use vortex::array::PrimitiveArray;
use vortex::compute::unary::scalar_at;
use vortex::IntoArray;

use crate::RoaringIntArray;

#[test]
#[cfg_attr(miri, ignore)]
pub fn test_scalar_at() {
let ints = PrimitiveArray::from(vec![2u32, 12, 22, 32]).into_array();
let array = RoaringIntArray::encode(ints).unwrap();

assert_eq!(scalar_at(&array, 0).unwrap(), 2u32.into());
assert_eq!(scalar_at(&array, 1).unwrap(), 12u32.into());
impl ArrayStatisticsCompute for RoaringIntArray {
fn compute_statistics(&self, stat: Stat) -> VortexResult<StatsSet> {
// possibly faster to write an accumulator over the iterator, though not necessarily
if stat == Stat::TrailingZeroFreq || stat == Stat::BitWidthFreq || stat == Stat::RunCount {
let primitive =
PrimitiveArray::from_vec(self.owned_bitmap().to_vec(), Validity::NonNullable);
primitive.statistics().compute(stat);
Ok(primitive.statistics().to_set())
} else {
Ok(StatsSet::new())
}
}
}
23 changes: 12 additions & 11 deletions vortex-array/src/array/chunked/compute/take.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,17 +77,18 @@ fn take_strict_sorted(chunked: &ChunkedArray, indices: &Array) -> VortexResult<A
// Adjust the indices so they're relative to the chunk
// Note. Indices might not have a dtype big enough to fit chunk_begin after cast,
// if it does cast the scalar otherwise upcast the indices.
let chunk_indices = if chunk_begin < PType::try_from(chunk_indices.dtype())?.max_value() {
subtract_scalar(
&chunk_indices,
&Scalar::from(chunk_begin).cast(chunk_indices.dtype())?,
)?
} else {
// Note. this try_cast (memory copy) is unnecessary, could instead upcast in the subtract fn.
// and avoid an extra
let u64_chunk_indices = try_cast(&chunk_indices, PType::U64.into())?;
subtract_scalar(&u64_chunk_indices, &chunk_begin.into())?
};
let chunk_indices =
if chunk_begin < PType::try_from(chunk_indices.dtype())?.max_value() as usize {
subtract_scalar(
&chunk_indices,
&Scalar::from(chunk_begin).cast(chunk_indices.dtype())?,
)?
} else {
// Note. this try_cast (memory copy) is unnecessary, could instead upcast in the subtract fn.
// and avoid an extra
let u64_chunk_indices = try_cast(&chunk_indices, PType::U64.into())?;
subtract_scalar(&u64_chunk_indices, &chunk_begin.into())?
};

indices_by_chunk[chunk_idx] = Some(chunk_indices);

Expand Down
4 changes: 2 additions & 2 deletions vortex-dtype/src/ptype.rs
Original file line number Diff line number Diff line change
Expand Up @@ -204,8 +204,8 @@ impl PType {
self.byte_width() * 8
}

pub const fn max_value(&self) -> usize {
match_each_integer_ptype!(self, |$T| $T::MAX as usize)
pub const fn max_value(&self) -> u64 {
match_each_integer_ptype!(self, |$T| $T::MAX as u64)
}

pub fn to_signed(self) -> Self {
Expand Down
33 changes: 28 additions & 5 deletions vortex-sampling-compressor/src/compressors/roaring_int.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,6 @@ impl EncodingCompressor for RoaringIntCompressor {
}

fn can_compress(&self, array: &Array) -> Option<&dyn EncodingCompressor> {
// Only support primitive enc arrays
if array.encoding().id() != RoaringInt::ID {
return None;
}

// Only support non-nullable uint arrays
if !array.dtype().is_unsigned_int() || array.dtype().is_nullable() {
return None;
Expand Down Expand Up @@ -60,3 +55,31 @@ impl EncodingCompressor for RoaringIntCompressor {
HashSet::from([&RoaringIntEncoding as EncodingRef])
}
}

#[cfg(test)]
mod tests {
use vortex::array::PrimitiveArray;
use vortex::validity::Validity;
use vortex::IntoArray;
use vortex_roaring::RoaringIntArray;

use crate::compressors::roaring_int::RoaringIntCompressor;
use crate::compressors::EncodingCompressor as _;
use crate::SamplingCompressor;

#[test]
#[cfg_attr(miri, ignore)]
fn test_roaring_int_compressor() {
let array =
PrimitiveArray::from_vec(vec![1u32, 2, 3, 4, 5], Validity::NonNullable).into_array();
assert!(RoaringIntCompressor.can_compress(&array).is_some());
let compressed = RoaringIntCompressor
.compress(&array, None, SamplingCompressor::default())
.unwrap();
assert_eq!(compressed.array.len(), 5);
assert!(compressed.path.is_some());

let roaring = RoaringIntArray::try_from(compressed.array).unwrap();
assert!(roaring.owned_bitmap().contains_range(1..=5));
}
}