diff --git a/encodings/fastlanes/src/bitpacking/array/bitpack_decompress.rs b/encodings/fastlanes/src/bitpacking/array/bitpack_decompress.rs index 5b81580d7a6..a4f8224966a 100644 --- a/encodings/fastlanes/src/bitpacking/array/bitpack_decompress.rs +++ b/encodings/fastlanes/src/bitpacking/array/bitpack_decompress.rs @@ -113,7 +113,7 @@ where // SAFETY: We initialize all `len` values below via `decode` and the patch loop. unsafe { - uninit_range.append_mask(array.validity()?.execute_mask(len, ctx)?); + uninit_range.append_mask(&array.validity()?.execute_mask(len, ctx)?); } // SAFETY: `decode` writes a value to every slot in this range. diff --git a/encodings/fastlanes/src/for/array/for_decompress.rs b/encodings/fastlanes/src/for/array/for_decompress.rs index 073864411fb..ffe9449f962 100644 --- a/encodings/fastlanes/src/for/array/for_decompress.rs +++ b/encodings/fastlanes/src/for/array/for_decompress.rs @@ -109,7 +109,7 @@ pub(crate) fn fused_decompress< let mut uninit_range = builder.uninit_range(bp.len()); unsafe { // Append a dense null Mask. - uninit_range.append_mask(bp.validity()?.execute_mask(bp.as_ref().len(), ctx)?); + uninit_range.append_mask(&bp.validity()?.execute_mask(bp.as_ref().len(), ctx)?); } // SAFETY: `decode_into` will initialize all values in this range. diff --git a/vortex-array/src/builders/bool.rs b/vortex-array/src/builders/bool.rs index c09d69a5acd..fdae7984844 100644 --- a/vortex-array/src/builders/bool.rs +++ b/vortex-array/src/builders/bool.rs @@ -121,7 +121,7 @@ impl ArrayBuilder for BoolBuilder { self.inner.append_buffer(&bool_array.to_bit_buffer()); self.nulls.append_validity_mask( - bool_array + &bool_array .as_ref() .validity() .vortex_expect("validity_mask") @@ -139,8 +139,7 @@ impl ArrayBuilder for BoolBuilder { } unsafe fn set_validity_unchecked(&mut self, validity: Mask) { - self.nulls = LazyBitBufferBuilder::new(validity.len()); - self.nulls.append_validity_mask(validity); + self.nulls = LazyBitBufferBuilder::from_validity_mask(validity); } fn finish(&mut self) -> ArrayRef { diff --git a/vortex-array/src/builders/decimal.rs b/vortex-array/src/builders/decimal.rs index c80f97d9ea2..f8d438d477b 100644 --- a/vortex-array/src/builders/decimal.rs +++ b/vortex-array/src/builders/decimal.rs @@ -207,7 +207,7 @@ impl ArrayBuilder for DecimalBuilder { }); self.nulls.append_validity_mask( - decimal_array + &decimal_array .as_ref() .validity() .vortex_expect("validity_mask") @@ -225,8 +225,7 @@ impl ArrayBuilder for DecimalBuilder { } unsafe fn set_validity_unchecked(&mut self, validity: Mask) { - self.nulls = LazyBitBufferBuilder::new(validity.len()); - self.nulls.append_validity_mask(validity); + self.nulls = LazyBitBufferBuilder::from_validity_mask(validity); } fn finish(&mut self) -> ArrayRef { diff --git a/vortex-array/src/builders/fixed_size_list.rs b/vortex-array/src/builders/fixed_size_list.rs index 5937c37b041..aece8c21465 100644 --- a/vortex-array/src/builders/fixed_size_list.rs +++ b/vortex-array/src/builders/fixed_size_list.rs @@ -246,7 +246,7 @@ impl ArrayBuilder for FixedSizeListBuilder { self.elements_builder.extend_from_array(fsl.elements()); self.nulls.append_validity_mask( - array + &array .validity() .vortex_expect("validity_mask in extend_from_array_unchecked") .execute_mask(array.len(), &mut LEGACY_SESSION.create_execution_ctx()) @@ -261,8 +261,7 @@ impl ArrayBuilder for FixedSizeListBuilder { } unsafe fn set_validity_unchecked(&mut self, validity: Mask) { - self.nulls = LazyBitBufferBuilder::new(validity.len()); - self.nulls.append_validity_mask(validity); + self.nulls = LazyBitBufferBuilder::from_validity_mask(validity); } fn finish(&mut self) -> ArrayRef { diff --git a/vortex-array/src/builders/lazy_null_builder.rs b/vortex-array/src/builders/lazy_null_builder.rs index 9ca4b28b484..24abe9ba17d 100644 --- a/vortex-array/src/builders/lazy_null_builder.rs +++ b/vortex-array/src/builders/lazy_null_builder.rs @@ -31,6 +31,42 @@ impl LazyBitBufferBuilder { } } + /// Creates a builder pre-populated from a validity mask, taking ownership of the mask's buffer + /// instead of copying it where possible. + /// + /// This is the counterpart to [`append_validity_mask`](Self::append_validity_mask) for callers + /// that want to *replace* the builder's contents with the mask rather than extend them: because + /// we own the mask, we can move its buffer in instead of copying it. + pub fn from_validity_mask(validity_mask: Mask) -> Self { + match validity_mask { + // An unmaterialized builder already represents `len` non-null values, so an all-valid + // mask stays lazy. + Mask::AllTrue(len) => Self { + inner: None, + len, + capacity: len, + }, + Mask::AllFalse(len) => Self::from_buffer(BitBufferMut::new_unset(len)), + // Take ownership of the underlying buffer; `into_bit_buffer` and `try_into_mut` only + // copy when the buffer is shared, otherwise this is a move. + values @ Mask::Values(_) => Self::from_buffer( + values + .into_bit_buffer() + .try_into_mut() + .unwrap_or_else(|buffer| BitBufferMut::copy_from(&buffer)), + ), + } + } + + /// Creates a builder backed by an already-materialized buffer. + fn from_buffer(inner: BitBufferMut) -> Self { + Self { + inner: Some(inner), + len: 0, + capacity: 0, + } + } + /// Appends `n` non-null values to the builder. #[inline] pub fn append_n_non_nulls(&mut self, n: usize) { @@ -82,10 +118,13 @@ impl LazyBitBufferBuilder { } /// Appends values from a validity mask. - pub fn append_validity_mask(&mut self, validity_mask: Mask) { + /// + /// Takes the mask by reference: the `Mask::Values` case copies the underlying buffer into the + /// running null buffer, so there is nothing to gain from owning the mask. + pub fn append_validity_mask(&mut self, validity_mask: &Mask) { match validity_mask { - Mask::AllTrue(len) => self.append_n_non_nulls(len), - Mask::AllFalse(len) => self.append_n_nulls(len), + Mask::AllTrue(len) => self.append_n_non_nulls(*len), + Mask::AllFalse(len) => self.append_n_nulls(*len), Mask::Values(is_valid) => self.append_buffer(is_valid.bit_buffer()), } } diff --git a/vortex-array/src/builders/list.rs b/vortex-array/src/builders/list.rs index ae1bb20dfa9..a0c1fdd581b 100644 --- a/vortex-array/src/builders/list.rs +++ b/vortex-array/src/builders/list.rs @@ -226,7 +226,7 @@ impl ArrayBuilder for ListBuilder { // Append validity information. self.nulls.append_validity_mask( - array + &array .validity() .vortex_expect("validity_mask in extend_from_array_unchecked") .execute_mask(array.len(), &mut LEGACY_SESSION.create_execution_ctx()) @@ -300,8 +300,7 @@ impl ArrayBuilder for ListBuilder { } unsafe fn set_validity_unchecked(&mut self, validity: Mask) { - self.nulls = LazyBitBufferBuilder::new(validity.len()); - self.nulls.append_validity_mask(validity); + self.nulls = LazyBitBufferBuilder::from_validity_mask(validity); } fn finish(&mut self) -> ArrayRef { diff --git a/vortex-array/src/builders/listview.rs b/vortex-array/src/builders/listview.rs index 3c7bb8e8885..d4f8f908dcb 100644 --- a/vortex-array/src/builders/listview.rs +++ b/vortex-array/src/builders/listview.rs @@ -308,7 +308,7 @@ impl ArrayBuilder for ListViewBuilder { debug_assert!(listview.is_zero_copy_to_list()); self.nulls.append_validity_mask( - array + &array .validity() .vortex_expect("validity_mask in extend_from_array_unchecked") .execute_mask(array.len(), &mut LEGACY_SESSION.create_execution_ctx()) @@ -364,8 +364,7 @@ impl ArrayBuilder for ListViewBuilder { } unsafe fn set_validity_unchecked(&mut self, validity: Mask) { - self.nulls = LazyBitBufferBuilder::new(validity.len()); - self.nulls.append_validity_mask(validity); + self.nulls = LazyBitBufferBuilder::from_validity_mask(validity); } fn finish(&mut self) -> ArrayRef { diff --git a/vortex-array/src/builders/primitive.rs b/vortex-array/src/builders/primitive.rs index 59381fc602f..657f4b1a603 100644 --- a/vortex-array/src/builders/primitive.rs +++ b/vortex-array/src/builders/primitive.rs @@ -127,7 +127,7 @@ impl PrimitiveBuilder { } /// Extends the primitive array with an iterator. - pub fn extend_with_iterator(&mut self, iter: impl IntoIterator, mask: Mask) { + pub fn extend_with_iterator(&mut self, iter: impl IntoIterator, mask: &Mask) { self.values.extend(iter); self.nulls.append_validity_mask(mask); } @@ -190,7 +190,7 @@ impl ArrayBuilder for PrimitiveBuilder { self.values.extend_from_slice(array.as_slice::()); self.nulls.append_validity_mask( - array + &array .as_ref() .validity() .vortex_expect("validity_mask") @@ -208,8 +208,7 @@ impl ArrayBuilder for PrimitiveBuilder { } unsafe fn set_validity_unchecked(&mut self, validity: Mask) { - self.nulls = LazyBitBufferBuilder::new(validity.len()); - self.nulls.append_validity_mask(validity); + self.nulls = LazyBitBufferBuilder::from_validity_mask(validity); } fn finish(&mut self) -> ArrayRef { @@ -271,7 +270,7 @@ impl UninitRange<'_, T> { /// - The caller must ensure that they safely initialize `mask.len()` primitive values via /// [`UninitRange::copy_from_slice`]. /// - The caller must also ensure that they only call this method once. - pub unsafe fn append_mask(&mut self, mask: Mask) { + pub unsafe fn append_mask(&mut self, mask: &Mask) { assert_eq!( mask.len(), self.len, @@ -426,7 +425,7 @@ mod tests { // SAFETY: We're about to initialize the values. unsafe { - range.append_mask(mask); + range.append_mask(&mask); } // Initialize the values. @@ -476,7 +475,7 @@ mod tests { // SAFETY: This is expected to panic due to length mismatch. unsafe { - range.append_mask(wrong_mask); + range.append_mask(&wrong_mask); } } @@ -522,7 +521,7 @@ mod tests { let initial_mask = Mask::from_iter([false, false, false]); // SAFETY: We're about to initialize the values. unsafe { - range.append_mask(initial_mask); + range.append_mask(&initial_mask); } // Now we can use set_bit to modify individual bits with relative indexing. @@ -623,7 +622,7 @@ mod tests { let mask = Mask::from_iter([true, true, false]); // SAFETY: We're about to initialize the matching number of values. unsafe { - range.append_mask(mask); + range.append_mask(&mask); } // Initialize all values. diff --git a/vortex-array/src/builders/struct_.rs b/vortex-array/src/builders/struct_.rs index b7c737b3f1c..d396cf186a0 100644 --- a/vortex-array/src/builders/struct_.rs +++ b/vortex-array/src/builders/struct_.rs @@ -180,7 +180,7 @@ impl ArrayBuilder for StructBuilder { } self.nulls.append_validity_mask( - array + &array .validity() .vortex_expect("validity_mask") .execute_mask(array.len(), &mut LEGACY_SESSION.create_execution_ctx()) @@ -196,8 +196,7 @@ impl ArrayBuilder for StructBuilder { } unsafe fn set_validity_unchecked(&mut self, validity: Mask) { - self.nulls = LazyBitBufferBuilder::new(validity.len()); - self.nulls.append_validity_mask(validity); + self.nulls = LazyBitBufferBuilder::from_validity_mask(validity); } fn finish(&mut self) -> ArrayRef { diff --git a/vortex-array/src/builders/tests.rs b/vortex-array/src/builders/tests.rs index 53b010597cc..69adfd5b893 100644 --- a/vortex-array/src/builders/tests.rs +++ b/vortex-array/src/builders/tests.rs @@ -5,6 +5,8 @@ use std::sync::Arc; use rstest::rstest; use vortex_error::VortexExpect; +use vortex_error::VortexResult; +use vortex_mask::Mask; use crate::LEGACY_SESSION; use crate::VortexSessionExecute; @@ -820,3 +822,62 @@ fn test_append_scalar_repeated_same_instance() { ); } } + +/// Test that `set_validity` correctly overrides a builder's validity across all mask variants. +/// +/// `set_validity` moves the mask's buffer into the builder rather than copying it, so the +/// `sliced_offset` case is important: slicing a `Mask::Values` at a non-byte-aligned boundary +/// yields a buffer with a non-zero bit offset, which the move path must preserve. +#[rstest] +#[case::all_true(Mask::new_true(8), vec![true; 8])] +#[case::all_false(Mask::new_false(8), vec![false; 8])] +#[case::values( + Mask::from_iter([true, false, true, true, false, false, true, false]), + vec![true, false, true, true, false, false, true, false] +)] +#[case::sliced_offset( + Mask::from_iter([ + false, false, false, // dropped by the slice + true, false, true, true, false, false, true, false, // kept: indices 3..11 + true, true, true, true, true, // dropped by the slice + ]) + .slice(3..11), + vec![true, false, true, true, false, false, true, false] +)] +fn test_set_validity_overrides_validity( + #[case] mask: Mask, + #[case] expected: Vec, +) -> VortexResult<()> { + let dtype = DType::Primitive(PType::I32, Nullability::Nullable); + let mut builder = builder_with_capacity(&dtype, mask.len()); + builder.append_zeros(mask.len()); + + builder.set_validity(mask); + + let validity = builder.finish().validity()?; + for (i, &valid) in expected.iter().enumerate() { + assert_eq!( + validity.is_valid(i)?, + valid, + "validity mismatch at index {i}" + ); + } + Ok(()) +} + +/// Test that `set_validity` is a no-op on a non-nullable builder. +#[test] +fn test_set_validity_noop_when_non_nullable() -> VortexResult<()> { + let dtype = DType::Primitive(PType::I32, Nullability::NonNullable); + let mut builder = builder_with_capacity(&dtype, 4); + builder.append_zeros(4); + + // Providing an all-false mask must not make the non-nullable array invalid. + builder.set_validity(Mask::new_false(4)); + + let validity = builder.finish().validity()?; + for i in 0..4 { + assert!(validity.is_valid(i)?, "index {i} should remain valid"); + } + Ok(()) +} diff --git a/vortex-array/src/builders/varbinview.rs b/vortex-array/src/builders/varbinview.rs index 9150e8e70e1..e06b54dd78c 100644 --- a/vortex-array/src/builders/varbinview.rs +++ b/vortex-array/src/builders/varbinview.rs @@ -206,7 +206,7 @@ impl VarBinViewBuilder { "Some buffers already exist", ); self.views_builder.extend_trusted(views.iter().copied()); - self.push_only_validity_mask(validity_mask); + self.push_only_validity_mask(&validity_mask); debug_assert_eq!(self.nulls.len(), self.views_builder.len()) } @@ -236,7 +236,7 @@ impl VarBinViewBuilder { } // Pushes a validity mask into the builder not affecting the views or buffers - fn push_only_validity_mask(&mut self, validity_mask: Mask) { + fn push_only_validity_mask(&mut self, validity_mask: &Mask) { self.nulls.append_validity_mask(validity_mask); } } @@ -299,17 +299,13 @@ impl ArrayBuilder for VarBinViewBuilder { let array = array.to_varbinview(); self.flush_in_progress(); - self.push_only_validity_mask( - array - .as_ref() - .validity() - .vortex_expect("validity_mask") - .execute_mask( - array.as_ref().len(), - &mut LEGACY_SESSION.create_execution_ctx(), - ) - .vortex_expect("Failed to compute validity mask"), - ); + let mask = array + .validity() + .vortex_expect("validity_mask") + .execute_mask(array.len(), &mut LEGACY_SESSION.create_execution_ctx()) + .vortex_expect("Failed to compute validity mask"); + + self.push_only_validity_mask(&mask); let view_adjustment = self.completed @@ -325,41 +321,30 @@ impl ArrayBuilder for VarBinViewBuilder { .iter() .map(|view| adjustment.adjust_view(view)), ), - ViewAdjustment::Rewriting(adjustment) => { - match array - .as_ref() - .validity() - .vortex_expect("validity_mask") - .execute_mask( - array.as_ref().len(), - &mut LEGACY_SESSION.create_execution_ctx(), - ) - .vortex_expect("Failed to compute validity mask") - { - Mask::AllTrue(_) => { - for (idx, &view) in array.views().iter().enumerate() { - let new_view = self.push_view(view, &adjustment, &array, idx); - self.views_builder.push(new_view); - } - } - Mask::AllFalse(_) => { - self.views_builder - .push_n(BinaryView::empty_view(), array.len()); + ViewAdjustment::Rewriting(adjustment) => match mask { + Mask::AllTrue(_) => { + for (idx, &view) in array.views().iter().enumerate() { + let new_view = self.push_view(view, &adjustment, &array, idx); + self.views_builder.push(new_view); } - Mask::Values(v) => { - for (idx, (&view, is_valid)) in - array.views().iter().zip(v.bit_buffer().iter()).enumerate() - { - let new_view = if !is_valid { - BinaryView::empty_view() - } else { - self.push_view(view, &adjustment, &array, idx) - }; - self.views_builder.push(new_view); - } + } + Mask::AllFalse(_) => { + self.views_builder + .push_n(BinaryView::empty_view(), array.len()); + } + Mask::Values(v) => { + for (idx, (&view, is_valid)) in + array.views().iter().zip(v.bit_buffer().iter()).enumerate() + { + let new_view = if !is_valid { + BinaryView::empty_view() + } else { + self.push_view(view, &adjustment, &array, idx) + }; + self.views_builder.push(new_view); } } - } + }, } } @@ -369,8 +354,7 @@ impl ArrayBuilder for VarBinViewBuilder { } unsafe fn set_validity_unchecked(&mut self, validity: Mask) { - self.nulls = LazyBitBufferBuilder::new(validity.len()); - self.nulls.append_validity_mask(validity); + self.nulls = LazyBitBufferBuilder::from_validity_mask(validity); } fn finish(&mut self) -> ArrayRef { @@ -626,8 +610,8 @@ impl BuffersWithOffsets { buffers: Arc::from( array .data_buffers() - .to_vec() - .into_iter() + .iter() + .cloned() .map(|b| b.unwrap_host()) .collect_vec(), ),