Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions encodings/alp/src/alp/compress.rs
Original file line number Diff line number Diff line change
Expand Up @@ -546,4 +546,44 @@ mod tests {

assert_arrays_eq!(decoded, array);
}

/// Regression test for patch_chunk index-out-of-bounds when slicing a multi-chunk
/// ALP array mid-chunk with patches in the trailing chunk.
///
/// The bug: chunk_offsets are sliced at chunk granularity (1024-row boundaries)
/// but patches indices/values are sliced at element granularity. When a slice ends
/// mid-chunk, patches_end_idx could exceed patches_indices.len(), causing OOB panic
/// during decompression.
#[test]
fn test_slice_mid_chunk_with_patches_in_trailing_chunk() {
// 3 chunks (3072 elements), patches scattered across all chunks.
let mut values = vec![1.0f64; 3072];
// Chunk 0 patches (indices 0..1024)
values[100] = PI;
values[500] = E;
// Chunk 1 patches (indices 1024..2048)
values[1100] = PI;
values[1500] = E;
values[1900] = PI;
// Chunk 2 patches (indices 2048..3072)
values[2100] = PI;
values[2500] = E;
values[2900] = PI;

let original = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
let encoded = alp_encode(&original, None).unwrap();
assert!(encoded.patches().is_some());

// Slice ending mid-chunk-2 (element 2500 is inside chunk 2 = 2048..3072).
// This creates a mismatch: chunk_offsets includes the full chunk 2 offset,
// but patches_indices only includes patches up to element 2500.
let sliced_alp = encoded.slice(0..2500).unwrap();
let expected = original.slice(0..2500).unwrap();
assert_arrays_eq!(sliced_alp, expected);

// Also test slicing that starts mid-chunk (both start and end mid-chunk).
let sliced_alp = encoded.slice(500..2500).unwrap();
let expected = original.slice(500..2500).unwrap();
assert_arrays_eq!(sliced_alp, expected);
}
}
37 changes: 36 additions & 1 deletion vortex-array/src/arrays/primitive/array/patch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -111,8 +111,12 @@ pub fn patch_chunk<T, I, C>(
// Use the same logic as patches slice implementation for calculating patch ranges.
let patches_start_idx =
(chunk_offsets_slice[chunk_idx].as_() - base_offset).saturating_sub(offset_within_chunk);
// Clamp: chunk_offsets are sliced at chunk granularity but patches at element
// granularity, so the next chunk offset may exceed the actual patches length.
let patches_end_idx = if chunk_idx + 1 < chunk_offsets_slice.len() {
chunk_offsets_slice[chunk_idx + 1].as_() - base_offset - offset_within_chunk
(chunk_offsets_slice[chunk_idx + 1].as_() - base_offset)
.saturating_sub(offset_within_chunk)
.min(patches_indices.len())
} else {
patches_indices.len()
};
Expand All @@ -135,6 +139,37 @@ mod tests {
use crate::assert_arrays_eq;
use crate::validity::Validity;

/// Regression: patch_chunk must not OOB when chunk_offsets (chunk granularity)
/// reference more patches than patches_indices (element granularity) contains.
#[test]
fn patch_chunk_no_oob_on_mid_chunk_slice() {
let mut decoded_values = vec![0.0f64; PATCH_CHUNK_SIZE];
// 10 patches, but chunk_offsets claim 15 exist past offset adjustment.
let patches_indices: Vec<u64> = (0..10)
.map(|i| (PATCH_CHUNK_SIZE as u64) + i * 10)
.collect();
let patches_values: Vec<f64> = (0..10).map(|i| (i + 1) as f64 * 100.0).collect();
// chunk_offsets [5, 12, 20]: for chunk_idx=1 with offset_within_chunk=3,
// unclamped end = (20-5)-3 = 12, which exceeds patches len of 10.
let chunk_offsets: Vec<u32> = vec![5, 12, 20];

patch_chunk(
&mut decoded_values,
&patches_indices,
&patches_values,
0,
&chunk_offsets,
1,
3,
);

// Spot-check: patch index 4 (first in range) should be applied.
assert_ne!(
decoded_values[usize::try_from(patches_indices[4]).unwrap() - PATCH_CHUNK_SIZE],
0.0
);
}

#[test]
fn patch_sliced() {
let input = PrimitiveArray::new(buffer![2u32; 10], Validity::AllValid);
Expand Down
13 changes: 6 additions & 7 deletions vortex-array/src/patches.rs
Original file line number Diff line number Diff line change
Expand Up @@ -460,7 +460,9 @@ impl Patches {
.saturating_sub(offset_within_chunk);

let patches_end_idx = if chunk_idx < chunk_offsets.len() - 1 {
self.chunk_offset_at(chunk_idx + 1)? - base_offset - offset_within_chunk
(self.chunk_offset_at(chunk_idx + 1)? - base_offset)
.saturating_sub(offset_within_chunk)
.min(self.indices.len())
} else {
self.indices.len()
};
Expand Down Expand Up @@ -522,13 +524,10 @@ impl Patches {
.saturating_sub(offset_within_chunk);

let patches_end_idx = if chunk_idx < chunk_offsets.len() - 1 {
let base_offset_end = chunk_offsets[chunk_idx + 1];

let offset_within_chunk = O::from(offset_within_chunk)
.ok_or_else(|| vortex_err!("offset_within_chunk failed to convert to O"))?;

usize::try_from(base_offset_end - chunk_offsets[0] - offset_within_chunk)
usize::try_from(chunk_offsets[chunk_idx + 1] - chunk_offsets[0])
.map_err(|_| vortex_err!("patches_end_idx failed to convert to usize"))?
.saturating_sub(offset_within_chunk)
.min(indices.len())
} else {
self.indices.len()
};
Expand Down
Loading