From 7b8b311ade28cd8bd54a7f804363d64e0657e6d1 Mon Sep 17 00:00:00 2001 From: Abanoub Doss Date: Wed, 8 Apr 2026 22:30:39 -0500 Subject: [PATCH 1/2] Fix patch_chunk index OOB when slicing ALP arrays mid-chunk Signed-off-by: Abanoub Doss --- encodings/alp/src/alp/compress.rs | 40 +++++++++++++++++++ .../src/arrays/primitive/array/patch.rs | 37 ++++++++++++++++- vortex-array/src/patches.rs | 13 +++--- 3 files changed, 82 insertions(+), 8 deletions(-) diff --git a/encodings/alp/src/alp/compress.rs b/encodings/alp/src/alp/compress.rs index a8215eef6fb..b259089a334 100644 --- a/encodings/alp/src/alp/compress.rs +++ b/encodings/alp/src/alp/compress.rs @@ -546,4 +546,44 @@ mod tests { assert_arrays_eq!(decoded, array); } + + /// Regression test for patch_chunk index-out-of-bounds when slicing a multi-chunk + /// ALP array mid-chunk with patches in the trailing chunk. + /// + /// The bug: chunk_offsets are sliced at chunk granularity (1024-row boundaries) + /// but patches indices/values are sliced at element granularity. When a slice ends + /// mid-chunk, patches_end_idx could exceed patches_indices.len(), causing OOB panic + /// during decompression. + #[test] + fn test_slice_mid_chunk_with_patches_in_trailing_chunk() { + // 3 chunks (3072 elements), patches scattered across all chunks. + let mut values = vec![1.0f64; 3072]; + // Chunk 0 patches (indices 0..1024) + values[100] = PI; + values[500] = E; + // Chunk 1 patches (indices 1024..2048) + values[1100] = PI; + values[1500] = E; + values[1900] = PI; + // Chunk 2 patches (indices 2048..3072) + values[2100] = PI; + values[2500] = E; + values[2900] = PI; + + let original = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable); + let encoded = alp_encode(&original, None).unwrap(); + assert!(encoded.patches().is_some()); + + // Slice ending mid-chunk-2 (element 2500 is inside chunk 2 = 2048..3072). + // This creates a mismatch: chunk_offsets includes the full chunk 2 offset, + // but patches_indices only includes patches up to element 2500. + let sliced_alp = encoded.slice(0..2500).unwrap(); + let expected = original.slice(0..2500).unwrap(); + assert_arrays_eq!(sliced_alp, expected); + + // Also test slicing that starts mid-chunk (both start and end mid-chunk). + let sliced_alp = encoded.slice(500..2500).unwrap(); + let expected = original.slice(500..2500).unwrap(); + assert_arrays_eq!(sliced_alp, expected); + } } diff --git a/vortex-array/src/arrays/primitive/array/patch.rs b/vortex-array/src/arrays/primitive/array/patch.rs index 8844009d410..115f2fef69f 100644 --- a/vortex-array/src/arrays/primitive/array/patch.rs +++ b/vortex-array/src/arrays/primitive/array/patch.rs @@ -111,8 +111,12 @@ pub fn patch_chunk( // Use the same logic as patches slice implementation for calculating patch ranges. let patches_start_idx = (chunk_offsets_slice[chunk_idx].as_() - base_offset).saturating_sub(offset_within_chunk); + // Clamp: chunk_offsets are sliced at chunk granularity but patches at element + // granularity, so the next chunk offset may exceed the actual patches length. let patches_end_idx = if chunk_idx + 1 < chunk_offsets_slice.len() { - chunk_offsets_slice[chunk_idx + 1].as_() - base_offset - offset_within_chunk + (chunk_offsets_slice[chunk_idx + 1].as_() - base_offset) + .saturating_sub(offset_within_chunk) + .min(patches_indices.len()) } else { patches_indices.len() }; @@ -135,6 +139,37 @@ mod tests { use crate::assert_arrays_eq; use crate::validity::Validity; + /// Regression: patch_chunk must not OOB when chunk_offsets (chunk granularity) + /// reference more patches than patches_indices (element granularity) contains. + #[test] + fn patch_chunk_no_oob_on_mid_chunk_slice() { + let mut decoded_values = vec![0.0f64; PATCH_CHUNK_SIZE]; + // 10 patches, but chunk_offsets claim 15 exist past offset adjustment. + let patches_indices: Vec = (0..10) + .map(|i| (PATCH_CHUNK_SIZE as u64) + i * 10) + .collect(); + let patches_values: Vec = (0..10).map(|i| (i + 1) as f64 * 100.0).collect(); + // chunk_offsets [5, 12, 20]: for chunk_idx=1 with offset_within_chunk=3, + // unclamped end = (20-5)-3 = 12, which exceeds patches len of 10. + let chunk_offsets: Vec = vec![5, 12, 20]; + + patch_chunk( + &mut decoded_values, + &patches_indices, + &patches_values, + 0, + &chunk_offsets, + 1, + 3, + ); + + // Spot-check: patch index 4 (first in range) should be applied. + assert_ne!( + decoded_values[patches_indices[4] as usize - PATCH_CHUNK_SIZE], + 0.0 + ); + } + #[test] fn patch_sliced() { let input = PrimitiveArray::new(buffer![2u32; 10], Validity::AllValid); diff --git a/vortex-array/src/patches.rs b/vortex-array/src/patches.rs index 42581935e19..1a614ceacf8 100644 --- a/vortex-array/src/patches.rs +++ b/vortex-array/src/patches.rs @@ -460,7 +460,9 @@ impl Patches { .saturating_sub(offset_within_chunk); let patches_end_idx = if chunk_idx < chunk_offsets.len() - 1 { - self.chunk_offset_at(chunk_idx + 1)? - base_offset - offset_within_chunk + (self.chunk_offset_at(chunk_idx + 1)? - base_offset) + .saturating_sub(offset_within_chunk) + .min(self.indices.len()) } else { self.indices.len() }; @@ -522,13 +524,10 @@ impl Patches { .saturating_sub(offset_within_chunk); let patches_end_idx = if chunk_idx < chunk_offsets.len() - 1 { - let base_offset_end = chunk_offsets[chunk_idx + 1]; - - let offset_within_chunk = O::from(offset_within_chunk) - .ok_or_else(|| vortex_err!("offset_within_chunk failed to convert to O"))?; - - usize::try_from(base_offset_end - chunk_offsets[0] - offset_within_chunk) + usize::try_from(chunk_offsets[chunk_idx + 1] - chunk_offsets[0]) .map_err(|_| vortex_err!("patches_end_idx failed to convert to usize"))? + .saturating_sub(offset_within_chunk) + .min(indices.len()) } else { self.indices.len() }; From f62f1172f8997e5f1e26a259b212a4091f7757b4 Mon Sep 17 00:00:00 2001 From: Abanoub Doss Date: Thu, 9 Apr 2026 08:03:14 -0500 Subject: [PATCH 2/2] fixed linting for test Signed-off-by: Abanoub Doss --- vortex-array/src/arrays/primitive/array/patch.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vortex-array/src/arrays/primitive/array/patch.rs b/vortex-array/src/arrays/primitive/array/patch.rs index 115f2fef69f..51fae5e9cd0 100644 --- a/vortex-array/src/arrays/primitive/array/patch.rs +++ b/vortex-array/src/arrays/primitive/array/patch.rs @@ -165,7 +165,7 @@ mod tests { // Spot-check: patch index 4 (first in range) should be applied. assert_ne!( - decoded_values[patches_indices[4] as usize - PATCH_CHUNK_SIZE], + decoded_values[usize::try_from(patches_indices[4]).unwrap() - PATCH_CHUNK_SIZE], 0.0 ); }