Skip to content

Commit b33b5bd

Browse files
committed
Merge remote-tracking branch 'origin/develop' into aduffy/fuzz2
2 parents 0bcd7f1 + 56c8acb commit b33b5bd

File tree

5 files changed

+81
-47
lines changed

5 files changed

+81
-47
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

vortex-duckdb/Cargo.toml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@ tokio-stream = { workspace = true }
3838
vortex = { workspace = true, features = ["files"] }
3939
vortex-file = { workspace = true, features = ["tokio"] }
4040

41+
[dev-dependencies]
42+
rstest = { workspace = true }
43+
4144
[lints]
4245
workspace = true
4346

vortex-duckdb/src/convert/dtype.rs

Lines changed: 16 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -319,6 +319,7 @@ impl TryFrom<&DType> for LogicalType {
319319
mod tests {
320320
use std::sync::Arc;
321321

322+
use rstest::rstest;
322323
use vortex::dtype::{DType, FieldName, FieldNames, Nullability, PType, StructFields};
323324

324325
use crate::cpp;
@@ -344,39 +345,21 @@ mod tests {
344345
);
345346
}
346347

347-
#[test]
348-
fn test_integer_types() {
349-
// Test signed and unsigned integers.
350-
let test_cases = [
351-
(PType::I8, cpp::DUCKDB_TYPE::DUCKDB_TYPE_TINYINT),
352-
(PType::I16, cpp::DUCKDB_TYPE::DUCKDB_TYPE_SMALLINT),
353-
(PType::I32, cpp::DUCKDB_TYPE::DUCKDB_TYPE_INTEGER),
354-
(PType::I64, cpp::DUCKDB_TYPE::DUCKDB_TYPE_BIGINT),
355-
(PType::U8, cpp::DUCKDB_TYPE::DUCKDB_TYPE_UTINYINT),
356-
(PType::U16, cpp::DUCKDB_TYPE::DUCKDB_TYPE_USMALLINT),
357-
(PType::U32, cpp::DUCKDB_TYPE::DUCKDB_TYPE_UINTEGER),
358-
(PType::U64, cpp::DUCKDB_TYPE::DUCKDB_TYPE_UBIGINT),
359-
];
360-
361-
for (ptype, expected_duckdb_type) in test_cases {
362-
let dtype = DType::Primitive(ptype, Nullability::NonNullable);
363-
let logical_type = LogicalType::try_from(&dtype).unwrap();
364-
assert_eq!(logical_type.as_type_id(), expected_duckdb_type);
365-
}
366-
}
367-
368-
#[test]
369-
fn test_float_types() {
370-
let float_test_cases = [
371-
(PType::F32, cpp::DUCKDB_TYPE::DUCKDB_TYPE_FLOAT),
372-
(PType::F64, cpp::DUCKDB_TYPE::DUCKDB_TYPE_DOUBLE),
373-
];
374-
375-
for (ptype, expected_duckdb_type) in float_test_cases {
376-
let dtype = DType::Primitive(ptype, Nullability::NonNullable);
377-
let logical_type = LogicalType::try_from(&dtype).unwrap();
378-
assert_eq!(logical_type.as_type_id(), expected_duckdb_type);
379-
}
348+
#[rstest]
349+
#[case(PType::I8, cpp::DUCKDB_TYPE::DUCKDB_TYPE_TINYINT)]
350+
#[case(PType::I16, cpp::DUCKDB_TYPE::DUCKDB_TYPE_SMALLINT)]
351+
#[case(PType::I32, cpp::DUCKDB_TYPE::DUCKDB_TYPE_INTEGER)]
352+
#[case(PType::I64, cpp::DUCKDB_TYPE::DUCKDB_TYPE_BIGINT)]
353+
#[case(PType::U8, cpp::DUCKDB_TYPE::DUCKDB_TYPE_UTINYINT)]
354+
#[case(PType::U16, cpp::DUCKDB_TYPE::DUCKDB_TYPE_USMALLINT)]
355+
#[case(PType::U32, cpp::DUCKDB_TYPE::DUCKDB_TYPE_UINTEGER)]
356+
#[case(PType::U64, cpp::DUCKDB_TYPE::DUCKDB_TYPE_UBIGINT)]
357+
#[case(PType::F32, cpp::DUCKDB_TYPE::DUCKDB_TYPE_FLOAT)]
358+
#[case(PType::F64, cpp::DUCKDB_TYPE::DUCKDB_TYPE_DOUBLE)]
359+
fn test_primitive_types(#[case] ptype: PType, #[case] expected_duckdb_type: cpp::DUCKDB_TYPE) {
360+
let dtype = DType::Primitive(ptype, Nullability::NonNullable);
361+
let logical_type = LogicalType::try_from(&dtype).unwrap();
362+
assert_eq!(logical_type.as_type_id(), expected_duckdb_type);
380363
}
381364

382365
#[test]

vortex-layout/src/layouts/zoned/mod.rs

Lines changed: 49 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,12 +145,13 @@ impl ZonedLayout {
145145
usize::try_from(self.zones.row_count()).vortex_expect("Invalid number of zones")
146146
}
147147

148+
/// Returns an array of stats that exist in the layout's data, must be sorted.
148149
pub fn present_stats(&self) -> &Arc<[Stat]> {
149150
&self.present_stats
150151
}
151152
}
152153

153-
#[derive(Debug)]
154+
#[derive(Debug, PartialEq, Eq, Clone)]
154155
pub struct ZonedMetadata {
155156
pub(super) zone_len: u32,
156157
pub(super) present_stats: Arc<[Stat]>,
@@ -179,3 +180,50 @@ impl SerializeMetadata for ZonedMetadata {
179180
metadata
180181
}
181182
}
183+
184+
#[cfg(test)]
185+
mod tests {
186+
187+
use rstest::rstest;
188+
189+
use super::*;
190+
191+
#[rstest]
192+
#[case(ZonedMetadata {
193+
zone_len: u32::MAX,
194+
present_stats: Arc::new([]),
195+
})]
196+
#[case(ZonedMetadata {
197+
zone_len: 0,
198+
present_stats: Arc::new([Stat::IsConstant]),
199+
})]
200+
#[case::all_sorted(ZonedMetadata {
201+
zone_len: 314,
202+
present_stats: Arc::new([Stat::IsConstant, Stat::IsSorted, Stat::IsStrictSorted, Stat::Max, Stat::Min, Stat::Sum, Stat::NullCount, Stat::UncompressedSizeInBytes, Stat::NaNCount]),
203+
})]
204+
#[case::some_sorted(ZonedMetadata {
205+
zone_len: 314,
206+
present_stats: Arc::new([Stat::IsSorted, Stat::IsStrictSorted, Stat::Max, Stat::Min, Stat::Sum, Stat::NullCount, Stat::UncompressedSizeInBytes, Stat::NaNCount]),
207+
})]
208+
fn test_metadata_serialization(#[case] metadata: ZonedMetadata) {
209+
let serialized = metadata.clone().serialize();
210+
let deserialized = ZonedMetadata::deserialize(&serialized).unwrap();
211+
assert_eq!(deserialized, metadata);
212+
}
213+
214+
#[test]
215+
fn test_deserialize_unsorted_stats() {
216+
let metadata = ZonedMetadata {
217+
zone_len: u32::MAX,
218+
present_stats: Arc::new([Stat::IsStrictSorted, Stat::IsSorted]),
219+
};
220+
let serialized = metadata.clone().serialize();
221+
let deserialized = ZonedMetadata::deserialize(&serialized).unwrap();
222+
assert!(deserialized.present_stats.is_sorted());
223+
assert_eq!(
224+
deserialized.present_stats.len(),
225+
metadata.present_stats.len()
226+
);
227+
assert_ne!(deserialized.present_stats, metadata.present_stats);
228+
}
229+
}

vortex-layout/src/layouts/zoned/zone_map.rs

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@ pub struct ZoneMap {
3131
}
3232

3333
impl ZoneMap {
34-
/// Create StatsTable of given column_dtype from given array. Validates that the array matches expected
35-
/// structure for given list of stats
34+
/// Create [`ZoneMap`] of given column_dtype from given array. Validates that the array matches expected
35+
/// structure for given list of stats.
3636
pub fn try_new(
3737
column_dtype: DType,
3838
array: StructArray,
@@ -45,12 +45,12 @@ impl ZoneMap {
4545
Ok(Self::new_unchecked(array, stats))
4646
}
4747

48-
/// Create StatsTable without validating return array against expected stats
48+
/// Creates [`ZoneMap`] without validating return array against expected stats.
4949
pub fn new_unchecked(array: StructArray, stats: Arc<[Stat]>) -> Self {
5050
Self { array, stats }
5151
}
5252

53-
/// Returns the DType of the statistics table given a set of statistics and column [`DType`].
53+
/// Returns the [`DType`] of the statistics table given a set of statistics and column [`DType`].
5454
pub fn dtype_for_stats_table(column_dtype: &DType, present_stats: &[Stat]) -> DType {
5555
assert!(present_stats.is_sorted(), "Stats must be sorted");
5656
DType::Struct(
@@ -77,17 +77,17 @@ impl ZoneMap {
7777
)
7878
}
7979

80-
/// The struct array backing the zone map
80+
/// Returns the underlying [`StructArray`] backing the zone map
8181
pub fn array(&self) -> &StructArray {
8282
&self.array
8383
}
8484

85-
/// The statistics that are included in the table.
85+
/// Returns the list of stats included in the zone map.
8686
pub fn present_stats(&self) -> &Arc<[Stat]> {
8787
&self.stats
8888
}
8989

90-
/// Return an aggregated stats set for the table.
90+
/// Returns an aggregated stats set for the table.
9191
pub fn to_stats_set(&self, stats: &[Stat]) -> VortexResult<StatsSet> {
9292
let mut stats_set = StatsSet::default();
9393
for &stat in stats {
@@ -117,7 +117,7 @@ impl ZoneMap {
117117
Ok(stats_set)
118118
}
119119

120-
/// Return the array for a given stat.
120+
/// Returns the array for a given stat.
121121
pub fn get_stat(&self, stat: Stat) -> VortexResult<Option<ArrayRef>> {
122122
Ok(self.array.field_by_name_opt(stat.name()).cloned())
123123
}
@@ -139,12 +139,11 @@ impl ZoneMap {
139139
}
140140
}
141141

142+
// TODO(ngates): we should make it such that the zone map stores a mirror of the DType
143+
// underneath each stats column. For example, `min: i32` for an `i32` array.
144+
// Or `min: {a: i32, b: i32}` for a struct array of type `{a: i32, b: i32}`.
145+
// See: <https://github.com/vortex-data/vortex/issues/1835>
142146
/// Accumulates statistics for a column.
143-
///
144-
/// TODO(ngates): we should make it such that the zone map stores a mirror of the DType
145-
/// underneath each stats column. For example, `min: i32` for an `i32` array.
146-
/// Or `min: {a: i32, b: i32}` for a struct array of type `{a: i32, b: i32}`.
147-
/// See: <https://github.com/vortex-data/vortex/issues/1835>
148147
pub struct StatsAccumulator {
149148
builders: Vec<Box<dyn StatsArrayBuilder>>,
150149
length: usize,

0 commit comments

Comments
 (0)