From 8ab2ff244b9bb2f2ed6f0c895c932296a58d58c3 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Thu, 16 Apr 2026 12:02:21 -0400 Subject: [PATCH 1/8] fix Signed-off-by: Joe Isaacs --- vortex-array/src/arrow/record_batch.rs | 2 +- vortex-array/src/validity.rs | 7 +++++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/vortex-array/src/arrow/record_batch.rs b/vortex-array/src/arrow/record_batch.rs index 02ec62f626a..75000b1b31c 100644 --- a/vortex-array/src/arrow/record_batch.rs +++ b/vortex-array/src/arrow/record_batch.rs @@ -29,7 +29,7 @@ impl TryFrom<&ArrayRef> for RecordBatch { }; vortex_ensure!( - matches!(struct_array.validity()?, Validity::AllValid), + struct_array.validity()?.no_nulls(), "RecordBatch can only be constructed from StructArray with no nulls" ); diff --git a/vortex-array/src/validity.rs b/vortex-array/src/validity.rs index cb261a9b0b2..84586754dcf 100644 --- a/vortex-array/src/validity.rs +++ b/vortex-array/src/validity.rs @@ -112,6 +112,13 @@ impl Validity { } } + /// Returns `true` if this validity guarantees no null values, i.e. it is either + /// [`Validity::NonNullable`] or [`Validity::AllValid`]. + #[inline] + pub fn no_nulls(&self) -> bool { + matches!(self, Self::NonNullable | Self::AllValid) + } + /// The union nullability and validity. #[inline] pub fn union_nullability(self, nullability: Nullability) -> Self { From 1eff1a6bbd8b63f208456f0d4df3939a0b97c745 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Thu, 16 Apr 2026 12:05:54 -0400 Subject: [PATCH 2/8] remove TryFrom<&ArrayRef> for RecordBatch, migrate callers to execute_record_batch Callers now derive the schema from the array dtype and use ArrowArrayExecutor::execute_record_batch directly. Signed-off-by: Joe Isaacs --- benchmarks/compress-bench/src/lib.rs | 8 +++++- vortex-array/src/arrow/record_batch.rs | 28 ------------------- .../src/datasets/struct_list_of_ints.rs | 6 ++-- 3 files changed, 11 insertions(+), 31 deletions(-) diff --git a/benchmarks/compress-bench/src/lib.rs b/benchmarks/compress-bench/src/lib.rs index 3705343fe5a..b197d3b2dc1 100644 --- a/benchmarks/compress-bench/src/lib.rs +++ b/benchmarks/compress-bench/src/lib.rs @@ -3,13 +3,17 @@ use std::sync::Arc; +use ::vortex::array::VortexSessionExecute; use ::vortex::array::arrays::ChunkedArray; use ::vortex::array::arrays::chunked::ChunkedArrayExt; use ::vortex::array::arrays::listview::recursive_list_from_list_view; +use ::vortex::array::arrow::ArrowArrayExecutor; use arrow_array::RecordBatch; use arrow_schema::Schema; #[cfg(feature = "lance")] pub use lance_bench::compress::LanceCompressor; +use vortex_bench::SESSION; + pub mod parquet; pub mod vortex; @@ -24,7 +28,9 @@ pub fn chunked_to_vec_record_batch( // TODO(connor)[ListView]: The rust Parquet implementation does not support writing // `ListView` to Parquet files yet. let converted_array = recursive_list_from_list_view(array.clone())?; - Ok(RecordBatch::try_from(&converted_array)?) + let schema = converted_array.dtype().to_arrow_schema()?; + Ok(converted_array + .execute_record_batch(&schema, &mut SESSION.create_execution_ctx())?) }) .collect::>>()?; diff --git a/vortex-array/src/arrow/record_batch.rs b/vortex-array/src/arrow/record_batch.rs index 75000b1b31c..b57c307aed0 100644 --- a/vortex-array/src/arrow/record_batch.rs +++ b/vortex-array/src/arrow/record_batch.rs @@ -5,41 +5,13 @@ use arrow_array::RecordBatch; use arrow_array::cast::AsArray; use arrow_schema::DataType; use arrow_schema::Schema; -use vortex_error::VortexError; use vortex_error::VortexResult; -use vortex_error::vortex_bail; -use vortex_error::vortex_ensure; -use crate::ArrayRef; -use crate::Canonical; use crate::LEGACY_SESSION; use crate::VortexSessionExecute; use crate::array::IntoArray; use crate::arrays::StructArray; use crate::arrow::ArrowArrayExecutor; -use crate::validity::Validity; - -// deprecated(note = "Use ArrowArrayExecutor::execute_record_batch instead") -impl TryFrom<&ArrayRef> for RecordBatch { - type Error = VortexError; - - fn try_from(value: &ArrayRef) -> VortexResult { - let Canonical::Struct(struct_array) = value.to_canonical()? else { - vortex_bail!("RecordBatch can only be constructed from ") - }; - - vortex_ensure!( - struct_array.validity()?.no_nulls(), - "RecordBatch can only be constructed from StructArray with no nulls" - ); - - let data_type = struct_array.dtype().to_arrow_dtype()?; - let array_ref = struct_array - .into_array() - .execute_arrow(Some(&data_type), &mut LEGACY_SESSION.create_execution_ctx())?; - Ok(RecordBatch::from(array_ref.as_struct())) - } -} impl StructArray { pub fn into_record_batch_with_schema( diff --git a/vortex-bench/src/datasets/struct_list_of_ints.rs b/vortex-bench/src/datasets/struct_list_of_ints.rs index 4999c05adac..0f30529bd49 100644 --- a/vortex-bench/src/datasets/struct_list_of_ints.rs +++ b/vortex-bench/src/datasets/struct_list_of_ints.rs @@ -5,7 +5,6 @@ use std::fs::File; use std::path::PathBuf; use anyhow::Result; -use arrow_array::RecordBatch; use async_trait::async_trait; use parquet::arrow::ArrowWriter; use rand::RngExt; @@ -18,8 +17,10 @@ use vortex::array::arrays::ListArray; use vortex::array::arrays::PrimitiveArray; use vortex::array::arrays::StructArray; use vortex::array::arrays::chunked::ChunkedArrayExt; +use vortex::array::arrow::ArrowArrayExecutor; use vortex::array::arrays::listview::recursive_list_from_list_view; use vortex::array::validity::Validity; +use vortex::array::{LEGACY_SESSION, VortexSessionExecute}; use vortex::dtype::FieldNames; use crate::IdempotentPath; @@ -123,7 +124,8 @@ impl Dataset for StructListOfInts { for chunk in chunked.iter_chunks() { let converted = recursive_list_from_list_view(chunk.clone())?; - let batch = RecordBatch::try_from(&converted)?; + let schema = converted.dtype().to_arrow_schema()?; + let batch = converted.execute_record_batch(&schema, &mut LEGACY_SESSION.create_execution_ctx())?; if writer.is_none() { writer = Some(ArrowWriter::try_new( From 8842e2806c0c330e6ea7df3ba025f7021c7e5805 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Thu, 16 Apr 2026 12:10:52 -0400 Subject: [PATCH 3/8] fix Signed-off-by: Joe Isaacs --- vortex-bench/src/datasets/struct_list_of_ints.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/vortex-bench/src/datasets/struct_list_of_ints.rs b/vortex-bench/src/datasets/struct_list_of_ints.rs index 0f30529bd49..920d5cf7a52 100644 --- a/vortex-bench/src/datasets/struct_list_of_ints.rs +++ b/vortex-bench/src/datasets/struct_list_of_ints.rs @@ -12,15 +12,16 @@ use rand::SeedableRng; use rand::rngs::StdRng; use vortex::array::ArrayRef; use vortex::array::IntoArray; +use vortex::array::LEGACY_SESSION; +use vortex::array::VortexSessionExecute; use vortex::array::arrays::ChunkedArray; use vortex::array::arrays::ListArray; use vortex::array::arrays::PrimitiveArray; use vortex::array::arrays::StructArray; use vortex::array::arrays::chunked::ChunkedArrayExt; -use vortex::array::arrow::ArrowArrayExecutor; use vortex::array::arrays::listview::recursive_list_from_list_view; +use vortex::array::arrow::ArrowArrayExecutor; use vortex::array::validity::Validity; -use vortex::array::{LEGACY_SESSION, VortexSessionExecute}; use vortex::dtype::FieldNames; use crate::IdempotentPath; @@ -125,7 +126,8 @@ impl Dataset for StructListOfInts { for chunk in chunked.iter_chunks() { let converted = recursive_list_from_list_view(chunk.clone())?; let schema = converted.dtype().to_arrow_schema()?; - let batch = converted.execute_record_batch(&schema, &mut LEGACY_SESSION.create_execution_ctx())?; + let batch = converted + .execute_record_batch(&schema, &mut LEGACY_SESSION.create_execution_ctx())?; if writer.is_none() { writer = Some(ArrowWriter::try_new( From 7140d546abd1013564aa283e7021ae0037791d2d Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Thu, 16 Apr 2026 13:42:48 -0400 Subject: [PATCH 4/8] fix Signed-off-by: Joe Isaacs --- benchmarks/compress-bench/src/lib.rs | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/benchmarks/compress-bench/src/lib.rs b/benchmarks/compress-bench/src/lib.rs index b197d3b2dc1..d4281418707 100644 --- a/benchmarks/compress-bench/src/lib.rs +++ b/benchmarks/compress-bench/src/lib.rs @@ -16,24 +16,3 @@ use vortex_bench::SESSION; pub mod parquet; pub mod vortex; - -pub fn chunked_to_vec_record_batch( - chunked: ChunkedArray, -) -> anyhow::Result<(Vec, Arc)> { - assert!(chunked.nchunks() > 0, "empty chunks"); - - let batches = chunked - .iter_chunks() - .map(|array| { - // TODO(connor)[ListView]: The rust Parquet implementation does not support writing - // `ListView` to Parquet files yet. - let converted_array = recursive_list_from_list_view(array.clone())?; - let schema = converted_array.dtype().to_arrow_schema()?; - Ok(converted_array - .execute_record_batch(&schema, &mut SESSION.create_execution_ctx())?) - }) - .collect::>>()?; - - let schema = batches[0].schema(); - Ok((batches, schema)) -} From 1c1e756f9b3e84907e320cff45449cd11c27b29a Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Thu, 16 Apr 2026 13:43:33 -0400 Subject: [PATCH 5/8] fix Signed-off-by: Joe Isaacs --- benchmarks/compress-bench/src/lib.rs | 7 ------- 1 file changed, 7 deletions(-) diff --git a/benchmarks/compress-bench/src/lib.rs b/benchmarks/compress-bench/src/lib.rs index d4281418707..d20a157cf2e 100644 --- a/benchmarks/compress-bench/src/lib.rs +++ b/benchmarks/compress-bench/src/lib.rs @@ -1,18 +1,11 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::sync::Arc; - use ::vortex::array::VortexSessionExecute; -use ::vortex::array::arrays::ChunkedArray; use ::vortex::array::arrays::chunked::ChunkedArrayExt; -use ::vortex::array::arrays::listview::recursive_list_from_list_view; use ::vortex::array::arrow::ArrowArrayExecutor; -use arrow_array::RecordBatch; -use arrow_schema::Schema; #[cfg(feature = "lance")] pub use lance_bench::compress::LanceCompressor; -use vortex_bench::SESSION; pub mod parquet; pub mod vortex; From 3877aaa0f4ca92e6a6bda3c0344af67e64fc35bd Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Thu, 16 Apr 2026 13:49:13 -0400 Subject: [PATCH 6/8] fix Signed-off-by: Joe Isaacs --- vortex-array/public-api.lock | 6 ------ 1 file changed, 6 deletions(-) diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index dced3a9f4a7..532afd22ec9 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -22120,12 +22120,6 @@ impl core::convert::From for vorte pub fn vortex_array::ArrayRef::from(value: vortex_array::arrays::datetime::TemporalData) -> Self -impl core::convert::TryFrom<&vortex_array::ArrayRef> for arrow_array::record_batch::RecordBatch - -pub type arrow_array::record_batch::RecordBatch::Error = vortex_error::VortexError - -pub fn arrow_array::record_batch::RecordBatch::try_from(value: &vortex_array::ArrayRef) -> vortex_error::VortexResult - impl core::convert::TryFrom for vortex_array::arrays::datetime::TemporalData pub type vortex_array::arrays::datetime::TemporalData::Error = vortex_error::VortexError From b75014ed382f09c6a0892ce2e93aecc1edfb5f96 Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Thu, 16 Apr 2026 13:49:35 -0400 Subject: [PATCH 7/8] fix Signed-off-by: Joe Isaacs --- benchmarks/compress-bench/src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/benchmarks/compress-bench/src/lib.rs b/benchmarks/compress-bench/src/lib.rs index d20a157cf2e..91ba1180faf 100644 --- a/benchmarks/compress-bench/src/lib.rs +++ b/benchmarks/compress-bench/src/lib.rs @@ -6,6 +6,5 @@ use ::vortex::array::arrays::chunked::ChunkedArrayExt; use ::vortex::array::arrow::ArrowArrayExecutor; #[cfg(feature = "lance")] pub use lance_bench::compress::LanceCompressor; - pub mod parquet; pub mod vortex; From bee85801d95f743d5dad684314539b0cd9d1fcaa Mon Sep 17 00:00:00 2001 From: Joe Isaacs Date: Thu, 16 Apr 2026 14:18:18 -0400 Subject: [PATCH 8/8] fix Signed-off-by: Joe Isaacs --- benchmarks/compress-bench/src/lib.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/benchmarks/compress-bench/src/lib.rs b/benchmarks/compress-bench/src/lib.rs index 91ba1180faf..b1692c40975 100644 --- a/benchmarks/compress-bench/src/lib.rs +++ b/benchmarks/compress-bench/src/lib.rs @@ -1,9 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use ::vortex::array::VortexSessionExecute; -use ::vortex::array::arrays::chunked::ChunkedArrayExt; -use ::vortex::array::arrow::ArrowArrayExecutor; #[cfg(feature = "lance")] pub use lance_bench::compress::LanceCompressor; pub mod parquet;