diff --git a/benchmarks/compress-bench/src/lib.rs b/benchmarks/compress-bench/src/lib.rs index 3705343fe5a..b1692c40975 100644 --- a/benchmarks/compress-bench/src/lib.rs +++ b/benchmarks/compress-bench/src/lib.rs @@ -1,33 +1,7 @@ // SPDX-License-Identifier: Apache-2.0 // SPDX-FileCopyrightText: Copyright the Vortex contributors -use std::sync::Arc; - -use ::vortex::array::arrays::ChunkedArray; -use ::vortex::array::arrays::chunked::ChunkedArrayExt; -use ::vortex::array::arrays::listview::recursive_list_from_list_view; -use arrow_array::RecordBatch; -use arrow_schema::Schema; #[cfg(feature = "lance")] pub use lance_bench::compress::LanceCompressor; pub mod parquet; pub mod vortex; - -pub fn chunked_to_vec_record_batch( - chunked: ChunkedArray, -) -> anyhow::Result<(Vec, Arc)> { - assert!(chunked.nchunks() > 0, "empty chunks"); - - let batches = chunked - .iter_chunks() - .map(|array| { - // TODO(connor)[ListView]: The rust Parquet implementation does not support writing - // `ListView` to Parquet files yet. - let converted_array = recursive_list_from_list_view(array.clone())?; - Ok(RecordBatch::try_from(&converted_array)?) - }) - .collect::>>()?; - - let schema = batches[0].schema(); - Ok((batches, schema)) -} diff --git a/vortex-array/public-api.lock b/vortex-array/public-api.lock index dced3a9f4a7..532afd22ec9 100644 --- a/vortex-array/public-api.lock +++ b/vortex-array/public-api.lock @@ -22120,12 +22120,6 @@ impl core::convert::From for vorte pub fn vortex_array::ArrayRef::from(value: vortex_array::arrays::datetime::TemporalData) -> Self -impl core::convert::TryFrom<&vortex_array::ArrayRef> for arrow_array::record_batch::RecordBatch - -pub type arrow_array::record_batch::RecordBatch::Error = vortex_error::VortexError - -pub fn arrow_array::record_batch::RecordBatch::try_from(value: &vortex_array::ArrayRef) -> vortex_error::VortexResult - impl core::convert::TryFrom for vortex_array::arrays::datetime::TemporalData pub type vortex_array::arrays::datetime::TemporalData::Error = vortex_error::VortexError diff --git a/vortex-array/src/arrow/record_batch.rs b/vortex-array/src/arrow/record_batch.rs index 3163e967ca3..b57c307aed0 100644 --- a/vortex-array/src/arrow/record_batch.rs +++ b/vortex-array/src/arrow/record_batch.rs @@ -5,41 +5,14 @@ use arrow_array::RecordBatch; use arrow_array::cast::AsArray; use arrow_schema::DataType; use arrow_schema::Schema; -use vortex_error::VortexError; use vortex_error::VortexResult; -use vortex_error::vortex_bail; -use vortex_error::vortex_ensure; -use crate::ArrayRef; -use crate::Canonical; use crate::LEGACY_SESSION; use crate::VortexSessionExecute; use crate::array::IntoArray; use crate::arrays::StructArray; use crate::arrow::ArrowArrayExecutor; -// deprecated(note = "Use ArrowArrayExecutor::execute_record_batch instead") -impl TryFrom<&ArrayRef> for RecordBatch { - type Error = VortexError; - - fn try_from(value: &ArrayRef) -> VortexResult { - let Canonical::Struct(struct_array) = value.to_canonical()? else { - vortex_bail!("RecordBatch can only be constructed from ") - }; - - vortex_ensure!( - struct_array.validity()?.no_nulls(), - "RecordBatch can only be constructed from StructArray with no nulls" - ); - - let data_type = struct_array.dtype().to_arrow_dtype()?; - let array_ref = struct_array - .into_array() - .execute_arrow(Some(&data_type), &mut LEGACY_SESSION.create_execution_ctx())?; - Ok(RecordBatch::from(array_ref.as_struct())) - } -} - impl StructArray { pub fn into_record_batch_with_schema( self, diff --git a/vortex-bench/src/datasets/struct_list_of_ints.rs b/vortex-bench/src/datasets/struct_list_of_ints.rs index 4999c05adac..920d5cf7a52 100644 --- a/vortex-bench/src/datasets/struct_list_of_ints.rs +++ b/vortex-bench/src/datasets/struct_list_of_ints.rs @@ -5,7 +5,6 @@ use std::fs::File; use std::path::PathBuf; use anyhow::Result; -use arrow_array::RecordBatch; use async_trait::async_trait; use parquet::arrow::ArrowWriter; use rand::RngExt; @@ -13,12 +12,15 @@ use rand::SeedableRng; use rand::rngs::StdRng; use vortex::array::ArrayRef; use vortex::array::IntoArray; +use vortex::array::LEGACY_SESSION; +use vortex::array::VortexSessionExecute; use vortex::array::arrays::ChunkedArray; use vortex::array::arrays::ListArray; use vortex::array::arrays::PrimitiveArray; use vortex::array::arrays::StructArray; use vortex::array::arrays::chunked::ChunkedArrayExt; use vortex::array::arrays::listview::recursive_list_from_list_view; +use vortex::array::arrow::ArrowArrayExecutor; use vortex::array::validity::Validity; use vortex::dtype::FieldNames; @@ -123,7 +125,9 @@ impl Dataset for StructListOfInts { for chunk in chunked.iter_chunks() { let converted = recursive_list_from_list_view(chunk.clone())?; - let batch = RecordBatch::try_from(&converted)?; + let schema = converted.dtype().to_arrow_schema()?; + let batch = converted + .execute_record_batch(&schema, &mut LEGACY_SESSION.create_execution_ctx())?; if writer.is_none() { writer = Some(ArrowWriter::try_new(