Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 0 additions & 26 deletions benchmarks/compress-bench/src/lib.rs
Original file line number Diff line number Diff line change
@@ -1,33 +1,7 @@
// SPDX-License-Identifier: Apache-2.0
// SPDX-FileCopyrightText: Copyright the Vortex contributors

use std::sync::Arc;

use ::vortex::array::arrays::ChunkedArray;
use ::vortex::array::arrays::chunked::ChunkedArrayExt;
use ::vortex::array::arrays::listview::recursive_list_from_list_view;
use arrow_array::RecordBatch;
use arrow_schema::Schema;
#[cfg(feature = "lance")]
pub use lance_bench::compress::LanceCompressor;
pub mod parquet;
pub mod vortex;

pub fn chunked_to_vec_record_batch(
chunked: ChunkedArray,
) -> anyhow::Result<(Vec<RecordBatch>, Arc<Schema>)> {
assert!(chunked.nchunks() > 0, "empty chunks");

let batches = chunked
.iter_chunks()
.map(|array| {
// TODO(connor)[ListView]: The rust Parquet implementation does not support writing
// `ListView` to Parquet files yet.
let converted_array = recursive_list_from_list_view(array.clone())?;
Ok(RecordBatch::try_from(&converted_array)?)
})
.collect::<anyhow::Result<Vec<_>>>()?;

let schema = batches[0].schema();
Ok((batches, schema))
}
6 changes: 0 additions & 6 deletions vortex-array/public-api.lock
Original file line number Diff line number Diff line change
Expand Up @@ -22120,12 +22120,6 @@ impl core::convert::From<vortex_array::arrays::datetime::TemporalData> for vorte

pub fn vortex_array::ArrayRef::from(value: vortex_array::arrays::datetime::TemporalData) -> Self

impl core::convert::TryFrom<&vortex_array::ArrayRef> for arrow_array::record_batch::RecordBatch

pub type arrow_array::record_batch::RecordBatch::Error = vortex_error::VortexError

pub fn arrow_array::record_batch::RecordBatch::try_from(value: &vortex_array::ArrayRef) -> vortex_error::VortexResult<Self>

impl core::convert::TryFrom<vortex_array::ArrayRef> for vortex_array::arrays::datetime::TemporalData

pub type vortex_array::arrays::datetime::TemporalData::Error = vortex_error::VortexError
Expand Down
27 changes: 0 additions & 27 deletions vortex-array/src/arrow/record_batch.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,41 +5,14 @@ use arrow_array::RecordBatch;
use arrow_array::cast::AsArray;
use arrow_schema::DataType;
use arrow_schema::Schema;
use vortex_error::VortexError;
use vortex_error::VortexResult;
use vortex_error::vortex_bail;
use vortex_error::vortex_ensure;

use crate::ArrayRef;
use crate::Canonical;
use crate::LEGACY_SESSION;
use crate::VortexSessionExecute;
use crate::array::IntoArray;
use crate::arrays::StructArray;
use crate::arrow::ArrowArrayExecutor;

// deprecated(note = "Use ArrowArrayExecutor::execute_record_batch instead")
impl TryFrom<&ArrayRef> for RecordBatch {
type Error = VortexError;

fn try_from(value: &ArrayRef) -> VortexResult<Self> {
let Canonical::Struct(struct_array) = value.to_canonical()? else {
vortex_bail!("RecordBatch can only be constructed from ")
};

vortex_ensure!(
struct_array.validity()?.no_nulls(),
"RecordBatch can only be constructed from StructArray with no nulls"
);

let data_type = struct_array.dtype().to_arrow_dtype()?;
let array_ref = struct_array
.into_array()
.execute_arrow(Some(&data_type), &mut LEGACY_SESSION.create_execution_ctx())?;
Ok(RecordBatch::from(array_ref.as_struct()))
}
}

impl StructArray {
pub fn into_record_batch_with_schema(
self,
Expand Down
8 changes: 6 additions & 2 deletions vortex-bench/src/datasets/struct_list_of_ints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,22 @@ use std::fs::File;
use std::path::PathBuf;

use anyhow::Result;
use arrow_array::RecordBatch;
use async_trait::async_trait;
use parquet::arrow::ArrowWriter;
use rand::RngExt;
use rand::SeedableRng;
use rand::rngs::StdRng;
use vortex::array::ArrayRef;
use vortex::array::IntoArray;
use vortex::array::LEGACY_SESSION;
use vortex::array::VortexSessionExecute;
use vortex::array::arrays::ChunkedArray;
use vortex::array::arrays::ListArray;
use vortex::array::arrays::PrimitiveArray;
use vortex::array::arrays::StructArray;
use vortex::array::arrays::chunked::ChunkedArrayExt;
use vortex::array::arrays::listview::recursive_list_from_list_view;
use vortex::array::arrow::ArrowArrayExecutor;
use vortex::array::validity::Validity;
use vortex::dtype::FieldNames;

Expand Down Expand Up @@ -123,7 +125,9 @@ impl Dataset for StructListOfInts {

for chunk in chunked.iter_chunks() {
let converted = recursive_list_from_list_view(chunk.clone())?;
let batch = RecordBatch::try_from(&converted)?;
let schema = converted.dtype().to_arrow_schema()?;
let batch = converted
.execute_record_batch(&schema, &mut LEGACY_SESSION.create_execution_ctx())?;

if writer.is_none() {
writer = Some(ArrowWriter::try_new(
Expand Down
Loading