Skip to content

Commit

Permalink
Support converting Boolean Array in DuckDB (milvus-io#554)
Browse files Browse the repository at this point in the history
  • Loading branch information
eddyxu committed Feb 11, 2023
1 parent 7093669 commit fafdaa5
Show file tree
Hide file tree
Showing 4 changed files with 72 additions and 7 deletions.
3 changes: 3 additions & 0 deletions integration/duckdb_lance/Cargo.toml
Expand Up @@ -13,6 +13,9 @@ arrow-array = "32.0.0"
futures = "0.3"
num-traits = "0.2"

[dev-dependencies]
libduckdb-sys = { version = "0.6.1", features = ["bundled"] }

[lib]
name = "duckdb_lance"
crate-type = ["staticlib"]
2 changes: 1 addition & 1 deletion integration/duckdb_lance/duckdb-ext/Cargo.toml
Expand Up @@ -8,4 +8,4 @@ edition = "2021"
[build-dependencies]
bindgen = "0.64.0"
build_script = "0.2.0"
cc = "1.0.78"
cc = "1.0.78"
30 changes: 26 additions & 4 deletions integration/duckdb_lance/duckdb-ext/src/data_chunk.rs
Expand Up @@ -13,18 +13,31 @@
// limitations under the License.

use super::vector::{FlatVector, ListVector, StructVector};
use crate::ffi::{
duckdb_data_chunk, duckdb_data_chunk_get_size, duckdb_data_chunk_get_vector,
duckdb_data_chunk_set_size,
use crate::{
ffi::{
duckdb_create_data_chunk, duckdb_data_chunk, duckdb_data_chunk_get_size,
duckdb_data_chunk_get_vector, duckdb_data_chunk_set_size, duckdb_destroy_data_chunk,
},
LogicalType,
};

/// DataChunk in DuckDB.
pub struct DataChunk {
/// Pointer to the DataChunk in duckdb C API.
ptr: duckdb_data_chunk,

/// Whether this [DataChunk] own the [DataChunk::ptr].
owned: bool,
}

impl DataChunk {
pub fn new(logical_types: &[LogicalType]) -> Self {
let num_columns = logical_types.len();
let mut c_types = logical_types.iter().map(|t| t.ptr).collect::<Vec<_>>();
let ptr = unsafe { duckdb_create_data_chunk(c_types.as_mut_ptr(), num_columns as u64) };
DataChunk { ptr, owned: true }
}

/// Get the vector at the specific column index: `idx`.
///
pub fn flat_vector(&self, idx: usize) -> FlatVector {
Expand Down Expand Up @@ -58,6 +71,15 @@ impl DataChunk {

impl From<duckdb_data_chunk> for DataChunk {
fn from(ptr: duckdb_data_chunk) -> Self {
Self { ptr }
Self { ptr, owned: false }
}
}

impl Drop for DataChunk {
fn drop(&mut self) {
if self.owned && !self.ptr.is_null() {
unsafe { duckdb_destroy_data_chunk(&mut self.ptr) }
self.ptr = std::ptr::null_mut();
}
}
}
44 changes: 42 additions & 2 deletions integration/duckdb_lance/src/arrow.rs
Expand Up @@ -14,7 +14,6 @@

//! Arrow / DuckDB conversion.

use crate::{Error, Result};
use arrow_array::{
cast::{
as_boolean_array, as_large_list_array, as_list_array, as_primitive_array, as_string_array,
Expand All @@ -30,6 +29,8 @@ use duckdb_ext::{LogicalType, LogicalTypeId};
use lance::arrow::as_fixed_size_list_array;
use num_traits::AsPrimitive;

use crate::{Error, Result};

pub fn to_duckdb_type_id(data_type: &DataType) -> Result<LogicalTypeId> {
use LogicalTypeId::*;

Expand Down Expand Up @@ -116,7 +117,7 @@ pub fn record_batch_to_duckdb_data_chunk(batch: &RecordBatch, chunk: &mut DataCh
for i in 0..batch.num_columns() {
let col = batch.column(i);
match col.data_type() {
dt if dt.is_primitive() => {
dt if dt.is_primitive() || matches!(dt, DataType::Boolean) => {
primitive_array_to_vector(col, &mut chunk.flat_vector(i));
}
DataType::Utf8 => {
Expand Down Expand Up @@ -331,3 +332,42 @@ fn struct_array_to_vector(array: &StructArray, out: &mut StructVector) {
}
}
}

#[cfg(test)]
mod tests {
use super::*;

use std::sync::Arc;

use arrow_schema::{Field, Schema};

// use libduckdb to link to a duckdb binary.
#[allow(unused_imports)]
use libduckdb_sys;

#[test]
fn test_record_batch_to_data_chunk() {
let schema = Arc::new(Schema::new(vec![Field::new("b", DataType::Boolean, false)]));

let batch = RecordBatch::try_new(
schema.clone(),
vec![Arc::new(BooleanArray::from(vec![true, false, true]))],
)
.unwrap();

let logical_types = schema
.fields
.iter()
.map(|f| to_duckdb_logical_type(f.data_type()).unwrap())
.collect::<Vec<_>>();
let mut chunk = DataChunk::new(&logical_types);

record_batch_to_duckdb_data_chunk(&batch, &mut chunk).unwrap();
assert_eq!(chunk.len(), 3);
let vector = chunk.flat_vector(0);
assert_eq!(LogicalTypeId::Boolean, vector.logical_type().id());
assert_eq!(vector.as_slice::<bool>()[0], true);
assert_eq!(vector.as_slice::<bool>()[1], false);
assert_eq!(vector.as_slice::<bool>()[2], true);
}
}

0 comments on commit fafdaa5

Please sign in to comment.