Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions java/vortex-jni/src/main/java/dev/vortex/api/Expression.java
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,10 @@
import dev.vortex.VortexCleaner;
import dev.vortex.jni.NativeExpression;
import java.math.BigInteger;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.Arrays;
import java.util.UUID;

/**
* A Vortex expression node backed by a native pointer.
Expand All @@ -18,6 +21,9 @@
* ownership — the resulting expression is an independent copy on the native side.
*/
public final class Expression {
/** Number of bytes in a UUID's big-endian representation. */
private static final int UUID_BYTE_LEN = 16;

private final long pointer;

private Expression(long pointer) {
Expand Down Expand Up @@ -198,6 +204,44 @@ public static Expression nullLiteralTimestamp(TimeUnit unit, String timezone) {
return new Expression(NativeExpression.literalTimestamp(0L, unit.tag(), timezone, true));
}

/**
* Create a UUID literal, enabling predicate pushdown over UUID columns. The value is stored as its 16-byte
* big-endian (network order) representation, matching Vortex's UUID extension type and Arrow's canonical UUID type.
*/
public static Expression literal(UUID value) {
Preconditions.checkArgument(value != null, "use nullLiteralUuid() for a null UUID literal");
return literalUuid(uuidToBigEndianBytes(value));
}

/**
* Create a UUID literal from its 16-byte big-endian (network order) representation, for example the bytes of
* Arrow's canonical UUID type or a {@link UUID} serialized most-significant-bits first.
*
* @param bigEndianBytes exactly 16 bytes; use {@link #nullLiteralUuid()} for a null literal
*/
public static Expression literalUuid(byte[] bigEndianBytes) {
Preconditions.checkArgument(bigEndianBytes != null, "use nullLiteralUuid() for a null UUID literal");
Preconditions.checkArgument(
bigEndianBytes.length == UUID_BYTE_LEN,
"UUID literal must be exactly %s bytes, got %s",
UUID_BYTE_LEN,
bigEndianBytes.length);
return new Expression(NativeExpression.literalUuid(bigEndianBytes, false));
}

/** Create a null UUID literal. */
public static Expression nullLiteralUuid() {
return new Expression(NativeExpression.literalUuid(new byte[UUID_BYTE_LEN], true));
}

private static byte[] uuidToBigEndianBytes(UUID value) {
return ByteBuffer.allocate(UUID_BYTE_LEN)
.order(ByteOrder.BIG_ENDIAN)
.putLong(value.getMostSignificantBits())
.putLong(value.getLeastSignificantBits())
.array();
}

/** Create a typed null literal of the given primitive {@link DType}. */
public static Expression nullLiteral(DType dtype) {
return new Expression(NativeExpression.literalNull(dtype.tag()));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ public static native long between(

public static native long literalTimestamp(long value, byte timeUnitTag, String timezone, boolean isNull);

public static native long literalUuid(byte[] bigEndianBytes, boolean isNull);

public static native long literalNull(byte dtypeTag);

public static native void free(long pointer);
Expand Down
73 changes: 73 additions & 0 deletions vortex-jni/src/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ use vortex::dtype::DecimalDType;
use vortex::dtype::FieldName;
use vortex::dtype::Nullability;
use vortex::dtype::PType;
use vortex::dtype::extension::ExtDType;
use vortex::error::vortex_err;
use vortex::expr::Expression;
use vortex::expr::and_collect;
Expand All @@ -45,6 +46,8 @@ use vortex::expr::select;
use vortex::extension::datetime::Date;
use vortex::extension::datetime::TimeUnit;
use vortex::extension::datetime::Timestamp;
use vortex::extension::uuid::Uuid;
use vortex::extension::uuid::UuidMetadata;
use vortex::scalar::DecimalValue;
use vortex::scalar::Scalar;
use vortex::scalar::ScalarValue;
Expand Down Expand Up @@ -522,6 +525,76 @@ pub extern "system" fn Java_dev_vortex_jni_NativeExpression_literalTimestamp(
})
}

/// Number of bytes in a UUID's big-endian representation.
const UUID_BYTE_LEN: usize = 16;

/// Build the version-agnostic UUID extension [`DType`] with the given nullability.
///
/// The storage is a non-nullable `FixedSizeList(U8, 16)`, matching Vortex's UUID extension and
/// Arrow's canonical UUID type. The metadata records no version constraint, so the dtype is
/// compatible with any UUID column regardless of the UUID versions it contains.
fn uuid_dtype(nullability: Nullability) -> Result<DType, JNIError> {
let list_size = u32::try_from(UUID_BYTE_LEN)
.map_err(|_| vortex_err!("UUID byte length {UUID_BYTE_LEN} does not fit in u32"))?;
let storage_dtype = DType::FixedSizeList(
Arc::new(DType::Primitive(PType::U8, Nullability::NonNullable)),
list_size,
nullability,
);
let ext = ExtDType::<Uuid>::try_new(UuidMetadata::default(), storage_dtype)?;
Ok(DType::Extension(ext.erased()))
}

/// Build a non-null UUID [`Scalar`] from its 16-byte big-endian representation.
fn uuid_scalar(bytes: &[u8]) -> Result<Scalar, JNIError> {
if bytes.len() != UUID_BYTE_LEN {
throw_runtime!(
"UUID literal must be exactly {UUID_BYTE_LEN} bytes, got {}",
bytes.len()
);
}
let children: Vec<Scalar> = bytes
.iter()
.map(|&b| Scalar::primitive(b, Nullability::NonNullable))
.collect();
let storage = Scalar::fixed_size_list(
DType::Primitive(PType::U8, Nullability::NonNullable),
children,
Nullability::NonNullable,
);
Ok(Scalar::try_new(
uuid_dtype(Nullability::NonNullable)?,
storage.into_value(),
)?)
}

/// Build a UUID literal from its 16-byte big-endian representation.
///
/// When `is_null_flag` is true the `value` array is ignored and a typed null UUID literal is
/// produced. Otherwise `value` must hold exactly 16 bytes in big-endian (network) order — the
/// same layout as a `java.util.UUID` written most-significant-bits first, and Arrow's canonical
/// UUID extension. The literal is version-agnostic so it compares against any UUID column.
#[unsafe(no_mangle)]
pub extern "system" fn Java_dev_vortex_jni_NativeExpression_literalUuid(
mut env: EnvUnowned,
_class: JClass,
value: JByteArray,
is_null_flag: jboolean,
) -> jlong {
try_or_throw(&mut env, |env| {
if is_null_flag {
return Ok(into_raw(lit(Scalar::null(uuid_dtype(
Nullability::Nullable,
)?))));
}
if value.is_null() {
throw_runtime!("UUID literal bytes must not be null");
}
let bytes = env.convert_byte_array(&value)?;
Ok(into_raw(lit(uuid_scalar(&bytes)?)))
})
}

/// Build a typed null literal whose nullable dtype is selected by `dtype_tag`.
///
/// Tag values intentionally do not overlap with [`parse_time_unit`].
Expand Down
Loading