Skip to content

Commit

Permalink
Add function castBIGINT_timestamp (apache#22)
Browse files Browse the repository at this point in the history
* Add function castBIGINT_timestamp

* fix

* wip
  • Loading branch information
zhztheplayer committed Jun 23, 2021
1 parent 684ac60 commit 640cf3f
Show file tree
Hide file tree
Showing 12 changed files with 72 additions and 10 deletions.
11 changes: 11 additions & 0 deletions cpp/src/arrow/compute/kernels/scalar_cast_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1041,6 +1041,17 @@ TEST(Cast, TimestampToTimestamp) {
options.allow_time_truncate = true;
CheckCast(will_be_truncated, coarse, options);
}

for (auto types : {
TimestampTypePair{timestamp(TimeUnit::MILLI, "UTC+8"), timestamp(TimeUnit::MILLI)}
}) {
auto coarse = ArrayFromJSON(types.coarse, "[0, null, 200000000000, 1000000000, 2000000000]");
auto promoted =
ArrayFromJSON(types.fine, "[0, null, 200000000000, 1000000000, 2000000000]");

// multiply/promote
CheckCast(coarse, promoted);
}
}

TEST(Cast, TimestampZeroCopy) {
Expand Down
5 changes: 4 additions & 1 deletion cpp/src/gandiva/function_registry_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ inline DataTypePtr time32() { return arrow::time32(arrow::TimeUnit::MILLI); }
inline DataTypePtr time64() { return arrow::time64(arrow::TimeUnit::MICRO); }

inline DataTypePtr timestamp() { return arrow::timestamp(arrow::TimeUnit::MILLI); }

inline DataTypePtr timestampusutc() { return arrow::timestamp(arrow::TimeUnit::MICRO, "UTC"); }

inline DataTypePtr decimal128() { return arrow::decimal(38, 0); }

struct KeyHash {
Expand Down Expand Up @@ -243,7 +246,7 @@ typedef std::unordered_map<const FunctionSignature*, const NativeFunction*, KeyH

// Iterate the inner macro over all date types
#define DATE_TYPES(INNER, NAME, ALIASES) \
INNER(NAME, ALIASES, date64), INNER(NAME, ALIASES, timestamp)
INNER(NAME, ALIASES, date64), INNER(NAME, ALIASES, timestamp), INNER(NAME, ALIASES, timestampusutc)

// Iterate the inner macro over all time types
#define TIME_TYPES(INNER, NAME, ALIASES) INNER(NAME, ALIASES, time32)
Expand Down
12 changes: 11 additions & 1 deletion cpp/src/gandiva/function_registry_datetime.cc
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ std::vector<NativeFunction> GetDateTimeFunctionRegistry() {
NativeFunction("castTIME", {}, DataTypeVector{timestamp()}, time32(),
kResultNullIfNull, "castTIME_timestamp"),

NativeFunction("castBIGINT", {}, DataTypeVector{timestamp()}, int64(),
kResultNullIfNull, "castBIGINT_timestamp"),

NativeFunction("castBIGINT", {}, DataTypeVector{day_time_interval()}, int64(),
kResultNullIfNull, "castBIGINT_daytimeinterval"),

Expand All @@ -97,7 +100,14 @@ std::vector<NativeFunction> GetDateTimeFunctionRegistry() {
kResultNullIfNull, "convertTimestampUnit_us"),

NativeFunction("castDATE", {}, DataTypeVector{date64()}, date32(),
kResultNullIfNull, "castDATE_date64"),
kResultNullIfNull, "castDATE32_date64"),

NativeFunction("castTIMESTAMP", {}, DataTypeVector{date32()}, timestamp(),
kResultNullIfNull, "castTIMESTAMP_date32"),

NativeFunction("castDATE", {}, DataTypeVector{timestamp()}, date32(),
kResultNullIfNull, "castDATE32_timestamp"),

DATE_TYPES(LAST_DAY_SAFE_NULL_IF_NULL, last_day, {})};

return date_time_fn_registry_;
Expand Down
8 changes: 8 additions & 0 deletions cpp/src/gandiva/function_signature.cc
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,14 @@ bool DataTypeEquals(const DataTypePtr& left, const DataTypePtr& right) {
return (dleft != NULL) && (dright != NULL) &&
(dleft->byte_width() == dright->byte_width());
}
case arrow::Type::TIMESTAMP: {
// Signature for timestamp treated the same if both are with zone or without zone.
auto tleft = checked_cast<arrow::TimestampType *>(left.get());
auto tright = checked_cast<arrow::TimestampType *>(right.get());
return (tleft != NULL) && (tright != NULL) &&
(tleft->unit() == tright->unit()) &&
(tleft->timezone().empty() == tleft->timezone().empty());
}
default:
return left->Equals(right);
}
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/gandiva/gdv_function_stubs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,8 @@ int32_t gdv_fn_populate_varlen_vector(int64_t context_ptr, int8_t* data_ptr,
INNER(date64) \
INNER(date32) \
INNER(time32) \
INNER(timestamp)
INNER(timestamp) \
INNER(timestampusutc)

// Expand inner macro for all numeric types.
#define SHA_VAR_LEN_PARAMS(INNER) \
Expand Down
1 change: 1 addition & 0 deletions cpp/src/gandiva/gdv_function_stubs.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ using gdv_date64 = int64_t;
using gdv_date32 = int32_t;
using gdv_time32 = int32_t;
using gdv_timestamp = int64_t;
using gdv_timestampusutc = int64_t;
using gdv_utf8 = char*;
using gdv_binary = char*;
using gdv_day_time_interval = int64_t;
Expand Down
15 changes: 11 additions & 4 deletions cpp/src/gandiva/jni/jni_common.cc
Original file line number Diff line number Diff line change
Expand Up @@ -155,19 +155,26 @@ DataTypePtr ProtoTypeToTime64(const types::ExtGandivaType& ext_type) {
}

DataTypePtr ProtoTypeToTimestamp(const types::ExtGandivaType& ext_type) {
arrow::TimeUnit::type unit;
switch (ext_type.timeunit()) {
case types::SEC:
return arrow::timestamp(arrow::TimeUnit::SECOND);
unit = arrow::TimeUnit::SECOND;
break;
case types::MILLISEC:
return arrow::timestamp(arrow::TimeUnit::MILLI);
unit = arrow::TimeUnit::MILLI;
break;
case types::MICROSEC:
return arrow::timestamp(arrow::TimeUnit::MICRO);
unit = arrow::TimeUnit::MICRO;
break;
case types::NANOSEC:
return arrow::timestamp(arrow::TimeUnit::NANO);
unit = arrow::TimeUnit::NANO;
break;
default:
std::cerr << "Unknown time unit: " << ext_type.timeunit() << " for timestamp\n";
return nullptr;
}
const std::string& zone_id = ext_type.timezone();
return arrow::timestamp(unit, zone_id);
}

DataTypePtr ProtoTypeToInterval(const types::ExtGandivaType& ext_type) {
Expand Down
2 changes: 2 additions & 0 deletions cpp/src/gandiva/precompiled/arithmetic_ops.cc
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ extern "C" {
INNER(NAME, date64, OP) \
INNER(NAME, date32, OP) \
INNER(NAME, timestamp, OP) \
INNER(NAME, timestampusutc, OP) \
INNER(NAME, time32, OP)

#define NUMERIC_DATE_TYPES(INNER, NAME, OP) \
Expand Down Expand Up @@ -253,6 +254,7 @@ NUMERIC_TYPES(VALIDITY_OP, isnumeric, +)
INNER(date32) \
INNER(date64) \
INNER(timestamp) \
INNER(timestampusutc) \
INNER(time32)

#define NUMERIC_BOOL_DATE_FUNCTION(INNER) \
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/gandiva/precompiled/hash.cc
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,8 @@ FORCE_INLINE gdv_int32 hash64_spark_int64_int32(gdv_int64 val, gdv_boolean is_va
INNER(NAME, date64) \
INNER(NAME, date32) \
INNER(NAME, time32) \
INNER(NAME, timestamp)
INNER(NAME, timestamp) \
INNER(NAME, timestampusutc)

NUMERIC_BOOL_DATE_TYPES(HASH32_OP, hash)
NUMERIC_BOOL_DATE_TYPES(HASH32_OP, hash32)
Expand Down
15 changes: 14 additions & 1 deletion cpp/src/gandiva/precompiled/time.cc
Original file line number Diff line number Diff line change
Expand Up @@ -813,10 +813,18 @@ gdv_timestamp convertTimestampUnit_us(gdv_timestamp timestamp_in_micro) {
return timestamp_in_micro / 1000;
}

gdv_date32 castDATE_date64(gdv_date64 date_in_millis) {
gdv_date32 castDATE32_date64(gdv_date64 date_in_millis) {
return static_cast<gdv_date32>(date_in_millis / (MILLIS_IN_DAY));
}

gdv_timestamp castTIMESTAMP_date32(gdv_date32 in_day) {
return static_cast<gdv_date32>(in_day * (MILLIS_IN_DAY));
}

gdv_date32 castDATE32_timestamp(gdv_timestamp timestamp_in_millis) {
return static_cast<gdv_date32>(timestamp_in_millis / (MILLIS_IN_DAY));
}

const char* castVARCHAR_timestamp_int64(gdv_int64 context, gdv_timestamp in,
gdv_int64 length, gdv_int32* out_len) {
gdv_int64 year = extractYear_timestamp(in);
Expand Down Expand Up @@ -877,6 +885,11 @@ gdv_int64 extractMillis_daytimeinterval(gdv_day_time_interval in) {
return static_cast<gdv_int64>(millis);
}

FORCE_INLINE
gdv_int64 castBIGINT_timestamp(gdv_timestamp in) {
return in;
}

FORCE_INLINE
gdv_int64 castBIGINT_daytimeinterval(gdv_day_time_interval in) {
return extractMillis_daytimeinterval(in) +
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/gandiva/precompiled/types.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ using gdv_date64 = int64_t;
using gdv_date32 = int32_t;
using gdv_time32 = int32_t;
using gdv_timestamp = int64_t;
using gdv_timestampusutc = int64_t;
using gdv_utf8 = char*;
using gdv_binary = char*;
using gdv_day_time_interval = int64_t;
Expand Down Expand Up @@ -278,7 +279,7 @@ gdv_date64 castDATE_timestamp(gdv_timestamp);
gdv_time32 castTIME_timestamp(gdv_timestamp timestamp_in_millis);
gdv_timestamp convertTimestampUnit_ms(gdv_timestamp);
gdv_timestamp convertTimestampUnit_us(gdv_timestamp);
gdv_date32 castDATE_date64(gdv_date64 date);
gdv_date32 castDATE32_date64(gdv_date64 date);
const char* castVARCHAR_timestamp_int64(int64_t, gdv_timestamp, gdv_int64, gdv_int32*);
gdv_date64 last_day_from_timestamp(gdv_date64 millis);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,10 @@ private static void initArrowTypeTimestamp(ArrowType.Timestamp timestampType,
// not supported
}
}
String timezone = timestampType.getTimezone();
if (timezone != null) {
builder.setTimeZone(timezone);
}
}

private static void initArrowTypeInterval(ArrowType.Interval interval,
Expand Down

0 comments on commit 640cf3f

Please sign in to comment.