Skip to content

Commit

Permalink
[EXT] Add two datetime functions: from_unixtime_tz and unix_timestamp_tz
Browse files Browse the repository at this point in the history
Add two datetime functions: from_unixtime_tz and unix_timestamp_tz
  • Loading branch information
zzcclp committed Aug 31, 2019
1 parent 32e5be4 commit ebca5e0
Show file tree
Hide file tree
Showing 3 changed files with 188 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,9 @@ object FunctionRegistry {
expression[Year]("year"),
expression[TimeWindow]("window"),

expression[FromUnixTimeTZ]("from_unixtime_tz"),
expression[UnixTimestampTZ]("unix_timestamp_tz"),

// collection functions
expression[CreateArray]("array"),
expression[ArrayContains]("array_contains"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -845,6 +845,183 @@ case class FromUnixTime(sec: Expression, format: Expression, timeZoneId: Option[
}
}

/**
* Converts the number of seconds from unix epoch (1970-01-01 00:00:00 UTC) to a string
* representing the timestamp of that moment in the given time zone in the given
* format.
*/
@ExpressionDescription(
usage = "_FUNC_(unix_time, format_tz) - Returns `unix_time` in the specified `format`.",
extended = """
Examples:
> SELECT _FUNC_(0, 'yyyy-MM-dd HH:mm:ss@GMT+08:00');
1970-01-01 08:00:00
""")
case class FromUnixTimeTZ(sec: Expression, format: Expression)
extends BinaryExpression with ImplicitCastInputTypes {

override def left: Expression = sec
override def right: Expression = format

private val tzSplit = "@"
private val tzDefaul = "GMT+00:00"

override def prettyName: String = "from_unixtime_tz"

override def dataType: DataType = StringType
override def nullable: Boolean = true

override def inputTypes: Seq[AbstractDataType] = Seq(LongType, StringType)

private lazy val constFormatTZ: UTF8String = right.eval().asInstanceOf[UTF8String]
private lazy val tzSplitIdx = constFormatTZ.toString.indexOf(tzSplit)
private lazy val timeZoneStr = if (tzSplitIdx != -1) {
constFormatTZ.toString.substring(tzSplitIdx + 1)
} else {
tzDefaul
}
private lazy val constFormat = if (tzSplitIdx != -1) {
UTF8String.fromString(constFormatTZ.toString.substring(0, tzSplitIdx))
} else {
constFormatTZ
}
@transient lazy val timeZone: TimeZone = DateTimeUtils.getTimeZone(timeZoneStr)
private lazy val formatter: DateFormat =
try {
DateTimeUtils.newDateFormat(constFormat.toString, timeZone)
} catch {
case NonFatal(_) => null
}

override def eval(input: InternalRow): Any = {
val time = left.eval(input)
if (time == null) {
null
} else {
if (constFormat == null || formatter == null) {
null
} else {
try {
UTF8String.fromString(formatter.format(
new java.util.Date(time.asInstanceOf[Long] * 1000L)))
} catch {
case NonFatal(_) => null
}
}
}
}

override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val df = classOf[DateFormat].getName
if (formatter == null) {
ExprCode("", "true", "(UTF8String) null")
} else {
val formatterName = ctx.addReferenceObj("formatter", formatter, df)
val t = left.genCode(ctx)
ev.copy(code = s"""
${t.code}
boolean ${ev.isNull} = ${t.isNull};
${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
if (!${ev.isNull}) {
try {
${ev.value} = UTF8String.fromString($formatterName.format(
new java.util.Date(${t.value} * 1000L)));
} catch (java.lang.IllegalArgumentException e) {
${ev.isNull} = true;
}
}""")
}
}
}

/**
* Converts time string with given pattern.
* (see [http://docs.oracle.com/javase/tutorial/i18n/format/simpleDateFormat.html])
* to Unix time stamp (in seconds), returns null if fail.
*/
@ExpressionDescription(
usage = "_FUNC_([expr[, pattern]]) - Returns the UNIX timestamp of specified time.",
extended = """
Examples:
> SELECT _FUNC_('1970-01-01 08:00:00', 'yyyy-MM-dd HH:mm:ss@GMT+08:00');
0
""")
case class UnixTimestampTZ(timeExp: Expression, format: Expression)
extends BinaryExpression with ImplicitCastInputTypes {

override def left: Expression = timeExp
override def right: Expression = format

private val tzSplit = "@"
private val tzDefaul = "GMT+00:00"

override def prettyName: String = "unix_timestamp_tz"

override def dataType: DataType = LongType
override def nullable: Boolean = true

override def inputTypes: Seq[AbstractDataType] = Seq(StringType, StringType)

private lazy val constFormatTZ: UTF8String = right.eval().asInstanceOf[UTF8String]
private lazy val tzSplitIdx = constFormatTZ.toString.indexOf(tzSplit)
private lazy val timeZoneStr: String = if (tzSplitIdx != -1) {
constFormatTZ.toString.substring(tzSplitIdx + 1)
} else {
tzDefaul
}
private lazy val constFormat: UTF8String = if (tzSplitIdx != -1) {
UTF8String.fromString(constFormatTZ.toString.substring(0, tzSplitIdx))
} else {
constFormatTZ
}
@transient lazy val timeZone: TimeZone = DateTimeUtils.getTimeZone(timeZoneStr)
private lazy val formatter: DateFormat =
try {
DateTimeUtils.newDateFormat(constFormat.toString, timeZone)
} catch {
case NonFatal(_) => null
}

override def eval(input: InternalRow): Any = {
val time = left.eval(input)
if (time == null) {
null
} else {
if (constFormat == null || formatter == null) {
null
} else {
try {
formatter.parse(
time.asInstanceOf[UTF8String].toString).getTime / 1000L
} catch {
case NonFatal(_) => null
}
}
}
}

override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
val df = classOf[DateFormat].getName
if (formatter == null) {
ExprCode("", "true", ctx.defaultValue(dataType))
} else {
val formatterName = ctx.addReferenceObj("formatter", formatter, df)
val eval1 = left.genCode(ctx)
ev.copy(code = s"""
${eval1.code}
boolean ${ev.isNull} = ${eval1.isNull};
${ctx.javaType(dataType)} ${ev.value} = ${ctx.defaultValue(dataType)};
if (!${ev.isNull}) {
try {
${ev.value} = $formatterName.parse(${eval1.value}.toString()).getTime() / 1000L;
} catch (java.text.ParseException e) {
${ev.isNull} = true;
}
}""")
}
}
}

/**
* Returns the last day of the month which the date belongs to.
*/
Expand Down
8 changes: 8 additions & 0 deletions sql/core/src/main/scala/org/apache/spark/sql/functions.scala
Original file line number Diff line number Diff line change
Expand Up @@ -2769,6 +2769,14 @@ object functions {
FromUnixTime(ut.expr, Literal(f))
}

def from_unixtime_tz(ut: Column, f: String): Column = withExpr {
FromUnixTimeTZ(ut.expr, Literal(f))
}

def unix_timestamp_tz(s: Column, p: String): Column = withExpr {
UnixTimestampTZ(s.expr, Literal(p))
}

/**
* Returns the current Unix timestamp (in seconds).
*
Expand Down

0 comments on commit ebca5e0

Please sign in to comment.