forked from apache/hive
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
HIVE-27199: Read TIMESTAMP WITH LOCAL TIME ZONE columns from text fil…
…es using custom formats (Stamatis Zampetakis reviewed by Ayush Saxena, John Sherman, Attila Turóczy) 1. Support parsing TimestampTZ using the TimestampParser, which accepts multiple DateTimeFormatters. 2. Pass timestamp.formats in Lazy inspector handling TIMESTAMP WITH LOCAL TIME ZONE and instantiate a TimestampParser. 3. Refactor TimestampTZUtil to allow passing different DateTimeFormatters. 4. Add tests covering timestamps with 3 different formats (built-in, plus 2 more not covered by the default). These changes give more flexibility to users reading timestamps from text files and it also aligns the way TIMESTAMP and TIMESTAMP WITH LOCAL TIME ZONE behave when a custom format is provided. Closes apache#4170
- Loading branch information
Showing
8 changed files
with
245 additions
and
8 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
DEFAULT2016-05-03 12:26:34.123456789 Europe/London | ||
DEFAULT2016-05-03 12:26:34.123456789Europe/London | ||
DEFAULT2016-05-03 12:26:34.123456789 GMT+01:00 | ||
DEFAULT2016-05-03 12:26:34.123456789GMT+01:00 | ||
DEFAULT2016-05-03 12:26:34.123456789 GMT+1:00 | ||
DEFAULT2016-05-03 12:26:34.123456789GMT+1:00 | ||
DEFAULT2016-05-03 12:26:34.123456789 +01:00 | ||
DEFAULT2016-05-03 12:26:34.123456789+01:00 | ||
DEFAULT2016-05-03 12:26:34.123456789 +1:00 | ||
DEFAULT2016-05-03 12:26:34.123456789 | ||
DEFAULT2016-05-03 12:26:34.123450000 | ||
DEFAULT2016-05-03 12:26:34.12345 | ||
DEFAULT2016-05-03 12:26:34.1 | ||
DEFAULT2016-05-03 12:26:34.0 | ||
DEFAULT2016-05-03 12:26:34 | ||
DEFAULT2016-05-03 | ||
FORMAT12016-05-03T12:26:34Europe/London | ||
FORMAT12016-05-03T12:26:34+01:00 | ||
FORMAT2May 3 2016 12:26:34 | ||
FORMAT2May 03 2016 12:26:34 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
CREATE TABLE timestampltz_formats ( | ||
formatid string, | ||
tsval timestamp with local time zone | ||
) | ||
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'; | ||
|
||
LOAD DATA LOCAL INPATH '../../data/files/timestamps_mixed_formats.txt' overwrite into table timestampltz_formats; | ||
|
||
SELECT formatid, tsval FROM timestampltz_formats; | ||
|
||
ALTER TABLE timestampltz_formats SET SERDEPROPERTIES ("timestamp.formats"="yyyy-MM-dd'T'HH:mm:ssVV"); | ||
|
||
SELECT formatid, tsval FROM timestampltz_formats; | ||
|
||
ALTER TABLE timestampltz_formats SET SERDEPROPERTIES ("timestamp.formats"="MMM d yyyy HH:mm:ss"); | ||
|
||
SELECT formatid, tsval FROM timestampltz_formats; | ||
|
||
ALTER TABLE timestampltz_formats SET SERDEPROPERTIES ("timestamp.formats"="yyyy-MM-dd'T'HH:mm:ssVV,MMM d yyyy HH:mm:ss"); | ||
|
||
SELECT formatid, tsval FROM timestampltz_formats; |
160 changes: 160 additions & 0 deletions
160
ql/src/test/results/clientpositive/llap/timestamptz_formats.q.out
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,160 @@ | ||
PREHOOK: query: CREATE TABLE timestampltz_formats ( | ||
formatid string, | ||
tsval timestamp with local time zone | ||
) | ||
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' | ||
PREHOOK: type: CREATETABLE | ||
PREHOOK: Output: database:default | ||
PREHOOK: Output: default@timestampltz_formats | ||
POSTHOOK: query: CREATE TABLE timestampltz_formats ( | ||
formatid string, | ||
tsval timestamp with local time zone | ||
) | ||
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe' | ||
POSTHOOK: type: CREATETABLE | ||
POSTHOOK: Output: database:default | ||
POSTHOOK: Output: default@timestampltz_formats | ||
PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/timestamps_mixed_formats.txt' overwrite into table timestampltz_formats | ||
PREHOOK: type: LOAD | ||
#### A masked pattern was here #### | ||
PREHOOK: Output: default@timestampltz_formats | ||
POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/timestamps_mixed_formats.txt' overwrite into table timestampltz_formats | ||
POSTHOOK: type: LOAD | ||
#### A masked pattern was here #### | ||
POSTHOOK: Output: default@timestampltz_formats | ||
PREHOOK: query: SELECT formatid, tsval FROM timestampltz_formats | ||
PREHOOK: type: QUERY | ||
PREHOOK: Input: default@timestampltz_formats | ||
#### A masked pattern was here #### | ||
POSTHOOK: query: SELECT formatid, tsval FROM timestampltz_formats | ||
POSTHOOK: type: QUERY | ||
POSTHOOK: Input: default@timestampltz_formats | ||
#### A masked pattern was here #### | ||
DEFAULT 2016-05-03 04:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 04:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 04:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 04:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 04:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 04:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 04:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 04:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 04:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 12:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 12:26:34.12345 US/Pacific | ||
DEFAULT 2016-05-03 12:26:34.12345 US/Pacific | ||
DEFAULT 2016-05-03 12:26:34.1 US/Pacific | ||
DEFAULT 2016-05-03 12:26:34.0 US/Pacific | ||
DEFAULT 2016-05-03 12:26:34.0 US/Pacific | ||
DEFAULT 2016-05-03 00:00:00.0 US/Pacific | ||
FORMAT1 NULL | ||
FORMAT1 NULL | ||
FORMAT2 NULL | ||
FORMAT2 NULL | ||
PREHOOK: query: ALTER TABLE timestampltz_formats SET SERDEPROPERTIES ("timestamp.formats"="yyyy-MM-dd'T'HH:mm:ssVV") | ||
PREHOOK: type: ALTERTABLE_SERDEPROPERTIES | ||
PREHOOK: Input: default@timestampltz_formats | ||
PREHOOK: Output: default@timestampltz_formats | ||
POSTHOOK: query: ALTER TABLE timestampltz_formats SET SERDEPROPERTIES ("timestamp.formats"="yyyy-MM-dd'T'HH:mm:ssVV") | ||
POSTHOOK: type: ALTERTABLE_SERDEPROPERTIES | ||
POSTHOOK: Input: default@timestampltz_formats | ||
POSTHOOK: Output: default@timestampltz_formats | ||
PREHOOK: query: SELECT formatid, tsval FROM timestampltz_formats | ||
PREHOOK: type: QUERY | ||
PREHOOK: Input: default@timestampltz_formats | ||
#### A masked pattern was here #### | ||
POSTHOOK: query: SELECT formatid, tsval FROM timestampltz_formats | ||
POSTHOOK: type: QUERY | ||
POSTHOOK: Input: default@timestampltz_formats | ||
#### A masked pattern was here #### | ||
DEFAULT 2016-05-03 04:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 04:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 04:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 04:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 04:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 04:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 04:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 04:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 04:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 12:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 12:26:34.12345 US/Pacific | ||
DEFAULT 2016-05-03 12:26:34.12345 US/Pacific | ||
DEFAULT 2016-05-03 12:26:34.1 US/Pacific | ||
DEFAULT 2016-05-03 12:26:34.0 US/Pacific | ||
DEFAULT 2016-05-03 12:26:34.0 US/Pacific | ||
DEFAULT 2016-05-03 00:00:00.0 US/Pacific | ||
FORMAT1 2016-05-03 04:26:34.0 US/Pacific | ||
FORMAT1 2016-05-03 04:26:34.0 US/Pacific | ||
FORMAT2 NULL | ||
FORMAT2 NULL | ||
PREHOOK: query: ALTER TABLE timestampltz_formats SET SERDEPROPERTIES ("timestamp.formats"="MMM d yyyy HH:mm:ss") | ||
PREHOOK: type: ALTERTABLE_SERDEPROPERTIES | ||
PREHOOK: Input: default@timestampltz_formats | ||
PREHOOK: Output: default@timestampltz_formats | ||
POSTHOOK: query: ALTER TABLE timestampltz_formats SET SERDEPROPERTIES ("timestamp.formats"="MMM d yyyy HH:mm:ss") | ||
POSTHOOK: type: ALTERTABLE_SERDEPROPERTIES | ||
POSTHOOK: Input: default@timestampltz_formats | ||
POSTHOOK: Output: default@timestampltz_formats | ||
PREHOOK: query: SELECT formatid, tsval FROM timestampltz_formats | ||
PREHOOK: type: QUERY | ||
PREHOOK: Input: default@timestampltz_formats | ||
#### A masked pattern was here #### | ||
POSTHOOK: query: SELECT formatid, tsval FROM timestampltz_formats | ||
POSTHOOK: type: QUERY | ||
POSTHOOK: Input: default@timestampltz_formats | ||
#### A masked pattern was here #### | ||
DEFAULT 2016-05-03 04:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 04:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 04:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 04:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 04:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 04:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 04:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 04:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 04:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 12:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 12:26:34.12345 US/Pacific | ||
DEFAULT 2016-05-03 12:26:34.12345 US/Pacific | ||
DEFAULT 2016-05-03 12:26:34.1 US/Pacific | ||
DEFAULT 2016-05-03 12:26:34.0 US/Pacific | ||
DEFAULT 2016-05-03 12:26:34.0 US/Pacific | ||
DEFAULT 2016-05-03 00:00:00.0 US/Pacific | ||
FORMAT1 NULL | ||
FORMAT1 NULL | ||
FORMAT2 2016-05-03 12:26:34.0 US/Pacific | ||
FORMAT2 2016-05-03 12:26:34.0 US/Pacific | ||
PREHOOK: query: ALTER TABLE timestampltz_formats SET SERDEPROPERTIES ("timestamp.formats"="yyyy-MM-dd'T'HH:mm:ssVV,MMM d yyyy HH:mm:ss") | ||
PREHOOK: type: ALTERTABLE_SERDEPROPERTIES | ||
PREHOOK: Input: default@timestampltz_formats | ||
PREHOOK: Output: default@timestampltz_formats | ||
POSTHOOK: query: ALTER TABLE timestampltz_formats SET SERDEPROPERTIES ("timestamp.formats"="yyyy-MM-dd'T'HH:mm:ssVV,MMM d yyyy HH:mm:ss") | ||
POSTHOOK: type: ALTERTABLE_SERDEPROPERTIES | ||
POSTHOOK: Input: default@timestampltz_formats | ||
POSTHOOK: Output: default@timestampltz_formats | ||
PREHOOK: query: SELECT formatid, tsval FROM timestampltz_formats | ||
PREHOOK: type: QUERY | ||
PREHOOK: Input: default@timestampltz_formats | ||
#### A masked pattern was here #### | ||
POSTHOOK: query: SELECT formatid, tsval FROM timestampltz_formats | ||
POSTHOOK: type: QUERY | ||
POSTHOOK: Input: default@timestampltz_formats | ||
#### A masked pattern was here #### | ||
DEFAULT 2016-05-03 04:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 04:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 04:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 04:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 04:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 04:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 04:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 04:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 04:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 12:26:34.123456789 US/Pacific | ||
DEFAULT 2016-05-03 12:26:34.12345 US/Pacific | ||
DEFAULT 2016-05-03 12:26:34.12345 US/Pacific | ||
DEFAULT 2016-05-03 12:26:34.1 US/Pacific | ||
DEFAULT 2016-05-03 12:26:34.0 US/Pacific | ||
DEFAULT 2016-05-03 12:26:34.0 US/Pacific | ||
DEFAULT 2016-05-03 00:00:00.0 US/Pacific | ||
FORMAT1 2016-05-03 04:26:34.0 US/Pacific | ||
FORMAT1 2016-05-03 04:26:34.0 US/Pacific | ||
FORMAT2 2016-05-03 12:26:34.0 US/Pacific | ||
FORMAT2 2016-05-03 12:26:34.0 US/Pacific |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters