mirror of
https://github.com/apache/impala.git
synced 2026-01-09 15:00:11 -05:00
Since HIVE-22589, Hive still uses Julian Calendar for writing dates before 1582-10-15, whereas Impala uses proleptic Gregorian Calendar. This affects the results Impala gets when querying tables written by Hive. Currently, the Avro and ORC formats of date_tbl are suffering this issue. This patch enables proleptic Gregorian Calendar for Hive by default. It also reverts the two commits of IMPALA-9555 which modifies the tests to satisfy the inconsistent results. Tests: - Ran CORE tests Change-Id: I6be9c9720dd352d6821cdaa6c64d35ba20473bc0 Reviewed-on: http://gerrit.cloudera.org:8080/18262 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
155 lines
4.4 KiB
Plaintext
155 lines
4.4 KiB
Plaintext
====
|
|
---- HIVE_MAJOR_VERSION
|
|
2
|
|
---- QUERY
|
|
# TODO: Once DATE type is supported across all fileformats, move this test to
|
|
# hdfs-scan-node.test.
|
|
# Avro table was created by Hive2. Inserting '0001-01-01' to date_col resulted as
|
|
# '0000-12-30' (because Hive2 uses Julian calendar for writing dates before 1582-10-15)
|
|
# which is outside of the supported date range.
|
|
select id_col, date_part, date_col from date_tbl;
|
|
---- RESULTS
|
|
10,1399-06-27,2017-11-28
|
|
11,1399-06-27,NULL
|
|
12,1399-06-27,2018-12-31
|
|
20,2017-11-27,0001-06-19
|
|
21,2017-11-27,0001-06-20
|
|
22,2017-11-27,0001-06-21
|
|
23,2017-11-27,0001-06-22
|
|
24,2017-11-27,0001-06-23
|
|
25,2017-11-27,0001-06-24
|
|
26,2017-11-27,0001-06-25
|
|
27,2017-11-27,0001-06-26
|
|
28,2017-11-27,0001-06-27
|
|
29,2017-11-27,2017-11-28
|
|
30,9999-12-31,9999-12-01
|
|
31,9999-12-31,9999-12-31
|
|
---- TYPES
|
|
INT,DATE,DATE
|
|
---- ERRORS
|
|
Problem parsing file __HDFS_FILENAME__ at 253
|
|
Avro file '__HDFS_FILENAME__' is corrupt: out of range date value -719164 at offset 253. The valid date range is -719162..2932896 (0001-01-01..9999-12-31).
|
|
====
|
|
---- HIVE_MAJOR_VERSION
|
|
3
|
|
---- QUERY
|
|
select id_col, date_part, date_col from date_tbl;
|
|
---- RESULTS
|
|
0,0001-01-01,0001-01-01
|
|
1,0001-01-01,0001-12-31
|
|
2,0001-01-01,0002-01-01
|
|
3,0001-01-01,1399-12-31
|
|
4,0001-01-01,2017-11-28
|
|
5,0001-01-01,9999-12-31
|
|
6,0001-01-01,NULL
|
|
10,1399-06-27,2017-11-28
|
|
11,1399-06-27,NULL
|
|
12,1399-06-27,2018-12-31
|
|
20,2017-11-27,0001-06-21
|
|
21,2017-11-27,0001-06-22
|
|
22,2017-11-27,0001-06-23
|
|
23,2017-11-27,0001-06-24
|
|
24,2017-11-27,0001-06-25
|
|
25,2017-11-27,0001-06-26
|
|
26,2017-11-27,0001-06-27
|
|
27,2017-11-27,0001-06-28
|
|
28,2017-11-27,0001-06-29
|
|
29,2017-11-27,2017-11-28
|
|
30,9999-12-31,9999-12-01
|
|
31,9999-12-31,9999-12-31
|
|
---- TYPES
|
|
INT,DATE,DATE
|
|
====
|
|
---- HIVE_MAJOR_VERSION
|
|
2
|
|
---- QUERY
|
|
# Avro table was created by Hive2. Inserting '0001-01-01' to date_col resulted as
|
|
# '0000-12-30' (because Hive2 uses Julian calendar for writing dates before 1582-10-15)
|
|
# which is outside of the supported date range.
|
|
select date_part, count(date_col) from date_tbl group by date_part;
|
|
---- RESULTS
|
|
2017-11-27,10
|
|
1399-06-27,2
|
|
9999-12-31,2
|
|
---- TYPES
|
|
DATE, BIGINT
|
|
---- ERRORS
|
|
Problem parsing file __HDFS_FILENAME__ at 253
|
|
Avro file '__HDFS_FILENAME__' is corrupt: out of range date value -719164 at offset 253. The valid date range is -719162..2932896 (0001-01-01..9999-12-31).
|
|
====
|
|
---- HIVE_MAJOR_VERSION
|
|
3
|
|
---- QUERY
|
|
select date_part, count(date_col) from date_tbl group by date_part;
|
|
---- RESULTS
|
|
2017-11-27,10
|
|
1399-06-27,2
|
|
9999-12-31,2
|
|
0001-01-01,6
|
|
---- TYPES
|
|
DATE, BIGINT
|
|
====
|
|
---- HIVE_MAJOR_VERSION
|
|
2
|
|
---- QUERY
|
|
# Avro table was created by Hive2. Inserting '0001-01-01' to date_col resulted as
|
|
# '0000-12-30' (because Hive2 uses Julian calendar for writing dates before 1582-10-15)
|
|
# which is outside of the supported date range.
|
|
select min(date_part), max(date_part), min(date_col), max(date_col) from date_tbl;
|
|
---- RESULTS
|
|
1399-06-27,9999-12-31,0001-06-19,9999-12-31
|
|
---- TYPES
|
|
DATE, DATE, DATE, DATE
|
|
---- ERRORS
|
|
Problem parsing file __HDFS_FILENAME__ at 253
|
|
Avro file '__HDFS_FILENAME__' is corrupt: out of range date value -719164 at offset 253. The valid date range is -719162..2932896 (0001-01-01..9999-12-31).
|
|
====
|
|
---- HIVE_MAJOR_VERSION
|
|
3
|
|
---- QUERY
|
|
select min(date_part), max(date_part), min(date_col), max(date_col) from date_tbl;
|
|
---- RESULTS
|
|
0001-01-01,9999-12-31,0001-01-01,9999-12-31
|
|
---- TYPES
|
|
DATE, DATE, DATE, DATE
|
|
====
|
|
---- HIVE_MAJOR_VERSION
|
|
2
|
|
---- QUERY
|
|
# Avro table was created by Hive2. Inserting '0001-01-01' to date_col resulted as
|
|
# '0000-12-30' (because Hive2 uses Julian calendar for writing dates before 1582-10-15)
|
|
# which is outside of the supported date range.
|
|
select date_part, min(date_col), max(date_col) from date_tbl group by date_part;
|
|
---- RESULTS
|
|
2017-11-27,0001-06-19,2017-11-28
|
|
1399-06-27,2017-11-28,2018-12-31
|
|
9999-12-31,9999-12-01,9999-12-31
|
|
---- TYPES
|
|
DATE, DATE, DATE
|
|
---- ERRORS
|
|
Problem parsing file __HDFS_FILENAME__ at 253
|
|
Avro file '__HDFS_FILENAME__' is corrupt: out of range date value -719164 at offset 253. The valid date range is -719162..2932896 (0001-01-01..9999-12-31).
|
|
====
|
|
---- HIVE_MAJOR_VERSION
|
|
3
|
|
---- QUERY
|
|
select date_part, min(date_col), max(date_col) from date_tbl group by date_part;
|
|
---- RESULTS
|
|
2017-11-27,0001-06-21,2017-11-28
|
|
1399-06-27,2017-11-28,2018-12-31
|
|
9999-12-31,9999-12-01,9999-12-31
|
|
0001-01-01,0001-01-01,9999-12-31
|
|
---- TYPES
|
|
DATE, DATE, DATE
|
|
====
|
|
---- QUERY
|
|
select date_part, count(*) from date_tbl group by date_part;
|
|
---- RESULTS
|
|
2017-11-27,10
|
|
1399-06-27,3
|
|
9999-12-31,2
|
|
0001-01-01,7
|
|
---- TYPES
|
|
DATE, BIGINT
|
|
====
|