From 2cd7a2b77acfa04094e91efbc6803d11fabcc0e9 Mon Sep 17 00:00:00 2001 From: Attila Jeges Date: Thu, 26 Mar 2020 18:28:13 +0100 Subject: [PATCH] IMPALA-9555: [Hive3] Fix test failure introduced by HIVE-22589 With HIVE-22589 Hive3 switched back to using Julian Calendar for historical dates by default which caused an Impala test failure around Avro DATE values. Change-Id: I51dd933867ea7877235e7f6e1f2b56711dca107e Reviewed-on: http://gerrit.cloudera.org:8080/15564 Reviewed-by: Impala Public Jenkins Tested-by: Impala Public Jenkins --- .../queries/QueryTest/avro_date.test | 51 +++++++++++-------- tests/query_test/test_date_queries.py | 3 +- 2 files changed, 33 insertions(+), 21 deletions(-) diff --git a/testdata/workloads/functional-query/queries/QueryTest/avro_date.test b/testdata/workloads/functional-query/queries/QueryTest/avro_date.test index 317e58cd1..248872a60 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/avro_date.test +++ b/testdata/workloads/functional-query/queries/QueryTest/avro_date.test @@ -33,32 +33,30 @@ Avro file '__HDFS_FILENAME__' is corrupt: out of range date value -719164 at off ---- HIVE_MAJOR_VERSION 3 ---- QUERY +# Avro table was created by Hive3. Since HIVE-22589 Hive3 also uses Julian calendar for +# dates before 1582-10-15 by default, therefore we expect the same results as above. select id_col, date_part, date_col from date_tbl; ---- RESULTS -0,0001-01-01,0001-01-01 -1,0001-01-01,0001-12-31 -2,0001-01-01,0002-01-01 -3,0001-01-01,1399-12-31 -4,0001-01-01,2017-11-28 -5,0001-01-01,9999-12-31 -6,0001-01-01,NULL 10,1399-06-27,2017-11-28 11,1399-06-27,NULL 12,1399-06-27,2018-12-31 -20,2017-11-27,0001-06-21 -21,2017-11-27,0001-06-22 -22,2017-11-27,0001-06-23 -23,2017-11-27,0001-06-24 -24,2017-11-27,0001-06-25 -25,2017-11-27,0001-06-26 -26,2017-11-27,0001-06-27 -27,2017-11-27,0001-06-28 -28,2017-11-27,0001-06-29 +20,2017-11-27,0001-06-19 +21,2017-11-27,0001-06-20 +22,2017-11-27,0001-06-21 +23,2017-11-27,0001-06-22 +24,2017-11-27,0001-06-23 +25,2017-11-27,0001-06-24 +26,2017-11-27,0001-06-25 +27,2017-11-27,0001-06-26 +28,2017-11-27,0001-06-27 29,2017-11-27,2017-11-28 30,9999-12-31,9999-12-01 31,9999-12-31,9999-12-31 ---- TYPES INT,DATE,DATE +---- ERRORS +Problem parsing file __HDFS_FILENAME__ at 309 +Avro file '__HDFS_FILENAME__' is corrupt: out of range date value -719164 at offset 309. The valid date range is -719162..2932896 (0001-01-01..9999-12-31). ==== ---- HIVE_MAJOR_VERSION 2 @@ -80,14 +78,18 @@ Avro file '__HDFS_FILENAME__' is corrupt: out of range date value -719164 at off ---- HIVE_MAJOR_VERSION 3 ---- QUERY +# Avro table was created by Hive3. Since HIVE-22589 Hive3 also uses Julian calendar for +# dates before 1582-10-15 by default, therefore we expect the same results as above. select date_part, count(date_col) from date_tbl group by date_part; ---- RESULTS 2017-11-27,10 1399-06-27,2 9999-12-31,2 -0001-01-01,6 ---- TYPES DATE, BIGINT +---- ERRORS +Problem parsing file __HDFS_FILENAME__ at 309 +Avro file '__HDFS_FILENAME__' is corrupt: out of range date value -719164 at offset 309. The valid date range is -719162..2932896 (0001-01-01..9999-12-31). ==== ---- HIVE_MAJOR_VERSION 2 @@ -107,11 +109,16 @@ Avro file '__HDFS_FILENAME__' is corrupt: out of range date value -719164 at off ---- HIVE_MAJOR_VERSION 3 ---- QUERY +# Avro table was created by Hive3. Since HIVE-22589 Hive3 also uses Julian calendar for +# dates before 1582-10-15 by default, therefore we expect the same results as above. select min(date_part), max(date_part), min(date_col), max(date_col) from date_tbl; ---- RESULTS -0001-01-01,9999-12-31,0001-01-01,9999-12-31 +1399-06-27,9999-12-31,0001-06-19,9999-12-31 ---- TYPES DATE, DATE, DATE, DATE +---- ERRORS +Problem parsing file __HDFS_FILENAME__ at 309 +Avro file '__HDFS_FILENAME__' is corrupt: out of range date value -719164 at offset 309. The valid date range is -719162..2932896 (0001-01-01..9999-12-31). ==== ---- HIVE_MAJOR_VERSION 2 @@ -133,14 +140,18 @@ Avro file '__HDFS_FILENAME__' is corrupt: out of range date value -719164 at off ---- HIVE_MAJOR_VERSION 3 ---- QUERY +# Avro table was created by Hive3. Since HIVE-22589 Hive3 also uses Julian calendar for +# dates before 1582-10-15 by default, therefore we expect the same results as above. select date_part, min(date_col), max(date_col) from date_tbl group by date_part; ---- RESULTS -2017-11-27,0001-06-21,2017-11-28 +2017-11-27,0001-06-19,2017-11-28 1399-06-27,2017-11-28,2018-12-31 9999-12-31,9999-12-01,9999-12-31 -0001-01-01,0001-01-01,9999-12-31 ---- TYPES DATE, DATE, DATE +---- ERRORS +Problem parsing file __HDFS_FILENAME__ at 309 +Avro file '__HDFS_FILENAME__' is corrupt: out of range date value -719164 at offset 309. The valid date range is -719162..2932896 (0001-01-01..9999-12-31). ==== ---- QUERY select date_part, count(*) from date_tbl group by date_part; diff --git a/tests/query_test/test_date_queries.py b/tests/query_test/test_date_queries.py index ae4c16922..46ca0fabb 100644 --- a/tests/query_test/test_date_queries.py +++ b/tests/query_test/test_date_queries.py @@ -56,7 +56,8 @@ class TestDateQueries(ImpalaTestSuite): # - Hive2 uses Julian Calendar for writing dates before 1582-10-15, whereas Impala # uses proleptic Gregorian Calendar. This affects the results Impala gets when # querying avro tables written by Hive2. - # - Hive3 on the other hand uses proleptic Gregorian Calendar to write dates. + # - Since HIVE-22589, Hive3 also uses Julian Calendar for dates before 1582-10-15 + # by default. self.run_test_case('QueryTest/avro_date', vector) else: self.run_test_case('QueryTest/date', vector)