From 684a54a89e1752534dc713b05185bae1a6b69ba8 Mon Sep 17 00:00:00 2001 From: Attila Jeges Date: Mon, 30 Sep 2019 19:18:51 +0200 Subject: [PATCH] IMPALA-7368: Change supported year range for DATE values to 1..9999 Before this patch the supported year range for DATE type started with year 0. This contradicts the ANSI SQL standard that defines the valid DATE value range to be 0001-01-01 to 9999-12-31. Change-Id: Iefdf1c036834763f52d44d0c39a25a1f04e41e07 Reviewed-on: http://gerrit.cloudera.org:8080/14349 Reviewed-by: Attila Jeges Tested-by: Impala Public Jenkins --- be/src/exec/hdfs-avro-scanner-test.cc | 8 +- be/src/exprs/expr-test.cc | 192 +++++++++--------- be/src/runtime/date-test.cc | 106 +++++----- be/src/runtime/date-value.cc | 24 ++- be/src/runtime/date-value.h | 2 +- be/src/runtime/raw-value-test.cc | 2 +- be/src/udf/udf.h | 2 +- be/src/util/string-parser-test.cc | 3 +- common/thrift/generate_error_codes.py | 4 +- .../apache/impala/catalog/ColumnStats.java | 6 +- testdata/data/README | 6 +- .../queries/DataErrorsTest/avro-errors.test | 2 +- .../queries/QueryTest/avro_date.test | 47 ++++- .../QueryTest/date-fileformat-support.test | 6 +- .../queries/QueryTest/date.test | 32 ++- .../queries/QueryTest/out-of-range-date.test | 8 +- 16 files changed, 251 insertions(+), 199 deletions(-) diff --git a/be/src/exec/hdfs-avro-scanner-test.cc b/be/src/exec/hdfs-avro-scanner-test.cc index cea8d06f0..621247b55 100644 --- a/be/src/exec/hdfs-avro-scanner-test.cc +++ b/be/src/exec/hdfs-avro-scanner-test.cc @@ -317,11 +317,11 @@ TEST_F(HdfsAvroScannerTest, DateTest) { TestReadAvroDate(data, len + 1, invalid_dv, -1, TErrorCode::AVRO_INVALID_DATE); TestReadAvroDate(data, len - 1, invalid_dv, -1, TErrorCode::SCANNER_INVALID_INT); - // Test lower limit: 0000-01-01. - const int32_t MIN_DATE_DAYS_SINCE_EPOCH = -719528; + // Test lower limit: 0001-01-01. + const int32_t MIN_DATE_DAYS_SINCE_EPOCH = -719162; len = ReadWriteUtil::PutZInt(MIN_DATE_DAYS_SINCE_EPOCH, data); - TestReadAvroDate(data, len, DateValue(0, 1, 1), len); - TestReadAvroDate(data, len + 1, DateValue(0, 1, 1), len); + TestReadAvroDate(data, len, DateValue(1, 1, 1), len); + TestReadAvroDate(data, len + 1, DateValue(1, 1, 1), len); TestReadAvroDate(data, len - 1, invalid_dv, -1, TErrorCode::SCANNER_INVALID_INT); len = ReadWriteUtil::PutZInt(MIN_DATE_DAYS_SINCE_EPOCH - 1, data); diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc index 6634fc8ce..e932a04f4 100644 --- a/be/src/exprs/expr-test.cc +++ b/be/src/exprs/expr-test.cc @@ -1484,7 +1484,7 @@ TEST_P(ExprTest, LiteralConstruction) { TestSingleLiteralConstruction(TYPE_SMALLINT, s_val, "-32768"); TestSingleLiteralConstruction(TYPE_INT, i_val, "-2147483648"); TestSingleLiteralConstruction(TYPE_BIGINT, l_val, "-9223372036854775808"); - TestSingleLiteralConstruction(TYPE_DATE, DateValue(0, 1, 1), "0000-01-01"); + TestSingleLiteralConstruction(TYPE_DATE, DateValue(1, 1, 1), "0001-01-01"); } @@ -3533,8 +3533,8 @@ TEST_P(ExprTest, CastDateExprs) { TestError("cast(' \t\r\n' as date)"); // Test String <-> Date boundary cases. - TestDateValue("cast('0000-01-01' as date)", DateValue(0, 1, 1)); - TestStringValue("cast(date '0000-01-01' as string)", "0000-01-01"); + TestDateValue("cast('0001-01-01' as date)", DateValue(1, 1, 1)); + TestStringValue("cast(date '0001-01-01' as string)", "0001-01-01"); TestDateValue("cast('9999-12-31' as date)", DateValue(9999, 12, 31)); TestStringValue("cast(date '9999-12-31' as string)", "9999-12-31"); TestError("cast('10000-01-01' as date)"); @@ -7634,20 +7634,20 @@ TEST_P(ExprTest, TruncForDateTest) { TestDateValue("trunc(date'9999-12-31', 'DAY')", DateValue(9999, 12, 27)); // Test lower limit - TestDateValue("trunc(date'0000-01-01', 'YYYY')", DateValue(0, 1, 1)); - TestDateValue("trunc(date'0000-01-01', 'Q')", DateValue(0, 1, 1)); - TestDateValue("trunc(date'0000-03-31', 'Q')", DateValue(0, 1, 1)); - TestDateValue("trunc(date'0000-01-01', 'MONTH')", DateValue(0, 1, 1)); - TestDateValue("trunc(date'0000-01-01', 'W')", DateValue(0, 1, 1)); - TestDateValue("trunc(date'0000-01-07', 'W')", DateValue(0, 1, 1)); - TestDateValue("trunc(date'0000-01-08', 'W')", DateValue(0, 1, 8)); - TestDateValue("trunc(date'0000-01-01', 'WW')", DateValue(0, 1, 1)); - TestDateValue("trunc(date'0000-01-07', 'WW')", DateValue(0, 1, 1)); - TestDateValue("trunc(date'0000-01-08', 'WW')", DateValue(0, 1, 8)); - TestDateValue("trunc(date'0000-01-04', 'DAY')", DateValue(0, 1, 3)); - TestDateValue("trunc(date'0000-01-03', 'DAY')", DateValue(0, 1, 3)); - TestIsNull("trunc(date'0000-01-02', 'DAY')", TYPE_DATE); - TestIsNull("trunc(date'0000-01-01', 'DAY')", TYPE_DATE); + TestDateValue("trunc(date'0001-01-01', 'YYYY')", DateValue(1, 1, 1)); + TestDateValue("trunc(date'0001-01-01', 'Q')", DateValue(1, 1, 1)); + TestDateValue("trunc(date'0001-03-31', 'Q')", DateValue(1, 1, 1)); + TestDateValue("trunc(date'0001-01-01', 'MONTH')", DateValue(1, 1, 1)); + // 0001-01-01 is Monday + TestDateValue("trunc(date'0001-01-01', 'W')", DateValue(1, 1, 1)); + TestDateValue("trunc(date'0001-01-07', 'W')", DateValue(1, 1, 1)); + TestDateValue("trunc(date'0001-01-08', 'W')", DateValue(1, 1, 8)); + TestDateValue("trunc(date'0001-01-01', 'WW')", DateValue(1, 1, 1)); + TestDateValue("trunc(date'0001-01-07', 'WW')", DateValue(1, 1, 1)); + TestDateValue("trunc(date'0001-01-08', 'WW')", DateValue(1, 1, 8)); + TestDateValue("trunc(date'0001-01-01', 'DAY')", DateValue(1, 1, 1)); + TestDateValue("trunc(date'0001-01-07', 'DAY')", DateValue(1, 1, 1)); + TestDateValue("trunc(date'0001-01-08', 'DAY')", DateValue(1, 1, 8)); // Truncating date to hour or minute returns an error for (const string& unit: { "HH", "HH12", "HH24", "MI" }) { @@ -7697,28 +7697,26 @@ TEST_P(ExprTest, DateTruncForDateTest) { TestDateValue("date_trunc('MILLENNIUM', date '1001-01-01')", DateValue(1001, 1, 1)); TestDateValue("date_trunc('MILLENNIUM', date '1000-01-01')", DateValue(1, 1, 1)); TestDateValue("date_trunc('MILLENNIUM', date '0001-01-01')", DateValue(1, 1, 1)); - TestIsNull("date_trunc('MILLENNIUM', date '0000-01-01')", TYPE_DATE); // Test lower limit for century TestDateValue("date_trunc('CENTURY', date '0101-01-01')", DateValue(101, 1, 1)); TestDateValue("date_trunc('CENTURY', date '0100-01-01')", DateValue(1, 1, 1)); TestDateValue("date_trunc('CENTURY', date '0001-01-01')", DateValue(1, 1, 1)); - TestIsNull("date_trunc('CENTURY', date '0000-01-01')", TYPE_DATE); // Test lower limit for decade - TestDateValue("date_trunc('DECADE', date '0001-01-01')", DateValue(0, 1, 1)); - TestDateValue("date_trunc('DECADE', date '0000-01-01')", DateValue(0, 1, 1)); + TestDateValue("date_trunc('DECADE', date '0011-01-01')", DateValue(10, 1, 1)); + TestDateValue("date_trunc('DECADE', date '0010-01-01')", DateValue(10, 1, 1)); + TestIsNull("date_trunc('DECADE', date '0001-01-01')", TYPE_DATE); // Test lower limit for year, month, day - TestDateValue("date_trunc('YEAR', date '0000-01-01')", DateValue(0, 1, 1)); - TestDateValue("date_trunc('MONTH', date '0000-01-01')", DateValue(0, 1, 1)); - TestDateValue("date_trunc('DAY', date '0000-01-01')", DateValue(0, 1, 1)); + TestDateValue("date_trunc('YEAR', date '0001-01-01')", DateValue(1, 1, 1)); + TestDateValue("date_trunc('MONTH', date '0001-01-01')", DateValue(1, 1, 1)); + TestDateValue("date_trunc('DAY', date '0001-01-01')", DateValue(1, 1, 1)); // Test lower limit for week - TestDateValue("date_trunc('WEEK', date '0000-01-09')", DateValue(0, 1, 3)); - TestDateValue("date_trunc('WEEK', date '0000-01-03')", DateValue(0, 1, 3)); - TestIsNull("date_trunc('WEEK', date '0000-01-02')", TYPE_DATE); - TestIsNull("date_trunc('WEEK', date '0000-01-01')", TYPE_DATE); + TestDateValue("date_trunc('WEEK', date '0001-01-08')", DateValue(1, 1, 8)); + TestDateValue("date_trunc('WEEK', date '0001-01-07')", DateValue(1, 1, 1)); + TestDateValue("date_trunc('WEEK', date '0001-01-01')", DateValue(1, 1, 1)); // Test invalid input. // Truncating date to hour or minute returns an error @@ -7760,10 +7758,10 @@ TEST_P(ExprTest, ExtractAndDatePartForDateTest) { TestValue("extract(DAY from date '9999-12-31')", TYPE_BIGINT, 31); // Test lower limit - TestValue("extract(date '0000-01-01', 'YEAR')", TYPE_BIGINT, 0); - TestValue("extract(quarter from date '0000-01-01')", TYPE_BIGINT, 1); - TestValue("extract(date '0000-01-01', 'month')", TYPE_BIGINT, 1); - TestValue("extract(DAY from date '0000-01-01')", TYPE_BIGINT, 1); + TestValue("extract(date '0001-01-01', 'YEAR')", TYPE_BIGINT, 1); + TestValue("extract(quarter from date '0001-01-01')", TYPE_BIGINT, 1); + TestValue("extract(date '0001-01-01', 'month')", TYPE_BIGINT, 1); + TestValue("extract(DAY from date '0001-01-01')", TYPE_BIGINT, 1); // Time of day extract fields are not supported for (const string& field: { "MINUTE", "SECOND", "MILLISECOND", "EPOCH" }) { @@ -7795,10 +7793,10 @@ TEST_P(ExprTest, ExtractAndDatePartForDateTest) { TestValue("date_part('DAY', date '9999-12-31')", TYPE_BIGINT, 31); // Test lower limit - TestValue("date_part('year', date '0000-01-01')", TYPE_BIGINT, 0); - TestValue("date_part('quarter', date '0000-01-01')", TYPE_BIGINT, 1); - TestValue("date_part('MONTH', date '0000-01-01')", TYPE_BIGINT, 1); - TestValue("date_part('DAY', date '0000-01-01')", TYPE_BIGINT, 1); + TestValue("date_part('year', date '0001-01-01')", TYPE_BIGINT, 1); + TestValue("date_part('quarter', date '0001-01-01')", TYPE_BIGINT, 1); + TestValue("date_part('MONTH', date '0001-01-01')", TYPE_BIGINT, 1); + TestValue("date_part('DAY', date '0001-01-01')", TYPE_BIGINT, 1); // Time of day extract fields are not supported for (const string& field: { "MINUTE", "SECOND", "MILLISECOND", "EPOCH" }) { @@ -7822,7 +7820,7 @@ TEST_P(ExprTest, DateFunctions) { // year: TestValue("year(date '2019-06-05')", TYPE_INT, 2019); TestValue("year(date '9999-12-31')", TYPE_INT, 9999); - TestValue("year(date '0000-01-01')", TYPE_INT, 0); + TestValue("year(date '0001-01-01')", TYPE_INT, 1); TestIsNull("year(cast(NULL as date))", TYPE_INT); // Test that the name-resolution algorithm picks up the TIMESTAMP-version of year() if @@ -7840,23 +7838,23 @@ TEST_P(ExprTest, DateFunctions) { // month: TestValue("month(date '2019-06-05')", TYPE_INT, 6); TestValue("month(date '9999-12-31')", TYPE_INT, 12); - TestValue("month(date '0000-01-01')", TYPE_INT, 1); + TestValue("month(date '0001-01-01')", TYPE_INT, 1); TestIsNull("month(cast(NULL as date))", TYPE_INT); // monthname: TestStringValue("monthname(date '2019-06-05')", "June"); TestStringValue("monthname(date '9999-12-31')", "December"); - TestStringValue("monthname(date '0000-01-01')", "January"); + TestStringValue("monthname(date '0001-01-01')", "January"); TestIsNull("monthname(cast(NULL as date))", TYPE_STRING); // day, dayofmonth: TestValue("day(date '2019-06-05')", TYPE_INT, 5); TestValue("day(date '9999-12-31')", TYPE_INT, 31); - TestValue("day(date '0000-01-01')", TYPE_INT, 1); + TestValue("day(date '0001-01-01')", TYPE_INT, 1); TestIsNull("day(cast(NULL as date))", TYPE_INT); TestValue("dayofmonth(date '2019-06-07')", TYPE_INT, 7); TestValue("dayofmonth(date '9999-12-31')", TYPE_INT, 31); - TestValue("dayofmonth(date '0000-01-01')", TYPE_INT, 1); + TestValue("dayofmonth(date '0001-01-01')", TYPE_INT, 1); TestIsNull("dayofmonth(cast(NULL as date))", TYPE_INT); // quarter: @@ -7869,15 +7867,15 @@ TEST_P(ExprTest, DateFunctions) { TestValue("quarter(date '2019-10-01')", TYPE_INT, 4); TestValue("quarter(date '2019-12-31')", TYPE_INT, 4); TestValue("quarter(date '9999-12-31')", TYPE_INT, 4); - TestValue("quarter(date '0000-01-01')", TYPE_INT, 1); + TestValue("quarter(date '0001-01-01')", TYPE_INT, 1); TestIsNull("quarter(cast(NULL as date))", TYPE_INT); // dayofweek: TestValue("dayofweek(date '2019-06-05')", TYPE_INT, 4); // 9999-12-31 is Friday. TestValue("dayofweek(date '9999-12-31')", TYPE_INT, 6); - // 0000-01-01 is Saturday. - TestValue("dayofweek(date '0000-01-01')", TYPE_INT, 7); + // 0001-01-01 is Monday. + TestValue("dayofweek(date '0001-01-01')", TYPE_INT, 2); TestIsNull("dayofweek(cast(NULL as date))", TYPE_INT); // dayname: @@ -7889,7 +7887,7 @@ TEST_P(ExprTest, DateFunctions) { TestStringValue("dayname(date '2019-06-08')", "Saturday"); TestStringValue("dayname(date '2019-06-09')", "Sunday"); TestStringValue("dayname(date '9999-12-31')", "Friday"); - TestStringValue("dayname(date '0000-01-01')", "Saturday"); + TestStringValue("dayname(date '0001-01-01')", "Monday"); TestIsNull("dayname(cast(NULL as date))", TYPE_STRING); // dayofyear: @@ -7899,7 +7897,7 @@ TEST_P(ExprTest, DateFunctions) { TestValue("dayofyear(date '2016-12-31')", TYPE_INT, 366); TestValue("dayofyear(date '2016-06-05')", TYPE_INT, 31 + 29 + 31 + 30 + 31 + 5); TestValue("dayofyear(date '9999-12-31')", TYPE_INT, 365); - TestValue("dayofyear(date '0000-01-01')", TYPE_INT, 1); + TestValue("dayofyear(date '0001-01-01')", TYPE_INT, 1); TestIsNull("dayofyear(cast(NULL as date))", TYPE_INT); // week, weekofyear @@ -7923,9 +7921,9 @@ TEST_P(ExprTest, DateFunctions) { TestValue("week(date '2019-12-29')", TYPE_INT, 52); TestValue("week(date '2019-12-30')", TYPE_INT, 1); TestValue("week(date '2015-12-31')", TYPE_INT, 53); - // 0000-01-01 is Saturday. It belongs to the last week of the previous year. - TestValue("weekofyear(date '0000-01-01')", TYPE_INT, 52); - TestValue("week(date '0000-01-01')", TYPE_INT, 52); + // 0001-01-01 is Monday. It belongs to the first week of the year. + TestValue("weekofyear(date '0001-01-01')", TYPE_INT, 1); + TestValue("week(date '0001-01-01')", TYPE_INT, 1); // 9999-12-31 is Friday. It belongs to the last week of the year. TestValue("weekofyear(date '9999-12-31')", TYPE_INT, 52); TestValue("week(date '9999-12-31')", TYPE_INT, 52); @@ -7941,9 +7939,9 @@ TEST_P(ExprTest, DateFunctions) { TestDateValue("next_day(date '2019-06-05', 'fRI')", DateValue(2019, 6, 7)); TestDateValue("next_day(date '2019-06-05', 'saturDAY')", DateValue(2019, 6, 8)); TestDateValue("next_day(date '2019-06-05', 'suN')", DateValue(2019, 6, 9)); - // 0000-01-01 is Saturday - TestDateValue("next_day(date '0000-01-01', 'SAT')", DateValue(0, 1, 8)); - TestDateValue("next_day(date '0000-01-01', 'friday')", DateValue(0, 1, 7)); + // 0001-01-01 is Monday + TestDateValue("next_day(date '0001-01-01', 'MON')", DateValue(1, 1, 8)); + TestDateValue("next_day(date '0001-01-01', 'sunday')", DateValue(1, 1, 7)); // 9999-12-31 is Friday TestDateValue("next_day(date'9999-12-30', 'FRI')", DateValue(9999, 12, 31)); TestIsNull("next_day(date'9999-12-30', 'THU')", TYPE_DATE); @@ -7964,7 +7962,7 @@ TEST_P(ExprTest, DateFunctions) { TestDateValue("last_day(date'2019-05-31')", DateValue(2019, 5, 31)); // 2016 is leap year TestDateValue("last_day(date'2016-02-05')", DateValue(2016, 2, 29)); - TestDateValue("last_day(date'0000-01-01')", DateValue(0, 1, 31)); + TestDateValue("last_day(date'0001-01-01')", DateValue(1, 1, 31)); TestDateValue("last_day(date'9999-12-31')", DateValue(9999, 12, 31)); TestIsNull("last_day(cast(NULL as date))", TYPE_DATE); @@ -7972,20 +7970,20 @@ TEST_P(ExprTest, DateFunctions) { TestDateValue("years_add(date '0125-05-24', 0)", DateValue(125, 5, 24)); TestDateValue("years_sub(date '0125-05-24', 0)", DateValue(125, 5, 24)); TestDateValue("years_add(date '0125-05-24', 125)", DateValue(250, 5, 24)); - TestDateValue("years_add(date '0125-05-24', -125)", DateValue(0, 5, 24)); - TestDateValue("years_sub(date '0125-05-24', 125)", DateValue(0, 5, 24)); + TestDateValue("years_add(date '0125-05-24', -124)", DateValue(1, 5, 24)); + TestDateValue("years_sub(date '0125-05-24', 124)", DateValue(1, 5, 24)); // Test leap years. TestDateValue("years_add(date '2000-02-29', 1)", DateValue(2001, 2, 28)); TestDateValue("years_add(date '2000-02-29', 4)", DateValue(2004, 2, 29)); TestDateValue("years_sub(date '2000-02-29', 1)", DateValue(1999, 2, 28)); TestDateValue("years_sub(date '2000-02-29', 4)", DateValue(1996, 2, 29)); // Test upper and lower limit - TestDateValue("years_add(date'0000-12-31', 9999)", DateValue(9999, 12, 31)); - TestIsNull("years_add(date'0000-12-31', 10000)", TYPE_DATE); - TestDateValue("years_sub(date'9999-01-01', 9999)", DateValue(0, 1, 1)); - TestIsNull("years_sub(date'9999-01-01', 10000)", TYPE_DATE); + TestDateValue("years_add(date'0001-12-31', 9998)", DateValue(9999, 12, 31)); + TestIsNull("years_add(date'0001-12-31', 9999)", TYPE_DATE); + TestDateValue("years_sub(date'9999-01-01', 9998)", DateValue(1, 1, 1)); + TestIsNull("years_sub(date'9999-01-01', 9999)", TYPE_DATE); // Test max int64 - TestIsNull("years_add(date'0000-01-01', 2147483647)", TYPE_DATE); + TestIsNull("years_add(date'0001-01-01', 2147483647)", TYPE_DATE); TestIsNull("years_sub(date'9999-12-31', 2147483647)", TYPE_DATE); // Test NULL values TestIsNull("years_add(cast(NULL as date), 1)", TYPE_DATE); @@ -7995,9 +7993,9 @@ TEST_P(ExprTest, DateFunctions) { // months_add, add_months, months_sub: TestDateValue("months_add(date '0005-01-29', 0)", DateValue(5, 1, 29)); TestDateValue("months_sub(date '0005-01-29', 0)", DateValue(5, 1, 29)); - TestDateValue("add_months(date '0005-01-29', -60)", DateValue(0, 1, 29)); - TestDateValue("months_add(date '0005-01-29', -60)", DateValue(0, 1, 29)); - TestDateValue("months_sub(date '0005-01-29', 60)", DateValue(0, 1, 29)); + TestDateValue("add_months(date '0005-01-29', -48)", DateValue(1, 1, 29)); + TestDateValue("months_add(date '0005-01-29', -48)", DateValue(1, 1, 29)); + TestDateValue("months_sub(date '0005-01-29', 48)", DateValue(1, 1, 29)); TestDateValue("add_months(date '9995-01-29', 59)", DateValue(9999, 12, 29)); TestDateValue("months_add(date '9995-01-29', 59)", DateValue(9999, 12, 29)); TestDateValue("months_sub(date '9995-01-29', -59)", DateValue(9999, 12, 29)); @@ -8008,13 +8006,13 @@ TEST_P(ExprTest, DateFunctions) { TestDateValue("months_sub(date '2000-03-31', 1)", DateValue(2000, 2, 29)); TestDateValue("months_add(date '2000-03-31', -2)", DateValue(2000, 1, 31)); // Test upper and lower limit. - // 12 * 9999 == 119988 - TestDateValue("months_add(date '0000-12-31', 119988)", DateValue(9999, 12, 31)); - TestIsNull("months_add(date'0000-12-31', 119989)", TYPE_DATE); - TestDateValue("months_sub(date '9999-01-01', 119988)", DateValue(0, 1, 1)); - TestIsNull("months_sub(date'9999-01-01', 119989)", TYPE_DATE); + // 12 * 9998 == 119976 + TestDateValue("months_add(date '0001-12-31', 119976)", DateValue(9999, 12, 31)); + TestIsNull("months_add(date'0001-12-31', 119977)", TYPE_DATE); + TestDateValue("months_sub(date '9999-01-01', 119976)", DateValue(1, 1, 1)); + TestIsNull("months_sub(date'9999-01-01', 119977)", TYPE_DATE); // Test max int64 - TestIsNull("months_add(date'0000-01-01', 2147483647)", TYPE_DATE); + TestIsNull("months_add(date'0001-01-01', 2147483647)", TYPE_DATE); TestIsNull("months_sub(date'9999-12-31', 2147483647)", TYPE_DATE); // Test NULL values TestIsNull("months_add(cast(NULL as date), 1)", TYPE_DATE); @@ -8029,14 +8027,14 @@ TEST_P(ExprTest, DateFunctions) { TestDateValue("weeks_sub(date'2019-06-12', 24)", DateValue(2018, 12, 26)); // Test leap year TestDateValue("weeks_add(date '2016-01-04', 8)", DateValue(2016, 2, 29)); - // Test upper and ower limit. There are 3652424 days between 0000-01-01 and 9999-12-31. - // 3652424 days is 521774 weeks + 6 days. - TestDateValue("weeks_add(date'0000-01-01', 521774)", DateValue(9999, 12, 25)); - TestIsNull("weeks_add(date'0000-01-01', 521775)", TYPE_DATE); - TestDateValue("weeks_sub(date'9999-12-31', 521774)", DateValue(0, 1, 7)); - TestIsNull("weeks_sub(date'9999-12-31', 521775)", TYPE_DATE); + // Test upper and ower limit. There are 3652058 days between 0001-01-01 and 9999-12-31. + // 3652058 days is 521722 weeks + 4 days. + TestDateValue("weeks_add(date'0001-01-01', 521722)", DateValue(9999, 12, 27)); + TestIsNull("weeks_add(date'0001-01-01', 521723)", TYPE_DATE); + TestDateValue("weeks_sub(date'9999-12-31', 521722)", DateValue(1, 1, 5)); + TestIsNull("weeks_sub(date'9999-12-31', 521723)", TYPE_DATE); // Test max int64 - TestIsNull("weeks_add(date'0000-01-01', 2147483647)", TYPE_DATE); + TestIsNull("weeks_add(date'0001-01-01', 2147483647)", TYPE_DATE); TestIsNull("weeks_sub(date'9999-12-31', 2147483647)", TYPE_DATE); // Test NULL values TestIsNull("weeks_sub(cast(NULL as date), 1)", TYPE_DATE); @@ -8051,13 +8049,13 @@ TEST_P(ExprTest, DateFunctions) { // Test leap year TestDateValue("date_add(date'2016-01-01', 366)", DateValue(2017, 1, 1)); TestDateValue("subdate(date'2016-12-31', 366)", DateValue(2015, 12, 31)); - // Test uper and lower limit. There are 3652424 days between 0000-01-01 and 9999-12-31. - TestDateValue("days_add(date '0000-01-01', 3652424)", DateValue(9999, 12, 31)); - TestIsNull("date_add(date '0000-01-01', 3652425)", TYPE_DATE); - TestDateValue("days_sub(date '9999-12-31', 3652424)", DateValue(0, 1, 1)); - TestIsNull("date_sub(date '9999-12-31', 3652425)", TYPE_DATE); + // Test upper and lower limit. There are 3652058 days between 0001-01-01 and 9999-12-31. + TestDateValue("days_add(date '0001-01-01', 3652058)", DateValue(9999, 12, 31)); + TestIsNull("date_add(date '0001-01-01', 3652059)", TYPE_DATE); + TestDateValue("days_sub(date '9999-12-31', 3652058)", DateValue(1, 1, 1)); + TestIsNull("date_sub(date '9999-12-31', 3652059)", TYPE_DATE); // Test max int64 - TestIsNull("days_add(date'0000-01-01', 2147483647)", TYPE_DATE); + TestIsNull("days_add(date'0001-01-01', 2147483647)", TYPE_DATE); TestIsNull("days_sub(date'9999-12-31', 2147483647)", TYPE_DATE); // Test NULL values TestIsNull("days_add(cast(NULL as date), 1)", TYPE_DATE); @@ -8072,9 +8070,9 @@ TEST_P(ExprTest, DateFunctions) { TestDateValue("date_sub(date '2000-02-29', interval 4 year)", DateValue(1996, 2, 29)); TestDateValue("date '2000-02-29' + interval 1 year", DateValue(2001, 2, 28)); TestDateValue("date '2000-02-29' + interval 4 years", DateValue(2004, 2, 29)); - TestDateValue("date '0000-12-31' + interval 9999 years", DateValue(9999, 12, 31)); - TestIsNull("date '0000-12-31' + interval 10000 years", TYPE_DATE); - TestIsNull("date '0000-01-01' + interval 2147483647 years", TYPE_DATE); + TestDateValue("date '0001-12-31' + interval 9998 years", DateValue(9999, 12, 31)); + TestIsNull("date '0001-12-31' + interval 9999 years", TYPE_DATE); + TestIsNull("date '0001-01-01' + interval 2147483647 years", TYPE_DATE); // Test month interval expressions. Keep-last-day-of-month behavior is not enforced. TestDateValue("date_add(date '2000-02-29', interval 1 month)", DateValue(2000, 3, 29)); TestDateValue("date_add(date '1999-02-28', interval 12 months)", @@ -8084,8 +8082,8 @@ TEST_P(ExprTest, DateFunctions) { DateValue(2000, 1, 31)); TestDateValue("date '2000-02-29' + interval 1 month", DateValue(2000, 3, 29)); TestDateValue("date '2000-03-31' - interval 2 months", DateValue(2000, 1, 31)); - TestDateValue("date '9999-01-01' - interval 119988 months", DateValue(0, 1, 1)); - TestIsNull("date'9999-01-01' - interval 119989 months", TYPE_DATE); + TestDateValue("date '9999-01-01' - interval 119976 months", DateValue(1, 1, 1)); + TestIsNull("date'9999-01-01' - interval 119977 months", TYPE_DATE); TestIsNull("date'9999-12-31' - interval 2147483647 months", TYPE_DATE); // Test week interval expressions. TestDateValue("date_add(date'2019-06-12', interval -24 weeks)", @@ -8094,15 +8092,15 @@ TEST_P(ExprTest, DateFunctions) { TestDateValue("date_add(date '2016-01-04', interval 8 weeks)", DateValue(2016, 2, 29)); TestDateValue("date '2019-06-12' - interval 24 weeks", DateValue(2018, 12, 26)); TestDateValue("date '2018-12-26' + interval 24 weeks", DateValue(2019, 6, 12)); - TestDateValue("date '9999-12-31' - interval 521774 weeks", DateValue(0, 1, 7)); - TestIsNull("date '9999-12-31' - interval 521775 weeks", TYPE_DATE); + TestDateValue("date '9999-12-31' - interval 521722 weeks", DateValue(1, 1, 5)); + TestIsNull("date '9999-12-31' - interval 521723 weeks", TYPE_DATE); TestIsNull("date'9999-12-31' - interval 2147483647 weeks", TYPE_DATE); // Test day interval expressions. TestDateValue("date_add(date '2019-01-01', interval 365 days)", DateValue(2020, 1, 1)); TestDateValue("date_sub(date '2016-12-31', interval 366 days)", DateValue(2015, 12, 31)); - TestDateValue("date '0000-01-01' + interval 3652424 days", DateValue(9999, 12, 31)); - TestIsNull("date '0000-01-01' + interval 3652425 days", TYPE_DATE); + TestDateValue("date '0001-01-01' + interval 3652058 days", DateValue(9999, 12, 31)); + TestIsNull("date '0001-01-01' + interval 3652059 days", TYPE_DATE); TestIsNull("date '9999-12-31' - interval 2147483647 days", TYPE_DATE); // Test NULL values. TestIsNull("date_add(date '2019-01-01', interval cast(NULL as BIGINT) days)", @@ -8122,10 +8120,10 @@ TEST_P(ExprTest, DateFunctions) { TestValue("datediff(date'2021-01-01', date '2020-01-01')", TYPE_INT, 366); TestValue("datediff('2020-01-01', date '2021-01-01')", TYPE_INT, -366); // Test difference between min and max date - TestValue("datediff(date'9999-12-31', date '0000-01-01')", TYPE_INT, 3652424); - TestValue("datediff(date'0000-01-01', '9999-12-31')", TYPE_INT, -3652424); + TestValue("datediff(date'9999-12-31', date '0001-01-01')", TYPE_INT, 3652058); + TestValue("datediff(date'0001-01-01', '9999-12-31')", TYPE_INT, -3652058); // Test NULL values - TestIsNull("datediff(cast(NULL as DATE), date '0000-01-01')", TYPE_INT); + TestIsNull("datediff(cast(NULL as DATE), date '0001-01-01')", TYPE_INT); TestIsNull("datediff(date'9999-12-31', cast(NULL as date))", TYPE_INT); TestIsNull("datediff(cast(NULL as DATE), cast(NULL as date))", TYPE_INT); @@ -8138,7 +8136,7 @@ TEST_P(ExprTest, DateFunctions) { TestIsNull("date_cmp(cast(NULL as date), date '2019-06-11')", TYPE_INT); TestIsNull("date_cmp(cast(NULL as DATE), cast(NULL as date))", TYPE_INT); // Test upper and lower limit - TestValue("date_cmp(date '9999-12-31', '0000-01-01')", TYPE_INT, 1); + TestValue("date_cmp(date '9999-12-31', '0001-01-01')", TYPE_INT, 1); // int_months_between: TestValue("int_months_between(date '1967-07-19','1966-06-04')", TYPE_INT, 13); @@ -8146,8 +8144,8 @@ TEST_P(ExprTest, DateFunctions) { TestValue("int_months_between(date '1967-07-19','1967-07-19')", TYPE_INT, 0); TestValue("int_months_between('2015-07-19', date '2015-08-18')", TYPE_INT, 0); // Test lower and upper limit - TestValue("int_months_between(date '9999-12-31','0000-01-01')", TYPE_INT, - 9999 * 12 + 11); + TestValue("int_months_between(date '9999-12-31','0001-01-01')", TYPE_INT, + 9998 * 12 + 11); // Test NULL values TestIsNull("int_months_between(date '1999-11-25', cast(NULL as date))", TYPE_INT); TestIsNull("int_months_between(cast(NULL as DATE), date '1999-11-25')", TYPE_INT); diff --git a/be/src/runtime/date-test.cc b/be/src/runtime/date-test.cc index eb37f624e..404c83139 100644 --- a/be/src/runtime/date-test.cc +++ b/be/src/runtime/date-test.cc @@ -300,13 +300,14 @@ TEST(DateTest, ParseFormatEdgeCases) { const TimestampValue now(date(1980, 2, 28), time_duration(16, 14, 24)); vector test_cases{ - // Test year upper/lower bound - DateTC("yyyy-MM-dd", "0000-01-01", true, 0, 1, 1), + // Test year lower/upper bound + DateTC("yyyy-MM-dd", "0001-01-01", true, 1, 1, 1), + DateTC("yyyy-MM-dd", "0000-01-01", false, true), DateTC("yyyy-MM-dd", "-001-01-01", false, true), DateTC("yyyy-MM-dd", "9999-12-31", true, 9999, 12, 31), DateTC("yyyyy-MM-dd", "10000-12-31", false, true), // Test Feb 29 in leap years - DateTC("yyyy-MM-dd", "0000-02-29", true, 0, 2, 29), + DateTC("yyyy-MM-dd", "0004-02-29", true, 4, 2, 29), DateTC("yyyy-MM-dd", "1904-02-29", true, 1904, 2, 29), DateTC("yyyy-MM-dd", "2000-02-29", true, 2000, 2, 29), // Test Feb 29 in non-leap years @@ -538,16 +539,17 @@ TEST(DateTest, FormatComplexFormats) { TEST(DateTest, DateValueEdgeCases) { // Test min supported date. // MIN_DATE_DAYS_SINCE_EPOCH was calculated using the Proleptic Gregorian calendar. This - // is expected to be different then how Hive written Parquet files represent 0000-01-01. - const int32_t MIN_DATE_DAYS_SINCE_EPOCH = -719528; - const DateValue min_date1 = ParseValidateDate("0000-01-01", true, 0, 1, 1); - const DateValue min_date2 = ParseValidateDate("0000-01-01 00:00:00", true, 0, 1, 1); + // is expected to be different then how Hive2 written Parquet files represent + // 0001-01-01. + const int32_t MIN_DATE_DAYS_SINCE_EPOCH = -719162; + const DateValue min_date1 = ParseValidateDate("0001-01-01", true, 1, 1, 1); + const DateValue min_date2 = ParseValidateDate("0001-01-01 00:00:00", true, 1, 1, 1); EXPECT_EQ(min_date1, min_date2); int32_t min_days; EXPECT_TRUE(min_date1.ToDaysSinceEpoch(&min_days)); EXPECT_EQ(MIN_DATE_DAYS_SINCE_EPOCH, min_days); - EXPECT_EQ("0000-01-01", min_date1.ToString()); - EXPECT_EQ("0000-01-01", min_date2.ToString()); + EXPECT_EQ("0001-01-01", min_date1.ToString()); + EXPECT_EQ("0001-01-01", min_date2.ToString()); const DateValue min_date3(MIN_DATE_DAYS_SINCE_EPOCH); EXPECT_TRUE(min_date3.IsValid()); @@ -576,7 +578,7 @@ TEST(DateTest, DateValueEdgeCases) { EXPECT_FALSE(too_late.IsValid()); // Test that Feb 29 is valid in leap years. - for (int leap_year: {0, 1904, 1980, 1996, 2000, 2004, 2104, 9996}) { + for (int leap_year: {4, 1904, 1980, 1996, 2000, 2004, 2104, 9996}) { EXPECT_TRUE(DateValue(leap_year, 2, 29).IsValid()) << "year:" << leap_year; } @@ -612,26 +614,26 @@ TEST(DateTest, AddDays) { EXPECT_FALSE(dv.AddDays(13).IsValid()); // Test lower limit. - dv = DateValue(0, 1, 10); - EXPECT_EQ(DateValue(0, 1, 1), dv.AddDays(-9)); + dv = DateValue(1, 1, 10); + EXPECT_EQ(DateValue(1, 1, 1), dv.AddDays(-9)); EXPECT_FALSE(dv.AddDays(-10).IsValid()); EXPECT_FALSE(dv.AddDays(-11).IsValid()); // Test adding days to cover the entire range. int32_t min_dse, max_dse; - EXPECT_TRUE(DateValue(0, 1, 1).ToDaysSinceEpoch(&min_dse)); + EXPECT_TRUE(DateValue(1, 1, 1).ToDaysSinceEpoch(&min_dse)); EXPECT_GT(0, min_dse); min_dse = -min_dse; EXPECT_TRUE(DateValue(9999, 12, 31).ToDaysSinceEpoch(&max_dse)); EXPECT_LT(0, max_dse); - dv = DateValue(0, 1, 1); + dv = DateValue(1, 1, 1); EXPECT_EQ(DateValue(9999, 12, 31), dv.AddDays(min_dse + max_dse)); EXPECT_FALSE(dv.AddDays(min_dse + max_dse + 1).IsValid()); EXPECT_FALSE(dv.AddDays(std::numeric_limits::max()).IsValid()); dv = DateValue(9999, 12, 31); - EXPECT_EQ(DateValue(0, 1, 1), dv.AddDays(-(min_dse + max_dse))); + EXPECT_EQ(DateValue(1, 1, 1), dv.AddDays(-(min_dse + max_dse))); EXPECT_FALSE(dv.AddDays(-(min_dse + max_dse + 1)).IsValid()); EXPECT_FALSE(dv.AddDays(std::numeric_limits::min()).IsValid()); @@ -692,19 +694,19 @@ TEST(DateTest, AddMonths) { EXPECT_FALSE(dv.AddMonths(14, true).IsValid()); // Test lower limit. - dv = DateValue(0, 11, 30); - EXPECT_EQ(DateValue(0, 1, 31), dv.AddMonths(-10, true)); + dv = DateValue(1, 11, 30); + EXPECT_EQ(DateValue(1, 1, 31), dv.AddMonths(-10, true)); EXPECT_FALSE(dv.AddMonths(-11, true).IsValid()); // Test adding months to cover the entire range. - dv = DateValue(0, 1, 1); - EXPECT_EQ(DateValue(9999, 12, 1), dv.AddMonths(9999 * 12 + 11, false)); - EXPECT_FALSE(dv.AddMonths(9999 * 12 + 12, false).IsValid()); + dv = DateValue(1, 1, 1); + EXPECT_EQ(DateValue(9999, 12, 1), dv.AddMonths(9998 * 12 + 11, false)); + EXPECT_FALSE(dv.AddMonths(9998 * 12 + 12, false).IsValid()); EXPECT_FALSE(dv.AddMonths(std::numeric_limits::max(), false).IsValid()); dv = DateValue(9999, 12, 31); - EXPECT_EQ(DateValue(0, 1, 31), dv.AddMonths(-9999 * 12 - 11, false)); - EXPECT_FALSE(dv.AddMonths(-9999 * 12 - 12, false).IsValid()); + EXPECT_EQ(DateValue(1, 1, 31), dv.AddMonths(-9998 * 12 - 11, false)); + EXPECT_FALSE(dv.AddMonths(-9998 * 12 - 12, false).IsValid()); EXPECT_FALSE(dv.AddMonths(std::numeric_limits::min(), false).IsValid()); } @@ -726,20 +728,20 @@ TEST(DateTest, AddYears) { EXPECT_FALSE(dv.AddYears(11).IsValid()); // Test lower limit. - dv = DateValue(10, 1, 1); - EXPECT_EQ(DateValue(0, 1, 1), dv.AddYears(-10)); + dv = DateValue(11, 1, 1); + EXPECT_EQ(DateValue(1, 1, 1), dv.AddYears(-10)); EXPECT_FALSE(dv.AddYears(-11).IsValid()); EXPECT_FALSE(dv.AddYears(-12).IsValid()); // Test adding years to cover the entire range. - dv = DateValue(0, 1, 1); - EXPECT_EQ(DateValue(9999, 1, 1), dv.AddYears(9999)); - EXPECT_FALSE(dv.AddYears(9999 + 1).IsValid()); + dv = DateValue(1, 1, 1); + EXPECT_EQ(DateValue(9999, 1, 1), dv.AddYears(9998)); + EXPECT_FALSE(dv.AddYears(9998 + 1).IsValid()); EXPECT_FALSE(dv.AddYears(std::numeric_limits::max()).IsValid()); dv = DateValue(9999, 12, 31); - EXPECT_EQ(DateValue(0, 12, 31), dv.AddYears(-9999)); - EXPECT_FALSE(dv.AddYears(-9999 - 1).IsValid()); + EXPECT_EQ(DateValue(1, 12, 31), dv.AddYears(-9998)); + EXPECT_FALSE(dv.AddYears(-9998 - 1).IsValid()); EXPECT_FALSE(dv.AddYears(std::numeric_limits::min()).IsValid()); // Test leap year. @@ -769,8 +771,8 @@ TEST(DateTest, WeekDay) { // Test lower limit. // 0001-01-01 is Monday. EXPECT_EQ(0, DateValue(1, 1, 1).WeekDay()); - // 0000-01-01 is Saturday. - EXPECT_EQ(5, DateValue(0, 1, 1).WeekDay()); + // 0002-01-01 is Tuesday. + EXPECT_EQ(1, DateValue(2, 1, 1).WeekDay()); } TEST(DateTest, ToYearMonthDay) { @@ -784,9 +786,9 @@ TEST(DateTest, ToYearMonthDay) { // Test that ToYearMonthDay() and ToYear() return the same values as // cctz::civil_day::year()/month()/day(). - // The following loop iterates through all valid dates (0000-01-01..9999-12-31): + // The following loop iterates through all valid dates (0001-01-01..9999-12-31): cctz::civil_day epoch(1970, 1, 1); - cctz::civil_day cd(0, 1, 1); + cctz::civil_day cd(1, 1, 1); do { DateValue dv(cd - epoch); EXPECT_TRUE(dv.IsValid()); @@ -808,7 +810,7 @@ TEST(DateTest, DayOfYear) { EXPECT_EQ(-1, invalid_dv.DayOfYear()); // Test lower limit. - EXPECT_EQ(1, DateValue(0, 1, 1).DayOfYear()); + EXPECT_EQ(1, DateValue(1, 1, 1).DayOfYear()); // Test upper limit. EXPECT_EQ(365, DateValue(9999, 12,31).DayOfYear()); @@ -848,17 +850,17 @@ TEST(DateTest, WeekOfYear) { EXPECT_EQ(1, DateValue(2020, 1, 5).WeekOfYear()); EXPECT_EQ(2, DateValue(2020, 1, 6).WeekOfYear()); + // 0002-01-01 is Tuesday. Test days around 0002-01-01. + EXPECT_EQ(51, DateValue(1, 12, 23).WeekOfYear()); + EXPECT_EQ(52, DateValue(1, 12, 30).WeekOfYear()); + EXPECT_EQ(1, DateValue(1, 12, 31).WeekOfYear()); + EXPECT_EQ(1, DateValue(2, 1, 1).WeekOfYear()); + EXPECT_EQ(1, DateValue(2, 1, 6).WeekOfYear()); + EXPECT_EQ(2, DateValue(2, 1, 7).WeekOfYear()); // 0001-01-01 is Monday. Test days around 0001-01-01. - EXPECT_EQ(51, DateValue(0, 12, 24).WeekOfYear()); - EXPECT_EQ(52, DateValue(0, 12, 25).WeekOfYear()); - EXPECT_EQ(52, DateValue(0, 12, 31).WeekOfYear()); EXPECT_EQ(1, DateValue(1, 1, 1).WeekOfYear()); - EXPECT_EQ(1, DateValue(1, 1, 7).WeekOfYear()); + EXPECT_EQ(1, DateValue(1, 1, 2).WeekOfYear()); EXPECT_EQ(2, DateValue(1, 1, 8).WeekOfYear()); - // 0000-01-01 is Saturday. Test days around 0000-01-01. - EXPECT_EQ(52, DateValue(0, 1, 1).WeekOfYear()); - EXPECT_EQ(52, DateValue(0, 1, 2).WeekOfYear()); - EXPECT_EQ(1, DateValue(0, 1, 3).WeekOfYear()); // 9999-12-31 is Friday. Test days around 9999-12-31. EXPECT_EQ(52, DateValue(9999, 12, 31).WeekOfYear()); @@ -892,8 +894,8 @@ TEST(DateTest, LastDay) { EXPECT_EQ(DateValue(9999, 12, 31), DateValue(9999, 12, 31).LastDay()); // Test lower limit. - EXPECT_EQ(DateValue(0, 1, 31), DateValue(0, 1, 1).LastDay()); - EXPECT_EQ(DateValue(0, 1, 31), DateValue(0, 1, 31).LastDay()); + EXPECT_EQ(DateValue(1, 1, 31), DateValue(1, 1, 1).LastDay()); + EXPECT_EQ(DateValue(1, 1, 31), DateValue(1, 1, 31).LastDay()); } // These macros add scoped trace to provide the line number of the caller upon failure. @@ -944,16 +946,16 @@ TEST(DateTest, MonthsBetween) { -29/31.0); // Test entire range w/o fractional part. - TEST_MONTHS_BW(DateValue(0, 1, 1), DateValue(9999, 12, 1), -9999 * 12 - 11); - TEST_MONTHS_BW(DateValue(9999, 12, 31), DateValue(0, 1, 31), 9999 * 12 + 11); + TEST_MONTHS_BW(DateValue(1, 1, 1), DateValue(9999, 12, 1), -9998 * 12 - 11); + TEST_MONTHS_BW(DateValue(9999, 12, 31), DateValue(1, 1, 31), 9998 * 12 + 11); // Test entire range w/ fractional part. - // There are (-9999*12 - 11 - 30/31.0) months between 0000-01-01 and 9999-12-31. - TEST_MONTHS_BW_RANGE(DateValue(0, 1, 1), DateValue(9999, 12, 31), -10000 * 12.0, - -9999 * 12 - 11 - 29/31.0); - // There are (9999*12 + 11 + 30/31.0) months between 9999-12-31 and 0000-01-01. - TEST_MONTHS_BW_RANGE(DateValue(9999, 12, 31), DateValue(0, 1, 1), - 9999 * 12 + 11 + 29/31.0, 10000 * 12.0); + // There are (-9998*12 - 11 - 30/31.0) months between 0001-01-01 and 9999-12-31. + TEST_MONTHS_BW_RANGE(DateValue(1, 1, 1), DateValue(9999, 12, 31), -9999 * 12.0, + -9998 * 12 - 11 - 29/31.0); + // There are (9998*12 + 11 + 30/31.0) months between 9999-12-31 and 0001-01-01. + TEST_MONTHS_BW_RANGE(DateValue(9999, 12, 31), DateValue(1, 1, 1), + 9998 * 12 + 11 + 29/31.0, 9999 * 12.0); } } diff --git a/be/src/runtime/date-value.cc b/be/src/runtime/date-value.cc index 855424269..d3fd4a067 100644 --- a/be/src/runtime/date-value.cc +++ b/be/src/runtime/date-value.cc @@ -31,7 +31,7 @@ using datetime_parse_util::GetMonthAndDayFromDaysSinceJan1; using datetime_parse_util::IsLeapYear; const int EPOCH_YEAR = 1970; -const int MIN_YEAR = 0; +const int MIN_YEAR = 1; const int MAX_YEAR = 9999; const cctz::civil_day EPOCH_DATE(EPOCH_YEAR, 1, 1); @@ -230,11 +230,9 @@ int DateValue::WeekOfYear() const { last_sunday = cctz::prev_weekday(dec31, cctz::weekday::sunday); } - if (UNLIKELY(today.year() == 0 && today < first_monday)) { - // 0000-01-01 is Saturday in the proleptic Gregorian calendar. - // 0000-01-01 and 0000-01-02 belong to the previous year. - return 52; - } else if (today >= first_monday && today <= last_sunday) { + // 0001-01-01 is Monday in the proleptic Gregorian calendar. + // 0001-01-01 belongs to year 0001. + if (today >= first_monday && today <= last_sunday) { return (today - first_monday) / 7 + 1; } else if (today > last_sunday) { return 1; @@ -254,7 +252,11 @@ int DateValue::WeekOfYear() const { } DateValue DateValue::AddDays(int64_t days) const { - if (UNLIKELY(!IsValid())) return DateValue(); + if (UNLIKELY(!IsValid() + || days < MIN_DAYS_SINCE_EPOCH - days_since_epoch_ + || days > MAX_DAYS_SINCE_EPOCH - days_since_epoch_)) { + return DateValue(); + } return DateValue(days_since_epoch_ + days); } @@ -262,6 +264,11 @@ DateValue DateValue::AddMonths(int64_t months, bool keep_last_day) const { if (UNLIKELY(!IsValid())) return DateValue(); const cctz::civil_day today = EPOCH_DATE + days_since_epoch_; + int64_t total_months = today.year()*12 + today.month() - 1; + if (UNLIKELY(months < MIN_YEAR*12 - total_months + || months > MAX_YEAR*12 + 11 - total_months)) { + return DateValue(); + } const cctz::civil_month month = cctz::civil_month(today); const cctz::civil_month result_month = month + months; const cctz::civil_day last_day_of_result_month = @@ -285,6 +292,9 @@ DateValue DateValue::AddYears(int64_t years) const { if (UNLIKELY(!IsValid())) return DateValue(); const cctz::civil_day today = EPOCH_DATE + days_since_epoch_; + if (UNLIKELY(years < MIN_YEAR - today.year() || years > MAX_YEAR - today.year())) { + return DateValue(); + } const int64_t result_year = today.year() + years; // Feb 29 in leap years requires special attention. diff --git a/be/src/runtime/date-value.h b/be/src/runtime/date-value.h index 01692c747..271946652 100644 --- a/be/src/runtime/date-value.h +++ b/be/src/runtime/date-value.h @@ -32,7 +32,7 @@ struct DateTimeFormatContext; } /// Represents a DATE value. -/// - The minimum and maximum dates are 0000-01-01 and 9999-12-31. Valid dates must fall +/// - The minimum and maximum dates are 0001-01-01 and 9999-12-31. Valid dates must fall /// in this range. /// - Internally represents DATE values as number of days since 1970-01-01. /// - This representation was chosen to be the same (bit-by-bit) as Parquet's date type. diff --git a/be/src/runtime/raw-value-test.cc b/be/src/runtime/raw-value-test.cc index ac6789049..7ffb80f47 100644 --- a/be/src/runtime/raw-value-test.cc +++ b/be/src/runtime/raw-value-test.cc @@ -49,7 +49,7 @@ TEST_F(RawValueTest, Compare) { EXPECT_GT(RawValue::Compare(&s1, &s2, TYPE_SMALLINT), 0); EXPECT_LT(RawValue::Compare(&s2, &s1, TYPE_SMALLINT), 0); - DateValue d1(-719528), d2(719528); + DateValue d1(-719162), d2(719162); EXPECT_LT(RawValue::Compare(&d1, &d2, TYPE_DATE), 0); EXPECT_GT(RawValue::Compare(&d2, &d1, TYPE_DATE), 0); } diff --git a/be/src/udf/udf.h b/be/src/udf/udf.h index ef46ba89c..235867221 100644 --- a/be/src/udf/udf.h +++ b/be/src/udf/udf.h @@ -584,7 +584,7 @@ struct TimestampVal : public AnyVal { }; /// Represents a DATE value. -/// - The minimum and maximum dates are 0000-01-01 and 9999-12-31. Valid dates must fall +/// - The minimum and maximum dates are 0001-01-01 and 9999-12-31. Valid dates must fall /// in this range. /// - Internally represents DATE values as number of days since 1970-01-01. /// - This representation was chosen to be the same (bit-by-bit) as Parquet's date type. diff --git a/be/src/util/string-parser-test.cc b/be/src/util/string-parser-test.cc index a357a3c43..aa4de6702 100644 --- a/be/src/util/string-parser-test.cc +++ b/be/src/util/string-parser-test.cc @@ -523,11 +523,12 @@ TEST(StringToDate, Basic) { TestDateValue("2018-11-1", DateValue(2018, 11, 1), StringParser::PARSE_SUCCESS); // Test min/max dates. - TestDateValue("0000-01-01", DateValue(0, 1, 1), StringParser::PARSE_SUCCESS); + TestDateValue("0001-01-01", DateValue(1, 1, 1), StringParser::PARSE_SUCCESS); TestDateValue("9999-12-31", DateValue(9999, 12, 31), StringParser::PARSE_SUCCESS); // Test less than min and greater than max dates. DateValue invalid_date; + TestDateValue("0000-12-31", invalid_date, StringParser::PARSE_FAILURE); TestDateValue("-0001-12-31", invalid_date, StringParser::PARSE_FAILURE); TestDateValue("10000-01-01", invalid_date, StringParser::PARSE_FAILURE); diff --git a/common/thrift/generate_error_codes.py b/common/thrift/generate_error_codes.py index 5a73e9c1c..8f0cbaae6 100755 --- a/common/thrift/generate_error_codes.py +++ b/common/thrift/generate_error_codes.py @@ -410,7 +410,7 @@ error_codes = ( ("PARQUET_DATE_OUT_OF_RANGE", 134, "Parquet file '$0' column '$1' contains an out of range date. " - "The valid date range is 0000-01-01..9999-12-31."), + "The valid date range is 0001-01-01..9999-12-31."), ("DISCONNECTED_SESSION_CLOSED", 135, "Session closed because it has no active connections"), @@ -437,7 +437,7 @@ error_codes = ( "maximum statement length ($1 bytes)"), ("AVRO_INVALID_DATE", 144, "Avro file '$0' is corrupt: out of range date value $1 " - "at offset $2. The valid date range is -719528..2932896 (0000-01-01..9999-12-31)."), + "at offset $2. The valid date range is -719162..2932896 (0001-01-01..9999-12-31)."), ) import sys diff --git a/fe/src/main/java/org/apache/impala/catalog/ColumnStats.java b/fe/src/main/java/org/apache/impala/catalog/ColumnStats.java index d96d5a891..414cd6e93 100644 --- a/fe/src/main/java/org/apache/impala/catalog/ColumnStats.java +++ b/fe/src/main/java/org/apache/impala/catalog/ColumnStats.java @@ -323,9 +323,9 @@ public class ColumnStats { colStatsData.setLongStats(new LongColumnStatsData(numNulls, ndv)); break; case DATE: - // Number of distinct dates in the 0000-01-01..9999-12-31 inclusive range is - // 3652425. - ndv = Math.min(ndv, 3652425); + // Number of distinct dates in the 0001-01-01..9999-12-31 inclusive range is + // 3652059. + ndv = Math.min(ndv, 3652059); colStatsData.setDateStats(new DateColumnStatsData(numNulls, ndv)); break; case BIGINT: diff --git a/testdata/data/README b/testdata/data/README index 8f946d01f..a4f55b096 100644 --- a/testdata/data/README +++ b/testdata/data/README @@ -309,11 +309,11 @@ Small parquet table with one DATE column, created by Parquet MR. out_of_range_date.parquet: Generated with a hacked version of Impala parquet writer. -Contains a single DATE column with 9 values, 2 of which are out of range +Contains a single DATE column with 9 values, 4 of which are out of range and should be read as NULL by Impala: -0001-12-31 (invalid - date too small) - 0000-01-01 - 0000-01-02 + 0000-01-01 (invalid - date too small) + 0000-01-02 (invalid - date too small) 1969-12-31 1970-01-01 1970-01-02 diff --git a/testdata/workloads/functional-query/queries/DataErrorsTest/avro-errors.test b/testdata/workloads/functional-query/queries/DataErrorsTest/avro-errors.test index 8eed33397..595eb922c 100644 --- a/testdata/workloads/functional-query/queries/DataErrorsTest/avro-errors.test +++ b/testdata/workloads/functional-query/queries/DataErrorsTest/avro-errors.test @@ -42,7 +42,7 @@ select * from bad_avro_date_out_of_range; DATE ---- ERRORS Problem parsing file $NAMENODE/test-warehouse/bad_avro_date_out_of_range_avro_snap/out_of_range_date.avro at 204 -Avro file '$NAMENODE/test-warehouse/bad_avro_date_out_of_range_avro_snap/out_of_range_date.avro' is corrupt: out of range date value -719530 at offset 204. The valid date range is -719528..2932896 (0000-01-01..9999-12-31). +Avro file '$NAMENODE/test-warehouse/bad_avro_date_out_of_range_avro_snap/out_of_range_date.avro' is corrupt: out of range date value -719530 at offset 204. The valid date range is -719162..2932896 (0001-01-01..9999-12-31). ==== ---- QUERY # Query an avro table created by Hive 2.1.1 containig the following dates: diff --git a/testdata/workloads/functional-query/queries/QueryTest/avro_date.test b/testdata/workloads/functional-query/queries/QueryTest/avro_date.test index 64cef54c8..317e58cd1 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/avro_date.test +++ b/testdata/workloads/functional-query/queries/QueryTest/avro_date.test @@ -4,15 +4,11 @@ ---- QUERY # TODO: Once DATE type is supported across all fileformats, move this test to # hdfs-scan-node.test. +# Avro table was created by Hive2. Inserting '0001-01-01' to date_col resulted as +# '0000-12-30' (because Hive2 uses Julian calendar for writing dates before 1582-10-15) +# which is outside of the supported date range. select id_col, date_part, date_col from date_tbl; ---- RESULTS -0,0001-01-01,0000-12-30 -1,0001-01-01,0001-12-29 -2,0001-01-01,0001-12-30 -3,0001-01-01,1400-01-08 -4,0001-01-01,2017-11-28 -5,0001-01-01,9999-12-31 -6,0001-01-01,NULL 10,1399-06-27,2017-11-28 11,1399-06-27,NULL 12,1399-06-27,2018-12-31 @@ -30,6 +26,9 @@ select id_col, date_part, date_col from date_tbl; 31,9999-12-31,9999-12-31 ---- TYPES INT,DATE,DATE +---- ERRORS +Problem parsing file __HDFS_FILENAME__ at 253 +Avro file '__HDFS_FILENAME__' is corrupt: out of range date value -719164 at offset 253. The valid date range is -719162..2932896 (0001-01-01..9999-12-31). ==== ---- HIVE_MAJOR_VERSION 3 @@ -61,6 +60,25 @@ select id_col, date_part, date_col from date_tbl; ---- TYPES INT,DATE,DATE ==== +---- HIVE_MAJOR_VERSION +2 +---- QUERY +# Avro table was created by Hive2. Inserting '0001-01-01' to date_col resulted as +# '0000-12-30' (because Hive2 uses Julian calendar for writing dates before 1582-10-15) +# which is outside of the supported date range. +select date_part, count(date_col) from date_tbl group by date_part; +---- RESULTS +2017-11-27,10 +1399-06-27,2 +9999-12-31,2 +---- TYPES +DATE, BIGINT +---- ERRORS +Problem parsing file __HDFS_FILENAME__ at 253 +Avro file '__HDFS_FILENAME__' is corrupt: out of range date value -719164 at offset 253. The valid date range is -719162..2932896 (0001-01-01..9999-12-31). +==== +---- HIVE_MAJOR_VERSION +3 ---- QUERY select date_part, count(date_col) from date_tbl group by date_part; ---- RESULTS @@ -74,11 +92,17 @@ DATE, BIGINT ---- HIVE_MAJOR_VERSION 2 ---- QUERY +# Avro table was created by Hive2. Inserting '0001-01-01' to date_col resulted as +# '0000-12-30' (because Hive2 uses Julian calendar for writing dates before 1582-10-15) +# which is outside of the supported date range. select min(date_part), max(date_part), min(date_col), max(date_col) from date_tbl; ---- RESULTS -0001-01-01,9999-12-31,0000-12-30,9999-12-31 +1399-06-27,9999-12-31,0001-06-19,9999-12-31 ---- TYPES DATE, DATE, DATE, DATE +---- ERRORS +Problem parsing file __HDFS_FILENAME__ at 253 +Avro file '__HDFS_FILENAME__' is corrupt: out of range date value -719164 at offset 253. The valid date range is -719162..2932896 (0001-01-01..9999-12-31). ==== ---- HIVE_MAJOR_VERSION 3 @@ -92,14 +116,19 @@ DATE, DATE, DATE, DATE ---- HIVE_MAJOR_VERSION 2 ---- QUERY +# Avro table was created by Hive2. Inserting '0001-01-01' to date_col resulted as +# '0000-12-30' (because Hive2 uses Julian calendar for writing dates before 1582-10-15) +# which is outside of the supported date range. select date_part, min(date_col), max(date_col) from date_tbl group by date_part; ---- RESULTS 2017-11-27,0001-06-19,2017-11-28 1399-06-27,2017-11-28,2018-12-31 9999-12-31,9999-12-01,9999-12-31 -0001-01-01,0000-12-30,9999-12-31 ---- TYPES DATE, DATE, DATE +---- ERRORS +Problem parsing file __HDFS_FILENAME__ at 253 +Avro file '__HDFS_FILENAME__' is corrupt: out of range date value -719164 at offset 253. The valid date range is -719162..2932896 (0001-01-01..9999-12-31). ==== ---- HIVE_MAJOR_VERSION 3 diff --git a/testdata/workloads/functional-query/queries/QueryTest/date-fileformat-support.test b/testdata/workloads/functional-query/queries/QueryTest/date-fileformat-support.test index a8a677808..d4cd44cbd 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/date-fileformat-support.test +++ b/testdata/workloads/functional-query/queries/QueryTest/date-fileformat-support.test @@ -2,7 +2,7 @@ ---- QUERY # Inserting into parquet table works. insert into $DATABASE.parquet_date_tbl -values ('2011-05-06'), ('9999-12-31'), ('0000-01-01'); +values ('2011-05-06'), ('9999-12-31'), ('0001-01-01'); ---- RESULTS : 3 ==== @@ -15,7 +15,7 @@ select * from $DATABASE.parquet_date_tbl; 1972-09-27 2011-05-06 9999-12-31 -0000-01-01 +0001-01-01 ---- TYPES DATE ==== @@ -96,7 +96,7 @@ where date_part != '2099-12-31'; 1899-12-31,1972-09-27 1899-12-31,2011-05-06 1899-12-31,9999-12-31 -1899-12-31,0000-01-01 +1899-12-31,0001-01-01 1899-12-31,2017-11-28 1899-12-31,NULL 1899-12-31,2018-12-31 diff --git a/testdata/workloads/functional-query/queries/QueryTest/date.test b/testdata/workloads/functional-query/queries/QueryTest/date.test index 7ab47aaa0..e9a0729e9 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/date.test +++ b/testdata/workloads/functional-query/queries/QueryTest/date.test @@ -42,7 +42,7 @@ bigint ---- QUERY select count(*) from date_tbl where date_col in ('2017-11-28', '0001-1-1', DATE '9999-12-31', '0001-06-25', -DATE '0001-6-23', '2018-12-31', '0000-01-01', '2018-12-31') +DATE '0001-6-23', '2018-12-31', '0001-01-01', '2018-12-31') ---- RESULTS 9 ---- TYPES @@ -59,7 +59,7 @@ bigint ---- QUERY select count(*) from date_tbl where date_col not in ('2017-11-28', '0001-1-1', DATE '9999-12-31', '0001-06-25', -DATE '0001-6-23', '2018-12-31', '0000-01-01', '2018-12-31') +DATE '0001-6-23', '2018-12-31', '0001-01-01', '2018-12-31') ---- RESULTS 11 ---- TYPES @@ -91,14 +91,20 @@ not between '2017-11-28' and '2017-11-28' bigint ==== ---- QUERY -# Test 0000-01-01 date handling during string to date conversion. -select CAST("0000-01-01" AS DATE); +# Test 0001-01-01 date handling during string to date conversion. +select CAST("0001-01-01" AS DATE); ---- RESULTS -0000-01-01 +0001-01-01 ---- TYPES DATE ==== ---- QUERY +# Test < 0001 date handling during string to date conversion. +select CAST("0000-01-01" AS DATE); +---- CATCH +UDF ERROR: String to Date parse failed. Invalid string val: "0000-01-01" +==== +---- QUERY # Test <1400 date handling during string to date conversion. select CAST("1399-12-31" AS DATE); ---- RESULTS @@ -125,14 +131,20 @@ select CAST("not a date" AS DATE); UDF ERROR: String to Date parse failed. Invalid string val: "not a date" ==== ---- QUERY -# Test 0000-01-01 and 9999-12-31 date literal -select DATE "0000-01-01", DATE '9999-12-31'; +# Test 0001-01-01 and 9999-12-31 date literal +select DATE "0001-01-01", DATE '9999-12-31'; ---- RESULTS -0000-01-01,9999-12-31 +0001-01-01,9999-12-31 ---- TYPES DATE,DATE ==== ---- QUERY +# Test < 0001 date literal. +select DATE "0000-01-01"; +---- CATCH +AnalysisException: Invalid date literal: '0000-01-01' +==== +---- QUERY # Test >=10000 date literal. select DATE "10000-01-01"; ---- CATCH @@ -854,9 +866,9 @@ PARSE ERROR: Time tokens provided with date type. ==== ---- QUERY # Boundary tests with FORMAT clause -select cast("0000-01-01" as date FORMAT "YYYY-MM-DD"); +select cast("0001-01-01" as date FORMAT "YYYY-MM-DD"); ---- RESULTS -0000-01-01 +0001-01-01 ==== ---- QUERY select cast("9999-12-31" as date FORMAT "YYYY-MM-DD"); diff --git a/testdata/workloads/functional-query/queries/QueryTest/out-of-range-date.test b/testdata/workloads/functional-query/queries/QueryTest/out-of-range-date.test index 9dd23f696..b5f7f3753 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/out-of-range-date.test +++ b/testdata/workloads/functional-query/queries/QueryTest/out-of-range-date.test @@ -7,8 +7,8 @@ SELECT * FROM $DATABASE.out_of_range_date; DATE ---- RESULTS NULL -0000-01-01 -0000-01-02 +NULL +NULL 1969-12-31 1970-01-01 1970-01-02 @@ -16,12 +16,12 @@ NULL 9999-12-31 NULL ---- ERRORS -Parquet file '$NAMENODE/test-warehouse/$DATABASE.db/out_of_range_date/out_of_range_date.parquet' column 'v' contains an out of range date. The valid date range is 0000-01-01..9999-12-31. (1 of 2 similar) +Parquet file '$NAMENODE/test-warehouse/$DATABASE.db/out_of_range_date/out_of_range_date.parquet' column 'v' contains an out of range date. The valid date range is 0001-01-01..9999-12-31. (1 of 4 similar) ==== ---- QUERY # Out of range DATE values in a parquet table. set abort_on_error=1; SELECT * FROM $DATABASE.out_of_range_date; ---- CATCH -Parquet file '$NAMENODE/test-warehouse/$DATABASE.db/out_of_range_date/out_of_range_date.parquet' column 'v' contains an out of range date. The valid date range is 0000-01-01..9999-12-31. +Parquet file '$NAMENODE/test-warehouse/$DATABASE.db/out_of_range_date/out_of_range_date.parquet' column 'v' contains an out of range date. The valid date range is 0001-01-01..9999-12-31. ====