mirror of
https://github.com/apache/impala.git
synced 2025-12-30 03:01:44 -05:00
IMPALA-4498: crash in to_utc_timestamp/from_utc_timestamp
The bugs was that the functions did not check whether the conversion pushed the value out of range. The fix is to use boost's validation immediately to check the validity of the timestamp and catch any exceptions thrown. It would be preferable to avoid the exceptions, but Boost does not provide a straightforward way to disable the exceptions or extract potentially-invalid values from a date object. Testing: Added expression tests that exercise out-of-range cases. Also added additional tests to confirm that date addition and subtraction weren't affected by similar bugs. Change-Id: Idc427b06ac33ec874a05cb98d01c00e970d3dde6 Reviewed-on: http://gerrit.cloudera.org:8080/5251 Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com> Tested-by: Impala Public Jenkins
This commit is contained in:
committed by
Impala Public Jenkins
parent
730ac1bae1
commit
1495b2007d
@@ -3580,6 +3580,36 @@ TEST_F(ExprTest, TimestampFunctions) {
|
||||
"CAST('9995-12-11 00:00:00' AS TIMESTAMP) + INTERVAL 61 MONTH", TYPE_TIMESTAMP);
|
||||
TestIsNull(
|
||||
"CAST('9995-12-11 00:00:00' AS TIMESTAMP) - INTERVAL -61 MONTH", TYPE_TIMESTAMP);
|
||||
TestIsNull(
|
||||
"CAST('9999-12-31 21:00:00' AS TIMESTAMP) + INTERVAL 1000 DAYS", TYPE_TIMESTAMP);
|
||||
TestIsNull(
|
||||
"CAST('1400-01-01 00:12:00' AS TIMESTAMP) - INTERVAL 1 DAYS", TYPE_TIMESTAMP);
|
||||
TestIsNull(
|
||||
"CAST('9999-12-31 21:00:00' AS TIMESTAMP) + INTERVAL 10000 HOURS", TYPE_TIMESTAMP);
|
||||
TestIsNull(
|
||||
"CAST('1400-01-01 00:12:00' AS TIMESTAMP) - INTERVAL 24 HOURS", TYPE_TIMESTAMP);
|
||||
TestIsNull("CAST('9999-12-31 21:00:00' AS TIMESTAMP) + INTERVAL 1000000 MINUTES",
|
||||
TYPE_TIMESTAMP);
|
||||
TestIsNull(
|
||||
"CAST('1400-01-01 00:12:00' AS TIMESTAMP) - INTERVAL 13 MINUTES", TYPE_TIMESTAMP);
|
||||
TestIsNull("CAST('9999-12-31 21:00:00' AS TIMESTAMP) + INTERVAL 100000000 SECONDS",
|
||||
TYPE_TIMESTAMP);
|
||||
TestIsNull(
|
||||
"CAST('1400-01-01 00:00:00' AS TIMESTAMP) - INTERVAL 1 SECONDS", TYPE_TIMESTAMP);
|
||||
TestIsNull(
|
||||
"CAST('9999-12-31 21:00:00' AS TIMESTAMP) + INTERVAL 100000000000 MILLISECONDS",
|
||||
TYPE_TIMESTAMP);
|
||||
TestIsNull("CAST('1400-01-01 00:00:00' AS TIMESTAMP) - INTERVAL 1 MILLISECONDS",
|
||||
TYPE_TIMESTAMP);
|
||||
TestIsNull(
|
||||
"CAST('9999-12-31 21:00:00' AS TIMESTAMP) + INTERVAL 100000000000000 MICROSECONDS",
|
||||
TYPE_TIMESTAMP);
|
||||
TestIsNull("CAST('1400-01-01 00:00:00' AS TIMESTAMP) - INTERVAL 1 MICROSECONDS",
|
||||
TYPE_TIMESTAMP);
|
||||
TestIsNull("CAST('9999-12-31 21:00:00' AS TIMESTAMP) + INTERVAL 100000000000000000 "
|
||||
"NANOSECONDS", TYPE_TIMESTAMP);
|
||||
TestIsNull("CAST('1400-01-01 00:00:00' AS TIMESTAMP) - INTERVAL 1 NANOSECONDS",
|
||||
TYPE_TIMESTAMP);
|
||||
// Add/sub months.
|
||||
TestStringValue("cast(date_add(cast('2012-01-01 09:10:11.123456789' "
|
||||
"as timestamp), interval 13 months) as string)",
|
||||
@@ -4067,11 +4097,23 @@ TEST_F(ExprTest, TimestampFunctions) {
|
||||
}
|
||||
|
||||
// Hive silently ignores bad timezones. We log a problem.
|
||||
TestStringValue(
|
||||
"cast(from_utc_timestamp("
|
||||
"cast('1970-01-01 00:00:00' as timestamp), 'FOOBAR') as string)",
|
||||
TestStringValue("cast(from_utc_timestamp("
|
||||
"cast('1970-01-01 00:00:00' as timestamp), 'FOOBAR') as string)",
|
||||
"1970-01-01 00:00:00");
|
||||
|
||||
// These return NULL because timezone conversion makes the value out
|
||||
// of range.
|
||||
TestIsNull("to_utc_timestamp(CAST(\"1400-01-01 05:00:00\" as TIMESTAMP), \"AEST\")",
|
||||
TYPE_TIMESTAMP);
|
||||
TestIsNull("from_utc_timestamp(CAST(\"1400-01-01 05:00:00\" as TIMESTAMP), \"PST\")",
|
||||
TYPE_TIMESTAMP);
|
||||
// TODO: IMPALA-4549: these should return NULL, but validation doesn't catch year 10000.
|
||||
// Just check that we don't crash.
|
||||
GetValue("from_utc_timestamp(CAST(\"10000-12-31 21:00:00\" as TIMESTAMP), \"JST\")",
|
||||
TYPE_TIMESTAMP);
|
||||
GetValue("to_utc_timestamp(CAST(\"10000-12-31 21:00:00\" as TIMESTAMP), \"PST\")",
|
||||
TYPE_TIMESTAMP);
|
||||
|
||||
// With support of date strings this generates a date and 0 time.
|
||||
TestStringValue(
|
||||
"cast(cast('1999-01-10' as timestamp) as string)", "1999-01-10 00:00:00");
|
||||
|
||||
@@ -23,11 +23,12 @@
|
||||
|
||||
#include "exprs/anyval-util.h"
|
||||
#include "exprs/timezone_db.h"
|
||||
#include "gutil/strings/substitute.h"
|
||||
#include "runtime/string-value.inline.h"
|
||||
#include "runtime/timestamp-parse-util.h"
|
||||
#include "runtime/timestamp-value.h"
|
||||
#include "udf/udf.h"
|
||||
#include "udf/udf-internal.h"
|
||||
#include "udf/udf.h"
|
||||
|
||||
#include "common/names.h"
|
||||
|
||||
@@ -47,6 +48,24 @@ string TimestampFunctions::ToIsoExtendedString(const TimestampValue& ts_value) {
|
||||
return to_iso_extended_string(ts_value.date());
|
||||
}
|
||||
|
||||
namespace {
|
||||
/// Uses Boost's internal checking to throw an exception if 'date' is out of the
|
||||
/// supported range of boost::gregorian.
|
||||
void ThrowIfDateOutOfRange(const boost::gregorian::date& date) {
|
||||
// Boost checks the ranges when instantiating the year/month/day representations.
|
||||
boost::gregorian::greg_year year = date.year();
|
||||
boost::gregorian::greg_month month = date.month();
|
||||
boost::gregorian::greg_day day = date.day();
|
||||
// Ensure Boost's validation is effective.
|
||||
DCHECK_GE(year, boost::gregorian::greg_year::min());
|
||||
DCHECK_LE(year, boost::gregorian::greg_year::max());
|
||||
DCHECK_GE(month, boost::gregorian::greg_month::min());
|
||||
DCHECK_LE(month, boost::gregorian::greg_month::max());
|
||||
DCHECK_GE(day, boost::gregorian::greg_day::min());
|
||||
DCHECK_LE(day, boost::gregorian::greg_day::max());
|
||||
}
|
||||
}
|
||||
|
||||
// This function uses inline asm functions, which we believe to be from the boost library.
|
||||
// Inline asm is not currently supported by JIT, so this function should always be run in
|
||||
// the interpreted mode. This is handled in ScalarFnCall::GetUdf().
|
||||
@@ -67,13 +86,22 @@ TimestampVal TimestampFunctions::FromUtc(FunctionContext* context,
|
||||
return ts_val;
|
||||
}
|
||||
|
||||
ptime temp;
|
||||
ts_value.ToPtime(&temp);
|
||||
local_date_time lt(temp, timezone);
|
||||
TimestampValue return_value = lt.local_time();
|
||||
TimestampVal return_val;
|
||||
return_value.ToTimestampVal(&return_val);
|
||||
return return_val;
|
||||
try {
|
||||
ptime temp;
|
||||
ts_value.ToPtime(&temp);
|
||||
local_date_time lt(temp, timezone);
|
||||
ptime local_time = lt.local_time();
|
||||
ThrowIfDateOutOfRange(local_time.date());
|
||||
TimestampVal return_val;
|
||||
TimestampValue(local_time).ToTimestampVal(&return_val);
|
||||
return return_val;
|
||||
} catch (boost::exception&) {
|
||||
const string& msg = Substitute(
|
||||
"Timestamp '$0' did not convert to a valid local time in timezone '$1'",
|
||||
ts_value.DebugString(), tz_string_value.DebugString());
|
||||
context->AddWarning(msg.c_str());
|
||||
return TimestampVal::null();
|
||||
}
|
||||
}
|
||||
|
||||
// This function uses inline asm functions, which we believe to be from the boost library.
|
||||
@@ -96,16 +124,26 @@ TimestampVal TimestampFunctions::ToUtc(FunctionContext* context,
|
||||
return ts_val;
|
||||
}
|
||||
|
||||
local_date_time lt(ts_value.date(), ts_value.time(),
|
||||
timezone, local_date_time::NOT_DATE_TIME_ON_ERROR);
|
||||
TimestampValue return_value(lt.utc_time());
|
||||
TimestampVal return_val;
|
||||
return_value.ToTimestampVal(&return_val);
|
||||
return return_val;
|
||||
try {
|
||||
local_date_time lt(ts_value.date(), ts_value.time(), timezone,
|
||||
local_date_time::NOT_DATE_TIME_ON_ERROR);
|
||||
ptime utc_time = lt.utc_time();
|
||||
// The utc_time() conversion does not check ranges - need to explicitly check.
|
||||
ThrowIfDateOutOfRange(utc_time.date());
|
||||
TimestampVal return_val;
|
||||
TimestampValue(utc_time).ToTimestampVal(&return_val);
|
||||
return return_val;
|
||||
} catch (boost::exception&) {
|
||||
const string& msg =
|
||||
Substitute("Timestamp '$0' in timezone '$1' could not be converted to UTC",
|
||||
ts_value.DebugString(), tz_string_value.DebugString());
|
||||
context->AddWarning(msg.c_str());
|
||||
return TimestampVal::null();
|
||||
}
|
||||
}
|
||||
|
||||
void TimestampFunctions::UnixAndFromUnixPrepare(FunctionContext* context,
|
||||
FunctionContext::FunctionStateScope scope) {
|
||||
void TimestampFunctions::UnixAndFromUnixPrepare(
|
||||
FunctionContext* context, FunctionContext::FunctionStateScope scope) {
|
||||
if (scope != FunctionContext::THREAD_LOCAL) return;
|
||||
DateTimeFormatContext* dt_ctx = NULL;
|
||||
if (context->IsArgConstant(1)) {
|
||||
|
||||
@@ -2574,3 +2574,51 @@ where (cast(a.string_col as string) > 'a');
|
||||
---- TYPES
|
||||
INT
|
||||
====
|
||||
---- QUERY
|
||||
# Test from_utc_timestamp() returning out-of-range result.
|
||||
select from_utc_timestamp(CAST("1400-01-01 05:00:00" as TIMESTAMP), "PST")
|
||||
from alltypes
|
||||
limit 1
|
||||
---- RESULTS
|
||||
NULL
|
||||
---- TYPES
|
||||
TIMESTAMP
|
||||
---- ERRORS
|
||||
UDF WARNING: Timestamp '1400-01-01 05:00:00' did not convert to a valid local time in timezone 'PST'
|
||||
====
|
||||
---- QUERY
|
||||
# Test from_utc_timestamp() returning out-of-range result.
|
||||
select to_utc_timestamp(CAST("1400-01-01 05:00:00" as TIMESTAMP), "JST")
|
||||
from alltypes
|
||||
limit 1
|
||||
---- RESULTS
|
||||
NULL
|
||||
---- TYPES
|
||||
TIMESTAMP
|
||||
---- ERRORS
|
||||
UDF WARNING: Timestamp '1400-01-01 05:00:00' in timezone 'JST' could not be converted to UTC
|
||||
====
|
||||
---- QUERY
|
||||
# Test out-of-range value handling when adding dates.
|
||||
select CAST('9999-12-31 21:00:00' AS TIMESTAMP) + INTERVAL 367 DAYS
|
||||
from alltypes
|
||||
limit 1
|
||||
---- RESULTS
|
||||
NULL
|
||||
---- TYPES
|
||||
TIMESTAMP
|
||||
---- ERRORS
|
||||
UDF WARNING: Cannot add interval 367: Year is out of valid range: 1400..10000
|
||||
====
|
||||
---- QUERY
|
||||
# Test out-of-range value handling when subtracting dates.
|
||||
select CAST('1400-01-01 21:00:00' AS TIMESTAMP) - INTERVAL 1 DAYS
|
||||
from alltypes
|
||||
limit 1
|
||||
---- RESULTS
|
||||
NULL
|
||||
---- TYPES
|
||||
TIMESTAMP
|
||||
---- ERRORS
|
||||
UDF WARNING: Cannot subtract interval 1: Year is out of valid range: 1400..10000
|
||||
====
|
||||
|
||||
Reference in New Issue
Block a user