IMPALA-4498: crash in to_utc_timestamp/from_utc_timestamp

The bugs was that the functions did not check whether the conversion
pushed the value out of range. The fix is to use boost's validation
immediately to check the validity of the timestamp and catch any
exceptions thrown.

It would be preferable to avoid the exceptions, but Boost does not
provide a straightforward way to disable the exceptions or extract
potentially-invalid values from a date object.

Testing:
Added expression tests that exercise out-of-range cases. Also
added additional tests to confirm that date addition and subtraction
weren't affected by similar bugs.

Change-Id: Idc427b06ac33ec874a05cb98d01c00e970d3dde6
Reviewed-on: http://gerrit.cloudera.org:8080/5251
Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com>
Tested-by: Impala Public Jenkins
This commit is contained in:
Tim Armstrong
2016-11-28 15:29:37 -08:00
committed by Impala Public Jenkins
parent 730ac1bae1
commit 1495b2007d
3 changed files with 147 additions and 19 deletions

View File

@@ -3580,6 +3580,36 @@ TEST_F(ExprTest, TimestampFunctions) {
"CAST('9995-12-11 00:00:00' AS TIMESTAMP) + INTERVAL 61 MONTH", TYPE_TIMESTAMP);
TestIsNull(
"CAST('9995-12-11 00:00:00' AS TIMESTAMP) - INTERVAL -61 MONTH", TYPE_TIMESTAMP);
TestIsNull(
"CAST('9999-12-31 21:00:00' AS TIMESTAMP) + INTERVAL 1000 DAYS", TYPE_TIMESTAMP);
TestIsNull(
"CAST('1400-01-01 00:12:00' AS TIMESTAMP) - INTERVAL 1 DAYS", TYPE_TIMESTAMP);
TestIsNull(
"CAST('9999-12-31 21:00:00' AS TIMESTAMP) + INTERVAL 10000 HOURS", TYPE_TIMESTAMP);
TestIsNull(
"CAST('1400-01-01 00:12:00' AS TIMESTAMP) - INTERVAL 24 HOURS", TYPE_TIMESTAMP);
TestIsNull("CAST('9999-12-31 21:00:00' AS TIMESTAMP) + INTERVAL 1000000 MINUTES",
TYPE_TIMESTAMP);
TestIsNull(
"CAST('1400-01-01 00:12:00' AS TIMESTAMP) - INTERVAL 13 MINUTES", TYPE_TIMESTAMP);
TestIsNull("CAST('9999-12-31 21:00:00' AS TIMESTAMP) + INTERVAL 100000000 SECONDS",
TYPE_TIMESTAMP);
TestIsNull(
"CAST('1400-01-01 00:00:00' AS TIMESTAMP) - INTERVAL 1 SECONDS", TYPE_TIMESTAMP);
TestIsNull(
"CAST('9999-12-31 21:00:00' AS TIMESTAMP) + INTERVAL 100000000000 MILLISECONDS",
TYPE_TIMESTAMP);
TestIsNull("CAST('1400-01-01 00:00:00' AS TIMESTAMP) - INTERVAL 1 MILLISECONDS",
TYPE_TIMESTAMP);
TestIsNull(
"CAST('9999-12-31 21:00:00' AS TIMESTAMP) + INTERVAL 100000000000000 MICROSECONDS",
TYPE_TIMESTAMP);
TestIsNull("CAST('1400-01-01 00:00:00' AS TIMESTAMP) - INTERVAL 1 MICROSECONDS",
TYPE_TIMESTAMP);
TestIsNull("CAST('9999-12-31 21:00:00' AS TIMESTAMP) + INTERVAL 100000000000000000 "
"NANOSECONDS", TYPE_TIMESTAMP);
TestIsNull("CAST('1400-01-01 00:00:00' AS TIMESTAMP) - INTERVAL 1 NANOSECONDS",
TYPE_TIMESTAMP);
// Add/sub months.
TestStringValue("cast(date_add(cast('2012-01-01 09:10:11.123456789' "
"as timestamp), interval 13 months) as string)",
@@ -4067,11 +4097,23 @@ TEST_F(ExprTest, TimestampFunctions) {
}
// Hive silently ignores bad timezones. We log a problem.
TestStringValue(
"cast(from_utc_timestamp("
"cast('1970-01-01 00:00:00' as timestamp), 'FOOBAR') as string)",
TestStringValue("cast(from_utc_timestamp("
"cast('1970-01-01 00:00:00' as timestamp), 'FOOBAR') as string)",
"1970-01-01 00:00:00");
// These return NULL because timezone conversion makes the value out
// of range.
TestIsNull("to_utc_timestamp(CAST(\"1400-01-01 05:00:00\" as TIMESTAMP), \"AEST\")",
TYPE_TIMESTAMP);
TestIsNull("from_utc_timestamp(CAST(\"1400-01-01 05:00:00\" as TIMESTAMP), \"PST\")",
TYPE_TIMESTAMP);
// TODO: IMPALA-4549: these should return NULL, but validation doesn't catch year 10000.
// Just check that we don't crash.
GetValue("from_utc_timestamp(CAST(\"10000-12-31 21:00:00\" as TIMESTAMP), \"JST\")",
TYPE_TIMESTAMP);
GetValue("to_utc_timestamp(CAST(\"10000-12-31 21:00:00\" as TIMESTAMP), \"PST\")",
TYPE_TIMESTAMP);
// With support of date strings this generates a date and 0 time.
TestStringValue(
"cast(cast('1999-01-10' as timestamp) as string)", "1999-01-10 00:00:00");

View File

@@ -23,11 +23,12 @@
#include "exprs/anyval-util.h"
#include "exprs/timezone_db.h"
#include "gutil/strings/substitute.h"
#include "runtime/string-value.inline.h"
#include "runtime/timestamp-parse-util.h"
#include "runtime/timestamp-value.h"
#include "udf/udf.h"
#include "udf/udf-internal.h"
#include "udf/udf.h"
#include "common/names.h"
@@ -47,6 +48,24 @@ string TimestampFunctions::ToIsoExtendedString(const TimestampValue& ts_value) {
return to_iso_extended_string(ts_value.date());
}
namespace {
/// Uses Boost's internal checking to throw an exception if 'date' is out of the
/// supported range of boost::gregorian.
void ThrowIfDateOutOfRange(const boost::gregorian::date& date) {
// Boost checks the ranges when instantiating the year/month/day representations.
boost::gregorian::greg_year year = date.year();
boost::gregorian::greg_month month = date.month();
boost::gregorian::greg_day day = date.day();
// Ensure Boost's validation is effective.
DCHECK_GE(year, boost::gregorian::greg_year::min());
DCHECK_LE(year, boost::gregorian::greg_year::max());
DCHECK_GE(month, boost::gregorian::greg_month::min());
DCHECK_LE(month, boost::gregorian::greg_month::max());
DCHECK_GE(day, boost::gregorian::greg_day::min());
DCHECK_LE(day, boost::gregorian::greg_day::max());
}
}
// This function uses inline asm functions, which we believe to be from the boost library.
// Inline asm is not currently supported by JIT, so this function should always be run in
// the interpreted mode. This is handled in ScalarFnCall::GetUdf().
@@ -67,13 +86,22 @@ TimestampVal TimestampFunctions::FromUtc(FunctionContext* context,
return ts_val;
}
ptime temp;
ts_value.ToPtime(&temp);
local_date_time lt(temp, timezone);
TimestampValue return_value = lt.local_time();
TimestampVal return_val;
return_value.ToTimestampVal(&return_val);
return return_val;
try {
ptime temp;
ts_value.ToPtime(&temp);
local_date_time lt(temp, timezone);
ptime local_time = lt.local_time();
ThrowIfDateOutOfRange(local_time.date());
TimestampVal return_val;
TimestampValue(local_time).ToTimestampVal(&return_val);
return return_val;
} catch (boost::exception&) {
const string& msg = Substitute(
"Timestamp '$0' did not convert to a valid local time in timezone '$1'",
ts_value.DebugString(), tz_string_value.DebugString());
context->AddWarning(msg.c_str());
return TimestampVal::null();
}
}
// This function uses inline asm functions, which we believe to be from the boost library.
@@ -96,16 +124,26 @@ TimestampVal TimestampFunctions::ToUtc(FunctionContext* context,
return ts_val;
}
local_date_time lt(ts_value.date(), ts_value.time(),
timezone, local_date_time::NOT_DATE_TIME_ON_ERROR);
TimestampValue return_value(lt.utc_time());
TimestampVal return_val;
return_value.ToTimestampVal(&return_val);
return return_val;
try {
local_date_time lt(ts_value.date(), ts_value.time(), timezone,
local_date_time::NOT_DATE_TIME_ON_ERROR);
ptime utc_time = lt.utc_time();
// The utc_time() conversion does not check ranges - need to explicitly check.
ThrowIfDateOutOfRange(utc_time.date());
TimestampVal return_val;
TimestampValue(utc_time).ToTimestampVal(&return_val);
return return_val;
} catch (boost::exception&) {
const string& msg =
Substitute("Timestamp '$0' in timezone '$1' could not be converted to UTC",
ts_value.DebugString(), tz_string_value.DebugString());
context->AddWarning(msg.c_str());
return TimestampVal::null();
}
}
void TimestampFunctions::UnixAndFromUnixPrepare(FunctionContext* context,
FunctionContext::FunctionStateScope scope) {
void TimestampFunctions::UnixAndFromUnixPrepare(
FunctionContext* context, FunctionContext::FunctionStateScope scope) {
if (scope != FunctionContext::THREAD_LOCAL) return;
DateTimeFormatContext* dt_ctx = NULL;
if (context->IsArgConstant(1)) {

View File

@@ -2574,3 +2574,51 @@ where (cast(a.string_col as string) > 'a');
---- TYPES
INT
====
---- QUERY
# Test from_utc_timestamp() returning out-of-range result.
select from_utc_timestamp(CAST("1400-01-01 05:00:00" as TIMESTAMP), "PST")
from alltypes
limit 1
---- RESULTS
NULL
---- TYPES
TIMESTAMP
---- ERRORS
UDF WARNING: Timestamp '1400-01-01 05:00:00' did not convert to a valid local time in timezone 'PST'
====
---- QUERY
# Test from_utc_timestamp() returning out-of-range result.
select to_utc_timestamp(CAST("1400-01-01 05:00:00" as TIMESTAMP), "JST")
from alltypes
limit 1
---- RESULTS
NULL
---- TYPES
TIMESTAMP
---- ERRORS
UDF WARNING: Timestamp '1400-01-01 05:00:00' in timezone 'JST' could not be converted to UTC
====
---- QUERY
# Test out-of-range value handling when adding dates.
select CAST('9999-12-31 21:00:00' AS TIMESTAMP) + INTERVAL 367 DAYS
from alltypes
limit 1
---- RESULTS
NULL
---- TYPES
TIMESTAMP
---- ERRORS
UDF WARNING: Cannot add interval 367: Year is out of valid range: 1400..10000
====
---- QUERY
# Test out-of-range value handling when subtracting dates.
select CAST('1400-01-01 21:00:00' AS TIMESTAMP) - INTERVAL 1 DAYS
from alltypes
limit 1
---- RESULTS
NULL
---- TYPES
TIMESTAMP
---- ERRORS
UDF WARNING: Cannot subtract interval 1: Year is out of valid range: 1400..10000
====