From d0dd13053aafeabea29f2fdf5b8bfed4fffa9bae Mon Sep 17 00:00:00 2001 From: Michael Ubell Date: Wed, 19 Sep 2012 23:46:53 -0700 Subject: [PATCH] Improve string to timestamp performance. --- be/src/benchmarks/CMakeLists.txt | 2 + .../benchmarks/parse-timestamp-benchmark.cc | 161 +++++++++++++++ be/src/exec/text-converter.inline.h | 3 +- be/src/exprs/expr-test.cc | 9 +- be/src/runtime/runtime-state.cc | 2 +- be/src/runtime/timestamp-test.cc | 87 +++++++- be/src/runtime/timestamp-value.cc | 187 ++++++++++++++---- be/src/runtime/timestamp-value.h | 35 +++- .../hbase-scan-node-errors.test | 21 +- .../DataErrorsTest/hdfs-scan-node-errors.test | 6 +- 10 files changed, 445 insertions(+), 68 deletions(-) create mode 100644 be/src/benchmarks/parse-timestamp-benchmark.cc diff --git a/be/src/benchmarks/CMakeLists.txt b/be/src/benchmarks/CMakeLists.txt index 5ac1edfcd..047ddf42c 100644 --- a/be/src/benchmarks/CMakeLists.txt +++ b/be/src/benchmarks/CMakeLists.txt @@ -6,12 +6,14 @@ set(LIBRARY_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}/benchmarks") # where to put generated binaries set(EXECUTABLE_OUTPUT_PATH "${BUILD_OUTPUT_ROOT_DIRECTORY}/benchmarks") +add_executable(parse-timestamp-benchmark parse-timestamp-benchmark.cc) add_executable(string-search-benchmark string-search-benchmark.cc) add_executable(atof-benchmark atof-benchmark.cc) add_executable(atoi-benchmark atoi-benchmark.cc) add_executable(hash-benchmark hash-benchmark.cc) add_executable(thread-create-benchmark thread-create-benchmark.cc) +target_link_libraries(parse-timestamp-benchmark ${IMPALA_LINK_LIBS}) target_link_libraries(string-search-benchmark ${IMPALA_LINK_LIBS}) target_link_libraries(atof-benchmark ${IMPALA_LINK_LIBS}) target_link_libraries(atoi-benchmark ${IMPALA_LINK_LIBS}) diff --git a/be/src/benchmarks/parse-timestamp-benchmark.cc b/be/src/benchmarks/parse-timestamp-benchmark.cc new file mode 100644 index 000000000..ef33c9779 --- /dev/null +++ b/be/src/benchmarks/parse-timestamp-benchmark.cc @@ -0,0 +1,161 @@ +// Copyright (c) 2012 Cloudera, Inc. All rights reserved. + +#include +#include +#include +#include +#include +#include "runtime/string-value.h" +#include "runtime/timestamp-value.h" +#include "util/benchmark.h" +#include "util/cpu-info.h" + +using namespace impala; +using namespace std; +using namespace boost; +using namespace boost::date_time; +using namespace boost::posix_time; +using namespace boost::gregorian; + +// Benchmark for parsing timestamps. +// Dates: +// Impala Rate (per ms): 32.4125 +// Boost String Rate (per ms): 0.683995 +// Boost Rate (per ms): 0.679348 +// +// Times: +// Impala Rate (per ms): 29.3199 +// Boost Rate (per ms): 0.491159 +// + +#define VALIDATE 0 + +#if VALIDATE +#define VALIDATE_RESULT(actual, expected, str) \ + if (actual != expected) { \ + cout << "Parse Error. " \ + << "String: " << str \ + << ". Parsed: " << actual << endl; \ + exit(-1); \ + } +#else +#define VALIDATE_RESULT(actual, expected, str) +#endif + + +struct TestData { + vector data; + vector memory; + vector result; +}; + +void AddTestData(TestData* data, const string& input) { + data->memory.push_back(input); + const string& str = data->memory.back(); + data->data.push_back(StringValue(const_cast(str.c_str()), str.length())); +} + +void AddTestDataDates(TestData* data, int n, const string& startstr) { + gregorian::date start(from_string(startstr)); + for (int i = 0; i < n; ++i) { + int val = rand(); + val %= 100; + gregorian::date_duration days(val); + start += days; + stringstream ss; + ss << to_iso_extended_string(start); + AddTestData(data, ss.str()); + } +} + +void AddTestDataTimes(TestData* data, int n, const string& startstr) { + posix_time::time_duration start(posix_time::duration_from_string(startstr)); + for (int i = 0; i < n; ++i) { + int val = rand(); + start += nanoseconds(val); + if (start.hours() >= 24) start -= hours(24); + stringstream ss; + ss << to_simple_string(start); + AddTestData(data, ss.str()); + } +} + +void TestImpalaDate(int batch_size, void* d) { + TestData* data = reinterpret_cast(d); + for (int i = 0; i < batch_size; ++i) { + int n = data->data.size(); + for (int j = 0; j < n; ++j) { + data->result[j] = TimestampValue(data->data[j].ptr, data->data[j].len); + } + } +} + +void TestBoostStringDate(int batch_size, void* d) { + TestData* data = reinterpret_cast(d); + for (int i = 0; i < batch_size; ++i) { + int n = data->data.size(); + for (int j = 0; j < n; ++j) { + data->result[j].set_date(from_string(data->memory[j])); + } + } +} + +void TestBoostDate(int batch_size, void* d) { + TestData* data = reinterpret_cast(d); + for (int i = 0; i < batch_size; ++i) { + int n = data->data.size(); + for (int j = 0; j < n; ++j) { + string s(data->data[j].ptr, data->data[j].len); + data->result[j].set_date(from_string(s)); + } + } +} + +void TestBoostTime(int batch_size, void* d) { + TestData* data = reinterpret_cast(d); + for (int i = 0; i < batch_size; ++i) { + int n = data->data.size(); + for (int j = 0; j < n; ++j) { + string s(data->data[j].ptr, data->data[j].len); + data->result[j].set_time(duration_from_string(s)); + } + } +} + + +int main(int argc, char **argv) { + CpuInfo::Init(); + + TestData dates, times; + + AddTestDataDates(&dates, 1000, "1953-04-22"); + AddTestDataTimes(×, 1000, "01:02:03.45678"); + + dates.result.resize(dates.data.size()); + times.result.resize(times.data.size()); + + // Run a warmup to iterate through the data. + TestBoostDate(1000, &dates); + + double impala_rate = Benchmark::Measure(TestImpalaDate, &dates); + double boostString_rate = Benchmark::Measure(TestBoostStringDate, &dates); + double boost_rate = Benchmark::Measure(TestBoostDate, &dates); + + // Run a warmup to iterate through the data. + TestBoostTime(1000, ×); + double impala_time_rate = Benchmark::Measure(TestImpalaDate, ×); + double boost_time_rate = Benchmark::Measure(TestBoostTime, ×); + + cout << "Dates:" << endl; + cout << "Impala Rate (per ms): " << impala_rate << endl; + cout << "Boost String Rate (per ms): " << boostString_rate << endl; + cout << "Boost Rate (per ms): " << boost_rate << endl; + cout << endl; + cout << "Times:" << endl; + cout << "Impala Rate (per ms): " << impala_time_rate << endl; + cout << "Boost Rate (per ms): " << boost_time_rate << endl; + + + return 0; +} + diff --git a/be/src/exec/text-converter.inline.h b/be/src/exec/text-converter.inline.h index 6d8c4c369..672207a21 100644 --- a/be/src/exec/text-converter.inline.h +++ b/be/src/exec/text-converter.inline.h @@ -75,9 +75,8 @@ inline bool TextConverter::WriteSlot(const SlotDescriptor* slot_desc, Tuple* tup StringParser::StringToFloat(data, len, &parse_result); break; case TYPE_TIMESTAMP: { - std::string strbuf(data, len); TimestampValue* ts_slot = reinterpret_cast(slot); - *ts_slot = TimestampValue(strbuf); + *ts_slot = TimestampValue(data, len); if (ts_slot->NotADateTime()) { parse_result = StringParser::PARSE_FAILURE; } diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc index 210c264e4..8b5ed5003 100644 --- a/be/src/exprs/expr-test.cc +++ b/be/src/exprs/expr-test.cc @@ -1792,9 +1792,6 @@ TEST_F(ExprTest, TimestampFunctions) { TestIsNull("dayofmonth(cast('09:10:11.000000' as timestamp))", TYPE_INT); TestIsNull("day(cast('09:10:11.000000' as timestamp))", TYPE_INT); TestIsNull("weekofyear(cast('09:10:11.000000' as timestamp))", TYPE_INT); - TestIsNull("hour(cast('2011-12-22' as timestamp))", TYPE_INT); - TestIsNull("minute(cast('2011-12-22' as timestamp))", TYPE_INT); - TestIsNull("second(cast('2011-12-22' as timestamp))", TYPE_INT); TestIsNull("datediff(cast('09:10:11.12345678' as timestamp), " "cast('2012-12-22' as timestamp))", TYPE_INT); @@ -1832,9 +1829,9 @@ TEST_F(ExprTest, TimestampFunctions) { "cast('1970-01-01 00:00:00' as timestamp), 'FOOBAR') as string)", "1970-01-01 00:00:00"); - // There is a boost bug converting from string we need to compensate for, test it - // With support of date strings this now generates the proper date. - TestStringValue("cast(cast('1999-01-10' as timestamp) as string)", "1999-01-10"); + // With support of date strings this generates a date and 0 time. + TestStringValue( + "cast(cast('1999-01-10' as timestamp) as string)", "1999-01-10 00:00:00"); // Test functions with unknown expected value. TestValidTimestampValue("now()"); diff --git a/be/src/runtime/runtime-state.cc b/be/src/runtime/runtime-state.cc index d00bc9d2c..2da233044 100644 --- a/be/src/runtime/runtime-state.cc +++ b/be/src/runtime/runtime-state.cc @@ -55,7 +55,7 @@ Status RuntimeState::Init( ExecEnv* exec_env) { fragment_id_ = fragment_id; query_options_ = query_options; - now_.reset(new TimestampValue(now)); + now_.reset(new TimestampValue(now.c_str(), now.size())); exec_env_ = exec_env; if (!query_options.disable_codegen) { RETURN_IF_ERROR(CreateCodegen()); diff --git a/be/src/runtime/timestamp-test.cc b/be/src/runtime/timestamp-test.cc index e0007abe4..d14a09b35 100644 --- a/be/src/runtime/timestamp-test.cc +++ b/be/src/runtime/timestamp-test.cc @@ -13,12 +13,12 @@ using namespace boost; namespace impala { TEST(TimestampTest, Basic) { - string s1("2012-01-20 01:10:01"); - string s2("1990-10-20 10:10:10.123456789"); - string s3("1990-10-20 10:10:10.123456789"); - TimestampValue v1(s1); - TimestampValue v2(s2); - TimestampValue v3(s3); + char s1[] = "2012-01-20 01:10:01"; + char s2[] = "1990-10-20 10:10:10.123456789 "; + char s3[] = " 1990-10-20 10:10:10.123456789"; + TimestampValue v1(s1, strlen(s1)); + TimestampValue v2(s2, strlen(s2)); + TimestampValue v3(s3, strlen(s3)); EXPECT_EQ(v1.date().year(), 2012); EXPECT_EQ(v1.date().month(), 1); @@ -40,6 +40,81 @@ TEST(TimestampTest, Basic) { RawValue::GetHashValue(&v2, TYPE_TIMESTAMP, 0)); EXPECT_EQ(RawValue::GetHashValue(&v3, TYPE_TIMESTAMP, 0), RawValue::GetHashValue(&v2, TYPE_TIMESTAMP, 0)); + + // Test Dates and Times as timestamps. + char d1[] = "2012-01-20"; + char d2[] = "1990-10-20"; + TimestampValue dv1(d1, strlen(d1)); + TimestampValue dv2(d2, strlen(d2)); + + EXPECT_NE(dv1, dv2); + EXPECT_LT(dv1, v1); + EXPECT_LE(dv1, v1); + EXPECT_GT(v1, dv1); + EXPECT_GE(v1, dv1); + EXPECT_NE(dv2, s2); + + EXPECT_EQ(dv1.date().year(), 2012); + EXPECT_EQ(dv1.date().month(), 1); + EXPECT_EQ(dv1.date().day(), 20); + + char t1[] = "10:11:12.123456789"; + char t2[] = "00:00:00"; + TimestampValue tv1(t1, strlen(t1)); + TimestampValue tv2(t2, strlen(t2)); + + EXPECT_NE(tv1, tv2); + EXPECT_NE(tv1, v2); + + EXPECT_EQ(tv1.time_of_day().hours(), 10); + EXPECT_EQ(tv1.time_of_day().minutes(), 11); + EXPECT_EQ(tv1.time_of_day().seconds(), 12); + EXPECT_EQ(tv1.time_of_day().fractional_seconds(), 123456789); + EXPECT_EQ(tv2.time_of_day().fractional_seconds(), 0); + + // Bad formats + char b1[] = "1990-10 10:10:10.123456789"; + TimestampValue bv1(b1, strlen(b1)); + boost::gregorian::date not_a_date; + + EXPECT_EQ(bv1.date(), not_a_date); + EXPECT_EQ(bv1.time_of_day(), not_a_date_time); + + char b2[] = "1991-10-10 99:10:10.123456789"; + TimestampValue bv2(b2, strlen(b2)); + + EXPECT_EQ(bv2.time_of_day(), not_a_date_time); + EXPECT_EQ(bv2.date(), not_a_date); + + char b3[] = "1990-10- 10:10:10.123456789"; + TimestampValue bv3(b3, strlen(b3)); + + EXPECT_EQ(bv3.date(), not_a_date); + EXPECT_EQ(bv3.time_of_day(), not_a_date_time); + + char b4[] = "10:1010.123456789"; + TimestampValue bv4(b4, strlen(b4)); + + EXPECT_EQ(bv4.date(), not_a_date); + EXPECT_EQ(bv4.time_of_day(), not_a_date_time); + + char b5[] = "10:11:12.123456 1991-10-10"; + TimestampValue bv5(b5, strlen(b5)); + + EXPECT_EQ(bv5.date(), not_a_date); + EXPECT_EQ(bv5.time_of_day(), not_a_date_time); + + char b6[] = "2012-01-20 01:10:00.123.466"; + TimestampValue bv6(b6, strlen(b6)); + + EXPECT_EQ(bv6.date(), not_a_date); + EXPECT_EQ(bv6.time_of_day(), not_a_date_time); + + char b7[] = "2012-01-20 01:10:00.123 477 "; + TimestampValue bv7(b7, strlen(b7)); + + EXPECT_EQ(bv7.date(), not_a_date); + EXPECT_EQ(bv7.time_of_day(), not_a_date_time); } } diff --git a/be/src/runtime/timestamp-value.cc b/be/src/runtime/timestamp-value.cc index 7e7065e46..6e3f8799e 100644 --- a/be/src/runtime/timestamp-value.cc +++ b/be/src/runtime/timestamp-value.cc @@ -2,6 +2,8 @@ #include "runtime/timestamp-value.h" #include "common/status.h" +#include "common/compiler-util.h" +#include "util/string-parser.h" #include #include @@ -20,44 +22,158 @@ time_t to_time_t(ptime t) { return time_t(x); } +static const time_duration one_day(24, 0, 0); -TimestampValue::TimestampValue(const string& strbuf) { - try { - // time_from_string has a bug: a missing time component will pass ok but - // give strange answers. - // Boost tickets #622 #6034. - // We look for things that might be just a date: 2012-07-12 - // The only format accepted here is YYYY-DD-MM. - bool dash = strbuf.find('-') != string::npos; - if (strbuf.size() < 11 && dash) { - boost::gregorian::date d(from_string(strbuf)); - // Mark the time component invalid. - boost::posix_time::time_duration t(not_a_date_time); - this->date_ = d; - this->time_of_day_ = t; - } else if (!dash) { - // mark the date component invalid. - boost::gregorian::date d(not_a_date_time); - this->date_ = d; - // Try to convert to a time only. The format accepted is HH:MM:SS.sssssssss. - boost::posix_time::time_duration t(duration_from_string(strbuf)); - // Time durations an be arbitrarily long, we only want a positive time of day. - boost::posix_time::time_duration one_day(24, 0, 0); - if (t >= one_day || t.is_negative()) { - boost::posix_time::time_duration t(not_a_date_time); - this->time_of_day_ = t; - } else { - this->time_of_day_ = t; + +inline bool TimestampValue::ParseTime(const char** strp, int* lenp) { + StringParser::ParseResult status; + int len = *lenp; + const char* str = *strp; + bool time_set = false; + + if (LIKELY(len >= 8 && str[2] == ':' && str[5] == ':' && (len == 8 || str[8] == '.'))) { + // A duration can be any amount of time, but + // it must only be within one 24 period to be part of a timestamp. + int hour = StringParser::StringToInt(str, 2, &status); + if (LIKELY(status == StringParser::PARSE_SUCCESS && hour >= 0 && hour < 24)) { + str += 3; + len -= 3; + + int minute = StringParser::StringToInt(str, 2, &status); + if (LIKELY(status == StringParser::PARSE_SUCCESS && minute >= 0 && minute < 60)) { + str += 3; + len -= 3; + + int second = StringParser::StringToInt(str, 2, &status); + if (LIKELY(status == StringParser::PARSE_SUCCESS && second >= 0 && second < 60)) { + str += 2; + len -= 2; + int fraction = 0; + if (LIKELY(len > 0)) { + ++str; + --len; + + if (len > 9) len = 9; + fraction = StringParser::StringToInt(str, len, &status); + + if (LIKELY(status == StringParser::PARSE_SUCCESS && fraction > 0)) { + // Convert the factional part to a number of nano-seconds. + for (int i = len; i < 9; ++i) fraction *= 10; + } + str += len; + len = 0; + } + if (status == StringParser::PARSE_SUCCESS) { + this->time_of_day_ = time_duration(hour, minute, second, fraction); + time_set = true; + } + } } - } else { - *this = TimestampValue(time_from_string(strbuf)); } - } catch (exception& e) { - ptime temp; // created as not_a_date_time - *this = TimestampValue(temp); } + if (time_set) { + *strp = str; + *lenp = len; + } + return time_set; } +inline bool TimestampValue::ParseDate(const char** strp, int* lenp) { + StringParser::ParseResult status; + const char* str = *strp; + int len = *lenp; + bool date_set = false; + // Check for a valid format + if (LIKELY(len >= 10 && + str[4] == '-' && str[7] == '-' && (len == 10 || str[10] == ' '))) { + int year = StringParser::StringToInt(str, 4, &status); + if (LIKELY(status == StringParser::PARSE_SUCCESS && year > 0)) { + str += 5; + len -= 5; + + int month = StringParser::StringToInt(str, 2, &status); + if (LIKELY(status == StringParser::PARSE_SUCCESS && month > 0)) { + str += 3; + len -= 3; + + int day = StringParser::StringToInt(str, 2, &status); + if (LIKELY(status == StringParser::PARSE_SUCCESS && day > 0)) { + str += 3; + len -= 3; + + date_set = true; + // Catch invalid dates. + try { + this->date_ = boost::gregorian::date(year, month, day); + } catch (exception e) { + LOG(WARNING) << "Invalid date: " << year << "-" << month << "-" << day; + date_set = false; + } + + } + } + } + } + + if (date_set) { + *lenp = len; + *strp = str; + } + return date_set; +} + +TimestampValue::TimestampValue(const char* str, int len) { + // One timestamp format is accepted: YYYY-MM-DD HH:MM:SS.sssssssss + // Either just the date or just the time may be specified. This provides + // minimal support to simulate date and time data types. All components + // are required in either the date or time except for the fractional + // seconds following the '.'. + // In the case of just a date, the time will be set to 00:00:00. + // In the case of just a time, the date will be set to invalid. + // Unfortunately there is no snscanf. + + // Remove leading white space. + while (len > 0 && isspace(*str)){ + ++str; + --len; + } + // strip the trailing blanks. + while (len > 0 && isspace(str[len - 1])) --len; + + + bool date_set = ParseDate(&str, &len); + + // If there is any data left, it must be a valid time. If not the whole + // conversion is considered failed. We do not return a valid date. + if (len <= 0) { + if (date_set) { + // If there is only a date component then set the time to the start of the day. + this->time_of_day_ = time_duration(0, 0, 0, 0); + } else { + // set the date to be invalid. + this->date_ = boost::gregorian::date(); + } + return; + } + + bool time_set = ParseTime(&str, &len); + if (time_set) { + while (len > 0){ + if (!isspace(*str)) { + time_set = false; + break; + } + ++str; + --len; + } + } + + // If there was a time component it needs to be valid or the whole timestamp + // is invalid. + if (!date_set || !time_set) this->date_ = boost::gregorian::date(); + if (!time_set) this->time_of_day_ = time_duration(not_a_date_time); +} + ostream& operator<<(ostream& os, const TimestampValue& timestamp_value) { return os << timestamp_value.DebugString(); } @@ -65,11 +181,10 @@ ostream& operator<<(ostream& os, const TimestampValue& timestamp_value) { istream& operator>>(istream& is, TimestampValue& timestamp_value) { char buf[32]; memset(buf, '\0', sizeof(buf)); - is.readsome(buf, 32); - string strbuf(buf, strlen(buf)); - timestamp_value = TimestampValue(strbuf); + int len = is.readsome(buf, 32); + timestamp_value = TimestampValue(buf, len); if (timestamp_value.NotADateTime()) { - LOG(WARNING) << "Invalid timestamp string: '" << strbuf << "'"; + LOG(WARNING) << "Invalid timestamp string: '" << buf << "'"; // This is called by auto generated functions that detect invalid // conversions from text via this exception. throw boost::bad_lexical_cast(); diff --git a/be/src/runtime/timestamp-value.h b/be/src/runtime/timestamp-value.h index e8caebdd3..33820980b 100644 --- a/be/src/runtime/timestamp-value.h +++ b/be/src/runtime/timestamp-value.h @@ -8,6 +8,9 @@ #include #include #include +using namespace std; +using namespace boost::posix_time; +using namespace boost::gregorian; namespace impala { @@ -55,8 +58,8 @@ class TimestampValue { temp += boost::posix_time::nanoseconds((t-i)/FRACTIONAL); *this = temp; } - TimestampValue(const std::string& strbuf); - + TimestampValue(const char* str, int len); + TimestampValue(int64_t t) { *this = TimestampValue(boost::posix_time::from_time_t(t)); } @@ -73,6 +76,9 @@ class TimestampValue { *this = TimestampValue(boost::posix_time::from_time_t(t)); } + void set_date(boost::gregorian::date d) { date_ = d; } + void set_time(boost::posix_time::time_duration t) { time_of_day_ = t; } + std::string DebugString() const { std::stringstream ss; if (!this->date_.is_special()) { @@ -92,8 +98,8 @@ class TimestampValue { return !(*this == other); } bool operator<=(const TimestampValue& other) const { - return this->date_ < other.date_ || - (this->date_ == other.date_ && this->time_of_day_ <= other.time_of_day_); + return this->date_ < other.date_ || (this->date_ == other.date_ && + (this->time_of_day_ <= other.time_of_day_)); } bool operator>=(const TimestampValue& other) const { return this->date_ > other.date_ || @@ -110,7 +116,6 @@ class TimestampValue { // If the date or time of day are valid then this is valid. bool NotADateTime() const { - boost::posix_time::ptime temp; return this->date_.is_special() && this->time_of_day_.is_special(); } @@ -161,8 +166,28 @@ class TimestampValue { private: friend class UnusedClass; + // Precision of fractional part of the time: nanoseconds. static const double FRACTIONAL = 0.000000001; + // Parse a date string into the object. + // strp -- pointer to string to parse, points to character after parsing stopped. + // lenp -- pointer to the length of the string. The length will + // be updated to the count of characters left passed the + // parsed string or where the parsing stopped. + // The accpeted format is: YYYY-MM-DD. All components must be present. + // Returns true if the date was sucsessfully parsed. + inline bool ParseDate(const char** strp, int* lenp); + + // Parse a time string into the object. + // strp -- pointer to string to parse, points to character after parsing stopped. + // lenp -- pointer to the length of the string. The length will + // be updated to the count of characters left passed the + // parsed string or where the parsing stopped. + // The accpeted format is: HH:MM:SS[.ssssssss] + // Returns true if the time was sucsessfully parsed. + inline bool ParseTime(const char** strp, int* lenp); + + // Boost ptime leaves a gap in the structure, so we swap the order to make it // 12 contiguous bytes. We then must convert to and from the boost ptime data type. boost::posix_time::time_duration time_of_day_; diff --git a/testdata/workloads/functional-query/queries/DataErrorsTest/hbase-scan-node-errors.test b/testdata/workloads/functional-query/queries/DataErrorsTest/hbase-scan-node-errors.test index 4646d6264..e7f7e1992 100644 --- a/testdata/workloads/functional-query/queries/DataErrorsTest/hbase-scan-node-errors.test +++ b/testdata/workloads/functional-query/queries/DataErrorsTest/hbase-scan-node-errors.test @@ -1,5 +1,8 @@ select * from hbasealltypeserror ---- ERRORS +Error converting column strings:timestamp_col: '0' TO TIMESTAMP +hbase table: hbasealltypeserror +row key: 0 Error converting column bools:bool_col: 'errfalse' TO BOOL hbase table: hbasealltypeserror row key: 1 @@ -58,24 +61,24 @@ file: hbasealltypeserror,15 ---- TYPES int, boolean, double, float, bigint, int, smallint, tinyint, string, string, timestamp ---- RESULTS -0,NULL,0,0,0,0,0,NULL,'01/01/09','0',00:00:00 -1,NULL,10.1,1,10,1,1,NULL,'01/01/09','1',1999-10-10 +0,NULL,0,0,0,0,0,NULL,'01/01/09','0',NULL +1,NULL,10.1,1,10,1,1,NULL,'01/01/09','1',1999-10-10 00:00:00 10,NULL,0,0,0,0,NULL,NULL,'02/01/09','0',2009-01-01 00:00:00 11,false,10.1,1,10,NULL,NULL,NULL,'02/01/09','1',2009-01-01 00:00:00 12,true,20.2,2,NULL,NULL,NULL,2,'02/01/09','2',2009-01-01 00:00:00 13,false,NULL,NULL,NULL,NULL,3,3,'02/01/09','3',2009-01-01 00:00:00 14,true,NULL,NULL,40,4,4,4,'02/01/09','4',2009-01-01 00:00:00 15,false,50.5,5,50,5,5,NULL,'02/01/09','5',NULL -16,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'02/01/09','6',00:00:00 +16,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'02/01/09','6',NULL 17,false,NULL,7,70,7,7,7,'02/01/09','7',2009-01-01 00:00:00 18,true,80.8,8,80,8,8,8,'02/01/09','8',2009-01-01 00:00:00 19,false,90.90000000000001,9,90,9,9,9,'02/01/09','9',2009-01-01 00:00:00 -2,true,20.2,2,20,2,NULL,NULL,'01/01/09','2',1999-10-13 18:10:10 +2,true,20.2,2,20,2,NULL,NULL,'01/01/09','2',NULL 20,true,0,0,0,0,0,0,'03/01/09','0',2020-10-10 10:10:10.123000000 21,false,10.1,1,10,1,1,1,'03/01/09','1',NULL 22,true,20.2,2,20,2,2,2,'03/01/09','2',NULL -23,false,30.3,3,30,3,NULL,3,'03/01/09','3',2020-10-12 12:10:10.123000000 -24,true,40.4,4,40,4,4,4,'03/01/09','4',2020-10-10 11:10:10.123000000 +23,false,30.3,3,30,3,NULL,3,'03/01/09','3',NULL +24,true,40.4,4,40,4,4,4,'03/01/09','4',NULL 25,false,50.5,5,50,NULL,5,5,'03/01/09','5',2020-10-10 10:10:10.123000000 26,true,60.6,6,60,6,6,6,'03/01/09','6',2020-10-10 10:10:10.123000000 27,false,70.7,7,70,7,7,NULL,'03/01/09','7',2020-10-10 10:10:10.123000000 @@ -168,11 +171,11 @@ int, boolean, double, float, bigint, int, smallint, tinyint, string, string, tim 22,true,20.2,2,20,2,2,2,'03/01/09','2',2012-03-22 11:20:01.123000000 23,false,30.3,3,30,3,NULL,3,'03/01/09','3',2012-03-22 11:20:01.123000000 24,true,40.4,4,40,4,4,4,'03/01/09','4',2012-03-22 11:20:01.123000000 -25,false,50.5,5,50,NULL,5,5,'03/01/09','5',2012-03-22 11:20:01.123000000 +25,false,50.5,5,50,NULL,5,5,'03/01/09','5',NULL 26,true,60.6,6,60,6,6,6,'03/01/09','6',2012-03-22 11:20:01.123000000 27,false,70.7,7,70,7,7,NULL,'03/01/09','7',2012-03-22 11:20:01.123000000 28,true,80.8,8,80,8,8,8,'03/01/09','8',NULL -29,false,90.90000000000001,9,90,NULL,9,9,'03/01/09','9',2012-03-22 +29,false,90.90000000000001,9,90,NULL,9,9,'03/01/09','9',2012-03-22 00:00:00 3,false,30.3,3,30,3,NULL,3,'01/01/09','3',2012-03-22 11:20:01.123000000 4,true,40.4,4,40,NULL,4,4,'01/01/09','4',2012-03-22 11:20:01.123000000 5,false,50.5,5,NULL,5,5,5,'01/01/09','5',2012-03-22 11:20:01.123000000 @@ -180,4 +183,4 @@ int, boolean, double, float, bigint, int, smallint, tinyint, string, string, tim 7,false,NULL,7,70,7,7,7,'01/01/09','7',2012-03-22 11:20:01.123000000 8,false,80.8,8,80,8,8,8,'01/01/09','8',2012-03-22 11:20:01.123000000 9,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'01/01/09','9',2012-03-22 11:20:01.123000000 -==== \ No newline at end of file +==== diff --git a/testdata/workloads/functional-query/queries/DataErrorsTest/hdfs-scan-node-errors.test b/testdata/workloads/functional-query/queries/DataErrorsTest/hdfs-scan-node-errors.test index ecaf7d1e8..500779e7a 100644 --- a/testdata/workloads/functional-query/queries/DataErrorsTest/hdfs-scan-node-errors.test +++ b/testdata/workloads/functional-query/queries/DataErrorsTest/hdfs-scan-node-errors.test @@ -97,9 +97,9 @@ int, int, int, boolean, tinyint, smallint, int, bigint, float, double, string, s 2009,3,22,true,2,2,2,20,2,20.2,'03/01/09','2',2012-03-22 11:20:01.123000000 2009,3,23,false,3,NULL,3,30,3,30.3,'03/01/09','3',2012-03-22 11:20:01.123000000 2009,3,24,true,4,4,4,40,4,40.4,'03/01/09','4',2012-03-22 11:20:01.123000000 -2009,3,25,false,5,5,NULL,50,5,50.5,'03/01/09','5',2012-03-22 11:20:01.123000000 +2009,3,25,false,5,5,NULL,50,5,50.5,'03/01/09','5',NULL 2009,3,26,true,6,6,6,60,6,60.6,'03/01/09','6',2012-03-22 11:20:01.123000000 2009,3,27,false,NULL,7,7,70,7,70.7,'03/01/09','7',2012-03-22 11:20:01.123000000 2009,3,28,true,8,8,8,80,8,80.8,'03/01/09','8',NULL -2009,3,29,false,9,9,NULL,90,9,90.90000000000001,'03/01/09','9',2012-03-22 -==== \ No newline at end of file +2009,3,29,false,9,9,NULL,90,9,90.90000000000001,'03/01/09','9',2012-03-22 00:00:00 +====