mirror of
https://github.com/apache/impala.git
synced 2025-12-25 02:03:09 -05:00
IMPALA-724: Support infinite / nan values in text files
This patch allows the text scanner to read 'inf' or 'Infinity' from a row and correctly translate it into floating-point infinity. It also adds is_inf() and is_nan() builtins. Finally, we change the text table writer to write Infinity and NaN for compatibility with Hive. In the future, we might consider adding nan / inf literals to our grammar (postgres has this, see: http://www.postgresql.org/docs/9.3/static/datatype-numeric.html). Change-Id: I796f2852b3c6c3b72e9aae9dd5ad228d188a6ea3 Reviewed-on: http://gerrit.ent.cloudera.com:8080/2393 Reviewed-by: Henry Robinson <henry@cloudera.com> Tested-by: jenkins (cherry picked from commit 58091355142cadd2b74874d9aa7c8ab6bf3efe2f) Reviewed-on: http://gerrit.ent.cloudera.com:8080/2483
This commit is contained in:
@@ -288,6 +288,24 @@ class ExprTest : public testing::Test {
|
||||
TestComparison(lexical_cast<string>(numeric_limits<T>::min()),
|
||||
lexical_cast<string>(numeric_limits<T>::max() + 1), true);
|
||||
}
|
||||
|
||||
// Compare nan: not equal to, larger than or smaller than anything, including itself
|
||||
TestValue(lexical_cast<string>(t_min) + " < 0/0", TYPE_BOOLEAN, false);
|
||||
TestValue(lexical_cast<string>(t_min) + " > 0/0", TYPE_BOOLEAN, false);
|
||||
TestValue(lexical_cast<string>(t_min) + " = 0/0", TYPE_BOOLEAN, false);
|
||||
TestValue(lexical_cast<string>(t_max) + " < 0/0", TYPE_BOOLEAN, false);
|
||||
TestValue(lexical_cast<string>(t_max) + " > 0/0", TYPE_BOOLEAN, false);
|
||||
TestValue(lexical_cast<string>(t_max) + " = 0/0", TYPE_BOOLEAN, false);
|
||||
TestValue("0/0 < 0/0", TYPE_BOOLEAN, false);
|
||||
TestValue("0/0 > 0/0", TYPE_BOOLEAN, false);
|
||||
TestValue("0/0 = 0/0", TYPE_BOOLEAN, false);
|
||||
|
||||
// Compare inf: larger than everything except nan (or smaller, for -inf)
|
||||
TestValue(lexical_cast<string>(t_max) + " < 1/0", TYPE_BOOLEAN, true);
|
||||
TestValue(lexical_cast<string>(t_min) + " > -1/0", TYPE_BOOLEAN, true);
|
||||
TestValue("1/0 = 1/0", TYPE_BOOLEAN, true);
|
||||
TestValue("1/0 < 0/0", TYPE_BOOLEAN, false);
|
||||
TestValue("0/0 < 1/0", TYPE_BOOLEAN, false);
|
||||
}
|
||||
|
||||
void TestStringComparisons() {
|
||||
@@ -1758,6 +1776,31 @@ TEST_F(ExprTest, UtilityFunctions) {
|
||||
TestIsNull("fnv_hash(NULL)", TYPE_BIGINT);
|
||||
}
|
||||
|
||||
TEST_F(ExprTest, NonFiniteFloats) {
|
||||
TestValue("is_inf(0.0)", TYPE_BOOLEAN, false);
|
||||
TestValue("is_inf(-1/0)", TYPE_BOOLEAN, true);
|
||||
TestValue("is_inf(1/0)", TYPE_BOOLEAN, true);
|
||||
TestValue("is_inf(0/0)", TYPE_BOOLEAN, false);
|
||||
TestValue("is_inf(NULL)", TYPE_BOOLEAN, false);
|
||||
TestValue("is_nan(NULL)", TYPE_BOOLEAN, false);
|
||||
|
||||
TestValue("is_nan(0.0)", TYPE_BOOLEAN, false);
|
||||
TestValue("is_nan(1/0)", TYPE_BOOLEAN, false);
|
||||
TestValue("is_nan(0/0)", TYPE_BOOLEAN, true);
|
||||
|
||||
TestCast("1/0", numeric_limits<double>::infinity());
|
||||
TestCast("CAST(1/0 AS FLOAT)", numeric_limits<float>::infinity());
|
||||
TestValue("CAST('inf' AS FLOAT)", TYPE_FLOAT, numeric_limits<float>::infinity());
|
||||
TestValue("CAST('inf' AS DOUBLE)", TYPE_DOUBLE, numeric_limits<double>::infinity());
|
||||
TestValue("CAST('Infinity' AS FLOAT)", TYPE_FLOAT, numeric_limits<float>::infinity());
|
||||
TestValue("CAST('-Infinity' AS DOUBLE)", TYPE_DOUBLE,
|
||||
-numeric_limits<double>::infinity());
|
||||
|
||||
// NaN != NaN, so we have to wrap the value in a string
|
||||
TestStringValue("CAST(CAST('nan' AS FLOAT) AS STRING)", string("nan"));
|
||||
TestStringValue("CAST(CAST('nan' AS DOUBLE) AS STRING)", string("nan"));
|
||||
}
|
||||
|
||||
TEST_F(ExprTest, MathTrigonometricFunctions) {
|
||||
// It is important to calculate the expected values
|
||||
// using math functions, and not simply use constants.
|
||||
|
||||
@@ -17,8 +17,9 @@
|
||||
#include "util/bit-util.h"
|
||||
|
||||
#include <ctype.h>
|
||||
#include <math.h>
|
||||
#include <gutil/strings/substitute.h>
|
||||
#include <math.h>
|
||||
|
||||
|
||||
using namespace std;
|
||||
using namespace boost::gregorian;
|
||||
@@ -77,6 +78,16 @@ BigIntVal UdfBuiltins::MinBigInt(FunctionContext* context) {
|
||||
return BigIntVal(numeric_limits<int64_t>::min());
|
||||
}
|
||||
|
||||
BooleanVal UdfBuiltins::IsNan(FunctionContext* context, const DoubleVal& val) {
|
||||
if (val.is_null) return BooleanVal(false);
|
||||
return BooleanVal(isnan(val.val));
|
||||
}
|
||||
|
||||
BooleanVal UdfBuiltins::IsInf(FunctionContext* context, const DoubleVal& val) {
|
||||
if (val.is_null) return BooleanVal(false);
|
||||
return BooleanVal(isinf(val.val));
|
||||
}
|
||||
|
||||
// The units which can be used when Truncating a Timestamp
|
||||
struct TruncUnit {
|
||||
enum Type {
|
||||
|
||||
@@ -40,6 +40,8 @@ class UdfBuiltins {
|
||||
static SmallIntVal MinSmallInt(FunctionContext* context);
|
||||
static BigIntVal MinBigInt(FunctionContext* context);
|
||||
|
||||
static BooleanVal IsNan(FunctionContext* context, const DoubleVal& val);
|
||||
static BooleanVal IsInf(FunctionContext* context, const DoubleVal& val);
|
||||
|
||||
// Rounds (truncating down) a Timestamp to the specified unit.
|
||||
// Units:
|
||||
|
||||
@@ -19,6 +19,7 @@
|
||||
#include <string>
|
||||
|
||||
#include <boost/functional/hash.hpp>
|
||||
#include <math.h>
|
||||
|
||||
#include "common/logging.h"
|
||||
#include "runtime/string-value.inline.h"
|
||||
@@ -243,10 +244,32 @@ inline void RawValue::PrintValue(const void* value, const ColumnType& type, int
|
||||
*stream << *reinterpret_cast<const int64_t*>(value);
|
||||
break;
|
||||
case TYPE_FLOAT:
|
||||
*stream << *reinterpret_cast<const float*>(value);
|
||||
{
|
||||
float val = *reinterpret_cast<const float*>(value);
|
||||
if (LIKELY(std::isfinite(val))) {
|
||||
*stream << val;
|
||||
} else if (isinf(val)) {
|
||||
// 'Infinity' is Java's text representation of inf. By staying close to Java, we
|
||||
// allow Hive to read text tables containing non-finite values produced by
|
||||
// Impala. (The same logic applies to 'NaN', below).
|
||||
*stream << (val < 0 ? "-Infinity" : "Infinity");
|
||||
} else if (isnan(val)) {
|
||||
*stream << "NaN";
|
||||
}
|
||||
}
|
||||
break;
|
||||
case TYPE_DOUBLE:
|
||||
*stream << *reinterpret_cast<const double*>(value);
|
||||
{
|
||||
double val = *reinterpret_cast<const double*>(value);
|
||||
if (LIKELY(std::isfinite(val))) {
|
||||
*stream << val;
|
||||
} else if (isinf(val)) {
|
||||
// See TYPE_FLOAT for rationale.
|
||||
*stream << (val < 0 ? "-Infinity" : "Infinity");
|
||||
} else if (isnan(val)) {
|
||||
*stream << "NaN";
|
||||
}
|
||||
}
|
||||
break;
|
||||
case TYPE_STRING:
|
||||
string_val = reinterpret_cast<const StringValue*>(value);
|
||||
|
||||
@@ -77,12 +77,25 @@ void TestBoolValue(const char* s, bool exp_val, StringParser::ParseResult exp_re
|
||||
// Compare Impala's float conversion function against strtod.
|
||||
template<typename T>
|
||||
void TestFloatValue(const string& s, StringParser::ParseResult exp_result) {
|
||||
T exp_val = 0;
|
||||
if (exp_result == StringParser::PARSE_SUCCESS) exp_val = strtod(s.c_str(), NULL);
|
||||
StringParser::ParseResult result;
|
||||
T val = StringParser::StringToFloat<T>(s.data(), s.length(), &result);
|
||||
EXPECT_EQ(exp_result, result);
|
||||
if (exp_result == StringParser::PARSE_SUCCESS) EXPECT_EQ(exp_val, val);
|
||||
|
||||
if (exp_result == StringParser::PARSE_SUCCESS && result == exp_result) {
|
||||
T exp_val = strtod(s.c_str(), NULL);
|
||||
EXPECT_EQ(exp_val, val);
|
||||
}
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
void TestFloatValueIsNan(const string& s, StringParser::ParseResult exp_result) {
|
||||
StringParser::ParseResult result;
|
||||
T val = StringParser::StringToFloat<T>(s.data(), s.length(), &result);
|
||||
EXPECT_EQ(exp_result, result);
|
||||
|
||||
if (exp_result == StringParser::PARSE_SUCCESS && result == exp_result) {
|
||||
EXPECT_TRUE(isnan(val));
|
||||
}
|
||||
}
|
||||
|
||||
// Tests conversion of s to double and float with +/- prefixing (and no prefix) and with
|
||||
@@ -346,6 +359,24 @@ TEST(StringToFloat, Basic) {
|
||||
TestFloatValue<double>(double_min, StringParser::PARSE_SUCCESS);
|
||||
TestFloatValue<double>(double_max, StringParser::PARSE_SUCCESS);
|
||||
|
||||
// Non-finite values
|
||||
TestAllFloatVariants("INFinity", StringParser::PARSE_SUCCESS);
|
||||
TestAllFloatVariants("infinity", StringParser::PARSE_SUCCESS);
|
||||
TestAllFloatVariants("inf", StringParser::PARSE_SUCCESS);
|
||||
|
||||
TestFloatValueIsNan<float>("nan", StringParser::PARSE_SUCCESS);
|
||||
TestFloatValueIsNan<double>("nan", StringParser::PARSE_SUCCESS);
|
||||
TestFloatValueIsNan<float>("NaN", StringParser::PARSE_SUCCESS);
|
||||
TestFloatValueIsNan<double>("NaN", StringParser::PARSE_SUCCESS);
|
||||
TestFloatValueIsNan<float>("nana", StringParser::PARSE_SUCCESS);
|
||||
TestFloatValueIsNan<double>("nana", StringParser::PARSE_SUCCESS);
|
||||
TestFloatValueIsNan<float>("naN", StringParser::PARSE_SUCCESS);
|
||||
TestFloatValueIsNan<double>("naN", StringParser::PARSE_SUCCESS);
|
||||
|
||||
TestFloatValueIsNan<float>("n aN", StringParser::PARSE_FAILURE);
|
||||
TestFloatValueIsNan<float>("nnaN", StringParser::PARSE_FAILURE);
|
||||
|
||||
|
||||
// Overflow.
|
||||
TestFloatValue<float>(float_max + "11111", StringParser::PARSE_OVERFLOW);
|
||||
TestFloatValue<double>(double_max + "11111", StringParser::PARSE_OVERFLOW);
|
||||
@@ -362,6 +393,10 @@ TEST(StringToFloat, Basic) {
|
||||
TestAllFloatVariants("456.789e10x", StringParser::PARSE_FAILURE);
|
||||
TestAllFloatVariants("456.789e10 sdfs ", StringParser::PARSE_FAILURE);
|
||||
TestAllFloatVariants("1e10 sdfs", StringParser::PARSE_FAILURE);
|
||||
TestAllFloatVariants("in", StringParser::PARSE_FAILURE);
|
||||
TestAllFloatVariants("in finity", StringParser::PARSE_FAILURE);
|
||||
TestAllFloatVariants("na", StringParser::PARSE_FAILURE);
|
||||
TestAllFloatVariants("ThisIsANaN", StringParser::PARSE_FAILURE);
|
||||
}
|
||||
|
||||
TEST(StringToFloat, InvalidLeadingTrailing) {
|
||||
|
||||
@@ -325,6 +325,29 @@ class StringParser {
|
||||
decimal = true;
|
||||
} else if (s[i] == 'e' || s[i] == 'E') {
|
||||
break;
|
||||
} else if (s[i] == 'i' || s[i] == 'I') {
|
||||
if (len > i + 2 && (s[i+1] == 'n' || s[i+1] == 'N') &&
|
||||
(s[i+2] == 'f' || s[i+2] == 'F')) {
|
||||
// Note: Hive writes inf as Infinity, at least for text. We'll be a little loose
|
||||
// here and interpret any column with inf as a prefix as infinity rather than
|
||||
// checking every remaining byte.
|
||||
*result = PARSE_SUCCESS;
|
||||
return negative ? -INFINITY : INFINITY;
|
||||
} else {
|
||||
// Starts with 'i', but isn't inf...
|
||||
*result = PARSE_FAILURE;
|
||||
return 0;
|
||||
}
|
||||
} else if (s[i] == 'n' || s[i] == 'N') {
|
||||
if (len > i + 2 && (s[i+1] == 'a' || s[i+1] == 'A') &&
|
||||
(s[i+2] == 'n' || s[i+2] == 'N')) {
|
||||
*result = PARSE_SUCCESS;
|
||||
return negative ? -NAN : NAN;
|
||||
} else {
|
||||
// Starts with 'n', but isn't NaN...
|
||||
*result = PARSE_FAILURE;
|
||||
return 0;
|
||||
}
|
||||
} else {
|
||||
if ((UNLIKELY(i == first || !isAllWhitespace(s + i, len - i)))) {
|
||||
// Reject the string because either the first char was not a digit, "," or "e",
|
||||
|
||||
@@ -145,6 +145,22 @@ void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
|
||||
return &e->result_.${result_field};\n\
|
||||
}\n\n")
|
||||
|
||||
|
||||
# Special case for float types to string that deals properly with nan
|
||||
# (lexical_cast<string>(nan) returns "-nan" which is nonsensical).
|
||||
float_types_to_string = Template("\
|
||||
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
|
||||
Expr* op = e->children()[0];\n\
|
||||
${native_type1}* val = reinterpret_cast<${native_type1}*>(op->GetValue(row));\n\
|
||||
if (val == NULL) return NULL;\n\
|
||||
if (isnan(*val)) {\n\
|
||||
e->result_.SetStringVal(string(\"nan\"));\n\
|
||||
} else {\n\
|
||||
e->result_.SetStringVal(lexical_cast<string>(*val));\n\
|
||||
}\n\
|
||||
return &e->result_.${result_field};\n\
|
||||
}\n\n")
|
||||
|
||||
case = Template("\
|
||||
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
|
||||
CaseExpr* expr = static_cast<CaseExpr*>(e);\n\
|
||||
@@ -216,7 +232,7 @@ types = {
|
||||
'FLOAT_TYPES' : ['FLOAT', 'DOUBLE'],
|
||||
'NUMERIC_TYPES' : ['TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'FLOAT', 'DOUBLE'],
|
||||
'NATIVE_TYPES' : ['BOOLEAN', 'TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'FLOAT', 'DOUBLE'],
|
||||
'STRCAST_TYPES' : ['BOOLEAN', 'SMALLINT', 'INT', 'BIGINT', 'FLOAT', 'DOUBLE'],
|
||||
'STRCAST_TYPES' : ['BOOLEAN', 'SMALLINT', 'INT', 'BIGINT'],
|
||||
'ALL_TYPES' : ['BOOLEAN', 'TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'FLOAT',\
|
||||
'DOUBLE', 'STRING', 'TIMESTAMP'],
|
||||
'MAX_TYPES' : ['BIGINT', 'DOUBLE'],
|
||||
@@ -268,6 +284,7 @@ functions = [
|
||||
['Cast', ['FLOAT_TYPES'], [['STRING'], ['FLOAT_TYPES']], string_to_float ],
|
||||
['Cast', ['STRING'], [['STRCAST_TYPES'], ['STRING']], numeric_to_string ],
|
||||
['Cast', ['STRING'], [['TINYINT'], ['STRING']], tinyint_to_string ],
|
||||
['Cast', ['STRING'], [['FLOAT_TYPES'], ['STRING']], float_types_to_string ],
|
||||
['Cast', ['NATIVE_TYPES'], [['TIMESTAMP'], ['NATIVE_TYPES']]],
|
||||
['Cast', ['STRING'], [['TIMESTAMP'], ['STRING']], numeric_to_string ],
|
||||
['Cast', ['TIMESTAMP'], [['STRING'], ['TIMESTAMP']], string_to_timestamp],
|
||||
|
||||
@@ -500,6 +500,10 @@ udf_functions = [
|
||||
'_ZN6impala11UdfBuiltins11MinSmallIntEPN10impala_udf15FunctionContextE'],
|
||||
[['min_bigint'], 'BIGINT', [],
|
||||
'_ZN6impala11UdfBuiltins9MinBigIntEPN10impala_udf15FunctionContextE'],
|
||||
[['is_nan'], 'BOOLEAN', ['DOUBLE'],
|
||||
'_ZN6impala11UdfBuiltins5IsNanEPN10impala_udf15FunctionContextERKNS1_9DoubleValE'],
|
||||
[['is_inf'], 'BOOLEAN', ['DOUBLE'],
|
||||
'_ZN6impala11UdfBuiltins5IsInfEPN10impala_udf15FunctionContextERKNS1_9DoubleValE'],
|
||||
[['trunc'], 'TIMESTAMP', ['TIMESTAMP', 'STRING'],
|
||||
'_ZN6impala11UdfBuiltins5TruncEPN10impala_udf15FunctionContextERKNS1_12TimestampValERKNS1_9StringValE',
|
||||
'_ZN6impala11UdfBuiltins12TruncPrepareEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE',
|
||||
|
||||
@@ -48,7 +48,7 @@ int, boolean, tinyint, smallint, int, bigint, float, double, string, string
|
||||
---- QUERY
|
||||
# insert into unpartitioned table
|
||||
insert into table alltypesnopart_insert
|
||||
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
|
||||
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
|
||||
float_col, double_col, date_string_col, string_col, timestamp_col
|
||||
from alltypessmall
|
||||
where year=2009 and month=04
|
||||
@@ -68,8 +68,8 @@ bigint
|
||||
---- QUERY
|
||||
# static partition overwrite
|
||||
insert overwrite table alltypesinsert
|
||||
partition (year=2009, month=4)
|
||||
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
|
||||
partition (year=2009, month=4)
|
||||
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
|
||||
float_col, double_col, date_string_col, string_col, timestamp_col
|
||||
from alltypessmall
|
||||
where year=2009 and month=4
|
||||
@@ -117,8 +117,8 @@ int, boolean, tinyint, smallint, int, bigint, float, double, string, string
|
||||
---- QUERY
|
||||
# static partition insert$TABLE, test creation of partitions
|
||||
insert into table alltypesinsert
|
||||
partition (year=2009, month=4)
|
||||
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
|
||||
partition (year=2009, month=4)
|
||||
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
|
||||
float_col, double_col, date_string_col, string_col, timestamp_col
|
||||
from alltypessmall
|
||||
where year=2009 and month=4
|
||||
@@ -139,8 +139,8 @@ bigint
|
||||
---- QUERY
|
||||
# partially dynamic partition overwrite
|
||||
insert overwrite table alltypesinsert
|
||||
partition (year=2009, month)
|
||||
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
|
||||
partition (year=2009, month)
|
||||
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
|
||||
float_col, double_col, date_string_col, string_col, timestamp_col, month
|
||||
from alltypessmall
|
||||
where year=2009 and month>1 and month<=4
|
||||
@@ -239,8 +239,8 @@ int, boolean, tinyint, smallint, int, bigint, float, double, string, string
|
||||
---- QUERY
|
||||
# partially dynamic partition insert$TABLE, check partition creation
|
||||
insert into table alltypesinsert
|
||||
partition (year=2009, month)
|
||||
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
|
||||
partition (year=2009, month)
|
||||
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
|
||||
float_col, double_col, date_string_col, string_col, timestamp_col, month
|
||||
from alltypessmall
|
||||
where year=2009 and month>=1 and month<4
|
||||
@@ -263,8 +263,8 @@ bigint
|
||||
---- QUERY
|
||||
# fully dynamic partition overwrite
|
||||
insert overwrite table alltypesinsert
|
||||
partition (year, month)
|
||||
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
|
||||
partition (year, month)
|
||||
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
|
||||
float_col, double_col, date_string_col, string_col, timestamp_col, year, month
|
||||
from alltypessmall
|
||||
---- SETUP
|
||||
@@ -388,8 +388,8 @@ int, boolean, tinyint, smallint, int, bigint, float, double, string, string
|
||||
---- QUERY
|
||||
# fully dynamic partition insert$TABLE, check partition creation
|
||||
insert into table alltypesinsert
|
||||
partition (year, month)
|
||||
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
|
||||
partition (year, month)
|
||||
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
|
||||
float_col, double_col, date_string_col, string_col, timestamp_col, year, month
|
||||
from alltypessmall
|
||||
---- SETUP
|
||||
@@ -502,7 +502,7 @@ SELECT "value4",NULL FROM alltypessmall LIMIT 1;
|
||||
s2=__HIVE_DEFAULT_PARTITION__/: 1
|
||||
====
|
||||
---- QUERY
|
||||
# select with empty partition key as predicate should return nothing, because "" is
|
||||
# select with empty partition key as predicate should return nothing, because "" is
|
||||
# mapped to NULL
|
||||
SELECT * FROM insert_string_partitioned WHERE s2 = "";
|
||||
---- TYPES
|
||||
@@ -535,7 +535,7 @@ year=2010/month=4/: 25
|
||||
# static partition insert from a constant select
|
||||
insert into alltypesinsert
|
||||
partition(year=2010, month=4)
|
||||
select 100, false, 1, 1, 1, 10,
|
||||
select 100, false, 1, 1, 1, 10,
|
||||
10.0, 10.0, "02/01/09", "1", cast("2009-02-01 00:01:00" as timestamp)
|
||||
---- SETUP
|
||||
RESET alltypesinsert
|
||||
@@ -545,8 +545,8 @@ year=2010/month=4/: 1
|
||||
---- QUERY
|
||||
# dynamic partition insert from a constant select
|
||||
insert into table alltypesinsert
|
||||
partition (year, month)
|
||||
select 200, true, 2, 2, 2, 20,
|
||||
partition (year, month)
|
||||
select 200, true, 2, 2, 2, 20,
|
||||
20.0, 20.0, "02/01/09", "1", cast("2009-02-01 00:02:00" as timestamp), 2010, 4
|
||||
---- RESULTS
|
||||
year=2010/month=4/: 1
|
||||
@@ -631,7 +631,7 @@ from alltypessmall limit 10
|
||||
: 10
|
||||
====
|
||||
---- QUERY
|
||||
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
|
||||
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
|
||||
float_col, double_col, date_string_col, string_col, timestamp_col
|
||||
from alltypesnopart_insert
|
||||
---- TYPES
|
||||
@@ -648,3 +648,25 @@ NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'NULL','NULL',NULL
|
||||
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'NULL','NULL',NULL
|
||||
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'NULL','NULL',NULL
|
||||
====
|
||||
---- QUERY
|
||||
insert overwrite alltypesnopart_insert(float_col, double_col)
|
||||
values(CAST(1/0 AS FLOAT), 1/0), (CAST(-1/0 AS FLOAT), -1/0),
|
||||
(CAST(0/0 AS FLOAT), 0/0), (CAST(-sqrt(-1) AS FLOAT), -sqrt(-1))
|
||||
---- SETUP
|
||||
RESET alltypesinsert
|
||||
---- RESULTS
|
||||
: 4
|
||||
====
|
||||
---- QUERY
|
||||
# Results have to be cast to strings, because nan == f is always false for all f
|
||||
# (even nan), so the results check would otherwise always fail.
|
||||
select CAST(float_col AS string), CAST(double_col AS string) from alltypesnopart_insert
|
||||
order by float_col, double_col limit 10;
|
||||
---- TYPES
|
||||
STRING, STRING
|
||||
---- RESULTS
|
||||
'nan','nan'
|
||||
'nan','nan'
|
||||
'-inf','-inf'
|
||||
'inf','inf'
|
||||
====
|
||||
@@ -5,7 +5,7 @@ select * from Overflow
|
||||
---- TYPES
|
||||
tinyint, smallint, int, bigint, float, double
|
||||
---- RESULTS
|
||||
-128,-32768,-2147483648,-9223372036854775808,-inf,-inf
|
||||
-128,-32768,-2147483648,-9223372036854775808,-Infinity,-Infinity
|
||||
1,2,3,4,5.5,6.6
|
||||
127,32767,2147483647,9223372036854775807,inf,inf
|
||||
====
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
====
|
||||
---- QUERY
|
||||
# Based on Aggregation Queries
|
||||
select int_col, sum(float_col)
|
||||
select int_col, sum(float_col)
|
||||
from functional_hbase.alltypessmall
|
||||
where id < 5
|
||||
group by 1
|
||||
@@ -16,9 +16,9 @@ INT, DOUBLE
|
||||
====
|
||||
---- QUERY
|
||||
# Run query without order by
|
||||
select tinyint_col, count(*)
|
||||
from alltypesagg
|
||||
group by 1
|
||||
select tinyint_col, count(*)
|
||||
from alltypesagg
|
||||
group by 1
|
||||
limit 10
|
||||
---- RESULTS
|
||||
5,1000
|
||||
@@ -36,9 +36,9 @@ TINYINT, BIGINT
|
||||
====
|
||||
---- QUERY
|
||||
# Same query order by asc first col
|
||||
select tinyint_col, count(*)
|
||||
from alltypesagg
|
||||
group by 1
|
||||
select tinyint_col, count(*)
|
||||
from alltypesagg
|
||||
group by 1
|
||||
order by 1
|
||||
limit 10
|
||||
---- RESULTS
|
||||
@@ -194,10 +194,10 @@ NULL,'NULL',NULL,NULL,1009,'Name9',94615
|
||||
BIGINT, STRING, INT, INT, BIGINT, STRING, INT
|
||||
====
|
||||
---- QUERY
|
||||
# order by multiple cols with nulls
|
||||
select tinyint_col % 3, smallint_col % 3, count(*)
|
||||
from alltypesagg
|
||||
where day = 1
|
||||
# order by multiple cols with nulls
|
||||
select tinyint_col % 3, smallint_col % 3, count(*)
|
||||
from alltypesagg
|
||||
where day = 1
|
||||
group by 1, 2
|
||||
order by 1, 2
|
||||
limit 20
|
||||
@@ -219,9 +219,9 @@ NULL,NULL,10
|
||||
TINYINT, SMALLINT, BIGINT
|
||||
====
|
||||
---- QUERY
|
||||
select tinyint_col % 3, smallint_col % 3, count(*)
|
||||
from alltypesagg
|
||||
where day = 1
|
||||
select tinyint_col % 3, smallint_col % 3, count(*)
|
||||
from alltypesagg
|
||||
where day = 1
|
||||
group by 1, 2
|
||||
order by 1, 2 desc
|
||||
limit 20
|
||||
@@ -243,9 +243,9 @@ NULL,0,30
|
||||
TINYINT, SMALLINT, BIGINT
|
||||
====
|
||||
---- QUERY
|
||||
select tinyint_col % 3, smallint_col % 3, count(*)
|
||||
from alltypesagg
|
||||
where day = 1
|
||||
select tinyint_col % 3, smallint_col % 3, count(*)
|
||||
from alltypesagg
|
||||
where day = 1
|
||||
group by 1, 2
|
||||
order by 1 desc, 2
|
||||
limit 20
|
||||
@@ -267,9 +267,9 @@ NULL,NULL,10
|
||||
TINYINT, SMALLINT, BIGINT
|
||||
====
|
||||
---- QUERY
|
||||
select tinyint_col % 3, smallint_col % 3, count(*)
|
||||
from alltypesagg
|
||||
where day = 1
|
||||
select tinyint_col % 3, smallint_col % 3, count(*)
|
||||
from alltypesagg
|
||||
where day = 1
|
||||
group by 1, 2
|
||||
order by 1 desc, 2 desc
|
||||
limit 20
|
||||
@@ -441,7 +441,7 @@ limit 10
|
||||
SMALLINT, INT, TINYINT, INT, INT, FLOAT, STRING
|
||||
====
|
||||
---- QUERY
|
||||
# Order by a column that is not in the select list
|
||||
# Order by a column that is not in the select list
|
||||
# Query with ordering column in select list
|
||||
# Don't include date_string_col, it comes back in random order.
|
||||
select int_col, tinyint_col
|
||||
@@ -504,8 +504,8 @@ TINYINT
|
||||
====
|
||||
---- QUERY
|
||||
# Order by many exprs
|
||||
select year, month, count(*)
|
||||
from alltypes
|
||||
select year, month, count(*)
|
||||
from alltypes
|
||||
group by 1, 2
|
||||
order by 1, 2
|
||||
limit 100
|
||||
@@ -703,7 +703,7 @@ INT
|
||||
# All select list items have an implicit alias. Test that the order by column ref
|
||||
# "int_col" is correctly aliased to t1.int_col, and therefore it is not an
|
||||
# ambiguous reference.
|
||||
select t1.int_col from alltypessmall t1, alltypessmall t2 where t1.id = t2.id
|
||||
select t1.int_col from alltypessmall t1, alltypessmall t2 where t1.id = t2.id
|
||||
order by int_col
|
||||
limit 2
|
||||
---- RESULTS
|
||||
@@ -741,9 +741,9 @@ TIMESTAMP, TIMESTAMP, INT
|
||||
====
|
||||
---- QUERY
|
||||
# Test of order by with NULL tuple rows (from an outer join)
|
||||
select t1.id, t1.int_col, t2.id, t2.int_col
|
||||
from alltypesagg t1
|
||||
left outer join alltypessmall t2
|
||||
select t1.id, t1.int_col, t2.id, t2.int_col
|
||||
from alltypesagg t1
|
||||
left outer join alltypessmall t2
|
||||
on (t1.int_col = t2.int_col)
|
||||
order by t1.id,t2.id limit 10
|
||||
---- TYPES
|
||||
@@ -762,7 +762,7 @@ int,int,int,int
|
||||
====
|
||||
---- QUERY
|
||||
# Test limit 0 from sub query
|
||||
select sum(a.int_col) from
|
||||
select sum(a.int_col) from
|
||||
(select int_col from functional.alltypes order by int_col limit 0) a
|
||||
---- TYPES
|
||||
bigint
|
||||
@@ -770,34 +770,36 @@ bigint
|
||||
NULL
|
||||
====
|
||||
---- QUERY
|
||||
# Test queries with divide by 0
|
||||
select if(id % 2 = 0, cast(id/3 as int), -id) / if(id > 4 or id = 0, 0, 1) as v
|
||||
from alltypestiny order by v desc limit 100;
|
||||
# Test queries with divide by 0 (cast to string to avoid nan != nan issues)
|
||||
select cast(if(id % 2 = 0, cast(id/3 as int), -id) / if(id > 4 or id = 0, 0, 1) as string)
|
||||
from alltypestiny order by
|
||||
if(id % 2 = 0, cast(id/3 as int), -id) / if(id > 4 or id = 0, 0, 1) desc limit 100;
|
||||
---- TYPES
|
||||
DOUBLE
|
||||
STRING
|
||||
---- RESULTS
|
||||
inf
|
||||
1
|
||||
0
|
||||
-1
|
||||
-3
|
||||
-inf
|
||||
-inf
|
||||
-nan
|
||||
'inf'
|
||||
'1'
|
||||
'0'
|
||||
'-1'
|
||||
'-3'
|
||||
'-inf'
|
||||
'-inf'
|
||||
'nan'
|
||||
====
|
||||
---- QUERY
|
||||
# Test queries with divide by 0
|
||||
select if(id % 2 = 0, cast(id/3 as int), -id) / if(id > 4 or id = 0, 0, 1) as v
|
||||
from alltypestiny order by v asc limit 100;
|
||||
# Test queries with divide by 0 (cast to string to avoid nan != nan issues)
|
||||
select CAST(if(id % 2 = 0, cast(id/3 as int), -id) / if(id > 4 or id = 0, 0, 1) as STRING)
|
||||
from alltypestiny order by
|
||||
if(id % 2 = 0, cast(id/3 as int), -id) / if(id > 4 or id = 0, 0, 1) asc limit 100;
|
||||
---- TYPES
|
||||
DOUBLE
|
||||
STRING
|
||||
---- RESULTS
|
||||
-nan
|
||||
-inf
|
||||
-inf
|
||||
-3
|
||||
-1
|
||||
0
|
||||
1
|
||||
inf
|
||||
'nan'
|
||||
'-inf'
|
||||
'-inf'
|
||||
'-3'
|
||||
'-1'
|
||||
'0'
|
||||
'1'
|
||||
'inf'
|
||||
====
|
||||
|
||||
6
tests/common/test_result_verifier.py
Normal file → Executable file
6
tests/common/test_result_verifier.py
Normal file → Executable file
@@ -116,6 +116,8 @@ def compare_float(x, y, epsilon):
|
||||
# floating point spec defines nan != nan.
|
||||
if math.isnan(x) and math.isnan(y):
|
||||
return True
|
||||
if math.isinf(x) or math.isinf(y):
|
||||
return x == y
|
||||
return abs(x - y) <= epsilon
|
||||
|
||||
# Represents a column in a row
|
||||
@@ -139,15 +141,13 @@ class ResultColumn(object):
|
||||
# Make sure the column types are the same
|
||||
if self.column_type != other.column_type:
|
||||
return False
|
||||
|
||||
# Check equality based on a supplied regex if one was given.
|
||||
if self.regex is not None:
|
||||
return self.regex.match(other.value)
|
||||
if other.regex is not None:
|
||||
return other.regex.match(self.value)
|
||||
|
||||
if (self.value == 'NULL' or other.value == 'NULL') or \
|
||||
('inf' in self.value or 'inf' in other.value):
|
||||
if (self.value == 'NULL' or other.value == 'NULL'):
|
||||
return self.value == other.value
|
||||
elif self.column_type == 'float':
|
||||
return compare_float(float(self.value), float(other.value), 10e-5)
|
||||
|
||||
0
tests/query_test/test_aggregation.py
Normal file → Executable file
0
tests/query_test/test_aggregation.py
Normal file → Executable file
Reference in New Issue
Block a user