IMPALA-724: Support infinite / nan values in text files

This patch allows the text scanner to read 'inf' or 'Infinity' from a
row and correctly translate it into floating-point infinity. It also
adds is_inf() and is_nan() builtins.

Finally, we change the text table writer to write Infinity and NaN for
compatibility with Hive.

In the future, we might consider adding nan / inf literals to our
grammar (postgres has this, see:
http://www.postgresql.org/docs/9.3/static/datatype-numeric.html).

Change-Id: I796f2852b3c6c3b72e9aae9dd5ad228d188a6ea3
Reviewed-on: http://gerrit.ent.cloudera.com:8080/2393
Reviewed-by: Henry Robinson <henry@cloudera.com>
Tested-by: jenkins
(cherry picked from commit 58091355142cadd2b74874d9aa7c8ab6bf3efe2f)
Reviewed-on: http://gerrit.ent.cloudera.com:8080/2483
This commit is contained in:
Henry Robinson
2014-04-28 17:11:31 -07:00
parent 38fdda20e4
commit 38befd2126
13 changed files with 263 additions and 81 deletions

View File

@@ -288,6 +288,24 @@ class ExprTest : public testing::Test {
TestComparison(lexical_cast<string>(numeric_limits<T>::min()),
lexical_cast<string>(numeric_limits<T>::max() + 1), true);
}
// Compare nan: not equal to, larger than or smaller than anything, including itself
TestValue(lexical_cast<string>(t_min) + " < 0/0", TYPE_BOOLEAN, false);
TestValue(lexical_cast<string>(t_min) + " > 0/0", TYPE_BOOLEAN, false);
TestValue(lexical_cast<string>(t_min) + " = 0/0", TYPE_BOOLEAN, false);
TestValue(lexical_cast<string>(t_max) + " < 0/0", TYPE_BOOLEAN, false);
TestValue(lexical_cast<string>(t_max) + " > 0/0", TYPE_BOOLEAN, false);
TestValue(lexical_cast<string>(t_max) + " = 0/0", TYPE_BOOLEAN, false);
TestValue("0/0 < 0/0", TYPE_BOOLEAN, false);
TestValue("0/0 > 0/0", TYPE_BOOLEAN, false);
TestValue("0/0 = 0/0", TYPE_BOOLEAN, false);
// Compare inf: larger than everything except nan (or smaller, for -inf)
TestValue(lexical_cast<string>(t_max) + " < 1/0", TYPE_BOOLEAN, true);
TestValue(lexical_cast<string>(t_min) + " > -1/0", TYPE_BOOLEAN, true);
TestValue("1/0 = 1/0", TYPE_BOOLEAN, true);
TestValue("1/0 < 0/0", TYPE_BOOLEAN, false);
TestValue("0/0 < 1/0", TYPE_BOOLEAN, false);
}
void TestStringComparisons() {
@@ -1758,6 +1776,31 @@ TEST_F(ExprTest, UtilityFunctions) {
TestIsNull("fnv_hash(NULL)", TYPE_BIGINT);
}
TEST_F(ExprTest, NonFiniteFloats) {
TestValue("is_inf(0.0)", TYPE_BOOLEAN, false);
TestValue("is_inf(-1/0)", TYPE_BOOLEAN, true);
TestValue("is_inf(1/0)", TYPE_BOOLEAN, true);
TestValue("is_inf(0/0)", TYPE_BOOLEAN, false);
TestValue("is_inf(NULL)", TYPE_BOOLEAN, false);
TestValue("is_nan(NULL)", TYPE_BOOLEAN, false);
TestValue("is_nan(0.0)", TYPE_BOOLEAN, false);
TestValue("is_nan(1/0)", TYPE_BOOLEAN, false);
TestValue("is_nan(0/0)", TYPE_BOOLEAN, true);
TestCast("1/0", numeric_limits<double>::infinity());
TestCast("CAST(1/0 AS FLOAT)", numeric_limits<float>::infinity());
TestValue("CAST('inf' AS FLOAT)", TYPE_FLOAT, numeric_limits<float>::infinity());
TestValue("CAST('inf' AS DOUBLE)", TYPE_DOUBLE, numeric_limits<double>::infinity());
TestValue("CAST('Infinity' AS FLOAT)", TYPE_FLOAT, numeric_limits<float>::infinity());
TestValue("CAST('-Infinity' AS DOUBLE)", TYPE_DOUBLE,
-numeric_limits<double>::infinity());
// NaN != NaN, so we have to wrap the value in a string
TestStringValue("CAST(CAST('nan' AS FLOAT) AS STRING)", string("nan"));
TestStringValue("CAST(CAST('nan' AS DOUBLE) AS STRING)", string("nan"));
}
TEST_F(ExprTest, MathTrigonometricFunctions) {
// It is important to calculate the expected values
// using math functions, and not simply use constants.

View File

@@ -17,8 +17,9 @@
#include "util/bit-util.h"
#include <ctype.h>
#include <math.h>
#include <gutil/strings/substitute.h>
#include <math.h>
using namespace std;
using namespace boost::gregorian;
@@ -77,6 +78,16 @@ BigIntVal UdfBuiltins::MinBigInt(FunctionContext* context) {
return BigIntVal(numeric_limits<int64_t>::min());
}
BooleanVal UdfBuiltins::IsNan(FunctionContext* context, const DoubleVal& val) {
if (val.is_null) return BooleanVal(false);
return BooleanVal(isnan(val.val));
}
BooleanVal UdfBuiltins::IsInf(FunctionContext* context, const DoubleVal& val) {
if (val.is_null) return BooleanVal(false);
return BooleanVal(isinf(val.val));
}
// The units which can be used when Truncating a Timestamp
struct TruncUnit {
enum Type {

View File

@@ -40,6 +40,8 @@ class UdfBuiltins {
static SmallIntVal MinSmallInt(FunctionContext* context);
static BigIntVal MinBigInt(FunctionContext* context);
static BooleanVal IsNan(FunctionContext* context, const DoubleVal& val);
static BooleanVal IsInf(FunctionContext* context, const DoubleVal& val);
// Rounds (truncating down) a Timestamp to the specified unit.
// Units:

View File

@@ -19,6 +19,7 @@
#include <string>
#include <boost/functional/hash.hpp>
#include <math.h>
#include "common/logging.h"
#include "runtime/string-value.inline.h"
@@ -243,10 +244,32 @@ inline void RawValue::PrintValue(const void* value, const ColumnType& type, int
*stream << *reinterpret_cast<const int64_t*>(value);
break;
case TYPE_FLOAT:
*stream << *reinterpret_cast<const float*>(value);
{
float val = *reinterpret_cast<const float*>(value);
if (LIKELY(std::isfinite(val))) {
*stream << val;
} else if (isinf(val)) {
// 'Infinity' is Java's text representation of inf. By staying close to Java, we
// allow Hive to read text tables containing non-finite values produced by
// Impala. (The same logic applies to 'NaN', below).
*stream << (val < 0 ? "-Infinity" : "Infinity");
} else if (isnan(val)) {
*stream << "NaN";
}
}
break;
case TYPE_DOUBLE:
*stream << *reinterpret_cast<const double*>(value);
{
double val = *reinterpret_cast<const double*>(value);
if (LIKELY(std::isfinite(val))) {
*stream << val;
} else if (isinf(val)) {
// See TYPE_FLOAT for rationale.
*stream << (val < 0 ? "-Infinity" : "Infinity");
} else if (isnan(val)) {
*stream << "NaN";
}
}
break;
case TYPE_STRING:
string_val = reinterpret_cast<const StringValue*>(value);

View File

@@ -77,12 +77,25 @@ void TestBoolValue(const char* s, bool exp_val, StringParser::ParseResult exp_re
// Compare Impala's float conversion function against strtod.
template<typename T>
void TestFloatValue(const string& s, StringParser::ParseResult exp_result) {
T exp_val = 0;
if (exp_result == StringParser::PARSE_SUCCESS) exp_val = strtod(s.c_str(), NULL);
StringParser::ParseResult result;
T val = StringParser::StringToFloat<T>(s.data(), s.length(), &result);
EXPECT_EQ(exp_result, result);
if (exp_result == StringParser::PARSE_SUCCESS) EXPECT_EQ(exp_val, val);
if (exp_result == StringParser::PARSE_SUCCESS && result == exp_result) {
T exp_val = strtod(s.c_str(), NULL);
EXPECT_EQ(exp_val, val);
}
}
template<typename T>
void TestFloatValueIsNan(const string& s, StringParser::ParseResult exp_result) {
StringParser::ParseResult result;
T val = StringParser::StringToFloat<T>(s.data(), s.length(), &result);
EXPECT_EQ(exp_result, result);
if (exp_result == StringParser::PARSE_SUCCESS && result == exp_result) {
EXPECT_TRUE(isnan(val));
}
}
// Tests conversion of s to double and float with +/- prefixing (and no prefix) and with
@@ -346,6 +359,24 @@ TEST(StringToFloat, Basic) {
TestFloatValue<double>(double_min, StringParser::PARSE_SUCCESS);
TestFloatValue<double>(double_max, StringParser::PARSE_SUCCESS);
// Non-finite values
TestAllFloatVariants("INFinity", StringParser::PARSE_SUCCESS);
TestAllFloatVariants("infinity", StringParser::PARSE_SUCCESS);
TestAllFloatVariants("inf", StringParser::PARSE_SUCCESS);
TestFloatValueIsNan<float>("nan", StringParser::PARSE_SUCCESS);
TestFloatValueIsNan<double>("nan", StringParser::PARSE_SUCCESS);
TestFloatValueIsNan<float>("NaN", StringParser::PARSE_SUCCESS);
TestFloatValueIsNan<double>("NaN", StringParser::PARSE_SUCCESS);
TestFloatValueIsNan<float>("nana", StringParser::PARSE_SUCCESS);
TestFloatValueIsNan<double>("nana", StringParser::PARSE_SUCCESS);
TestFloatValueIsNan<float>("naN", StringParser::PARSE_SUCCESS);
TestFloatValueIsNan<double>("naN", StringParser::PARSE_SUCCESS);
TestFloatValueIsNan<float>("n aN", StringParser::PARSE_FAILURE);
TestFloatValueIsNan<float>("nnaN", StringParser::PARSE_FAILURE);
// Overflow.
TestFloatValue<float>(float_max + "11111", StringParser::PARSE_OVERFLOW);
TestFloatValue<double>(double_max + "11111", StringParser::PARSE_OVERFLOW);
@@ -362,6 +393,10 @@ TEST(StringToFloat, Basic) {
TestAllFloatVariants("456.789e10x", StringParser::PARSE_FAILURE);
TestAllFloatVariants("456.789e10 sdfs ", StringParser::PARSE_FAILURE);
TestAllFloatVariants("1e10 sdfs", StringParser::PARSE_FAILURE);
TestAllFloatVariants("in", StringParser::PARSE_FAILURE);
TestAllFloatVariants("in finity", StringParser::PARSE_FAILURE);
TestAllFloatVariants("na", StringParser::PARSE_FAILURE);
TestAllFloatVariants("ThisIsANaN", StringParser::PARSE_FAILURE);
}
TEST(StringToFloat, InvalidLeadingTrailing) {

View File

@@ -325,6 +325,29 @@ class StringParser {
decimal = true;
} else if (s[i] == 'e' || s[i] == 'E') {
break;
} else if (s[i] == 'i' || s[i] == 'I') {
if (len > i + 2 && (s[i+1] == 'n' || s[i+1] == 'N') &&
(s[i+2] == 'f' || s[i+2] == 'F')) {
// Note: Hive writes inf as Infinity, at least for text. We'll be a little loose
// here and interpret any column with inf as a prefix as infinity rather than
// checking every remaining byte.
*result = PARSE_SUCCESS;
return negative ? -INFINITY : INFINITY;
} else {
// Starts with 'i', but isn't inf...
*result = PARSE_FAILURE;
return 0;
}
} else if (s[i] == 'n' || s[i] == 'N') {
if (len > i + 2 && (s[i+1] == 'a' || s[i+1] == 'A') &&
(s[i+2] == 'n' || s[i+2] == 'N')) {
*result = PARSE_SUCCESS;
return negative ? -NAN : NAN;
} else {
// Starts with 'n', but isn't NaN...
*result = PARSE_FAILURE;
return 0;
}
} else {
if ((UNLIKELY(i == first || !isAllWhitespace(s + i, len - i)))) {
// Reject the string because either the first char was not a digit, "," or "e",

View File

@@ -145,6 +145,22 @@ void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
return &e->result_.${result_field};\n\
}\n\n")
# Special case for float types to string that deals properly with nan
# (lexical_cast<string>(nan) returns "-nan" which is nonsensical).
float_types_to_string = Template("\
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
Expr* op = e->children()[0];\n\
${native_type1}* val = reinterpret_cast<${native_type1}*>(op->GetValue(row));\n\
if (val == NULL) return NULL;\n\
if (isnan(*val)) {\n\
e->result_.SetStringVal(string(\"nan\"));\n\
} else {\n\
e->result_.SetStringVal(lexical_cast<string>(*val));\n\
}\n\
return &e->result_.${result_field};\n\
}\n\n")
case = Template("\
void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
CaseExpr* expr = static_cast<CaseExpr*>(e);\n\
@@ -216,7 +232,7 @@ types = {
'FLOAT_TYPES' : ['FLOAT', 'DOUBLE'],
'NUMERIC_TYPES' : ['TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'FLOAT', 'DOUBLE'],
'NATIVE_TYPES' : ['BOOLEAN', 'TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'FLOAT', 'DOUBLE'],
'STRCAST_TYPES' : ['BOOLEAN', 'SMALLINT', 'INT', 'BIGINT', 'FLOAT', 'DOUBLE'],
'STRCAST_TYPES' : ['BOOLEAN', 'SMALLINT', 'INT', 'BIGINT'],
'ALL_TYPES' : ['BOOLEAN', 'TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'FLOAT',\
'DOUBLE', 'STRING', 'TIMESTAMP'],
'MAX_TYPES' : ['BIGINT', 'DOUBLE'],
@@ -268,6 +284,7 @@ functions = [
['Cast', ['FLOAT_TYPES'], [['STRING'], ['FLOAT_TYPES']], string_to_float ],
['Cast', ['STRING'], [['STRCAST_TYPES'], ['STRING']], numeric_to_string ],
['Cast', ['STRING'], [['TINYINT'], ['STRING']], tinyint_to_string ],
['Cast', ['STRING'], [['FLOAT_TYPES'], ['STRING']], float_types_to_string ],
['Cast', ['NATIVE_TYPES'], [['TIMESTAMP'], ['NATIVE_TYPES']]],
['Cast', ['STRING'], [['TIMESTAMP'], ['STRING']], numeric_to_string ],
['Cast', ['TIMESTAMP'], [['STRING'], ['TIMESTAMP']], string_to_timestamp],

View File

@@ -500,6 +500,10 @@ udf_functions = [
'_ZN6impala11UdfBuiltins11MinSmallIntEPN10impala_udf15FunctionContextE'],
[['min_bigint'], 'BIGINT', [],
'_ZN6impala11UdfBuiltins9MinBigIntEPN10impala_udf15FunctionContextE'],
[['is_nan'], 'BOOLEAN', ['DOUBLE'],
'_ZN6impala11UdfBuiltins5IsNanEPN10impala_udf15FunctionContextERKNS1_9DoubleValE'],
[['is_inf'], 'BOOLEAN', ['DOUBLE'],
'_ZN6impala11UdfBuiltins5IsInfEPN10impala_udf15FunctionContextERKNS1_9DoubleValE'],
[['trunc'], 'TIMESTAMP', ['TIMESTAMP', 'STRING'],
'_ZN6impala11UdfBuiltins5TruncEPN10impala_udf15FunctionContextERKNS1_12TimestampValERKNS1_9StringValE',
'_ZN6impala11UdfBuiltins12TruncPrepareEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE',

View File

@@ -48,7 +48,7 @@ int, boolean, tinyint, smallint, int, bigint, float, double, string, string
---- QUERY
# insert into unpartitioned table
insert into table alltypesnopart_insert
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
float_col, double_col, date_string_col, string_col, timestamp_col
from alltypessmall
where year=2009 and month=04
@@ -68,8 +68,8 @@ bigint
---- QUERY
# static partition overwrite
insert overwrite table alltypesinsert
partition (year=2009, month=4)
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
partition (year=2009, month=4)
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
float_col, double_col, date_string_col, string_col, timestamp_col
from alltypessmall
where year=2009 and month=4
@@ -117,8 +117,8 @@ int, boolean, tinyint, smallint, int, bigint, float, double, string, string
---- QUERY
# static partition insert$TABLE, test creation of partitions
insert into table alltypesinsert
partition (year=2009, month=4)
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
partition (year=2009, month=4)
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
float_col, double_col, date_string_col, string_col, timestamp_col
from alltypessmall
where year=2009 and month=4
@@ -139,8 +139,8 @@ bigint
---- QUERY
# partially dynamic partition overwrite
insert overwrite table alltypesinsert
partition (year=2009, month)
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
partition (year=2009, month)
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
float_col, double_col, date_string_col, string_col, timestamp_col, month
from alltypessmall
where year=2009 and month>1 and month<=4
@@ -239,8 +239,8 @@ int, boolean, tinyint, smallint, int, bigint, float, double, string, string
---- QUERY
# partially dynamic partition insert$TABLE, check partition creation
insert into table alltypesinsert
partition (year=2009, month)
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
partition (year=2009, month)
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
float_col, double_col, date_string_col, string_col, timestamp_col, month
from alltypessmall
where year=2009 and month>=1 and month<4
@@ -263,8 +263,8 @@ bigint
---- QUERY
# fully dynamic partition overwrite
insert overwrite table alltypesinsert
partition (year, month)
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
partition (year, month)
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
float_col, double_col, date_string_col, string_col, timestamp_col, year, month
from alltypessmall
---- SETUP
@@ -388,8 +388,8 @@ int, boolean, tinyint, smallint, int, bigint, float, double, string, string
---- QUERY
# fully dynamic partition insert$TABLE, check partition creation
insert into table alltypesinsert
partition (year, month)
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
partition (year, month)
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
float_col, double_col, date_string_col, string_col, timestamp_col, year, month
from alltypessmall
---- SETUP
@@ -502,7 +502,7 @@ SELECT "value4",NULL FROM alltypessmall LIMIT 1;
s2=__HIVE_DEFAULT_PARTITION__/: 1
====
---- QUERY
# select with empty partition key as predicate should return nothing, because "" is
# select with empty partition key as predicate should return nothing, because "" is
# mapped to NULL
SELECT * FROM insert_string_partitioned WHERE s2 = "";
---- TYPES
@@ -535,7 +535,7 @@ year=2010/month=4/: 25
# static partition insert from a constant select
insert into alltypesinsert
partition(year=2010, month=4)
select 100, false, 1, 1, 1, 10,
select 100, false, 1, 1, 1, 10,
10.0, 10.0, "02/01/09", "1", cast("2009-02-01 00:01:00" as timestamp)
---- SETUP
RESET alltypesinsert
@@ -545,8 +545,8 @@ year=2010/month=4/: 1
---- QUERY
# dynamic partition insert from a constant select
insert into table alltypesinsert
partition (year, month)
select 200, true, 2, 2, 2, 20,
partition (year, month)
select 200, true, 2, 2, 2, 20,
20.0, 20.0, "02/01/09", "1", cast("2009-02-01 00:02:00" as timestamp), 2010, 4
---- RESULTS
year=2010/month=4/: 1
@@ -631,7 +631,7 @@ from alltypessmall limit 10
: 10
====
---- QUERY
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
float_col, double_col, date_string_col, string_col, timestamp_col
from alltypesnopart_insert
---- TYPES
@@ -648,3 +648,25 @@ NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'NULL','NULL',NULL
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'NULL','NULL',NULL
NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'NULL','NULL',NULL
====
---- QUERY
insert overwrite alltypesnopart_insert(float_col, double_col)
values(CAST(1/0 AS FLOAT), 1/0), (CAST(-1/0 AS FLOAT), -1/0),
(CAST(0/0 AS FLOAT), 0/0), (CAST(-sqrt(-1) AS FLOAT), -sqrt(-1))
---- SETUP
RESET alltypesinsert
---- RESULTS
: 4
====
---- QUERY
# Results have to be cast to strings, because nan == f is always false for all f
# (even nan), so the results check would otherwise always fail.
select CAST(float_col AS string), CAST(double_col AS string) from alltypesnopart_insert
order by float_col, double_col limit 10;
---- TYPES
STRING, STRING
---- RESULTS
'nan','nan'
'nan','nan'
'-inf','-inf'
'inf','inf'
====

View File

@@ -5,7 +5,7 @@ select * from Overflow
---- TYPES
tinyint, smallint, int, bigint, float, double
---- RESULTS
-128,-32768,-2147483648,-9223372036854775808,-inf,-inf
-128,-32768,-2147483648,-9223372036854775808,-Infinity,-Infinity
1,2,3,4,5.5,6.6
127,32767,2147483647,9223372036854775807,inf,inf
====

View File

@@ -1,7 +1,7 @@
====
---- QUERY
# Based on Aggregation Queries
select int_col, sum(float_col)
select int_col, sum(float_col)
from functional_hbase.alltypessmall
where id < 5
group by 1
@@ -16,9 +16,9 @@ INT, DOUBLE
====
---- QUERY
# Run query without order by
select tinyint_col, count(*)
from alltypesagg
group by 1
select tinyint_col, count(*)
from alltypesagg
group by 1
limit 10
---- RESULTS
5,1000
@@ -36,9 +36,9 @@ TINYINT, BIGINT
====
---- QUERY
# Same query order by asc first col
select tinyint_col, count(*)
from alltypesagg
group by 1
select tinyint_col, count(*)
from alltypesagg
group by 1
order by 1
limit 10
---- RESULTS
@@ -194,10 +194,10 @@ NULL,'NULL',NULL,NULL,1009,'Name9',94615
BIGINT, STRING, INT, INT, BIGINT, STRING, INT
====
---- QUERY
# order by multiple cols with nulls
select tinyint_col % 3, smallint_col % 3, count(*)
from alltypesagg
where day = 1
# order by multiple cols with nulls
select tinyint_col % 3, smallint_col % 3, count(*)
from alltypesagg
where day = 1
group by 1, 2
order by 1, 2
limit 20
@@ -219,9 +219,9 @@ NULL,NULL,10
TINYINT, SMALLINT, BIGINT
====
---- QUERY
select tinyint_col % 3, smallint_col % 3, count(*)
from alltypesagg
where day = 1
select tinyint_col % 3, smallint_col % 3, count(*)
from alltypesagg
where day = 1
group by 1, 2
order by 1, 2 desc
limit 20
@@ -243,9 +243,9 @@ NULL,0,30
TINYINT, SMALLINT, BIGINT
====
---- QUERY
select tinyint_col % 3, smallint_col % 3, count(*)
from alltypesagg
where day = 1
select tinyint_col % 3, smallint_col % 3, count(*)
from alltypesagg
where day = 1
group by 1, 2
order by 1 desc, 2
limit 20
@@ -267,9 +267,9 @@ NULL,NULL,10
TINYINT, SMALLINT, BIGINT
====
---- QUERY
select tinyint_col % 3, smallint_col % 3, count(*)
from alltypesagg
where day = 1
select tinyint_col % 3, smallint_col % 3, count(*)
from alltypesagg
where day = 1
group by 1, 2
order by 1 desc, 2 desc
limit 20
@@ -441,7 +441,7 @@ limit 10
SMALLINT, INT, TINYINT, INT, INT, FLOAT, STRING
====
---- QUERY
# Order by a column that is not in the select list
# Order by a column that is not in the select list
# Query with ordering column in select list
# Don't include date_string_col, it comes back in random order.
select int_col, tinyint_col
@@ -504,8 +504,8 @@ TINYINT
====
---- QUERY
# Order by many exprs
select year, month, count(*)
from alltypes
select year, month, count(*)
from alltypes
group by 1, 2
order by 1, 2
limit 100
@@ -703,7 +703,7 @@ INT
# All select list items have an implicit alias. Test that the order by column ref
# "int_col" is correctly aliased to t1.int_col, and therefore it is not an
# ambiguous reference.
select t1.int_col from alltypessmall t1, alltypessmall t2 where t1.id = t2.id
select t1.int_col from alltypessmall t1, alltypessmall t2 where t1.id = t2.id
order by int_col
limit 2
---- RESULTS
@@ -741,9 +741,9 @@ TIMESTAMP, TIMESTAMP, INT
====
---- QUERY
# Test of order by with NULL tuple rows (from an outer join)
select t1.id, t1.int_col, t2.id, t2.int_col
from alltypesagg t1
left outer join alltypessmall t2
select t1.id, t1.int_col, t2.id, t2.int_col
from alltypesagg t1
left outer join alltypessmall t2
on (t1.int_col = t2.int_col)
order by t1.id,t2.id limit 10
---- TYPES
@@ -762,7 +762,7 @@ int,int,int,int
====
---- QUERY
# Test limit 0 from sub query
select sum(a.int_col) from
select sum(a.int_col) from
(select int_col from functional.alltypes order by int_col limit 0) a
---- TYPES
bigint
@@ -770,34 +770,36 @@ bigint
NULL
====
---- QUERY
# Test queries with divide by 0
select if(id % 2 = 0, cast(id/3 as int), -id) / if(id > 4 or id = 0, 0, 1) as v
from alltypestiny order by v desc limit 100;
# Test queries with divide by 0 (cast to string to avoid nan != nan issues)
select cast(if(id % 2 = 0, cast(id/3 as int), -id) / if(id > 4 or id = 0, 0, 1) as string)
from alltypestiny order by
if(id % 2 = 0, cast(id/3 as int), -id) / if(id > 4 or id = 0, 0, 1) desc limit 100;
---- TYPES
DOUBLE
STRING
---- RESULTS
inf
1
0
-1
-3
-inf
-inf
-nan
'inf'
'1'
'0'
'-1'
'-3'
'-inf'
'-inf'
'nan'
====
---- QUERY
# Test queries with divide by 0
select if(id % 2 = 0, cast(id/3 as int), -id) / if(id > 4 or id = 0, 0, 1) as v
from alltypestiny order by v asc limit 100;
# Test queries with divide by 0 (cast to string to avoid nan != nan issues)
select CAST(if(id % 2 = 0, cast(id/3 as int), -id) / if(id > 4 or id = 0, 0, 1) as STRING)
from alltypestiny order by
if(id % 2 = 0, cast(id/3 as int), -id) / if(id > 4 or id = 0, 0, 1) asc limit 100;
---- TYPES
DOUBLE
STRING
---- RESULTS
-nan
-inf
-inf
-3
-1
0
1
inf
'nan'
'-inf'
'-inf'
'-3'
'-1'
'0'
'1'
'inf'
====

6
tests/common/test_result_verifier.py Normal file → Executable file
View File

@@ -116,6 +116,8 @@ def compare_float(x, y, epsilon):
# floating point spec defines nan != nan.
if math.isnan(x) and math.isnan(y):
return True
if math.isinf(x) or math.isinf(y):
return x == y
return abs(x - y) <= epsilon
# Represents a column in a row
@@ -139,15 +141,13 @@ class ResultColumn(object):
# Make sure the column types are the same
if self.column_type != other.column_type:
return False
# Check equality based on a supplied regex if one was given.
if self.regex is not None:
return self.regex.match(other.value)
if other.regex is not None:
return other.regex.match(self.value)
if (self.value == 'NULL' or other.value == 'NULL') or \
('inf' in self.value or 'inf' in other.value):
if (self.value == 'NULL' or other.value == 'NULL'):
return self.value == other.value
elif self.column_type == 'float':
return compare_float(float(self.value), float(other.value), 10e-5)

0
tests/query_test/test_aggregation.py Normal file → Executable file
View File