IMPALA-724: Support infinite / nan values in text files

This patch allows the text scanner to read 'inf' or 'Infinity' from a row and correctly translate it into floating-point infinity. It also adds is_inf() and is_nan() builtins. Finally, we change the text table writer to write Infinity and NaN for compatibility with Hive. In the future, we might consider adding nan / inf literals to our grammar (postgres has this, see: http://www.postgresql.org/docs/9.3/static/datatype-numeric.html). Change-Id: I796f2852b3c6c3b72e9aae9dd5ad228d188a6ea3 Reviewed-on: http://gerrit.ent.cloudera.com:8080/2393 Reviewed-by: Henry Robinson <henry@cloudera.com> Tested-by: jenkins (cherry picked from commit 58091355142cadd2b74874d9aa7c8ab6bf3efe2f) Reviewed-on: http://gerrit.ent.cloudera.com:8080/2483
2025-12-25 02:03:09 -05:00 · 2014-04-28 17:11:31 -07:00
parent 38fdda20e4
commit 38befd2126
13 changed files with 263 additions and 81 deletions
--- a/be/src/exprs/expr-test.cc
+++ b/be/src/exprs/expr-test.cc
@@ -288,6 +288,24 @@ class ExprTest : public testing::Test {
      TestComparison(lexical_cast<string>(numeric_limits<T>::min()),
                   lexical_cast<string>(numeric_limits<T>::max() + 1), true);
    }
+
+    // Compare nan: not equal to, larger than or smaller than anything, including itself
+    TestValue(lexical_cast<string>(t_min) + " < 0/0", TYPE_BOOLEAN, false);
+    TestValue(lexical_cast<string>(t_min) + " > 0/0", TYPE_BOOLEAN, false);
+    TestValue(lexical_cast<string>(t_min) + " = 0/0", TYPE_BOOLEAN, false);
+    TestValue(lexical_cast<string>(t_max) + " < 0/0", TYPE_BOOLEAN, false);
+    TestValue(lexical_cast<string>(t_max) + " > 0/0", TYPE_BOOLEAN, false);
+    TestValue(lexical_cast<string>(t_max) + " = 0/0", TYPE_BOOLEAN, false);
+    TestValue("0/0 < 0/0", TYPE_BOOLEAN, false);
+    TestValue("0/0 > 0/0", TYPE_BOOLEAN, false);
+    TestValue("0/0 = 0/0", TYPE_BOOLEAN, false);
+
+    // Compare inf: larger than everything except nan (or smaller, for -inf)
+    TestValue(lexical_cast<string>(t_max) + " < 1/0", TYPE_BOOLEAN, true);
+    TestValue(lexical_cast<string>(t_min) + " > -1/0", TYPE_BOOLEAN, true);
+    TestValue("1/0 = 1/0", TYPE_BOOLEAN, true);
+    TestValue("1/0 < 0/0", TYPE_BOOLEAN, false);
+    TestValue("0/0 < 1/0", TYPE_BOOLEAN, false);
  }

  void TestStringComparisons() {
@@ -1758,6 +1776,31 @@ TEST_F(ExprTest, UtilityFunctions) {
  TestIsNull("fnv_hash(NULL)", TYPE_BIGINT);
 }

+TEST_F(ExprTest, NonFiniteFloats) {
+  TestValue("is_inf(0.0)", TYPE_BOOLEAN, false);
+  TestValue("is_inf(-1/0)", TYPE_BOOLEAN, true);
+  TestValue("is_inf(1/0)", TYPE_BOOLEAN, true);
+  TestValue("is_inf(0/0)", TYPE_BOOLEAN, false);
+  TestValue("is_inf(NULL)", TYPE_BOOLEAN, false);
+  TestValue("is_nan(NULL)", TYPE_BOOLEAN, false);
+
+  TestValue("is_nan(0.0)", TYPE_BOOLEAN, false);
+  TestValue("is_nan(1/0)", TYPE_BOOLEAN, false);
+  TestValue("is_nan(0/0)", TYPE_BOOLEAN, true);
+
+  TestCast("1/0", numeric_limits<double>::infinity());
+  TestCast("CAST(1/0 AS FLOAT)", numeric_limits<float>::infinity());
+  TestValue("CAST('inf' AS FLOAT)", TYPE_FLOAT, numeric_limits<float>::infinity());
+  TestValue("CAST('inf' AS DOUBLE)", TYPE_DOUBLE, numeric_limits<double>::infinity());
+  TestValue("CAST('Infinity' AS FLOAT)", TYPE_FLOAT, numeric_limits<float>::infinity());
+  TestValue("CAST('-Infinity' AS DOUBLE)", TYPE_DOUBLE,
+      -numeric_limits<double>::infinity());
+
+  // NaN != NaN, so we have to wrap the value in a string
+  TestStringValue("CAST(CAST('nan' AS FLOAT) AS STRING)", string("nan"));
+  TestStringValue("CAST(CAST('nan' AS DOUBLE) AS STRING)", string("nan"));
+}
+
 TEST_F(ExprTest, MathTrigonometricFunctions) {
  // It is important to calculate the expected values
  // using math functions, and not simply use constants.
--- a/be/src/exprs/udf-builtins.cc
+++ b/be/src/exprs/udf-builtins.cc
@@ -17,8 +17,9 @@
 #include "util/bit-util.h"

 #include <ctype.h>
-#include <math.h>
 #include <gutil/strings/substitute.h>
+#include <math.h>
+

 using namespace std;
 using namespace boost::gregorian;
@@ -77,6 +78,16 @@ BigIntVal UdfBuiltins::MinBigInt(FunctionContext* context) {
  return BigIntVal(numeric_limits<int64_t>::min());
 }

+BooleanVal UdfBuiltins::IsNan(FunctionContext* context, const DoubleVal& val) {
+  if (val.is_null) return BooleanVal(false);
+  return BooleanVal(isnan(val.val));
+}
+
+BooleanVal UdfBuiltins::IsInf(FunctionContext* context, const DoubleVal& val) {
+  if (val.is_null) return BooleanVal(false);
+  return BooleanVal(isinf(val.val));
+}
+
 // The units which can be used when Truncating a Timestamp
 struct TruncUnit {
  enum Type {
--- a/be/src/exprs/udf-builtins.h
+++ b/be/src/exprs/udf-builtins.h
@@ -40,6 +40,8 @@ class UdfBuiltins {
  static SmallIntVal MinSmallInt(FunctionContext* context);
  static BigIntVal MinBigInt(FunctionContext* context);

+  static BooleanVal IsNan(FunctionContext* context, const DoubleVal& val);
+  static BooleanVal IsInf(FunctionContext* context, const DoubleVal& val);

  // Rounds (truncating down) a Timestamp to the specified unit.
  //    Units:
--- a/be/src/runtime/raw-value.h
+++ b/be/src/runtime/raw-value.h
@@ -19,6 +19,7 @@
 #include <string>

 #include <boost/functional/hash.hpp>
+#include <math.h>

 #include "common/logging.h"
 #include "runtime/string-value.inline.h"
@@ -243,10 +244,32 @@ inline void RawValue::PrintValue(const void* value, const ColumnType& type, int
      *stream << *reinterpret_cast<const int64_t*>(value);
      break;
    case TYPE_FLOAT:
-      *stream << *reinterpret_cast<const float*>(value);
+      {
+        float val = *reinterpret_cast<const float*>(value);
+        if (LIKELY(std::isfinite(val))) {
+          *stream << val;
+        } else if (isinf(val)) {
+          // 'Infinity' is Java's text representation of inf. By staying close to Java, we
+          // allow Hive to read text tables containing non-finite values produced by
+          // Impala. (The same logic applies to 'NaN', below).
+          *stream << (val < 0 ? "-Infinity" : "Infinity");
+        } else if (isnan(val)) {
+          *stream << "NaN";
+        }
+      }
      break;
    case TYPE_DOUBLE:
-      *stream << *reinterpret_cast<const double*>(value);
+      {
+        double val = *reinterpret_cast<const double*>(value);
+        if (LIKELY(std::isfinite(val))) {
+          *stream << val;
+        } else if (isinf(val)) {
+          // See TYPE_FLOAT for rationale.
+          *stream << (val < 0 ? "-Infinity" : "Infinity");
+        } else if (isnan(val)) {
+          *stream << "NaN";
+        }
+      }
      break;
    case TYPE_STRING:
      string_val = reinterpret_cast<const StringValue*>(value);
--- a/be/src/util/string-parser-test.cc
+++ b/be/src/util/string-parser-test.cc
@@ -77,12 +77,25 @@ void TestBoolValue(const char* s, bool exp_val, StringParser::ParseResult exp_re
 // Compare Impala's float conversion function against strtod.
 template<typename T>
 void TestFloatValue(const string& s, StringParser::ParseResult exp_result) {
-  T exp_val = 0;
-  if (exp_result == StringParser::PARSE_SUCCESS) exp_val = strtod(s.c_str(), NULL);
  StringParser::ParseResult result;
  T val = StringParser::StringToFloat<T>(s.data(), s.length(), &result);
  EXPECT_EQ(exp_result, result);
-  if (exp_result == StringParser::PARSE_SUCCESS) EXPECT_EQ(exp_val, val);
+
+  if (exp_result == StringParser::PARSE_SUCCESS && result == exp_result) {
+    T exp_val = strtod(s.c_str(), NULL);
+    EXPECT_EQ(exp_val, val);
+  }
+}
+
+template<typename T>
+void TestFloatValueIsNan(const string& s, StringParser::ParseResult exp_result) {
+  StringParser::ParseResult result;
+  T val = StringParser::StringToFloat<T>(s.data(), s.length(), &result);
+  EXPECT_EQ(exp_result, result);
+
+  if (exp_result == StringParser::PARSE_SUCCESS && result == exp_result) {
+    EXPECT_TRUE(isnan(val));
+  }
 }

 // Tests conversion of s to double and float with +/- prefixing (and no prefix) and with
@@ -346,6 +359,24 @@ TEST(StringToFloat, Basic) {
  TestFloatValue<double>(double_min, StringParser::PARSE_SUCCESS);
  TestFloatValue<double>(double_max, StringParser::PARSE_SUCCESS);

+  // Non-finite values
+  TestAllFloatVariants("INFinity", StringParser::PARSE_SUCCESS);
+  TestAllFloatVariants("infinity", StringParser::PARSE_SUCCESS);
+  TestAllFloatVariants("inf", StringParser::PARSE_SUCCESS);
+
+  TestFloatValueIsNan<float>("nan", StringParser::PARSE_SUCCESS);
+  TestFloatValueIsNan<double>("nan", StringParser::PARSE_SUCCESS);
+  TestFloatValueIsNan<float>("NaN", StringParser::PARSE_SUCCESS);
+  TestFloatValueIsNan<double>("NaN", StringParser::PARSE_SUCCESS);
+  TestFloatValueIsNan<float>("nana", StringParser::PARSE_SUCCESS);
+  TestFloatValueIsNan<double>("nana", StringParser::PARSE_SUCCESS);
+  TestFloatValueIsNan<float>("naN", StringParser::PARSE_SUCCESS);
+  TestFloatValueIsNan<double>("naN", StringParser::PARSE_SUCCESS);
+
+  TestFloatValueIsNan<float>("n aN", StringParser::PARSE_FAILURE);
+  TestFloatValueIsNan<float>("nnaN", StringParser::PARSE_FAILURE);
+
+
  // Overflow.
  TestFloatValue<float>(float_max + "11111", StringParser::PARSE_OVERFLOW);
  TestFloatValue<double>(double_max + "11111", StringParser::PARSE_OVERFLOW);
@@ -362,6 +393,10 @@ TEST(StringToFloat, Basic) {
  TestAllFloatVariants("456.789e10x", StringParser::PARSE_FAILURE);
  TestAllFloatVariants("456.789e10   sdfs ", StringParser::PARSE_FAILURE);
  TestAllFloatVariants("1e10   sdfs", StringParser::PARSE_FAILURE);
+  TestAllFloatVariants("in", StringParser::PARSE_FAILURE);
+  TestAllFloatVariants("in finity", StringParser::PARSE_FAILURE);
+  TestAllFloatVariants("na", StringParser::PARSE_FAILURE);
+  TestAllFloatVariants("ThisIsANaN", StringParser::PARSE_FAILURE);
 }

 TEST(StringToFloat, InvalidLeadingTrailing) {
--- a/be/src/util/string-parser.h
+++ b/be/src/util/string-parser.h
@@ -325,6 +325,29 @@ class StringParser {
        decimal = true;
      } else if (s[i] == 'e' || s[i] == 'E') {
        break;
+      } else if (s[i] == 'i' || s[i] == 'I') {
+        if (len > i + 2 && (s[i+1] == 'n' || s[i+1] == 'N') &&
+            (s[i+2] == 'f' || s[i+2] == 'F')) {
+          // Note: Hive writes inf as Infinity, at least for text. We'll be a little loose
+          // here and interpret any column with inf as a prefix as infinity rather than
+          // checking every remaining byte.
+          *result = PARSE_SUCCESS;
+          return negative ? -INFINITY : INFINITY;
+        } else {
+          // Starts with 'i', but isn't inf...
+          *result = PARSE_FAILURE;
+          return 0;
+        }
+      } else if (s[i] == 'n' || s[i] == 'N') {
+        if (len > i + 2 && (s[i+1] == 'a' || s[i+1] == 'A') &&
+            (s[i+2] == 'n' || s[i+2] == 'N')) {
+          *result = PARSE_SUCCESS;
+          return negative ? -NAN : NAN;
+        } else {
+          // Starts with 'n', but isn't NaN...
+          *result = PARSE_FAILURE;
+          return 0;
+        }
      } else {
        if ((UNLIKELY(i == first || !isAllWhitespace(s + i, len - i)))) {
          // Reject the string because either the first char was not a digit, "," or "e",
--- a/common/function-registry/gen_operators.py
+++ b/common/function-registry/gen_operators.py
@@ -145,6 +145,22 @@ void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
  return &e->result_.${result_field};\n\
 }\n\n")

+
+# Special case for float types to string that deals properly with nan
+# (lexical_cast<string>(nan) returns "-nan" which is nonsensical).
+float_types_to_string = Template("\
+void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
+  Expr* op = e->children()[0];\n\
+  ${native_type1}* val = reinterpret_cast<${native_type1}*>(op->GetValue(row));\n\
+  if (val == NULL) return NULL;\n\
+  if (isnan(*val)) {\n\
+    e->result_.SetStringVal(string(\"nan\"));\n\
+  } else {\n\
+    e->result_.SetStringVal(lexical_cast<string>(*val));\n\
+  }\n\
+  return &e->result_.${result_field};\n\
+}\n\n")
+
 case = Template("\
 void* ComputeFunctions::${fn_signature}(Expr* e, TupleRow* row) {\n\
  CaseExpr* expr = static_cast<CaseExpr*>(e);\n\
@@ -216,7 +232,7 @@ types = {
  'FLOAT_TYPES'   : ['FLOAT', 'DOUBLE'],
  'NUMERIC_TYPES' : ['TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'FLOAT', 'DOUBLE'],
  'NATIVE_TYPES'  : ['BOOLEAN', 'TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'FLOAT', 'DOUBLE'],
-  'STRCAST_TYPES' : ['BOOLEAN', 'SMALLINT', 'INT', 'BIGINT', 'FLOAT', 'DOUBLE'],
+  'STRCAST_TYPES' : ['BOOLEAN', 'SMALLINT', 'INT', 'BIGINT'],
  'ALL_TYPES'     : ['BOOLEAN', 'TINYINT', 'SMALLINT', 'INT', 'BIGINT', 'FLOAT',\
                     'DOUBLE', 'STRING', 'TIMESTAMP'],
  'MAX_TYPES'     : ['BIGINT', 'DOUBLE'],
@@ -268,6 +284,7 @@ functions = [
  ['Cast', ['FLOAT_TYPES'], [['STRING'], ['FLOAT_TYPES']], string_to_float ],
  ['Cast', ['STRING'], [['STRCAST_TYPES'], ['STRING']], numeric_to_string ],
  ['Cast', ['STRING'], [['TINYINT'], ['STRING']], tinyint_to_string ],
+  ['Cast', ['STRING'], [['FLOAT_TYPES'], ['STRING']], float_types_to_string ],
  ['Cast', ['NATIVE_TYPES'], [['TIMESTAMP'], ['NATIVE_TYPES']]],
  ['Cast', ['STRING'], [['TIMESTAMP'], ['STRING']], numeric_to_string ],
  ['Cast', ['TIMESTAMP'], [['STRING'], ['TIMESTAMP']], string_to_timestamp],
--- a/common/function-registry/impala_functions.py
+++ b/common/function-registry/impala_functions.py
@@ -500,6 +500,10 @@ udf_functions = [
   '_ZN6impala11UdfBuiltins11MinSmallIntEPN10impala_udf15FunctionContextE'],
  [['min_bigint'], 'BIGINT', [],
   '_ZN6impala11UdfBuiltins9MinBigIntEPN10impala_udf15FunctionContextE'],
+  [['is_nan'], 'BOOLEAN', ['DOUBLE'],
+   '_ZN6impala11UdfBuiltins5IsNanEPN10impala_udf15FunctionContextERKNS1_9DoubleValE'],
+  [['is_inf'], 'BOOLEAN', ['DOUBLE'],
+   '_ZN6impala11UdfBuiltins5IsInfEPN10impala_udf15FunctionContextERKNS1_9DoubleValE'],
  [['trunc'], 'TIMESTAMP', ['TIMESTAMP', 'STRING'],
   '_ZN6impala11UdfBuiltins5TruncEPN10impala_udf15FunctionContextERKNS1_12TimestampValERKNS1_9StringValE',
   '_ZN6impala11UdfBuiltins12TruncPrepareEPN10impala_udf15FunctionContextENS2_18FunctionStateScopeE',
--- a/testdata/workloads/functional-query/queries/QueryTest/insert.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/insert.test
@@ -48,7 +48,7 @@ int, boolean, tinyint, smallint, int, bigint, float, double, string, string
 ---- QUERY
 # insert into unpartitioned table
 insert into table alltypesnopart_insert
-select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, 
+select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
 float_col, double_col, date_string_col, string_col, timestamp_col
 from alltypessmall
 where year=2009 and month=04
@@ -68,8 +68,8 @@ bigint
 ---- QUERY
 # static partition overwrite
 insert overwrite table alltypesinsert
-partition (year=2009, month=4) 
-select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, 
+partition (year=2009, month=4)
+select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
 float_col, double_col, date_string_col, string_col, timestamp_col
 from alltypessmall
 where year=2009 and month=4
@@ -117,8 +117,8 @@ int, boolean, tinyint, smallint, int, bigint, float, double, string, string
 ---- QUERY
 # static partition insert$TABLE, test creation of partitions
 insert into table alltypesinsert
-partition (year=2009, month=4) 
-select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, 
+partition (year=2009, month=4)
+select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
 float_col, double_col, date_string_col, string_col, timestamp_col
 from alltypessmall
 where year=2009 and month=4
@@ -139,8 +139,8 @@ bigint
 ---- QUERY
 # partially dynamic partition overwrite
 insert overwrite table alltypesinsert
-partition (year=2009, month) 
-select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, 
+partition (year=2009, month)
+select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
 float_col, double_col, date_string_col, string_col, timestamp_col, month
 from alltypessmall
 where year=2009 and month>1 and month<=4
@@ -239,8 +239,8 @@ int, boolean, tinyint, smallint, int, bigint, float, double, string, string
 ---- QUERY
 # partially dynamic partition insert$TABLE, check partition creation
 insert into table alltypesinsert
-partition (year=2009, month) 
-select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, 
+partition (year=2009, month)
+select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
 float_col, double_col, date_string_col, string_col, timestamp_col, month
 from alltypessmall
 where year=2009 and month>=1 and month<4
@@ -263,8 +263,8 @@ bigint
 ---- QUERY
 # fully dynamic partition overwrite
 insert overwrite table alltypesinsert
-partition (year, month) 
-select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, 
+partition (year, month)
+select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
 float_col, double_col, date_string_col, string_col, timestamp_col, year, month
 from alltypessmall
 ---- SETUP
@@ -388,8 +388,8 @@ int, boolean, tinyint, smallint, int, bigint, float, double, string, string
 ---- QUERY
 # fully dynamic partition insert$TABLE, check partition creation
 insert into table alltypesinsert
-partition (year, month) 
-select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, 
+partition (year, month)
+select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
 float_col, double_col, date_string_col, string_col, timestamp_col, year, month
 from alltypessmall
 ---- SETUP
@@ -502,7 +502,7 @@ SELECT "value4",NULL FROM alltypessmall LIMIT 1;
 s2=__HIVE_DEFAULT_PARTITION__/: 1
 ====
 ---- QUERY
-# select with empty partition key as predicate should return nothing, because "" is 
+# select with empty partition key as predicate should return nothing, because "" is
 # mapped to NULL
 SELECT * FROM insert_string_partitioned WHERE s2 = "";
 ---- TYPES
@@ -535,7 +535,7 @@ year=2010/month=4/: 25
 # static partition insert from a constant select
 insert into alltypesinsert
 partition(year=2010, month=4)
-select 100, false, 1, 1, 1, 10, 
+select 100, false, 1, 1, 1, 10,
 10.0, 10.0, "02/01/09", "1", cast("2009-02-01 00:01:00" as timestamp)
 ---- SETUP
 RESET alltypesinsert
@@ -545,8 +545,8 @@ year=2010/month=4/: 1
 ---- QUERY
 # dynamic partition insert from a constant select
 insert into table alltypesinsert
-partition (year, month) 
-select 200, true, 2, 2, 2, 20, 
+partition (year, month)
+select 200, true, 2, 2, 2, 20,
 20.0, 20.0, "02/01/09", "1", cast("2009-02-01 00:02:00" as timestamp), 2010, 4
 ---- RESULTS
 year=2010/month=4/: 1
@@ -631,7 +631,7 @@ from alltypessmall limit 10
 : 10
 ====
 ---- QUERY
-select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, 
+select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col,
 float_col, double_col, date_string_col, string_col, timestamp_col
 from alltypesnopart_insert
 ---- TYPES
@@ -648,3 +648,25 @@ NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'NULL','NULL',NULL
 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'NULL','NULL',NULL
 NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'NULL','NULL',NULL
 ====
+---- QUERY
+insert overwrite alltypesnopart_insert(float_col, double_col)
+values(CAST(1/0 AS FLOAT), 1/0), (CAST(-1/0 AS FLOAT), -1/0),
+      (CAST(0/0 AS FLOAT), 0/0), (CAST(-sqrt(-1) AS FLOAT), -sqrt(-1))
+---- SETUP
+RESET alltypesinsert
+---- RESULTS
+: 4
+====
+---- QUERY
+# Results have to be cast to strings, because nan == f is always false for all f
+# (even nan), so the results check would otherwise always fail.
+select CAST(float_col AS string), CAST(double_col AS string) from alltypesnopart_insert
+order by float_col, double_col limit 10;
+---- TYPES
+STRING, STRING
+---- RESULTS
+'nan','nan'
+'nan','nan'
+'-inf','-inf'
+'inf','inf'
+====
--- a/testdata/workloads/functional-query/queries/QueryTest/overflow.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/overflow.test
@@ -5,7 +5,7 @@ select * from Overflow
 ---- TYPES
 tinyint, smallint, int, bigint, float, double
 ---- RESULTS
-128,-32768,-2147483648,-9223372036854775808,-inf,-inf
+-128,-32768,-2147483648,-9223372036854775808,-Infinity,-Infinity
 1,2,3,4,5.5,6.6
 127,32767,2147483647,9223372036854775807,inf,inf
 ====
--- a/testdata/workloads/functional-query/queries/QueryTest/top-n.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/top-n.test
@@ -1,7 +1,7 @@
 ====
 ---- QUERY
 # Based on Aggregation Queries
-select int_col, sum(float_col) 
+select int_col, sum(float_col)
 from functional_hbase.alltypessmall
 where id < 5
 group by 1
@@ -16,9 +16,9 @@ INT, DOUBLE
 ====
 ---- QUERY
 # Run query without order by
-select tinyint_col, count(*) 
-from alltypesagg 
-group by 1 
+select tinyint_col, count(*)
+from alltypesagg
+group by 1
 limit 10
 ---- RESULTS
 5,1000
@@ -36,9 +36,9 @@ TINYINT, BIGINT
 ====
 ---- QUERY
 # Same query order by asc first col
-select tinyint_col, count(*) 
-from alltypesagg 
-group by 1 
+select tinyint_col, count(*)
+from alltypesagg
+group by 1
 order by 1
 limit 10
 ---- RESULTS
@@ -194,10 +194,10 @@ NULL,'NULL',NULL,NULL,1009,'Name9',94615
 BIGINT, STRING, INT, INT, BIGINT, STRING, INT
 ====
 ---- QUERY
-# order by multiple cols with nulls 
-select tinyint_col % 3, smallint_col % 3, count(*) 
-from alltypesagg 
-where day = 1 
+# order by multiple cols with nulls
+select tinyint_col % 3, smallint_col % 3, count(*)
+from alltypesagg
+where day = 1
 group by 1, 2
 order by 1, 2
 limit 20
@@ -219,9 +219,9 @@ NULL,NULL,10
 TINYINT, SMALLINT, BIGINT
 ====
 ---- QUERY
-select tinyint_col % 3, smallint_col % 3, count(*) 
-from alltypesagg 
-where day = 1 
+select tinyint_col % 3, smallint_col % 3, count(*)
+from alltypesagg
+where day = 1
 group by 1, 2
 order by 1, 2 desc
 limit 20
@@ -243,9 +243,9 @@ NULL,0,30
 TINYINT, SMALLINT, BIGINT
 ====
 ---- QUERY
-select tinyint_col % 3, smallint_col % 3, count(*) 
-from alltypesagg 
-where day = 1 
+select tinyint_col % 3, smallint_col % 3, count(*)
+from alltypesagg
+where day = 1
 group by 1, 2
 order by 1 desc, 2
 limit 20
@@ -267,9 +267,9 @@ NULL,NULL,10
 TINYINT, SMALLINT, BIGINT
 ====
 ---- QUERY
-select tinyint_col % 3, smallint_col % 3, count(*) 
-from alltypesagg 
-where day = 1 
+select tinyint_col % 3, smallint_col % 3, count(*)
+from alltypesagg
+where day = 1
 group by 1, 2
 order by 1 desc, 2 desc
 limit 20
@@ -441,7 +441,7 @@ limit 10
 SMALLINT, INT, TINYINT, INT, INT, FLOAT, STRING
 ====
 ---- QUERY
-# Order by a column that is not in the select list 
+# Order by a column that is not in the select list
 # Query with ordering column in select list
 # Don't include date_string_col, it comes back in random order.
 select int_col, tinyint_col
@@ -504,8 +504,8 @@ TINYINT
 ====
 ---- QUERY
 # Order by many exprs
-select year, month, count(*) 
-from alltypes 
+select year, month, count(*)
+from alltypes
 group by 1, 2
 order by 1, 2
 limit 100
@@ -703,7 +703,7 @@ INT
 # All select list items have an implicit alias. Test that the order by column ref
 # "int_col" is correctly aliased to t1.int_col, and therefore it is not an
 # ambiguous reference.
-select t1.int_col from alltypessmall t1, alltypessmall t2 where t1.id = t2.id 
+select t1.int_col from alltypessmall t1, alltypessmall t2 where t1.id = t2.id
 order by int_col
 limit 2
 ---- RESULTS
@@ -741,9 +741,9 @@ TIMESTAMP, TIMESTAMP, INT
 ====
 ---- QUERY
 # Test of order by with NULL tuple rows (from an outer join)
-select t1.id, t1.int_col, t2.id, t2.int_col 
-from alltypesagg t1 
-left outer join alltypessmall t2 
+select t1.id, t1.int_col, t2.id, t2.int_col
+from alltypesagg t1
+left outer join alltypessmall t2
  on (t1.int_col = t2.int_col)
 order by t1.id,t2.id limit 10
 ---- TYPES
@@ -762,7 +762,7 @@ int,int,int,int
 ====
 ---- QUERY
 # Test limit 0 from sub query
-select sum(a.int_col) from 
+select sum(a.int_col) from
  (select int_col from functional.alltypes order by int_col limit 0) a
 ---- TYPES
 bigint
@@ -770,34 +770,36 @@ bigint
 NULL
 ====
 ---- QUERY
-# Test queries with divide by 0
-select if(id % 2 = 0, cast(id/3 as int), -id) / if(id > 4 or id = 0, 0, 1) as v 
-from alltypestiny order by v desc limit 100;
+# Test queries with divide by 0 (cast to string to avoid nan != nan issues)
+select cast(if(id % 2 = 0, cast(id/3 as int), -id) / if(id > 4 or id = 0, 0, 1) as string)
+from alltypestiny order by
+if(id % 2 = 0, cast(id/3 as int), -id) / if(id > 4 or id = 0, 0, 1) desc limit 100;
 ---- TYPES
-DOUBLE
+STRING
 ---- RESULTS
-inf
-1
-0
-1
-3
-inf
-inf
-nan
+'inf'
+'1'
+'0'
+'-1'
+'-3'
+'-inf'
+'-inf'
+'nan'
 ====
 ---- QUERY
-# Test queries with divide by 0
-select if(id % 2 = 0, cast(id/3 as int), -id) / if(id > 4 or id = 0, 0, 1) as v 
-from alltypestiny order by v asc limit 100;
+# Test queries with divide by 0 (cast to string to avoid nan != nan issues)
+select CAST(if(id % 2 = 0, cast(id/3 as int), -id) / if(id > 4 or id = 0, 0, 1) as STRING)
+from alltypestiny order by
+if(id % 2 = 0, cast(id/3 as int), -id) / if(id > 4 or id = 0, 0, 1) asc limit 100;
 ---- TYPES
-DOUBLE
+STRING
 ---- RESULTS
-nan
-inf
-inf
-3
-1
-0
-1
-inf
+'nan'
+'-inf'
+'-inf'
+'-3'
+'-1'
+'0'
+'1'
+'inf'
 ====
--- a/tests/common/test_result_verifier.py
+++ b/tests/common/test_result_verifier.py
@@ -116,6 +116,8 @@ def compare_float(x, y, epsilon):
  # floating point spec defines nan != nan.
  if math.isnan(x) and math.isnan(y):
    return True
+  if math.isinf(x) or math.isinf(y):
+    return x == y
  return abs(x - y) <= epsilon

 # Represents a column in a row
@@ -139,15 +141,13 @@ class ResultColumn(object):
    # Make sure the column types are the same
    if self.column_type != other.column_type:
      return False
-
    # Check equality based on a supplied regex if one was given.
    if self.regex is not None:
      return self.regex.match(other.value)
    if other.regex is not None:
      return other.regex.match(self.value)

-    if (self.value == 'NULL' or other.value == 'NULL') or \
-       ('inf' in self.value or 'inf' in other.value):
+    if (self.value == 'NULL' or other.value == 'NULL'):
      return self.value == other.value
    elif self.column_type == 'float':
      return compare_float(float(self.value), float(other.value), 10e-5)
--- a/tests/query_test/test_aggregation.py
+++ b/tests/query_test/test_aggregation.py