diff --git a/be/src/codegen/llvm-codegen.cc b/be/src/codegen/llvm-codegen.cc index 33ecc7516..c9145d82a 100644 --- a/be/src/codegen/llvm-codegen.cc +++ b/be/src/codegen/llvm-codegen.cc @@ -260,6 +260,8 @@ string LlvmCodeGen::GetIR(bool full_module) const { Type* LlvmCodeGen::GetType(PrimitiveType type) { switch (type) { + case TYPE_NULL: + return Type::getInt1Ty(context()); case TYPE_BOOLEAN: return Type::getInt1Ty(context()); case TYPE_TINYINT: @@ -299,6 +301,8 @@ Value* LlvmCodeGen::CastPtrToLlvmPtr(Type* type, void* ptr) { Value* LlvmCodeGen::GetIntConstant(PrimitiveType type, int64_t val) { switch (type) { + case TYPE_NULL: + return ConstantInt::get(context(), APInt(8, val)); case TYPE_TINYINT: return ConstantInt::get(context(), APInt(8, val)); case TYPE_SMALLINT: @@ -628,6 +632,9 @@ Function* LlvmCodeGen::CodegenMinMax(PrimitiveType type, bool min) { Value* compare = NULL; switch (type) { + case TYPE_NULL: + compare = false_value(); + break; case TYPE_BOOLEAN: if (min) { // For min, return x && y @@ -683,6 +690,8 @@ Value* LlvmCodeGen::CodegenEquals(LlvmBuilder* builder, Value* v1, Value* v2, return NULL; } switch (type) { + case TYPE_NULL: + return false_value(); case TYPE_BOOLEAN: case TYPE_TINYINT: case TYPE_SMALLINT: diff --git a/be/src/exec/hash-table.cc b/be/src/exec/hash-table.cc index 3983ffd73..93c508f55 100644 --- a/be/src/exec/hash-table.cc +++ b/be/src/exec/hash-table.cc @@ -121,6 +121,10 @@ static void CodegenAssignNullValue(LlvmCodeGen* codegen, dst = builder->CreateBitCast(dst, codegen->ptr_type()); null_value = codegen->GetIntConstant(TYPE_TINYINT, fvn_seed); break; + case TYPE_NULL: + dst = builder->CreateBitCast(dst, codegen->ptr_type()); + null_value = codegen->GetIntConstant(type, fvn_seed); + break; case TYPE_TINYINT: case TYPE_SMALLINT: case TYPE_INT: diff --git a/be/src/exec/parquet-common.h b/be/src/exec/parquet-common.h index d936f100b..9cb85334c 100644 --- a/be/src/exec/parquet-common.h +++ b/be/src/exec/parquet-common.h @@ -28,6 +28,7 @@ const uint32_t PARQUET_CURRENT_VERSION = 1; // PrimitiveType enum const parquet::Type::type IMPALA_TO_PARQUET_TYPES[] = { parquet::Type::BOOLEAN, // Invalid + parquet::Type::BOOLEAN, // NULL type parquet::Type::BOOLEAN, parquet::Type::INT32, parquet::Type::INT32, diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc index 01f994c85..58180885e 100644 --- a/be/src/exprs/expr-test.cc +++ b/be/src/exprs/expr-test.cc @@ -319,6 +319,36 @@ class ExprTest : public testing::Test { TestValue("'' = ''", TYPE_BOOLEAN, true); } + // Test comparison operators with a left or right NULL operand against op. + void TestNullComparison(const string& op) { + // NULL as right operand. + TestIsNull(op + " = NULL", TYPE_BOOLEAN); + TestIsNull(op + " != NULL", TYPE_BOOLEAN); + TestIsNull(op + " <> NULL", TYPE_BOOLEAN); + TestIsNull(op + " < NULL", TYPE_BOOLEAN); + TestIsNull(op + " > NULL", TYPE_BOOLEAN); + TestIsNull(op + " <= NULL", TYPE_BOOLEAN); + TestIsNull(op + " >= NULL", TYPE_BOOLEAN); + // NULL as left operand. + TestIsNull("NULL = " + op, TYPE_BOOLEAN); + TestIsNull("NULL != " + op, TYPE_BOOLEAN); + TestIsNull("NULL <> " + op, TYPE_BOOLEAN); + TestIsNull("NULL < " + op, TYPE_BOOLEAN); + TestIsNull("NULL > " + op, TYPE_BOOLEAN); + TestIsNull("NULL <= " + op, TYPE_BOOLEAN); + TestIsNull("NULL >= " + op, TYPE_BOOLEAN); + } + + // Test comparison operators with a left or right NULL operand on all types. + void TestNullComparisons() { + unordered_map::iterator def_iter; + for(def_iter = default_type_strs_.begin(); def_iter != default_type_strs_.end(); + ++def_iter) { + TestNullComparison(def_iter->second); + } + TestNullComparison("NULL"); + } + // Generate all possible tests for combinations of . // Also test conversions from strings. void TestComparison(const string& smaller, const string& larger, bool compare_strings) { @@ -481,6 +511,60 @@ class ExprTest : public testing::Test { TestValue(a_str + " % " + b_str, expected_type, cast_a % cast_b); } + // Test ops that that always promote to a fixed type with NULL operands: + // ADD, SUBTRACT, MULTIPLY, DIVIDE. + // We need CastType to make lexical_cast work properly for low-resolution types. + template + void TestNullOperandFixedResultTypeOps(NonNullOp op, PrimitiveType expected_type) { + CastType cast_op = static_cast(op); + string op_str = lexical_cast(cast_op); + // NULL as right operand. + TestIsNull(op_str + " + NULL", expected_type); + TestIsNull(op_str + " - NULL", expected_type); + TestIsNull(op_str + " * NULL", expected_type); + TestIsNull(op_str + " / NULL", TYPE_DOUBLE); + // NULL as left operand. + TestIsNull("NULL + " + op_str, expected_type); + TestIsNull("NULL - " + op_str, expected_type); + TestIsNull("NULL * " + op_str, expected_type); + TestIsNull("NULL / " + op_str, TYPE_DOUBLE); + } + + // Test binary int ops with NULL as left or right operand: + // BITAND, BITOR, BITXOR, INT_DIVIDE, MOD. + template + void TestNullOperandVariableResultTypeIntOps(NonNullOp op, + PrimitiveType expected_type) { + string op_str = lexical_cast(static_cast(op)); + // NULL as right operand. + TestIsNull(op_str + " & NULL", expected_type); + TestIsNull(op_str + " | NULL", expected_type); + TestIsNull(op_str + " ^ NULL", expected_type); + TestIsNull(op_str + " DIV NULL", expected_type); + TestIsNull(op_str + " % NULL", expected_type); + // NULL as left operand. + TestIsNull("NULL & " + op_str, expected_type); + TestIsNull("NULL | " + op_str, expected_type); + TestIsNull("NULL ^ " + op_str, expected_type); + TestIsNull("NULL DIV " + op_str, expected_type); + TestIsNull("NULL % " + op_str, expected_type); + } + + // Test all binary ops with both operands being NULL. + // We expect such exprs to return TYPE_NULL, except for '/' which is always double. + void TestNullOperandsArithmeticOps() { + TestIsNull("NULL + NULL", TYPE_NULL); + TestIsNull("NULL - NULL", TYPE_NULL); + TestIsNull("NULL * NULL", TYPE_NULL); + TestIsNull("NULL / NULL", TYPE_DOUBLE); + TestIsNull("NULL & NULL", TYPE_NULL); + TestIsNull("NULL | NULL", TYPE_NULL); + TestIsNull("NULL ^ NULL", TYPE_NULL); + TestIsNull("NULL DIV NULL", TYPE_NULL); + TestIsNull("NULL % NULL", TYPE_NULL); + TestIsNull("~NULL", TYPE_NULL); + } + // Test casting stmt to all types. Expected result is val. template void TestCast(const string& stmt, T val) { @@ -604,6 +688,7 @@ TEST_F(ExprTest, LiteralConstruction) { TestSingleLiteralConstruction(TYPE_DOUBLE, &d_val, "1.23"); TestSingleLiteralConstruction(TYPE_DOUBLE, &d_val, "+1.23"); TestSingleLiteralConstruction(TYPE_STRING, &str_val, "Hello"); + TestSingleLiteralConstruction(TYPE_NULL, NULL, "NULL"); // Min/Max Boundary value test for tiny/small/int/long c_val = 127; @@ -643,7 +728,7 @@ TEST_F(ExprTest, LiteralExprs) { TestValue("true", TYPE_BOOLEAN, true); TestValue("false", TYPE_BOOLEAN, false); TestStringValue("'test'", "test"); - TestIsNull("null", TYPE_BOOLEAN); + TestIsNull("null", TYPE_NULL); } TEST_F(ExprTest, ArithmeticExprs) { @@ -712,6 +797,20 @@ TEST_F(ExprTest, ArithmeticExprs) { TestFixedResultTypeOps(numeric_limits::max(), numeric_limits::min()+1, TYPE_BIGINT); + // Test behavior with NULLs. + TestNullOperandFixedResultTypeOps(min_float_values_[TYPE_FLOAT], + TYPE_DOUBLE); + TestNullOperandFixedResultTypeOps(min_float_values_[TYPE_DOUBLE], + TYPE_DOUBLE); + TestNullOperandFixedResultTypeOps(min_int_values_[TYPE_TINYINT], + TYPE_BIGINT); + TestNullOperandFixedResultTypeOps(min_int_values_[TYPE_SMALLINT], + TYPE_BIGINT); + TestNullOperandFixedResultTypeOps(min_int_values_[TYPE_INT], + TYPE_BIGINT); + TestNullOperandFixedResultTypeOps(min_int_values_[TYPE_BIGINT], + TYPE_BIGINT); + // Test int ops that promote to assignment compatible type. TestVariableResultTypeIntOps(min_int_values_[TYPE_TINYINT], min_int_values_[TYPE_TINYINT], TYPE_TINYINT); @@ -734,6 +833,16 @@ TEST_F(ExprTest, ArithmeticExprs) { TestVariableResultTypeIntOps(min_int_values_[TYPE_BIGINT], min_int_values_[TYPE_BIGINT], TYPE_BIGINT); + // Test behavior with NULLs. + TestNullOperandVariableResultTypeIntOps(min_int_values_[TYPE_TINYINT], + TYPE_TINYINT); + TestNullOperandVariableResultTypeIntOps(min_int_values_[TYPE_SMALLINT], + TYPE_SMALLINT); + TestNullOperandVariableResultTypeIntOps(min_int_values_[TYPE_INT], + TYPE_INT); + TestNullOperandVariableResultTypeIntOps(min_int_values_[TYPE_BIGINT], + TYPE_BIGINT); + // Tests for dealing with '-'. TestValue("-1", TYPE_TINYINT, -1); TestValue("1 - 1", TYPE_BIGINT, 0); @@ -744,6 +853,9 @@ TEST_F(ExprTest, ArithmeticExprs) { // The "--" indicates a comment to be ignored. // Therefore, the result should be -1. TestValue("- 1 --1", TYPE_TINYINT, -1); + + // Test all arithmetic exprs with only NULL operands. + TestNullOperandsArithmeticOps(); } // There are two tests of ranges, the second of which requires a cast @@ -759,6 +871,7 @@ TEST_F(ExprTest, BinaryPredicates) { TestFloatingPointComparisons(true); TestFloatingPointComparisons(false); TestStringComparisons(); + TestNullComparisons(); } // Test casting from all types to all other types @@ -861,6 +974,8 @@ TEST_F(ExprTest, CompoundPredicates) { TEST_F(ExprTest, IsNullPredicate) { TestValue("5 IS NULL", TYPE_BOOLEAN, false); TestValue("5 IS NOT NULL", TYPE_BOOLEAN, true); + TestValue("NULL IS NULL", TYPE_BOOLEAN, true); + TestValue("NULL IS NOT NULL", TYPE_BOOLEAN, false); } TEST_F(ExprTest, LikePredicate) { @@ -922,6 +1037,16 @@ TEST_F(ExprTest, LikePredicate) { TestNonOkStatus("'a' REGEXP '(./'"); // Pattern is converted for LIKE, and should not throw. TestValue("'a' LIKE '(./'", TYPE_BOOLEAN, false); + // Test NULLs. + TestIsNull("NULL LIKE 'a'", TYPE_BOOLEAN); + TestIsNull("'a' LIKE NULL", TYPE_BOOLEAN); + TestIsNull("NULL LIKE NULL", TYPE_BOOLEAN); + TestIsNull("NULL RLIKE 'a'", TYPE_BOOLEAN); + TestIsNull("'a' RLIKE NULL", TYPE_BOOLEAN); + TestIsNull("NULL RLIKE NULL", TYPE_BOOLEAN); + TestIsNull("NULL REGEXP 'a'", TYPE_BOOLEAN); + TestIsNull("'a' REGEXP NULL", TYPE_BOOLEAN); + TestIsNull("NULL REGEXP NULL", TYPE_BOOLEAN); } TEST_F(ExprTest, BetweenPredicate) { @@ -954,6 +1079,11 @@ TEST_F(ExprTest, BetweenPredicate) { TestValue("'abc' between 'aaa' and 'aab'", TYPE_BOOLEAN, false); TestValue("'abc' not between 'a' and 'z'", TYPE_BOOLEAN, false); TestValue("'abc' not between 'aaa' and 'aab'", TYPE_BOOLEAN, true); + // Test NULLs. + TestIsNull("NULL between 0 and 10", TYPE_BOOLEAN); + TestIsNull("1 between NULL and 10", TYPE_BOOLEAN); + TestIsNull("1 between 0 and NULL", TYPE_BOOLEAN); + TestIsNull("NULL between NULL and NULL", TYPE_BOOLEAN); } // Tests with NULLs are in the FE QueryTest. @@ -1022,32 +1152,52 @@ TEST_F(ExprTest, StringFunctions) { TestStringValue("substring('Hello', -5)", "Hello"); TestStringValue("substring('Hello', -6)", ""); TestStringValue("substring('Hello', 100)", ""); + TestIsNull("substring(NULL, 100)", TYPE_STRING); + TestIsNull("substring('Hello', NULL)", TYPE_STRING); + TestIsNull("substring(NULL, NULL)", TYPE_STRING); + TestStringValue("substring('Hello', 1, 1)", "H"); TestStringValue("substring('Hello', 2, 100)", "ello"); TestStringValue("substring('Hello', -3, 2)", "ll"); + TestIsNull("substring(NULL, 1, 100)", TYPE_STRING); + TestIsNull("substring('Hello', NULL, 100)", TYPE_STRING); + TestIsNull("substring('Hello', 1, NULL)", TYPE_STRING); + TestIsNull("substring(NULL, NULL, NULL)", TYPE_STRING); TestStringValue("lower('')", ""); TestStringValue("lower('HELLO')", "hello"); TestStringValue("lower('Hello')", "hello"); TestStringValue("lower('hello!')", "hello!"); TestStringValue("lcase('HELLO')", "hello"); + TestIsNull("lower(NULL)", TYPE_STRING); + TestIsNull("lcase(NULL)", TYPE_STRING); TestStringValue("upper('')", ""); TestStringValue("upper('HELLO')", "HELLO"); TestStringValue("upper('Hello')", "HELLO"); TestStringValue("upper('hello!')", "HELLO!"); TestStringValue("ucase('hello')", "HELLO"); + TestIsNull("upper(NULL)", TYPE_STRING); + TestIsNull("ucase(NULL)", TYPE_STRING); TestValue("length('')", TYPE_INT, 0); TestValue("length('a')", TYPE_INT, 1); TestValue("length('abcdefg')", TYPE_INT, 7); + TestIsNull("length(NULL)", TYPE_INT); TestStringValue("reverse('abcdefg')", "gfedcba"); TestStringValue("reverse('')", ""); + TestIsNull("reverse(NULL)", TYPE_STRING); TestStringValue("strleft('abcdefg', 3)", "abc"); TestStringValue("strleft('abcdefg', 10)", "abcdefg"); + TestIsNull("strleft(NULL, 3)", TYPE_STRING); + TestIsNull("strleft('abcdefg', NULL)", TYPE_STRING); + TestIsNull("strleft(NULL, NULL)", TYPE_STRING); TestStringValue("strright('abcdefg', 3)", "efg"); TestStringValue("strright('abcdefg', 10)", "abcdefg"); + TestIsNull("strright(NULL, 3)", TYPE_STRING); + TestIsNull("strright('abcdefg', NULL)", TYPE_STRING); + TestIsNull("strright(NULL, NULL)", TYPE_STRING); TestStringValue("trim('')", ""); TestStringValue("trim(' ')", ""); @@ -1055,23 +1205,27 @@ TEST_F(ExprTest, StringFunctions) { TestStringValue("trim('abcdefg ')", "abcdefg"); TestStringValue("trim(' abcdefg')", "abcdefg"); TestStringValue("trim('abc defg')", "abc defg"); + TestIsNull("trim(NULL)", TYPE_STRING); TestStringValue("ltrim('')", ""); TestStringValue("ltrim(' ')", ""); TestStringValue("ltrim(' abcdefg ')", "abcdefg "); TestStringValue("ltrim('abcdefg ')", "abcdefg "); TestStringValue("ltrim(' abcdefg')", "abcdefg"); TestStringValue("ltrim('abc defg')", "abc defg"); + TestIsNull("ltrim(NULL)", TYPE_STRING); TestStringValue("rtrim('')", ""); TestStringValue("rtrim(' ')", ""); TestStringValue("rtrim(' abcdefg ')", " abcdefg"); TestStringValue("rtrim('abcdefg ')", "abcdefg"); TestStringValue("rtrim(' abcdefg')", " abcdefg"); TestStringValue("rtrim('abc defg')", "abc defg"); + TestIsNull("rtrim(NULL)", TYPE_STRING); TestStringValue("space(0)", ""); TestStringValue("space(-1)", ""); TestStringValue("space(1)", " "); TestStringValue("space(6)", " "); + TestIsNull("space(NULL)", TYPE_STRING); TestStringValue("repeat('', 0)", ""); TestStringValue("repeat('', 6)", ""); @@ -1079,12 +1233,16 @@ TEST_F(ExprTest, StringFunctions) { TestStringValue("repeat('ab', -1)", ""); TestStringValue("repeat('ab', 1)", "ab"); TestStringValue("repeat('ab', 6)", "abababababab"); + TestIsNull("repeat(NULL, 6)", TYPE_STRING); + TestIsNull("repeat('ab', NULL)", TYPE_STRING); + TestIsNull("repeat(NULL, NULL)", TYPE_STRING); TestValue("ascii('')", TYPE_INT, 0); TestValue("ascii('abcde')", TYPE_INT, 'a'); TestValue("ascii('Abcde')", TYPE_INT, 'A'); TestValue("ascii('dddd')", TYPE_INT, 'd'); TestValue("ascii(' ')", TYPE_INT, ' '); + TestIsNull("ascii(NULL)", TYPE_INT); TestStringValue("lpad('', 0, '')", ""); TestStringValue("lpad('abc', 0, '')", ""); @@ -1093,6 +1251,10 @@ TEST_F(ExprTest, StringFunctions) { TestStringValue("lpad('abc', 6, 'xyz')", "xyzabc"); TestStringValue("lpad('abc', 5, 'xyz')", "xyabc"); TestStringValue("lpad('abc', 10, 'xyz')", "xyzxyzxabc"); + TestIsNull("lpad(NULL, 10, 'xyz')", TYPE_STRING); + TestIsNull("lpad('abc', NULL, 'xyz')", TYPE_STRING); + TestIsNull("lpad('abc', 10, NULL)", TYPE_STRING); + TestIsNull("lpad(NULL, NULL, NULL)", TYPE_STRING); TestStringValue("rpad('', 0, '')", ""); TestStringValue("rpad('abc', 0, '')", ""); TestStringValue("rpad('abc', 3, '')", "abc"); @@ -1100,6 +1262,10 @@ TEST_F(ExprTest, StringFunctions) { TestStringValue("rpad('abc', 6, 'xyz')", "abcxyz"); TestStringValue("rpad('abc', 5, 'xyz')", "abcxy"); TestStringValue("rpad('abc', 10, 'xyz')", "abcxyzxyzx"); + TestIsNull("rpad(NULL, 10, 'xyz')", TYPE_STRING); + TestIsNull("rpad('abc', NULL, 'xyz')", TYPE_STRING); + TestIsNull("rpad('abc', 10, NULL)", TYPE_STRING); + TestIsNull("rpad(NULL, NULL, NULL)", TYPE_STRING); // Note that Hive returns positions starting from 1. // Hive returns 0 if substr was not found in str (or on other error coditions). @@ -1109,12 +1275,19 @@ TEST_F(ExprTest, StringFunctions) { TestValue("instr('abc', 'abc')", TYPE_INT, 1); TestValue("instr('xyzabc', 'abc')", TYPE_INT, 4); TestValue("instr('xyzabcxyz', 'bcx')", TYPE_INT, 5); + TestIsNull("instr(NULL, 'bcx')", TYPE_INT); + TestIsNull("instr('xyzabcxyz', NULL)", TYPE_INT); + TestIsNull("instr(NULL, NULL)", TYPE_INT); TestValue("locate('', '')", TYPE_INT, 0); TestValue("locate('abc', '')", TYPE_INT, 0); TestValue("locate('', 'abc')", TYPE_INT, 0); TestValue("locate('abc', 'abc')", TYPE_INT, 1); TestValue("locate('abc', 'xyzabc')", TYPE_INT, 4); TestValue("locate('bcx', 'xyzabcxyz')", TYPE_INT, 5); + TestIsNull("locate(NULL, 'xyzabcxyz')", TYPE_INT); + TestIsNull("locate('bcx', NULL)", TYPE_INT); + TestIsNull("locate(NULL, NULL)", TYPE_INT); + // Test locate with starting pos param. // Note that Hive expects positions starting from 1 as input. TestValue("locate('', '', 0)", TYPE_INT, 0); @@ -1130,12 +1303,19 @@ TEST_F(ExprTest, StringFunctions) { TestValue("locate('abc', 'xyzabcdef', 3)", TYPE_INT, 4); TestValue("locate('abc', 'xyzabcdef', 4)", TYPE_INT, 4); TestValue("locate('abc', 'abcabcabc', 5)", TYPE_INT, 7); + TestIsNull("locate(NULL, 'abcabcabc', 5)", TYPE_INT); + TestIsNull("locate('abc', NULL, 5)", TYPE_INT); + TestIsNull("locate('abc', 'abcabcabc', NULL)", TYPE_INT); + TestIsNull("locate(NULL, NULL, NULL)", TYPE_INT); TestStringValue("concat('a')", "a"); TestStringValue("concat('a', 'b')", "ab"); TestStringValue("concat('a', 'b', 'cde')", "abcde"); TestStringValue("concat('a', 'b', 'cde', 'fg')", "abcdefg"); TestStringValue("concat('a', 'b', 'cde', '', 'fg', '')", "abcdefg"); + TestIsNull("concat(NULL)", TYPE_STRING); + TestIsNull("concat('a', NULL, 'b')", TYPE_STRING); + TestIsNull("concat('a', 'b', NULL)", TYPE_STRING); TestStringValue("concat_ws(',', 'a')", "a"); TestStringValue("concat_ws(',', 'a', 'b')", "a,b"); @@ -1144,6 +1324,9 @@ TEST_F(ExprTest, StringFunctions) { TestStringValue("concat_ws('%%', 'a', 'b', 'cde', 'fg')", "a%%b%%cde%%fg"); TestStringValue("concat_ws('|','a', 'b', 'cde', '', 'fg', '')", "a|b|cde||fg|"); TestStringValue("concat_ws('', '', '', '')", ""); + TestIsNull("concat_ws(NULL, NULL)", TYPE_STRING); + TestIsNull("concat_ws(',', NULL, 'b')", TYPE_STRING); + TestIsNull("concat_ws(',', 'b', NULL)", TYPE_STRING); TestValue("find_in_set('ab', 'ab,ab,ab,ade,cde')", TYPE_INT, 1); TestValue("find_in_set('ab', 'abc,xyz,abc,ade,ab')", TYPE_INT, 5); @@ -1155,12 +1338,11 @@ TEST_F(ExprTest, StringFunctions) { TestValue("find_in_set('', 'abc,ad,,ade,cde,')", TYPE_INT,3); // First param contains comma. TestValue("find_in_set('abc,def', 'abc,ad,,ade,cde,')", TYPE_INT, 0); + TestIsNull("find_in_set(NULL, 'abc,ad,,ade,cde')", TYPE_INT); + TestIsNull("find_in_set('abc,def', NULL)", TYPE_INT); + TestIsNull("find_in_set(NULL, NULL)", TYPE_INT); TestStringValue("version()", GetVersionString()); - - // TODO: tests with NULL arguments, currently we can't parse them - // inside function calls. - // e.g. TestValue("length(NULL)", TYPE_INT, NULL); } TEST_F(ExprTest, StringRegexpFunctions) { @@ -1189,6 +1371,11 @@ TEST_F(ExprTest, StringRegexpFunctions) { TestStringValue("regexp_extract('', 'abx', 0)", ""); // Invalid regex patter, unmatched parenthesis. TestIsNull("regexp_extract('abxcy1234a', '(/.', 0)", TYPE_STRING); + // NULL arguments. + TestIsNull("regexp_extract(NULL, 'a.x', 2)", TYPE_STRING); + TestIsNull("regexp_extract('abxcy1234a', NULL, 2)", TYPE_STRING); + TestIsNull("regexp_extract('abxcy1234a', 'a.x', NULL)", TYPE_STRING); + TestIsNull("regexp_extract(NULL, NULL, NULL)", TYPE_STRING); TestStringValue("regexp_replace('axcaycazc', 'a.c', 'a')", "aaa"); TestStringValue("regexp_replace('axcaycazc', 'a.c', '')", ""); @@ -1205,6 +1392,11 @@ TEST_F(ExprTest, StringRegexpFunctions) { TestStringValue("regexp_replace('axcaycazc', '', 'r')", "rarxrcraryrcrarzrcr"); // Invalid regex patter, unmatched parenthesis. TestIsNull("regexp_replace('abxcy1234a', '(/.', 'x')", TYPE_STRING); + // NULL arguments. + TestIsNull("regexp_replace(NULL, 'a.*', 'abcde')", TYPE_STRING); + TestIsNull("regexp_replace('axcaycazc', NULL, 'abcde')", TYPE_STRING); + TestIsNull("regexp_replace('axcaycazc', 'a.*', NULL)", TYPE_STRING); + TestIsNull("regexp_replace(NULL, NULL, NULL)", TYPE_STRING); } TEST_F(ExprTest, StringParseUrlFunction) { @@ -1415,6 +1607,12 @@ TEST_F(ExprTest, StringParseUrlFunction) { TestIsNull("parse_url('http://example.com', 'Userinfo')", TYPE_STRING); TestIsNull("parse_url('http://example.com', 'USERINFOXYZ')", TYPE_STRING); + // NULL arguments. + TestIsNull("parse_url(NULL, 'AUTHORITY')", TYPE_STRING); + TestIsNull("parse_url('http://user:pass@example.com:80/docs/books/tutorial/" + "index.html?name=networking#DOWNLOADING', NULL)", TYPE_STRING); + TestIsNull("parse_url(NULL, NULL)", TYPE_STRING); + // Key's value is terminated by '#'. TestStringValue("parse_url('http://example.com:80/docs/books/tutorial/" "index.html?name=networking#DOWNLOADING', 'QUERY', 'name')", "networking"); @@ -1488,7 +1686,16 @@ TEST_F(ExprTest, MathTrigonometricFunctions) { TestValue("degrees(0)", TYPE_DOUBLE, 0.0); TestValue("degrees(pi())", TYPE_DOUBLE, 180.0); - // TODO: tests with NULL arguments, currently we can't parse them inside function calls. + // NULL artuments. + TestIsNull("sin(NULL)", TYPE_DOUBLE); + TestIsNull("asin(NULL)", TYPE_DOUBLE); + TestIsNull("cos(NULL)", TYPE_DOUBLE); + TestIsNull("acos(NULL)", TYPE_DOUBLE); + TestIsNull("tan(NULL)", TYPE_DOUBLE); + TestIsNull("atan(NULL)", TYPE_DOUBLE); + TestIsNull("radians(NULL)", TYPE_DOUBLE); + TestIsNull("degrees(NULL)", TYPE_DOUBLE); + } TEST_F(ExprTest, MathConversionFunctions) { @@ -1576,7 +1783,14 @@ TEST_F(ExprTest, MathConversionFunctions) { TestStringValue("conv('$123', 12, 2)", "0"); TestStringValue("conv('*12g', 32, 5)", "0"); - // TODO: tests with NULL arguments, currently we can't parse them inside function calls. + // NULL arguments. + TestIsNull("bin(NULL)", TYPE_STRING); + TestIsNull("hex(NULL)", TYPE_STRING); + TestIsNull("unhex(NULL)", TYPE_STRING); + TestIsNull("conv(NULL, 10, 10)", TYPE_STRING); + TestIsNull("conv(10, NULL, 10)", TYPE_STRING); + TestIsNull("conv(10, 10, NULL)", TYPE_STRING); + TestIsNull("conv(NULL, NULL, NULL)", TYPE_STRING); } TEST_F(ExprTest, MathFunctions) { @@ -1652,7 +1866,30 @@ TEST_F(ExprTest, MathFunctions) { TestValue("negative(3.14159265)", TYPE_DOUBLE, -3.14159265); TestValue("negative(-3.14159265)", TYPE_DOUBLE, 3.14159265); - // TODO: tests with NULL arguments, currently we can't parse them inside function calls. + // NULL arguments. + TestIsNull("abs(NULL)", TYPE_DOUBLE); + TestIsNull("sign(NULL)", TYPE_FLOAT); + TestIsNull("exp(NULL)", TYPE_DOUBLE); + TestIsNull("ln(NULL)", TYPE_DOUBLE); + TestIsNull("log10(NULL)", TYPE_DOUBLE); + TestIsNull("log2(NULL)", TYPE_DOUBLE); + TestIsNull("log(NULL, 64.0)", TYPE_DOUBLE); + TestIsNull("log(2.0, NULL)", TYPE_DOUBLE); + TestIsNull("log(NULL, NULL)", TYPE_DOUBLE); + TestIsNull("pow(NULL, 10.0)", TYPE_DOUBLE); + TestIsNull("pow(2.0, NULL)", TYPE_DOUBLE); + TestIsNull("pow(NULL, NULL)", TYPE_DOUBLE); + TestIsNull("power(NULL, 10.0)", TYPE_DOUBLE); + TestIsNull("power(2.0, NULL)", TYPE_DOUBLE); + TestIsNull("power(NULL, NULL)", TYPE_DOUBLE); + TestIsNull("sqrt(NULL)", TYPE_DOUBLE); + TestIsNull("rand(NULL)", TYPE_DOUBLE); + TestIsNull("pmod(NULL, 3)", TYPE_BIGINT); + TestIsNull("pmod(10, NULL)", TYPE_BIGINT); + TestIsNull("pmod(NULL, NULL)", TYPE_BIGINT); + TestIsNull("positive(NULL)", TYPE_BIGINT); + TestIsNull("negative(NULL)", TYPE_BIGINT); + } TEST_F(ExprTest, MathRoundingFunctions) { @@ -1682,6 +1919,15 @@ TEST_F(ExprTest, MathRoundingFunctions) { TestValue("round(-3.14159265, 3)", TYPE_DOUBLE, -3.142); TestValue("round(-3.14159265, 4)", TYPE_DOUBLE, -3.1416); TestValue("round(-3.14159265, 5)", TYPE_DOUBLE, -3.14159); + + // NULL arguments. + TestIsNull("ceil(NULL)", TYPE_BIGINT); + TestIsNull("ceiling(NULL)", TYPE_BIGINT); + TestIsNull("floor(NULL)", TYPE_BIGINT); + TestIsNull("round(NULL)", TYPE_BIGINT); + TestIsNull("round(NULL, 1)", TYPE_DOUBLE); + TestIsNull("round(3.14159265, NULL)", TYPE_DOUBLE); + TestIsNull("round(NULL, NULL)", TYPE_DOUBLE); } TEST_F(ExprTest, UnaryOperators) { @@ -1804,6 +2050,16 @@ TEST_F(ExprTest, TimestampFunctions) { "as timestamp), interval 1033 nanoseconds) as string)", "2012-01-01 00:00:00.000000001"); + // NULL arguments. + TestIsNull("date_add(NULL, interval 10 years)", TYPE_TIMESTAMP); + TestIsNull("date_add(cast('2012-01-01 09:10:11.123456789' as timestamp)," + "interval NULL years)", TYPE_TIMESTAMP); + TestIsNull("date_add(NULL, interval NULL years)", TYPE_TIMESTAMP); + TestIsNull("date_sub(NULL, interval 10 years)", TYPE_TIMESTAMP); + TestIsNull("date_sub(cast('2012-01-01 09:10:11.123456789' as timestamp)," + "interval NULL years)", TYPE_TIMESTAMP); + TestIsNull("date_sub(NULL, interval NULL years)", TYPE_TIMESTAMP); + // Test add/sub behavior with very large time values. string max_int = lexical_cast(numeric_limits::max()); TestStringValue( @@ -1823,10 +2079,18 @@ TEST_F(ExprTest, TimestampFunctions) { TestValue("unix_timestamp('1970-01-01 10:10:10', 'yyyy-MM-dd')", TYPE_INT, 0); TestValue("unix_timestamp('1970-01-01 00:00:00 extra text', 'yyyy-MM-dd HH:mm:ss')", TYPE_INT, 0); + TestIsNull("unix_timestamp(NULL)", TYPE_INT); + TestIsNull("unix_timestamp(NULL, 'yyyy-MM-dd')", TYPE_INT); + TestIsNull("unix_timestamp('1970-01-01 10:10:10', NULL)", TYPE_INT); + TestIsNull("unix_timestamp(NULL, NULL)", TYPE_INT); TestStringValue("cast(cast(0 as timestamp) as string)", "1970-01-01 00:00:00"); TestStringValue("from_unixtime(0)", "1970-01-01 00:00:00"); + TestIsNull("from_unixtime(NULL)", TYPE_STRING); TestStringValue("from_unixtime(0, 'yyyy-MM-dd HH:mm:ss')", "1970-01-01 00:00:00"); TestStringValue("from_unixtime(0, 'yyyy-MM-dd')", "1970-01-01"); + TestIsNull("from_unixtime(NULL, 'yyyy-MM-dd')", TYPE_STRING); + TestIsNull("from_unixtime(0, NULL)", TYPE_STRING); + TestIsNull("from_unixtime(NULL, NULL)", TYPE_STRING); TestStringValue("from_unixtime(unix_timestamp('1999-01-01 10:10:10'), \ 'yyyy-MM-dd')", "1999-01-01"); TestStringValue("from_unixtime(unix_timestamp('1999-01-01 10:10:10'), \ @@ -1867,6 +2131,15 @@ TEST_F(ExprTest, TimestampFunctions) { TestIsNull("datediff(cast('09:10:11.12345678' as timestamp), " "cast('2012-12-22' as timestamp))", TYPE_INT); + TestIsNull("year(NULL)", TYPE_INT); + TestIsNull("month(NULL)", TYPE_INT); + TestIsNull("dayofmonth(NULL)", TYPE_INT); + TestIsNull("day(NULL)", TYPE_INT); + TestIsNull("weekofyear(NULL)", TYPE_INT); + TestIsNull("datediff(NULL, '2011-12-22 09:10:11.12345678')", TYPE_INT); + TestIsNull("datediff('2012-12-22', NULL)", TYPE_INT); + TestIsNull("datediff(NULL, NULL)", TYPE_INT); + // Tests from Hive // The hive documentation states that timestamps are timezoneless, but the tests // show that they treat them as being in the current timezone so these tests @@ -1894,6 +2167,11 @@ TEST_F(ExprTest, TimestampFunctions) { // We get some decimal-binary skew here TestStringValue("cast(from_utc_timestamp(cast(1.3041352164485E9 as timestamp), 'PST') " "as string)", "2011-04-29 20:46:56.448499917"); + // NULL arguments. + TestIsNull("from_utc_timestamp(NULL, 'PST')", TYPE_TIMESTAMP); + TestIsNull("from_utc_timestamp(cast('2011-01-01 01:01:01.1' as timestamp), NULL)", + TYPE_TIMESTAMP); + TestIsNull("from_utc_timestamp(NULL, NULL)", TYPE_TIMESTAMP); // Hive silently ignores bad timezones. We log a problem. TestStringValue( @@ -1926,12 +2204,6 @@ TEST_F(ExprTest, TimestampFunctions) { TestIsNull("from_unixtime(0, 'HH:mm:dd')", TYPE_STRING); } -// TODO: Since we currently can't analyze NULL literals as function parameters, -// we instead use a function which we know will return NULL as a workaround. -// This only works sometimes though, because the NULL-returning function -// must also have the correct return type that we are looking for. -// The commented (#if 0) tests should be enabled once we can analyze NULL literals -// as function arguments. TEST_F(ExprTest, ConditionalFunctions) { // If first param evaluates to true, should return second parameter, // false or NULL should return the third. @@ -1950,52 +2222,33 @@ TEST_F(ExprTest, ConditionalFunctions) { TestTimestampValue("if(FALSE, cast('2011-01-01 09:01:01' as timestamp), " "cast('1999-06-14 19:07:25' as timestamp))", else_val); - // Workaround: if(true, NULL, NULL) returns NULL of type BOOLEAN. - // coalesce(NULL) - TestIsNull("coalesce(if(true, NULL, NULL))", TYPE_BOOLEAN); - // coalesce(NULL, NULL) - TestIsNull("coalesce(if(true, NULL, NULL), if(true, NULL, NULL))", TYPE_BOOLEAN); + TestIsNull("coalesce(NULL)", TYPE_DOUBLE); + TestIsNull("coalesce(NULL, NULL)", TYPE_DOUBLE); TestValue("coalesce(TRUE)", TYPE_BOOLEAN, true); - // coalesce(NULL, TRUE, NULL) - TestValue("coalesce(if(true, NULL, NULL), TRUE, if(true, NULL, NULL))", - TYPE_BOOLEAN, true); - // coalesce(FALSE, NULL, TRUE, NULL) - TestValue("coalesce(FALSE, if(true, NULL, NULL), TRUE, if(true, NULL, NULL))", - TYPE_BOOLEAN, false); - // coalesce(NULL, NULL, NULL TRUE, NULL, NULL) - TestValue("coalesce(if(true, NULL, NULL), if(true, NULL, NULL), if(true, NULL, NULL)," - "TRUE, if(true, NULL, NULL), if(true, NULL, NULL))", TYPE_BOOLEAN, true); + TestValue("coalesce(NULL, TRUE, NULL)", TYPE_BOOLEAN, true); + TestValue("coalesce(FALSE, NULL, TRUE, NULL)", TYPE_BOOLEAN, false); + TestValue("coalesce(NULL, NULL, NULL, TRUE, NULL, NULL)", TYPE_BOOLEAN, true); TestValue("coalesce(10)", TYPE_BIGINT, 10); -#if 0 TestValue("coalesce(NULL, 10, NULL)", TYPE_BIGINT, 10); - TestValue("coalesce(20, NULL, 10, NULL)", TYPE_BIGINT); - TestValue("coalesce(NULL, NULL, NULL, 10, NULL, NULL)", TYPE_BIGINT); -#endif + TestValue("coalesce(20, NULL, 10, NULL)", TYPE_BIGINT, 20); + TestValue("coalesce(NULL, NULL, NULL, 10, NULL, NULL)", TYPE_BIGINT, 10); TestValue("coalesce(5.5)", TYPE_DOUBLE, 5.5); -#if 0 TestValue("coalesce(NULL, 5.5, NULL)", TYPE_DOUBLE, 5.5); - TestValue("coalesce(8.8, NULL, 5.5, NULL)", TYPE_DOUBLE); - TestValue("coalesce(NULL, NULL, NULL, 5.5, NULL, NULL)", TYPE_DOUBLE); -#endif + TestValue("coalesce(8.8, NULL, 5.5, NULL)", TYPE_DOUBLE, 8.8); + TestValue("coalesce(NULL, NULL, NULL, 5.5, NULL, NULL)", TYPE_DOUBLE, 5.5); TestStringValue("coalesce('abc')", "abc"); -#if 0 TestStringValue("coalesce(NULL, 'abc', NULL)", "abc"); TestStringValue("coalesce('defgh', NULL, 'abc', NULL)", "defgh"); TestStringValue("coalesce(NULL, NULL, NULL, 'abc', NULL, NULL)", "abc"); -#endif TimestampValue ats(1293872461); -#if 0 TimestampValue bts(929387245); -#endif TestTimestampValue("coalesce(cast('2011-01-01 09:01:01' as timestamp))", ats); -#if 0 TestTimestampValue("coalesce(NULL, cast('2011-01-01 09:01:01' as timestamp)," "NULL)", ats); TestTimestampValue("coalesce(cast('1999-06-14 19:07:25' as timestamp), NULL," "cast('2011-01-01 09:01:01' as timestamp), NULL)", bts); TestTimestampValue("coalesce(NULL, NULL, NULL," "cast('2011-01-01 09:01:01' as timestamp), NULL, NULL)", ats); -#endif // Test logic of case expr using int types. // The different types and casting are tested below. @@ -2014,27 +2267,29 @@ TEST_F(ExprTest, ConditionalFunctions) { TestValue("case 21 when 20 then 1 when 21 then 2 end", TYPE_TINYINT, 2); TestValue("case 21 when 20 then 1 when 19 then 2 when 21 then 3 end", TYPE_TINYINT, 3); // Should skip when-exprs that are NULL -#if 0 TestIsNull("case when NULL then 1 end", TYPE_TINYINT); - TestIsNull("case when NULL then 1 end else NULL end", TYPE_TINYINT); + TestIsNull("case when NULL then 1 else NULL end", TYPE_TINYINT); TestValue("case when NULL then 1 else 2 end", TYPE_TINYINT, 2); TestValue("case when NULL then 1 when true then 2 else 3 end", TYPE_TINYINT, 2); -#endif // Should return else expr, if case-expr is NULL. -#if 0 TestIsNull("case NULL when 1 then 1 end", TYPE_TINYINT); TestIsNull("case NULL when 1 then 1 else NULL end", TYPE_TINYINT); TestValue("case NULL when 1 then 1 else 2 end", TYPE_TINYINT, 2); TestValue("case 10 when NULL then 1 else 2 end", TYPE_TINYINT, 2); TestValue("case 10 when NULL then 1 when 10 then 2 else 3 end", TYPE_TINYINT, 2); -#endif + // Not statically known that it will return NULL. + TestIsNull("case NULL when NULL then true end", TYPE_BOOLEAN); + TestIsNull("case NULL when NULL then true else NULL end", TYPE_BOOLEAN); + // Statically known that it will return NULL. + TestIsNull("case NULL when NULL then NULL end", TYPE_NULL); + TestIsNull("case NULL when NULL then NULL else NULL end", TYPE_NULL); // Test all types in case/when exprs, without casts. unordered_map::iterator def_iter; for(def_iter = default_type_strs_.begin(); def_iter != default_type_strs_.end(); ++def_iter) { TestValue("case " + def_iter->second + " when " + def_iter->second + - " then true end", TYPE_BOOLEAN, true); + " then true else true end", TYPE_BOOLEAN, true); } // Test all int types in then and else exprs. diff --git a/be/src/exprs/expr.cc b/be/src/exprs/expr.cc index 957ec5114..a56c5d7ad 100644 --- a/be/src/exprs/expr.cc +++ b/be/src/exprs/expr.cc @@ -60,6 +60,8 @@ bool ParseString(const string& str, T* val) { void* ExprValue::TryParse(const string& str, PrimitiveType type) { switch(type) { + case TYPE_NULL: + return NULL; case TYPE_BOOLEAN: if (ParseString(str, &bool_val)) return &bool_val; break; @@ -125,10 +127,13 @@ Status Expr::CreateExprTree(ObjectPool* pool, const TExpr& texpr, Expr** root_ex } Expr* Expr::CreateLiteral(ObjectPool* pool, PrimitiveType type, void* data) { - DCHECK(data != NULL); + DCHECK(type == TYPE_NULL || data != NULL); Expr* result = NULL; switch (type) { + case TYPE_NULL: + result = new NullLiteral(TYPE_NULL); + break; case TYPE_BOOLEAN: result = new BoolLiteral(*reinterpret_cast(data)); break; @@ -158,6 +163,9 @@ Expr* Expr::CreateLiteral(ObjectPool* pool, PrimitiveType type, const string& st Expr* result = NULL; switch (type) { + case TYPE_NULL: + result = new NullLiteral(TYPE_NULL); + break; case TYPE_BOOLEAN: if (ParseString(str, &val.bool_val)) result = new BoolLiteral(&val.bool_val); @@ -601,7 +609,7 @@ Function* Expr::CreateComputeFnPrototype(LlvmCodeGen* codegen, const string& nam prototype.AddArgument(LlvmCodeGen::NamedVariable("row", PointerType::get(ptr_type, 0))); prototype.AddArgument(LlvmCodeGen::NamedVariable("state_data", ptr_type)); prototype.AddArgument(LlvmCodeGen::NamedVariable("is_null", - codegen->GetPtrType(TYPE_BOOLEAN))); + codegen->GetPtrType(TYPE_NULL))); Function* function = prototype.GeneratePrototype(); DCHECK(function != NULL); @@ -611,6 +619,8 @@ Function* Expr::CreateComputeFnPrototype(LlvmCodeGen* codegen, const string& nam Value* Expr::GetNullReturnValue(LlvmCodeGen* codegen) { switch (type()) { + case TYPE_NULL: + return ConstantInt::get(codegen->context(), APInt(1, 0, true)); case TYPE_BOOLEAN: return ConstantInt::get(codegen->context(), APInt(1, 0, true)); case TYPE_TINYINT: @@ -680,6 +690,10 @@ void* Expr::EvalCodegendComputeFn(Expr* expr, TupleRow* row) { void* result = NULL; bool is_null = false; switch (expr->type()) { + case TYPE_NULL: { + is_null = true; + break; + } case TYPE_BOOLEAN: { BoolComputeFn new_func = reinterpret_cast(func); expr->result_.bool_val = new_func(row, NULL, &is_null); diff --git a/be/src/exprs/expr.h b/be/src/exprs/expr.h index e3b808cd2..4fdc32e1c 100644 --- a/be/src/exprs/expr.h +++ b/be/src/exprs/expr.h @@ -116,6 +116,8 @@ struct ExprValue { // Sets the value for type to '0' and returns a pointer to the data void* SetToZero(PrimitiveType type) { switch (type) { + case TYPE_NULL: + return NULL; case TYPE_BOOLEAN: bool_val = false; return &bool_val; @@ -146,6 +148,8 @@ struct ExprValue { // Sets the value for type to min and returns a pointer to the data void* SetToMin(PrimitiveType type) { switch (type) { + case TYPE_NULL: + return NULL; case TYPE_BOOLEAN: bool_val = false; return &bool_val; @@ -176,6 +180,8 @@ struct ExprValue { // Sets the value for type to max and returns a pointer to the data void* SetToMax(PrimitiveType type) { switch (type) { + case TYPE_NULL: + return NULL; case TYPE_BOOLEAN: bool_val = true; return &bool_val; diff --git a/be/src/exprs/in-predicate.cc b/be/src/exprs/in-predicate.cc index fdcd5069c..f3a2d362c 100644 --- a/be/src/exprs/in-predicate.cc +++ b/be/src/exprs/in-predicate.cc @@ -55,7 +55,8 @@ void* InPredicate::ComputeFn(Expr* e, TupleRow* row) { int32_t num_children = e->GetNumChildren(); bool found_null = false; for (int32_t i = 1; i < num_children; ++i) { - DCHECK_EQ(type, e->children()[i]->type()); + DCHECK(type == e->children()[i]->type() || type == TYPE_NULL + || e->children()[i]->type() == TYPE_NULL); void* in_list_val = e->children()[i]->GetValue(row); if (in_list_val == NULL) { found_null = true; diff --git a/be/src/exprs/like-predicate.cc b/be/src/exprs/like-predicate.cc index aa9f823d1..6b28d8d39 100644 --- a/be/src/exprs/like-predicate.cc +++ b/be/src/exprs/like-predicate.cc @@ -125,9 +125,22 @@ void* LikePredicate::RegexFn(Expr* e, TupleRow* row) { Status LikePredicate::Prepare(RuntimeState* state, const RowDescriptor& row_desc) { RETURN_IF_ERROR(Expr::PrepareChildren(state, row_desc)); DCHECK_EQ(children_.size(), 2); + switch (opcode_) { + case TExprOpcode::LIKE: + compute_fn_ = LikeFn; + break; + case TExprOpcode::REGEX: + compute_fn_ = RegexFn; + break; + default: + stringstream error; + error << "Invalid LIKE operator: " << opcode_; + return Status(error.str()); + } if (GetChild(1)->IsConstant()) { // determine pattern and decide on eval fn StringValue* pattern = static_cast(GetChild(1)->GetValue(NULL)); + if (pattern == NULL) return Status::OK; string pattern_str(pattern->ptr, pattern->len); // Generate a regex search to look for simple patterns: // - "%anything%": This maps to a fast substring search implementation. @@ -181,19 +194,6 @@ Status LikePredicate::Prepare(RuntimeState* state, const RowDescriptor& row_desc return Status("Invalid regular expression: " + pattern_str); } compute_fn_ = ConstantRegexFn; - } else { - switch (opcode_) { - case TExprOpcode::LIKE: - compute_fn_ = LikeFn; - break; - case TExprOpcode::REGEX: - compute_fn_ = RegexFn; - break; - default: - stringstream error; - error << "Invalid LIKE operator: " << opcode_; - return Status(error.str()); - } } return Status::OK; } diff --git a/be/src/exprs/timestamp-functions.cc b/be/src/exprs/timestamp-functions.cc index 9ec6ca1be..cbf43ad13 100644 --- a/be/src/exprs/timestamp-functions.cc +++ b/be/src/exprs/timestamp-functions.cc @@ -113,6 +113,8 @@ void* TimestampFunctions::Unix(Expr* e, TupleRow* row) { // http://docs.oracle.com/javase/1.4.2/docs/api/java/text/SimpleDateFormat.html // Convert them to boost format strings. StringValue* TimestampFunctions::CheckFormat(StringValue* format) { + if(format == NULL) return NULL; + // For now the format must be of the form: yyyy-MM-dd HH:mm:ss // where the time part is optional. switch(format->len) { diff --git a/be/src/runtime/primitive-type.cc b/be/src/runtime/primitive-type.cc index c8085721e..24ced3a9c 100644 --- a/be/src/runtime/primitive-type.cc +++ b/be/src/runtime/primitive-type.cc @@ -22,6 +22,7 @@ namespace impala { PrimitiveType ThriftToType(TPrimitiveType::type ttype) { switch (ttype) { case TPrimitiveType::INVALID_TYPE: return INVALID_TYPE; + case TPrimitiveType::NULL_TYPE: return TYPE_NULL; case TPrimitiveType::BOOLEAN: return TYPE_BOOLEAN; case TPrimitiveType::TINYINT: return TYPE_TINYINT; case TPrimitiveType::SMALLINT: return TYPE_SMALLINT; @@ -42,6 +43,7 @@ PrimitiveType ThriftToType(TPrimitiveType::type ttype) { TPrimitiveType::type ToThrift(PrimitiveType ptype) { switch (ptype) { case INVALID_TYPE: return TPrimitiveType::INVALID_TYPE; + case TYPE_NULL: return TPrimitiveType::NULL_TYPE; case TYPE_BOOLEAN: return TPrimitiveType::BOOLEAN; case TYPE_TINYINT: return TPrimitiveType::TINYINT; case TYPE_SMALLINT: return TPrimitiveType::SMALLINT; @@ -62,6 +64,7 @@ TPrimitiveType::type ToThrift(PrimitiveType ptype) { string TypeToString(PrimitiveType t) { switch (t) { case INVALID_TYPE: return "INVALID"; + case TYPE_NULL: return "NULL"; case TYPE_BOOLEAN: return "BOOL"; case TYPE_TINYINT: return "TINYINT"; case TYPE_SMALLINT: return "SMALLINT"; @@ -83,6 +86,7 @@ string TypeToOdbcString(PrimitiveType t) { // ODBC driver requires types in lower case switch (t) { case INVALID_TYPE: return "invalid"; + case TYPE_NULL: return "null"; case TYPE_BOOLEAN: return "boolean"; case TYPE_TINYINT: return "tinyint"; case TYPE_SMALLINT: return "smallint"; @@ -102,6 +106,7 @@ string TypeToOdbcString(PrimitiveType t) { TTypeId::type TypeToHiveServer2Type(PrimitiveType t) { switch (t) { + case TYPE_NULL: return TTypeId::USER_DEFINED_TYPE; case TYPE_BOOLEAN: return TTypeId::BOOLEAN_TYPE; case TYPE_TINYINT: return TTypeId::TINYINT_TYPE; case TYPE_SMALLINT: return TTypeId::SMALLINT_TYPE; diff --git a/be/src/runtime/primitive-type.h b/be/src/runtime/primitive-type.h index 7d4efe83a..77a1a09a5 100644 --- a/be/src/runtime/primitive-type.h +++ b/be/src/runtime/primitive-type.h @@ -26,6 +26,7 @@ namespace impala { enum PrimitiveType { INVALID_TYPE = 0, + TYPE_NULL, TYPE_BOOLEAN, TYPE_TINYINT, TYPE_SMALLINT, @@ -38,7 +39,7 @@ enum PrimitiveType { TYPE_DATE, // Not implemented TYPE_DATETIME, // Not implemented TYPE_BINARY, // Not implemented - TYPE_DECIMAL // Not implemented + TYPE_DECIMAL // Not implemented }; // Returns the byte size of 'type' Returns 0 for variable length types. @@ -46,6 +47,7 @@ inline int GetByteSize(PrimitiveType type) { switch (type) { case TYPE_STRING: return 0; + case TYPE_NULL: case TYPE_BOOLEAN: case TYPE_TINYINT: return 1; diff --git a/be/src/runtime/raw-value.cc b/be/src/runtime/raw-value.cc index e045d115b..0982b5117 100644 --- a/be/src/runtime/raw-value.cc +++ b/be/src/runtime/raw-value.cc @@ -151,6 +151,8 @@ int RawValue::Compare(const void* v1, const void* v2, PrimitiveType type) { int32_t i1, i2; int64_t b1, b2; switch (type) { + case TYPE_NULL: + return 0; case TYPE_BOOLEAN: return *reinterpret_cast(v1) - *reinterpret_cast(v2); case TYPE_TINYINT: diff --git a/be/src/service/impala-hs2-server.cc b/be/src/service/impala-hs2-server.cc index 766c2d6e9..b69fc33d1 100644 --- a/be/src/service/impala-hs2-server.cc +++ b/be/src/service/impala-hs2-server.cc @@ -710,6 +710,11 @@ void ImpalaServer::ExprValueToHiveServer2TColumnValue(const void* value, apache::hive::service::cli::thrift::TColumnValue* hs2_col_val) { bool not_null = (value != NULL); switch (type) { + case TPrimitiveType::NULL_TYPE: + // Set NULLs in the boolVal. + hs2_col_val->__isset.boolVal = true; + hs2_col_val->boolVal.__isset.value = false; + break; case TPrimitiveType::BOOLEAN: hs2_col_val->__isset.boolVal = true; if (not_null) hs2_col_val->boolVal.value = *reinterpret_cast(value); diff --git a/common/thrift/Types.thrift b/common/thrift/Types.thrift index 0ff393792..57c8df1a9 100644 --- a/common/thrift/Types.thrift +++ b/common/thrift/Types.thrift @@ -23,6 +23,7 @@ typedef i32 TTableId enum TPrimitiveType { INVALID_TYPE, + NULL_TYPE, BOOLEAN, TINYINT, SMALLINT, diff --git a/fe/src/main/cup/sql-parser.y b/fe/src/main/cup/sql-parser.y index ed842bea4..4a93889e2 100644 --- a/fe/src/main/cup/sql-parser.y +++ b/fe/src/main/cup/sql-parser.y @@ -185,7 +185,6 @@ terminal BITAND, BITOR, BITXOR, BITNOT; terminal EQUAL, NOT, LESSTHAN, GREATERTHAN; terminal String IDENT; terminal String NUMERIC_OVERFLOW; -terminal Boolean BOOL_LITERAL; terminal Long INTEGER_LITERAL; terminal Double FLOATINGPOINT_LITERAL; terminal String STRING_LITERAL; @@ -212,7 +211,7 @@ nonterminal SelectList select_clause; nonterminal SelectList select_list; nonterminal SelectListItem select_list_item; nonterminal SelectListItem star_expr ; -nonterminal Expr expr, arithmetic_expr, timestamp_arithmetic_expr; +nonterminal Expr value_expr, expr, arithmetic_expr, timestamp_arithmetic_expr; nonterminal ArrayList expr_list; nonterminal ArrayList func_arg_list; nonterminal String alias_clause; @@ -221,7 +220,7 @@ nonterminal TableName table_name; nonterminal Predicate where_clause; nonterminal Predicate predicate, between_predicate, comparison_predicate, compound_predicate, in_predicate, like_predicate; -nonterminal LiteralPredicate literal_predicate; +nonterminal Predicate predicate_or_null; nonterminal ArrayList group_by_clause; nonterminal Predicate having_clause; nonterminal ArrayList order_by_elements, order_by_clause; @@ -241,7 +240,6 @@ nonterminal JoinOperator join_operator; nonterminal opt_inner, opt_outer; nonterminal PrimitiveType primitive_type; nonterminal Expr sign_chain_expr; -nonterminal BinaryPredicate.Operator binary_comparison_operator; nonterminal InsertStmt insert_stmt; nonterminal ArrayList partition_spec; nonterminal ArrayList partition_clause; @@ -249,7 +247,6 @@ nonterminal ArrayList static_partition_key_value_list; nonterminal ArrayList partition_key_value_list; nonterminal PartitionKeyValue partition_key_value; nonterminal PartitionKeyValue static_partition_key_value; -nonterminal Expr expr_or_predicate; nonterminal Qualifier union_op; nonterminal AlterTableStmt alter_tbl_stmt; @@ -285,7 +282,7 @@ nonterminal String optional_kw_table; precedence left KW_OR; precedence left KW_AND; -precedence left KW_NOT; +precedence left KW_NOT, NOT; precedence left KW_LIKE, KW_RLIKE, KW_REGEXP; precedence left EQUAL, LESSTHAN, GREATERTHAN; precedence left ADD, SUBTRACT; @@ -296,6 +293,7 @@ precedence left RPAREN; precedence left KW_IN; // Support chaining of timestamp arithmetic exprs. precedence left KW_INTERVAL; +precedence left KW_IS; start with stmt; @@ -601,11 +599,19 @@ partition_key_value ::= static_partition_key_value ::= // Static partition key values. - IDENT:column EQUAL literal:value - {: RESULT = new PartitionKeyValue(column, (LiteralExpr) value); :} - // Static partition key value with NULL. - | IDENT:column EQUAL KW_NULL - {: RESULT = new PartitionKeyValue(column, new NullLiteral()); :} + IDENT:column EQUAL literal:l + {: + // transform NULL literal predicate into literal expr + if (l instanceof LiteralPredicate) { + RESULT = new PartitionKeyValue(column, new NullLiteral()); + } else { + RESULT = new PartitionKeyValue(column, (LiteralExpr)l); + } + :} + | IDENT:column EQUAL KW_TRUE + {: RESULT = new PartitionKeyValue(column, new BoolLiteral(true)); :} + | IDENT:column EQUAL KW_FALSE + {: RESULT = new PartitionKeyValue(column, new BoolLiteral(false)); :} ; // Our parsing of UNION is slightly different from MySQL's: @@ -800,11 +806,6 @@ select_list_item ::= {: RESULT = new SelectListItem(expr, alias); :} | expr:expr {: RESULT = new SelectListItem(expr, null); :} - // allow predicates in the select list - | predicate:p alias_clause:alias - {: RESULT = new SelectListItem(p, alias); :} - | predicate:p - {: RESULT = new SelectListItem(p, null); :} | star_expr:expr {: RESULT = expr; :} ; @@ -860,7 +861,7 @@ table_ref_list ::= RESULT = list; :} | table_ref_list:list join_operator:op table_ref:t - KW_ON predicate:p + KW_ON predicate_or_null:p {: t.setJoinOp((JoinOperator) op); t.setOnClause(p); @@ -948,7 +949,7 @@ expr_list ::= ; where_clause ::= - KW_WHERE predicate:p + KW_WHERE predicate_or_null:p {: RESULT = p; :} | /* empty */ {: RESULT = null; :} @@ -962,7 +963,7 @@ group_by_clause ::= ; having_clause ::= - KW_HAVING predicate:p + KW_HAVING predicate_or_null:p {: RESULT = p; :} | /* empty */ {: RESULT = null; :} @@ -1011,7 +1012,7 @@ cast_expr ::= ; case_expr ::= - KW_CASE expr_or_predicate:caseExpr + KW_CASE expr:caseExpr case_when_clause_list:whenClauseList case_else_clause:elseExpr KW_END @@ -1024,14 +1025,14 @@ case_expr ::= ; case_when_clause_list ::= - KW_WHEN expr_or_predicate:whenExpr KW_THEN expr_or_predicate:thenExpr + KW_WHEN expr:whenExpr KW_THEN expr:thenExpr {: ArrayList list = new ArrayList(); list.add(new CaseWhenClause(whenExpr, thenExpr)); RESULT = list; :} - | case_when_clause_list:list KW_WHEN expr_or_predicate:whenExpr - KW_THEN expr_or_predicate:thenExpr + | case_when_clause_list:list KW_WHEN expr:whenExpr + KW_THEN expr:thenExpr {: list.add(new CaseWhenClause(whenExpr, thenExpr)); RESULT = list; @@ -1039,21 +1040,14 @@ case_when_clause_list ::= ; case_else_clause ::= - KW_ELSE expr_or_predicate:e + KW_ELSE expr:e {: RESULT = e; :} | /* emtpy */ {: RESULT = null; :} ; -expr_or_predicate ::= - expr:e - {: RESULT = e; :} - | predicate:p - {: RESULT = p; :} - ; - -sign_chain_expr ::= - SUBTRACT expr:e +sign_chain_expr ::= + SUBTRACT value_expr:e {: // integrate signs into literals if (e.isLiteral() && e.getType().isNumericType()) { @@ -1063,11 +1057,18 @@ sign_chain_expr ::= RESULT = new ArithmeticExpr(ArithmeticExpr.Operator.MULTIPLY, new IntLiteral((long)-1), e); } :} - | ADD expr:e + | ADD value_expr:e {: RESULT = e; :} ; expr ::= + value_expr:e + {: RESULT = e; :} + | predicate:p + {: RESULT = p; :} + ; + +value_expr ::= sign_chain_expr:e {: RESULT = e; :} | literal:l @@ -1091,45 +1092,46 @@ expr ::= {: RESULT = e; :} | arithmetic_expr:e {: RESULT = e; :} - | LPAREN expr:e RPAREN + | LPAREN value_expr:e RPAREN {: RESULT = e; :} ; func_arg_list ::= - // Function arguments can be exprs as well as predicates. - expr_or_predicate:item + expr:item {: ArrayList list = new ArrayList(); list.add(item); RESULT = list; :} - | func_arg_list:list COMMA expr_or_predicate:item + | func_arg_list:list COMMA expr:item {: list.add(item); RESULT = list; :} ; +// We use value_expr instead of expr in this production because arithmetic +// does not make sense on bool values. We also avoid shift/reduce conflicts. arithmetic_expr ::= - expr:e1 STAR expr:e2 + value_expr:e1 STAR value_expr:e2 {: RESULT = new ArithmeticExpr(ArithmeticExpr.Operator.MULTIPLY, e1, e2); :} - | expr:e1 DIVIDE expr:e2 + | value_expr:e1 DIVIDE value_expr:e2 {: RESULT = new ArithmeticExpr(ArithmeticExpr.Operator.DIVIDE, e1, e2); :} - | expr:e1 MOD expr:e2 + | value_expr:e1 MOD value_expr:e2 {: RESULT = new ArithmeticExpr(ArithmeticExpr.Operator.MOD, e1, e2); :} - | expr:e1 KW_DIV expr:e2 + | value_expr:e1 KW_DIV value_expr:e2 {: RESULT = new ArithmeticExpr(ArithmeticExpr.Operator.INT_DIVIDE, e1, e2); :} - | expr:e1 ADD expr:e2 + | value_expr:e1 ADD value_expr:e2 {: RESULT = new ArithmeticExpr(ArithmeticExpr.Operator.ADD, e1, e2); :} - | expr:e1 SUBTRACT expr:e2 + | value_expr:e1 SUBTRACT value_expr:e2 {: RESULT = new ArithmeticExpr(ArithmeticExpr.Operator.SUBTRACT, e1, e2); :} - | expr:e1 BITAND expr:e2 + | value_expr:e1 BITAND value_expr:e2 {: RESULT = new ArithmeticExpr(ArithmeticExpr.Operator.BITAND, e1, e2); :} - | expr:e1 BITOR expr:e2 + | value_expr:e1 BITOR value_expr:e2 {: RESULT = new ArithmeticExpr(ArithmeticExpr.Operator.BITOR, e1, e2); :} - | expr:e1 BITXOR expr:e2 + | value_expr:e1 BITXOR value_expr:e2 {: RESULT = new ArithmeticExpr(ArithmeticExpr.Operator.BITXOR, e1, e2); :} - | BITNOT expr:e + | BITNOT value_expr:e {: RESULT = new ArithmeticExpr(ArithmeticExpr.Operator.BITNOT, e, null); :} ; @@ -1137,15 +1139,15 @@ arithmetic_expr ::= // This way we do not need to change existing uses of IDENT. // We chose not to make DATE_ADD and DATE_SUB keywords for the same reason. timestamp_arithmetic_expr ::= - KW_INTERVAL expr:v IDENT:u ADD expr:t + KW_INTERVAL value_expr:v IDENT:u ADD value_expr:t {: RESULT = new TimestampArithmeticExpr(ArithmeticExpr.Operator.ADD, t, v, u, true); :} - | expr:t ADD KW_INTERVAL expr:v IDENT:u + | value_expr:t ADD KW_INTERVAL value_expr:v IDENT:u {: RESULT = new TimestampArithmeticExpr(ArithmeticExpr.Operator.ADD, t, v, u, false); :} // Set precedence to KW_INTERVAL (which is higher than ADD) for chaining. %prec KW_INTERVAL - | expr:t SUBTRACT KW_INTERVAL expr:v IDENT:u + | value_expr:t SUBTRACT KW_INTERVAL value_expr:v IDENT:u {: RESULT = new TimestampArithmeticExpr(ArithmeticExpr.Operator.SUBTRACT, t, v, u, false); @@ -1155,7 +1157,8 @@ timestamp_arithmetic_expr ::= // Timestamp arithmetic expr that looks like a function call. // We use func_arg_list instead of expr to avoid a shift/reduce conflict with // func_arg_list on COMMA, and report an error if the list contains more than one expr. - | IDENT:functionName LPAREN func_arg_list:l COMMA KW_INTERVAL expr:v IDENT:u RPAREN + | IDENT:functionName LPAREN func_arg_list:l COMMA + KW_INTERVAL value_expr:v IDENT:u RPAREN {: if (l.size() > 1) { // Report parsing failure on keyword interval. @@ -1172,9 +1175,9 @@ literal ::= {: RESULT = new FloatLiteral(l); :} | STRING_LITERAL:l {: RESULT = new StringLiteral(l); :} - | BOOL_LITERAL:l - {: RESULT = new BoolLiteral(l); :} - | UNMATCHED_STRING_LITERAL:l expr:e + | KW_NULL:l + {: RESULT = LiteralPredicate.Null(); :} + | UNMATCHED_STRING_LITERAL:l value_expr:e {: // we have an unmatched string literal. // to correctly report the root cause of this syntax error @@ -1232,7 +1235,11 @@ aggregate_param_list ::= ; predicate ::= - expr:e KW_IS KW_NULL + KW_TRUE:l + {: RESULT = LiteralPredicate.True(); :} + | KW_FALSE:l + {: RESULT = LiteralPredicate.False(); :} + | expr:e KW_IS KW_NULL {: RESULT = new IsNullPredicate(e, false); :} | expr:e KW_IS KW_NOT KW_NULL {: RESULT = new IsNullPredicate(e, true); :} @@ -1246,115 +1253,84 @@ predicate ::= {: RESULT = p; :} | like_predicate:p {: RESULT = p; :} - | literal_predicate:p - {: RESULT = p; :} | LPAREN predicate:p RPAREN {: RESULT = p; :} ; -binary_comparison_operator ::= - EQUAL:op - {: RESULT = BinaryPredicate.Operator.EQ; :} - | NOT EQUAL:op - {: RESULT = BinaryPredicate.Operator.NE; :} - | LESSTHAN GREATERTHAN:op - {: RESULT = BinaryPredicate.Operator.NE; :} - | LESSTHAN EQUAL:op - {: RESULT = BinaryPredicate.Operator.LE; :} - | GREATERTHAN EQUAL:op - {: RESULT = BinaryPredicate.Operator.GE; :} - | LESSTHAN:op - {: RESULT = BinaryPredicate.Operator.LT; :} - | GREATERTHAN:op - {: RESULT = BinaryPredicate.Operator.GT; :} +predicate_or_null ::= + predicate:p + {: RESULT = p; :} + | KW_NULL + {: RESULT = LiteralPredicate.Null(); :} + // TODO: How can we parse ((NULL))? + | LPAREN KW_NULL RPAREN + {: RESULT = LiteralPredicate.Null(); :} ; comparison_predicate ::= - expr:e1 binary_comparison_operator:op expr:e2 - {: RESULT = new BinaryPredicate(op, e1, e2); :} - // A bool/null literal should be both an expr (to act as a BoolLiteral) - // and a predicate (to act as a LiteralPredicate). - // Implementing this directly will lead to shift-reduce conflicts. - // We decided that a bool literal shall be literal predicate. - // This means we must list all combinations with bool literals in the ops below, - // transforming the literal predicate to a literal expr. - // We could have chosen the other way (bool literal as a literal expr), but - // this would have required more and uglier code, - // e.g., a special-case rule for dealing with "where true/false". - | expr:e1 binary_comparison_operator:op literal_predicate:l - {: - Expr e2 = (l.isNull()) ? new NullLiteral() : new BoolLiteral(l.getValue()); - RESULT = new BinaryPredicate(op, e1, e2); - :} - | literal_predicate:l binary_comparison_operator:op expr:e2 - {: - Expr e1 = (l.isNull()) ? new NullLiteral() : new BoolLiteral(l.getValue()); - RESULT = new BinaryPredicate(op, e1, e2); - :} - | literal_predicate:l1 binary_comparison_operator:op literal_predicate:l2 - {: - Expr e1 = (l1.isNull()) ? new NullLiteral() : new BoolLiteral(l1.getValue()); - Expr e2 = (l2.isNull()) ? new NullLiteral() : new BoolLiteral(l2.getValue()); - RESULT = new BinaryPredicate(op, e1, e2); - :} + expr:e1 EQUAL:op expr:e2 + {: RESULT = new BinaryPredicate(BinaryPredicate.Operator.EQ, e1, e2); :} + | expr:e1 NOT EQUAL:op expr:e2 + {: RESULT = new BinaryPredicate(BinaryPredicate.Operator.NE, e1, e2); :} + | expr:e1 LESSTHAN GREATERTHAN:op expr:e2 + {: RESULT = new BinaryPredicate(BinaryPredicate.Operator.NE, e1, e2); :} + | expr:e1 LESSTHAN EQUAL:op expr:e2 + {: RESULT = new BinaryPredicate(BinaryPredicate.Operator.LE, e1, e2); :} + | expr:e1 GREATERTHAN EQUAL:op expr:e2 + {: RESULT = new BinaryPredicate(BinaryPredicate.Operator.GE, e1, e2); :} + | expr:e1 LESSTHAN:op expr:e2 + {: RESULT = new BinaryPredicate(BinaryPredicate.Operator.LT, e1, e2); :} + | expr:e1 GREATERTHAN:op expr:e2 + {: RESULT = new BinaryPredicate(BinaryPredicate.Operator.GT, e1, e2); :} ; like_predicate ::= - expr:e1 KW_LIKE expr:e2 + value_expr:e1 KW_LIKE value_expr:e2 {: RESULT = new LikePredicate(LikePredicate.Operator.LIKE, e1, e2); :} - | expr:e1 KW_RLIKE expr:e2 + | value_expr:e1 KW_RLIKE value_expr:e2 {: RESULT = new LikePredicate(LikePredicate.Operator.RLIKE, e1, e2); :} - | expr:e1 KW_REGEXP expr:e2 + | value_expr:e1 KW_REGEXP value_expr:e2 {: RESULT = new LikePredicate(LikePredicate.Operator.REGEXP, e1, e2); :} - | expr:e1 KW_NOT KW_LIKE expr:e2 + | value_expr:e1 KW_NOT KW_LIKE value_expr:e2 {: RESULT = new CompoundPredicate(CompoundPredicate.Operator.NOT, new LikePredicate(LikePredicate.Operator.LIKE, e1, e2), null); :} - | expr:e1 KW_NOT KW_RLIKE expr:e2 + | value_expr:e1 KW_NOT KW_RLIKE value_expr:e2 {: RESULT = new CompoundPredicate(CompoundPredicate.Operator.NOT, new LikePredicate(LikePredicate.Operator.RLIKE, e1, e2), null); :} - | expr:e1 KW_NOT KW_REGEXP expr:e2 + | value_expr:e1 KW_NOT KW_REGEXP value_expr:e2 {: RESULT = new CompoundPredicate(CompoundPredicate.Operator.NOT, new LikePredicate(LikePredicate.Operator.REGEXP, e1, e2), null); :} ; between_predicate ::= - expr:e1 KW_BETWEEN expr:e2 KW_AND expr:e3 + value_expr:e1 KW_BETWEEN value_expr:e2 KW_AND value_expr:e3 {: RESULT = new BetweenPredicate(e1, e2, e3, false); :} - | expr:e1 KW_NOT KW_BETWEEN expr:e2 KW_AND expr:e3 + | value_expr:e1 KW_NOT KW_BETWEEN value_expr:e2 KW_AND value_expr:e3 {: RESULT = new BetweenPredicate(e1, e2, e3, true); :} ; -compound_predicate ::= - predicate:p1 KW_AND predicate:p2 - {: RESULT = new CompoundPredicate(CompoundPredicate.Operator.AND, p1, p2); :} - | predicate:p1 KW_OR predicate:p2 - {: RESULT = new CompoundPredicate(CompoundPredicate.Operator.OR, p1, p2); :} - | KW_NOT predicate:p - {: RESULT = new CompoundPredicate(CompoundPredicate.Operator.NOT, p, null); :} - | NOT predicate:p - {: RESULT = new CompoundPredicate(CompoundPredicate.Operator.NOT, p, null); :} - ; - -// Using expr_or_predicate here results in an unresolvable shift/reduce conflict. -// Instead, we must list expr and predicate explicitly. +// Do not use expr in this production. Otherwise, +// the in 'NOT IN()' version will not parse. in_predicate ::= - expr:e KW_IN LPAREN func_arg_list:l RPAREN - {: RESULT = new InPredicate(e, l, false); :} - | predicate:p KW_IN LPAREN func_arg_list:l RPAREN - {: RESULT = new InPredicate(p, l, false); :} - | expr:e KW_NOT KW_IN LPAREN func_arg_list:l RPAREN + value_expr:e KW_IN LPAREN func_arg_list:l RPAREN + {: RESULT = new InPredicate(e, l, false); :} + | value_expr:e KW_NOT KW_IN LPAREN func_arg_list:l RPAREN + {: RESULT = new InPredicate(e, l, true); :} + | predicate:e KW_IN LPAREN func_arg_list:l RPAREN + {: RESULT = new InPredicate(e, l, false); :} + | predicate:e KW_NOT KW_IN LPAREN func_arg_list:l RPAREN {: RESULT = new InPredicate(e, l, true); :} - | predicate:p KW_NOT KW_IN LPAREN func_arg_list:l RPAREN - {: RESULT = new InPredicate(p, l, true); :} ; -literal_predicate ::= - KW_TRUE - {: RESULT = LiteralPredicate.True(); :} - | KW_FALSE - {: RESULT = LiteralPredicate.False(); :} - | KW_NULL - {: RESULT = LiteralPredicate.Null(); :} +compound_predicate ::= + expr:p1 KW_AND expr:p2 + {: RESULT = new CompoundPredicate(CompoundPredicate.Operator.AND, p1, p2); :} + | expr:p1 KW_OR expr:p2 + {: RESULT = new CompoundPredicate(CompoundPredicate.Operator.OR, p1, p2); :} + | KW_NOT expr:p + {: RESULT = new CompoundPredicate(CompoundPredicate.Operator.NOT, p, null); :} + | NOT expr:p + {: RESULT = new CompoundPredicate(CompoundPredicate.Operator.NOT, p, null); :} ; column_ref ::= @@ -1397,3 +1373,4 @@ primitive_type ::= | KW_STRING {: RESULT = PrimitiveType.STRING; :} ; + diff --git a/fe/src/main/java/com/cloudera/impala/analysis/AggregateExpr.java b/fe/src/main/java/com/cloudera/impala/analysis/AggregateExpr.java index 681d41c2c..3b488c751 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/AggregateExpr.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/AggregateExpr.java @@ -173,18 +173,18 @@ public class AggregateExpr extends Expr { Expr arg = (Expr) getChild(0); // SUM and AVG cannot be applied to non-numeric types - if (op == Operator.SUM && !arg.type.isNumericType()) { + if (op == Operator.SUM && !arg.type.isNumericType() && !arg.type.isNull()) { throw new AnalysisException( "SUM requires a numeric parameter: " + this.toSql()); } - if (op == Operator.AVG && - (!arg.type.isNumericType() && arg.type != PrimitiveType.TIMESTAMP)) { + if (op == Operator.AVG && !arg.type.isNumericType() + && arg.type != PrimitiveType.TIMESTAMP && !arg.type.isNull()) { throw new AnalysisException( "AVG requires a numeric or timestamp parameter: " + this.toSql()); } if ((op == Operator.MERGE_PC || op == Operator.MERGE_PCSA) - && !arg.type.isStringType()) { + && !arg.type.isStringType() && !arg.type.isNull()) { Preconditions.checkState(false, "MERGEPC(SA) expects string type input but gets " + arg.type.toString()); diff --git a/fe/src/main/java/com/cloudera/impala/analysis/Analyzer.java b/fe/src/main/java/com/cloudera/impala/analysis/Analyzer.java index 30e3acc99..0897d6d5b 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/Analyzer.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/Analyzer.java @@ -587,12 +587,6 @@ public class Analyzer { * lastCompatibleExpr is passed for error reporting purposes, * but note that lastCompatibleExpr may not yet have lastCompatibleType, * because it was not cast yet. - * - * @param lastCompatibleType - * @param expr - * @param lastExprIndex - * @return - * @throws AnalysisException */ public PrimitiveType getCompatibleType(PrimitiveType lastCompatibleType, Expr lastCompatibleExpr, Expr expr) @@ -617,9 +611,6 @@ public class Analyzer { * Calls analyze() on each of the exprs. * Throw an AnalysisException if the types are incompatible, * returns compatible type otherwise. - * - * @param exprs - * @throws AnalysisException */ public PrimitiveType castAllToCompatibleType(List exprs) throws AnalysisException { @@ -628,8 +619,8 @@ public class Analyzer { PrimitiveType compatibleType = null; for (int i = 0; i < exprs.size(); ++i) { exprs.get(i).analyze(this); - compatibleType = getCompatibleType(compatibleType, - lastCompatibleExpr, exprs.get(i)); + compatibleType = getCompatibleType(compatibleType, lastCompatibleExpr, + exprs.get(i)); } // Add implicit casts if necessary. for (int i = 0; i < exprs.size(); ++i) { diff --git a/fe/src/main/java/com/cloudera/impala/analysis/ArithmeticExpr.java b/fe/src/main/java/com/cloudera/impala/analysis/ArithmeticExpr.java index e15886c3e..fa609848d 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/ArithmeticExpr.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/ArithmeticExpr.java @@ -108,7 +108,7 @@ public class ArithmeticExpr extends Expr { super.analyze(analyzer); for (Expr child: children) { Expr operand = (Expr) child; - if (!operand.type.isNumericType()) { + if (!operand.type.isNumericType() && !operand.type.isNull()) { throw new AnalysisException("Arithmetic operation requires " + "numeric operands: " + toSql()); } @@ -120,10 +120,10 @@ public class ArithmeticExpr extends Expr { OpcodeRegistry.Signature match = OpcodeRegistry.instance().getFunctionInfo(op.functionOp, true, type); if (match == null) { - throw new AnalysisException("Bitwise operations only allowed on fixed-point types: " - + toSql()); + throw new AnalysisException("Bitwise operations only allowed on fixed-point " + + "types: " + toSql()); } - Preconditions.checkState(type == match.returnType); + Preconditions.checkState(type == match.returnType || type.isNull()); opcode = match.opcode; return; } @@ -136,7 +136,7 @@ public class ArithmeticExpr extends Expr { case ADD: case SUBTRACT: // numeric ops must be promoted to highest-resolution type - // (otherwise we can't guarantee that a b won't result in an overflow/underflow) + // (otherwise we can't guarantee that a b won't overflow/underflow) type = PrimitiveType.getAssignmentCompatibleType(t1, t2).getMaxResolutionType(); Preconditions.checkState(type.isValid()); break; @@ -156,8 +156,8 @@ public class ArithmeticExpr extends Expr { op.toString() + ": " + this.toSql()); } type = PrimitiveType.getAssignmentCompatibleType(t1, t2); - // the result is always an integer - Preconditions.checkState(type.isFixedPointType()); + // the result is always an integer or null + Preconditions.checkState(type.isFixedPointType() || type.isNull()); break; default: // the programmer forgot to deal with a case diff --git a/fe/src/main/java/com/cloudera/impala/analysis/BoolLiteral.java b/fe/src/main/java/com/cloudera/impala/analysis/BoolLiteral.java index 33a3a8204..a4e8362f0 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/BoolLiteral.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/BoolLiteral.java @@ -20,6 +20,7 @@ import com.cloudera.impala.thrift.TBoolLiteral; import com.cloudera.impala.thrift.TExprNode; import com.cloudera.impala.thrift.TExprNodeType; +// TODO: Decide between keeping only this class or only BoolLiteral and NullLiteral. public class BoolLiteral extends LiteralExpr { private final boolean value; diff --git a/fe/src/main/java/com/cloudera/impala/analysis/CaseExpr.java b/fe/src/main/java/com/cloudera/impala/analysis/CaseExpr.java index df3af2918..c3e397a11 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/CaseExpr.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/CaseExpr.java @@ -122,8 +122,7 @@ public class CaseExpr extends Expr { Expr whenExpr = children.get(i); if (hasCaseExpr) { // Determine maximum compatible type of the case expr, - // and all when expr seen so far. - // We will add casts to them at the very end. + // and all when exprs seen so far. We will add casts to them at the very end. whenType = analyzer.getCompatibleType(whenType, lastCompatibleWhenExpr, whenExpr); lastCompatibleWhenExpr = whenExpr; diff --git a/fe/src/main/java/com/cloudera/impala/analysis/CastExpr.java b/fe/src/main/java/com/cloudera/impala/analysis/CastExpr.java index 176c9dc9b..0ef36df51 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/CastExpr.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/CastExpr.java @@ -76,7 +76,7 @@ public class CastExpr extends Expr { // this cast may result in loss of precision, but the user requested it this.type = targetType; OpcodeRegistry.Signature match = OpcodeRegistry.instance().getFunctionInfo( - FunctionOperator.CAST, false, getChild(0).getType(), type); + FunctionOperator.CAST, childType.isNull(), getChild(0).getType(), type); if (match == null) { throw new AnalysisException("Invalid type cast of " + getChild(0).toSql() + " from " + childType + " to " + targetType); diff --git a/fe/src/main/java/com/cloudera/impala/analysis/CompoundPredicate.java b/fe/src/main/java/com/cloudera/impala/analysis/CompoundPredicate.java index ba67e30c3..1f6791b40 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/CompoundPredicate.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/CompoundPredicate.java @@ -53,15 +53,15 @@ public class CompoundPredicate extends Predicate { } private final Operator op; - public CompoundPredicate(Operator op, Predicate p1, Predicate p2) { + public CompoundPredicate(Operator op, Expr e1, Expr e2) { super(); this.op = op; - Preconditions.checkNotNull(p1); - children.add(p1); - Preconditions.checkArgument(op == Operator.NOT && p2 == null - || op != Operator.NOT && p2 != null); - if (p2 != null) { - children.add(p2); + Preconditions.checkNotNull(e1); + children.add(e1); + Preconditions.checkArgument(op == Operator.NOT && e2 == null + || op != Operator.NOT && e2 != null); + if (e2 != null) { + children.add(e2); } } @@ -97,6 +97,15 @@ public class CompoundPredicate extends Predicate { public void analyze(Analyzer analyzer) throws AnalysisException { super.analyze(analyzer); + // Check that children are predicates. + for (Expr e : children) { + if (!(e instanceof Predicate)) { + throw new AnalysisException(String.format("Operand '%s' part of predicate " + + "'%s' should return type 'BOOLEAN' but returns type '%s'.", + e.toSql(), toSql(), e.getType())); + } + } + if (getChild(0).selectivity == -1 || children.size() == 2 && getChild(1).selectivity == -1) { // give up if we're missing an input diff --git a/fe/src/main/java/com/cloudera/impala/analysis/Expr.java b/fe/src/main/java/com/cloudera/impala/analysis/Expr.java index cb05801a5..60124372b 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/Expr.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/Expr.java @@ -539,6 +539,11 @@ abstract public class Expr extends TreeNode implements ParseNode, Cloneabl public final Expr castTo(PrimitiveType targetType) throws AnalysisException { PrimitiveType type = PrimitiveType.getAssignmentCompatibleType(this.type, targetType); Preconditions.checkState(type.isValid(), "cast %s to %s", this.type, targetType); + // If the targetType is NULL_TYPE then ignore the cast because NULL_TYPE + // is compatible with all types and no cast is necessary. + if (targetType.isNull()) { + return this; + } // requested cast must be to assignment-compatible type // (which implies no loss of precision) Preconditions.checkArgument(type == targetType); diff --git a/fe/src/main/java/com/cloudera/impala/analysis/InsertStmt.java b/fe/src/main/java/com/cloudera/impala/analysis/InsertStmt.java index d93655389..715109a6d 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/InsertStmt.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/InsertStmt.java @@ -322,7 +322,7 @@ public class InsertStmt extends ParseNodeBase { + exprType.toString() + "' in column '" + expr.toSql() + "'."); } // Loss of precision when inserting into the table. - if (compatibleType != colType) { + if (compatibleType != colType && !compatibleType.isNull()) { throw new AnalysisException("Inserting into target table '" + targetTableName.getTbl() + "' may result in loss of precision.\n" + "Would need to cast '" diff --git a/fe/src/main/java/com/cloudera/impala/analysis/LikePredicate.java b/fe/src/main/java/com/cloudera/impala/analysis/LikePredicate.java index adb72b5bb..40a51dd94 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/LikePredicate.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/LikePredicate.java @@ -84,16 +84,19 @@ public class LikePredicate extends Predicate { @Override public void analyze(Analyzer analyzer) throws AnalysisException { super.analyze(analyzer); - if (getChild(0).getType() != PrimitiveType.STRING) { + if (getChild(0).getType() != PrimitiveType.STRING + && !getChild(0).getType().isNull()) { throw new AnalysisException( - "left operand of " + op.toString() + " must be of type STRING: " + this.toSql()); + "left operand of " + op.toString() + " must be of type STRING: " + toSql()); } - if (getChild(1).getType() != PrimitiveType.STRING) { + if (getChild(1).getType() != PrimitiveType.STRING + && !getChild(1).getType().isNull()) { throw new AnalysisException( - "right operand of " + op.toString() + " must be of type STRING: " + this.toSql()); + "right operand of " + op.toString() + " must be of type STRING: " + toSql()); } - if (getChild(1).isLiteral() && (op == Operator.RLIKE || op == Operator.REGEXP)) { + if (!getChild(1).getType().isNull() && getChild(1).isLiteral() + && (op == Operator.RLIKE || op == Operator.REGEXP)) { // let's make sure the pattern works // TODO: this checks that it's a Java-supported regex, but the syntax supported // by the backend is Posix; add a call to the backend to check the re syntax diff --git a/fe/src/main/java/com/cloudera/impala/analysis/LiteralExpr.java b/fe/src/main/java/com/cloudera/impala/analysis/LiteralExpr.java index 9c7395bd3..b69dbed9c 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/LiteralExpr.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/LiteralExpr.java @@ -25,6 +25,8 @@ public abstract class LiteralExpr extends Expr { throws AnalysisException { Preconditions.checkArgument(type != PrimitiveType.INVALID_TYPE); switch (type) { + case NULL_TYPE: + return new NullLiteral(); case BOOLEAN: return new BoolLiteral(value); case TINYINT: @@ -49,12 +51,13 @@ public abstract class LiteralExpr extends Expr { // Returns the string representation of the literal's value. Used when passing // literal values to the metastore rather than to Impala backends. This is similar to // the toSql() method, but does not perform any formatting of the string values. Neither - // method unescapes string values. + // method unescapes string values. public abstract String getStringValue(); // Swaps the sign of numeric literals. // Throws for non-numeric literals. public void swapSign() throws NotImplementedException { - throw new NotImplementedException("swapSign() only implemented for numeric literals"); + throw new NotImplementedException("swapSign() only implemented for numeric" + + "literals"); } } diff --git a/fe/src/main/java/com/cloudera/impala/analysis/LiteralPredicate.java b/fe/src/main/java/com/cloudera/impala/analysis/LiteralPredicate.java index ef0fce47d..7f9d2f5ca 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/LiteralPredicate.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/LiteralPredicate.java @@ -14,6 +14,8 @@ package com.cloudera.impala.analysis; +import com.cloudera.impala.catalog.PrimitiveType; +import com.cloudera.impala.common.AnalysisException; import com.cloudera.impala.thrift.TExprNode; import com.cloudera.impala.thrift.TExprNodeType; import com.cloudera.impala.thrift.TLiteralPredicate; @@ -41,6 +43,12 @@ public class LiteralPredicate extends Predicate { this.selectivity = (isNull || !val ? 0 : 1); } + @Override + public void analyze(Analyzer analyzer) throws AnalysisException { + super.analyze(analyzer); + type = (isNull) ? PrimitiveType.NULL_TYPE : PrimitiveType.BOOLEAN; + } + public boolean isNull() { return isNull; } diff --git a/fe/src/main/java/com/cloudera/impala/analysis/NullLiteral.java b/fe/src/main/java/com/cloudera/impala/analysis/NullLiteral.java index 166c819a5..3e7edb0af 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/NullLiteral.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/NullLiteral.java @@ -18,10 +18,10 @@ import com.cloudera.impala.catalog.PrimitiveType; import com.cloudera.impala.thrift.TExprNode; import com.cloudera.impala.thrift.TExprNodeType; +// TODO: Decide between keeping only this class or only BoolLiteral and NullLiteral. public class NullLiteral extends LiteralExpr { public NullLiteral() { - // TODO: should NULL be a type? - type = PrimitiveType.BOOLEAN; + type = PrimitiveType.NULL_TYPE; } @Override diff --git a/fe/src/main/java/com/cloudera/impala/analysis/OpcodeRegistry.java b/fe/src/main/java/com/cloudera/impala/analysis/OpcodeRegistry.java index cf36bc4a8..7b11a1c83 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/OpcodeRegistry.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/OpcodeRegistry.java @@ -65,6 +65,8 @@ public class OpcodeRegistry { * varArgType must be a maximum-resolution type. * We use a separate map to be able to support multiple vararg signatures for the same * FunctionOperator. + * Contains a special entry mapping from Operator,NULL_TYPE to signatures for each + * Operator to correctly match varag functions when all args are NULL. * Limitations: Since we do not consider the number of arguments, each FunctionOperator * is limited to having one vararg signature per maximum-resolution PrimitiveType. * For example, one can have two signatures func(float, int ...) and func(string ...), @@ -210,8 +212,8 @@ public class OpcodeRegistry { // Take the last argument's type as the vararg type. Pair varArgsLookup = null; if (argTypes.length > 0) { - varArgsLookup = - Pair.create(op, argTypes[argTypes.length - 1].getMaxResolutionType()); + PrimitiveType varArgMatchType = getRightMostNonNullTypeOrNull(argTypes); + varArgsLookup = Pair.create(op, varArgMatchType.getMaxResolutionType()); } List signatures = null; if (operations.containsKey(lookup)) { @@ -235,6 +237,18 @@ public class OpcodeRegistry { return compatibleMatch; } + /** + * Returns right-most argType that is not NULL_TYPE, otherwise NULL_TYPE. + */ + private PrimitiveType getRightMostNonNullTypeOrNull(PrimitiveType[] argTypes) { + for (int i = argTypes.length - 1; i >= 0; --i) { + if (!argTypes[i].isNull()) { + return argTypes[i]; + } + } + return PrimitiveType.NULL_TYPE; + } + /** * Add a function with the specified opcode/signature to the registry. */ @@ -244,9 +258,12 @@ public class OpcodeRegistry { Pair lookup = Pair.create(op, args.length); // Take the last argument's type as the vararg type. Pair varArgsLookup = null; + // Special signature for vararg functions to handle matching when all args are NULL. + Pair varArgsNullLookup = null; Preconditions.checkArgument((varArgs) ? args.length > 0 : true); if (varArgs && args.length > 0) { varArgsLookup = Pair.create(op, args[args.length - 1].getMaxResolutionType()); + varArgsNullLookup = Pair.create(op, PrimitiveType.NULL_TYPE); } if (operations.containsKey(lookup)) { signatures = operations.get(lookup); @@ -256,6 +273,7 @@ public class OpcodeRegistry { signatures = new ArrayList(); if (varArgs) { varArgOperations.put(varArgsLookup, signatures); + varArgOperations.put(varArgsNullLookup, signatures); } else { operations.put(lookup, signatures); } diff --git a/fe/src/main/java/com/cloudera/impala/analysis/TableRef.java b/fe/src/main/java/com/cloudera/impala/analysis/TableRef.java index dd164c262..41baa88ec 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/TableRef.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/TableRef.java @@ -14,7 +14,6 @@ package com.cloudera.impala.analysis; -import java.util.ArrayList; import java.util.List; import com.cloudera.impala.catalog.Table; diff --git a/fe/src/main/java/com/cloudera/impala/analysis/TimestampArithmeticExpr.java b/fe/src/main/java/com/cloudera/impala/analysis/TimestampArithmeticExpr.java index 17dfb0d66..0ca16b9c2 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/TimestampArithmeticExpr.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/TimestampArithmeticExpr.java @@ -123,8 +123,9 @@ public class TimestampArithmeticExpr extends Expr { "' in timestamp arithmetic expression '" + toSql() + "'."); } - // The first child must return a timestamp. - if (getChild(0).getType() != PrimitiveType.TIMESTAMP) { + // The first child must return a timestamp or null. + if (getChild(0).getType() != PrimitiveType.TIMESTAMP + && !getChild(0).getType().isNull()) { throw new AnalysisException("Operand '" + getChild(0).toSql() + "' of timestamp arithmetic expression '" + toSql() + "' returns type '" + getChild(0).getType() + "'. Expected type 'TIMESTAMP'."); diff --git a/fe/src/main/java/com/cloudera/impala/catalog/PrimitiveType.java b/fe/src/main/java/com/cloudera/impala/catalog/PrimitiveType.java index 7ecd36fac..feccedab9 100644 --- a/fe/src/main/java/com/cloudera/impala/catalog/PrimitiveType.java +++ b/fe/src/main/java/com/cloudera/impala/catalog/PrimitiveType.java @@ -23,6 +23,9 @@ import com.google.common.collect.Lists; public enum PrimitiveType { INVALID_TYPE("INVALID_TYPE", -1, TPrimitiveType.INVALID_TYPE), + // NULL_TYPE - used only in LiteralPredicate and NullLiteral to make NULLs compatible + // with all other types. + NULL_TYPE("NULL_TYPE", 1, TPrimitiveType.NULL_TYPE), BOOLEAN("BOOLEAN", 1, TPrimitiveType.BOOLEAN), TINYINT("TINYINT", 1, TPrimitiveType.TINYINT), SMALLINT("SMALLINT", 2, TPrimitiveType.SMALLINT), @@ -82,6 +85,8 @@ public enum PrimitiveType { // Timestamps get summed as DOUBLE for AVG. } else if (isFloatingPointType() || this == TIMESTAMP) { return DOUBLE; + } else if (isNull()) { + return NULL_TYPE; } else { return INVALID_TYPE; } @@ -95,6 +100,10 @@ public enum PrimitiveType { return this != INVALID_TYPE; } + public boolean isNull() { + return this == NULL_TYPE; + } + public boolean isDateType() { return (this == DATE || this == DATETIME || this == TIMESTAMP); } @@ -105,6 +114,8 @@ public enum PrimitiveType { public boolean isSupported() { switch (this) { + case DATE: + case DATETIME: case BINARY: case DECIMAL: return false; @@ -164,6 +175,20 @@ public enum PrimitiveType { static { compatibilityMatrix = new PrimitiveType[STRING.ordinal() + 1][STRING.ordinal() + 1]; + // NULL_TYPE is compatible with any type and results in the non-null type. + compatibilityMatrix[NULL_TYPE.ordinal()][NULL_TYPE.ordinal()] = NULL_TYPE; + compatibilityMatrix[NULL_TYPE.ordinal()][BOOLEAN.ordinal()] = BOOLEAN; + compatibilityMatrix[NULL_TYPE.ordinal()][TINYINT.ordinal()] = TINYINT; + compatibilityMatrix[NULL_TYPE.ordinal()][SMALLINT.ordinal()] = SMALLINT; + compatibilityMatrix[NULL_TYPE.ordinal()][INT.ordinal()] = INT; + compatibilityMatrix[NULL_TYPE.ordinal()][BIGINT.ordinal()] = BIGINT; + compatibilityMatrix[NULL_TYPE.ordinal()][FLOAT.ordinal()] = FLOAT; + compatibilityMatrix[NULL_TYPE.ordinal()][DOUBLE.ordinal()] = DOUBLE; + compatibilityMatrix[NULL_TYPE.ordinal()][DATE.ordinal()] = DATE; + compatibilityMatrix[NULL_TYPE.ordinal()][DATETIME.ordinal()] = DATETIME; + compatibilityMatrix[NULL_TYPE.ordinal()][TIMESTAMP.ordinal()] = TIMESTAMP; + compatibilityMatrix[NULL_TYPE.ordinal()][STRING.ordinal()] = STRING; + compatibilityMatrix[BOOLEAN.ordinal()][BOOLEAN.ordinal()] = BOOLEAN; compatibilityMatrix[BOOLEAN.ordinal()][TINYINT.ordinal()] = TINYINT; compatibilityMatrix[BOOLEAN.ordinal()][SMALLINT.ordinal()] = SMALLINT; @@ -247,14 +272,16 @@ public enum PrimitiveType { * without loss of precision. Returns INVALID_TYPE if there is no such type * or if any of t1 and t2 is INVALID_TYPE. */ - public static PrimitiveType getAssignmentCompatibleType(PrimitiveType t1, PrimitiveType t2) { + public static PrimitiveType getAssignmentCompatibleType(PrimitiveType t1, + PrimitiveType t2) { if (!t1.isValid() || !t2.isValid()) { return INVALID_TYPE; } PrimitiveType smallerType = (t1.ordinal() < t2.ordinal() ? t1 : t2); PrimitiveType largerType = (t1.ordinal() > t2.ordinal() ? t1 : t2); - PrimitiveType result = compatibilityMatrix[smallerType.ordinal()][largerType.ordinal()]; + PrimitiveType result = + compatibilityMatrix[smallerType.ordinal()][largerType.ordinal()]; Preconditions.checkNotNull(result); return result; } @@ -264,7 +291,7 @@ public enum PrimitiveType { * t1 to t2 results in no loss of precision. */ public static boolean isImplicitlyCastable(PrimitiveType t1, PrimitiveType t2) { - return getAssignmentCompatibleType(t1, t2) == t2; + return getAssignmentCompatibleType(t1, t2) == t2; } // Returns the highest resolution type @@ -376,6 +403,7 @@ public enum PrimitiveType { */ public int getJavaSQLType() { switch (this) { + case NULL_TYPE: return java.sql.Types.NULL; case BOOLEAN: return java.sql.Types.BOOLEAN; case TINYINT: return java.sql.Types.TINYINT; case SMALLINT: return java.sql.Types.SMALLINT; diff --git a/fe/src/main/jflex/sql-scanner.flex b/fe/src/main/jflex/sql-scanner.flex index 6777d1f83..d8eb52df5 100644 --- a/fe/src/main/jflex/sql-scanner.flex +++ b/fe/src/main/jflex/sql-scanner.flex @@ -195,7 +195,6 @@ import com.cloudera.impala.analysis.SqlParserSymbols; tokenIdMap.put(new Integer(SqlParserSymbols.error), "ERROR"); tokenIdMap.put(new Integer(SqlParserSymbols.BITXOR), "^"); tokenIdMap.put(new Integer(SqlParserSymbols.NUMERIC_OVERFLOW), "NUMERIC OVERFLOW"); - tokenIdMap.put(new Integer(SqlParserSymbols.BOOL_LITERAL), "BOOL LITERAL"); } public static boolean isKeyword(Integer tokenId) { diff --git a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzerTest.java b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzerTest.java index c14d5a3b3..3fececf8e 100644 --- a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzerTest.java +++ b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzerTest.java @@ -142,8 +142,8 @@ public class AnalyzerTest { } catch (InternalException e) { fail("Internal exception:\n" + e.toString()); } - if(node instanceof SelectStmt) { - CheckSelectToThrift((SelectStmt)node); + if (node instanceof SelectStmt) { + CheckSelectToThrift((SelectStmt) node); } else if (node instanceof InsertStmt) { InsertStmt insertStmt = (InsertStmt) node; if (insertStmt.getQueryStmt() instanceof SelectStmt) { @@ -207,9 +207,11 @@ public class AnalyzerTest { private void checkBinaryExprs(Expr expr) { if (expr instanceof BinaryPredicate || (expr instanceof ArithmeticExpr - && ((ArithmeticExpr) expr).getOp() != ArithmeticExpr.Operator.BITNOT)) { + && ((ArithmeticExpr) expr).getOp() != ArithmeticExpr.Operator.BITNOT)) { Assert.assertEquals(expr.getChildren().size(), 2); - Assert.assertEquals(expr.getChild(0).getType(), expr.getChild(1).getType()); + // The types must be equal or one of them is NULL_TYPE. + Assert.assertTrue(expr.getChild(0).getType() == expr.getChild(1).getType() + || expr.getChild(0).getType().isNull() || expr.getChild(1).getType().isNull()); } for (Expr child: expr.getChildren()) { checkBinaryExprs(child); @@ -226,7 +228,7 @@ public class AnalyzerTest { private void TestSelectStar() throws AnalysisException { AnalyzesOk("select * from functional.AllTypes"); DescriptorTable descTbl = analyzer.getDescTbl(); - for (SlotDescriptor slotD: descTbl.getTupleDesc(new TupleId(0)).getSlots()) { + for (SlotDescriptor slotD : descTbl.getTupleDesc(new TupleId(0)).getSlots()) { slotD.setIsMaterialized(true); } descTbl.computeMemLayout(); @@ -246,9 +248,9 @@ public class AnalyzerTest { } private void TestNonNullable() throws AnalysisException { - // both slots are non-nullable bigints. The layout should look like: + // both slots are non-nullable bigints. The layout should look like: // (byte range : data) - // 0 - 7: count(int_col) + // 0 - 7: count(int_col) // 8 - 15: count(*) AnalyzesOk("select count(int_col), count(*) from functional.AllTypes"); DescriptorTable descTbl = analyzer.getDescTbl(); @@ -264,7 +266,7 @@ public class AnalyzerTest { } private void TestMixedNullable() throws AnalysisException { - // one slot is nullable, one is not. The layout should look like: + // one slot is nullable, one is not. The layout should look like: // (byte range : data) // 0 : 1 nullable-byte (only 1 bit used) // 1 - 7: padded bytes @@ -284,7 +286,7 @@ public class AnalyzerTest { } private void checkLayoutParams(SlotDescriptor d, int byteSize, int byteOffset, - int nullIndicatorByte, int nullIndicatorBit) { + int nullIndicatorByte, int nullIndicatorBit) { Assert.assertEquals(byteSize, d.getByteSize()); Assert.assertEquals(byteOffset, d.getByteOffset()); Assert.assertEquals(nullIndicatorByte, d.getNullIndicatorByte()); @@ -292,7 +294,7 @@ public class AnalyzerTest { } private void checkLayoutParams(String colAlias, int byteSize, int byteOffset, - int nullIndicatorByte, int nullIndicatorBit) { + int nullIndicatorByte, int nullIndicatorBit) { SlotDescriptor d = analyzer.getSlotDescriptor(colAlias); checkLayoutParams(d, byteSize, byteOffset, nullIndicatorByte, nullIndicatorBit); } @@ -316,7 +318,7 @@ public class AnalyzerTest { AnalysisError("select a.id from (select id y from functional.hbasealltypessmall) a", "unknown column 'id' (table alias 'a')"); AnalyzesOk("select * from (select * from functional.AllTypes) a where year = 2009"); - AnalyzesOk("select * from (select * from functional.alltypesagg) a right outer join"+ + AnalyzesOk("select * from (select * from functional.alltypesagg) a right outer join" + " (select * from functional.alltypessmall) b using (id, int_col) " + " where a.day >= 6 and b.month > 2 and a.tinyint_col = 15 and " + " b.string_col = '15' and a.tinyint_col + b.tinyint_col < 15"); @@ -332,7 +334,7 @@ public class AnalyzerTest { "duplicated inline view column alias: 'year' in inline view 'x'"); // subquery on the rhs of the join - AnalyzesOk("select x.float_col "+ + AnalyzesOk("select x.float_col " + " from functional.alltypessmall c join " + " (select a.smallint_col smallint_col, a.tinyint_col tinyint_col, " + " a.int_col int_col, b.float_col float_col" + @@ -343,7 +345,7 @@ public class AnalyzerTest { // aggregate test AnalyzesOk("select count(*) from (select count(id) from " + "functional.AllTypes group by id) a"); - AnalyzesOk("select count(a.x) from (select id+2 x "+ + AnalyzesOk("select count(a.x) from (select id+2 x " + "from functional.hbasealltypessmall) a"); AnalyzesOk("select * from (select id, zip " + " from (select * from functional.testtbl) x " + @@ -382,11 +384,11 @@ public class AnalyzerTest { AnalysisError("select * from " + "(select id, zip from functional.testtbl group by id having count(*) > 0) x", "select list expression not produced by aggregation output " + - "(missing from GROUP BY clause?)"); + "(missing from GROUP BY clause?)"); AnalysisError("select * from " + "(select id from functional.testtbl group by id having zip + count(*) > 0) x", "HAVING clause not produced by aggregation output " + - "(missing from GROUP BY clause?)"); + "(missing from GROUP BY clause?)"); AnalysisError("select * from " + "(select zip, count(*) from functional.testtbl group by 3) x", "GROUP BY: ordinal exceeds number of items in select list"); @@ -410,6 +412,9 @@ public class AnalyzerTest { AnalyzesOk("select c1, c2 from (select zip c1 , count(*) c2 " + " from (select * from functional.testtbl) x group by 1) x " + " order by 2, 1 limit 5"); + + // test NULLs + AnalyzesOk("select * from (select NULL) a"); } @Test @@ -420,15 +425,15 @@ public class AnalyzerTest { AnalyzesOk("select functional_seq.alltypes.* from functional_seq.alltypes"); // two tables w/ identical names from different dbs AnalyzesOk("select functional.alltypes.*, functional_seq.alltypes.* " + - "from functional.alltypes, functional_seq.alltypes"); + "from functional.alltypes, functional_seq.alltypes"); AnalyzesOk("select * from functional.alltypes, functional_seq.alltypes"); } @Test public void TestTimestampValueExprs() throws AnalysisException { - AnalyzesOk("select cast (0 as timestamp)"); - AnalyzesOk("select cast (0.1 as timestamp)"); - AnalyzesOk("select cast ('1970-10-10 10:00:00.123' as timestamp)"); + AnalyzesOk("select cast (0 as timestamp)"); + AnalyzesOk("select cast (0.1 as timestamp)"); + AnalyzesOk("select cast ('1970-10-10 10:00:00.123' as timestamp)"); } @Test @@ -436,19 +441,25 @@ public class AnalyzerTest { // Test predicates in where clause. AnalyzesOk("select * from functional.AllTypes where true"); AnalyzesOk("select * from functional.AllTypes where false"); + AnalyzesOk("select * from functional.AllTypes where NULL"); AnalyzesOk("select * from functional.AllTypes where bool_col = true"); AnalyzesOk("select * from functional.AllTypes where bool_col = false"); + AnalyzesOk("select * from functional.AllTypes where bool_col = NULL"); + AnalyzesOk("select * from functional.AllTypes where NULL = NULL"); + AnalyzesOk("select * from functional.AllTypes where NULL and NULL or NULL"); AnalyzesOk("select * from functional.AllTypes where true or false"); AnalyzesOk("select * from functional.AllTypes where true and false"); AnalyzesOk("select * from functional.AllTypes " + - "where true or false and bool_col = false"); + "where true or false and bool_col = false"); AnalyzesOk("select * from functional.AllTypes " + - "where true and false or bool_col = false"); + "where true and false or bool_col = false"); // Test predicates in select list. AnalyzesOk("select bool_col = true from functional.AllTypes"); AnalyzesOk("select bool_col = false from functional.AllTypes"); + AnalyzesOk("select bool_col = NULL from functional.AllTypes"); AnalyzesOk("select true or false and bool_col = false from functional.AllTypes"); AnalyzesOk("select true and false or bool_col = false from functional.AllTypes"); + AnalyzesOk("select NULL or NULL and NULL from functional.AllTypes"); } @Test @@ -485,7 +496,7 @@ public class AnalyzerTest { "Duplicate table alias"); // duplicate implicit alias AnalysisError("select int_col from functional.alltypes, " + - "functional.alltypes", "Duplicate table alias"); + "functional.alltypes", "Duplicate table alias"); // resolves dbs correctly AnalyzesOk("select zip from functional.testtbl"); @@ -509,9 +520,13 @@ public class AnalyzerTest { AnalyzesOk("select null"); AnalyzesOk("select null and null"); AnalyzesOk("select null or null"); + AnalyzesOk("select null is null"); + AnalyzesOk("select null is not null"); + AnalyzesOk("select int_col is not null from functional.alltypes"); } - @Test public void TestOnClause() throws AnalysisException { + @Test + public void TestOnClause() throws AnalysisException { AnalyzesOk( "select a.int_col from functional.alltypes a " + "join functional.alltypes b on (a.int_col = b.int_col)"); @@ -519,6 +534,9 @@ public class AnalyzerTest { "select a.int_col " + "from functional.alltypes a join functional.alltypes b on " + "(a.int_col = b.int_col and a.string_col = b.string_col)"); + AnalyzesOk( + "select a.int_col from functional.alltypes a " + + "join functional.alltypes b on (NULL)"); // ON or USING clause not required for inner join AnalyzesOk("select a.int_col from functional.alltypes a join functional.alltypes b"); // unknown column @@ -542,7 +560,7 @@ public class AnalyzerTest { "functional.alltypes b on (a.bool_col = b.string_col)", "operands are not comparable: a.bool_col = b.string_col"); AnalyzesOk( - "select a.int_col, b.int_col, c.int_col " + + "select a.int_col, b.int_col, c.int_col " + "from functional.alltypes a join functional.alltypes b on " + "(a.int_col = b.int_col and a.string_col = b.string_col)" + "join functional.alltypes c on " + @@ -585,23 +603,24 @@ public class AnalyzerTest { AnalyzesOk("select a.id from functional.alltypes a left semi join " + "functional.alltypes b on (a.id = b.id)"); AnalyzesOk("select a.id from functional.alltypes a left semi join " + - "functional.alltypes b using (id)"); + "functional.alltypes b using (id)"); AnalysisError("select a.id from functional.alltypes a " + - "left semi join functional.alltypes b", + "left semi join functional.alltypes b", "LEFT SEMI JOIN requires an ON or USING clause"); // TODO: enable when implemented // must not reference semi-joined alias outside of join clause - //AnalysisError( - //"select a.id, b.id from alltypes a left semi join alltypes b on (a.id = b.id)", - //"x"); + // AnalysisError( + // "select a.id, b.id from alltypes a left semi join alltypes b on (a.id = b.id)", + // "x"); } - @Test public void TestUsingClause() throws AnalysisException { + @Test + public void TestUsingClause() throws AnalysisException { AnalyzesOk("select a.int_col, b.int_col from functional.alltypes a join " + - "functional.alltypes b using (int_col)"); + "functional.alltypes b using (int_col)"); AnalyzesOk("select a.int_col, b.int_col from " + - "functional.alltypes a join functional.alltypes b " + - "using (int_col, string_col)"); + "functional.alltypes a join functional.alltypes b " + + "using (int_col, string_col)"); AnalyzesOk( "select a.int_col, b.int_col, c.int_col " + "from functional.alltypes a " + @@ -613,7 +632,7 @@ public class AnalyzerTest { "unknown column badcol for alias a"); AnalysisError( "select a.int_col from functional.alltypes a " + - "join functional.alltypes b using (int_col, badcol)", + "join functional.alltypes b using (int_col, badcol)", "unknown column badcol for alias a "); } @@ -641,6 +660,7 @@ public class AnalyzerTest { AnalyzesOk("select id from functional.testtbl where NULL OR NULL"); AnalyzesOk("select id from functional.testtbl where NULL AND NULL"); AnalyzesOk("select id from functional.testtbl where NOT NULL"); + AnalyzesOk("select id from functional.testtbl where NULL"); // bool literal predicate AnalyzesOk("select id from functional.testtbl where true"); AnalyzesOk("select id from functional.testtbl where false"); @@ -658,6 +678,8 @@ public class AnalyzerTest { public void TestAggregates() throws AnalysisException { AnalyzesOk("select count(*), min(id), max(id), sum(id), avg(id) " + "from functional.testtbl"); + AnalyzesOk("select count(NULL), min(NULL), max(NULL), sum(NULL), avg(NULL) " + + "from functional.testtbl"); AnalysisError("select id, zip from functional.testtbl where count(*) > 0", "aggregation function not allowed in WHERE clause"); @@ -796,6 +818,14 @@ public class AnalyzerTest { public void TestGroupBy() throws AnalysisException { AnalyzesOk("select zip, count(*) from functional.testtbl group by zip"); AnalyzesOk("select zip + count(*) from functional.testtbl group by zip"); + // grouping on constants is ok and doesn't require them to be in select list + AnalyzesOk("select count(*) from functional.testtbl group by 2*3+4"); + AnalyzesOk("select count(*) from functional.testtbl " + + "group by true, false, NULL"); + // ok for constants in select list not to be in group by list + AnalyzesOk("select true, NULL, 1*2+5 as a, zip, count(*) from functional.testtbl " + + "group by zip"); + // doesn't group by all non-agg select list items AnalysisError("select zip, count(*) from functional.testtbl", "select list expression not produced by aggregation output " + @@ -830,7 +860,7 @@ public class AnalyzerTest { AnalysisError("select zip id, id, count(*) from functional.testtbl group by id", "Column id in group by clause is ambiguous"); AnalysisError("select zip id, zip ID, count(*) from functional.testtbl group by id", - "Column id in group by clause is ambiguous"); + "Column id in group by clause is ambiguous"); // can't group by aggregate @@ -860,7 +890,8 @@ public class AnalyzerTest { "from functional.alltypes group by 1"); } - @Test public void TestAvgSubstitution() throws AnalysisException { + @Test + public void TestAvgSubstitution() throws AnalysisException { SelectStmt select = (SelectStmt) AnalyzesOk( "select avg(id) from functional.testtbl having count(id) > 0 order by avg(zip)"); ArrayList selectListExprs = select.getResultExprs(); @@ -878,10 +909,13 @@ public class AnalyzerTest { assertEquals(" / ", orderingExpr.toSql()); } - @Test public void TestOrderBy() throws AnalysisException { + @Test + public void TestOrderBy() throws AnalysisException { AnalyzesOk("select zip, id from functional.testtbl order by zip"); AnalyzesOk("select zip, id from functional.testtbl order by zip asc"); AnalyzesOk("select zip, id from functional.testtbl order by zip desc"); + AnalyzesOk("select zip, id from functional.testtbl " + + "order by true asc, false desc, NULL asc"); // resolves ordinals AnalyzesOk("select zip, id from functional.testtbl order by 1"); @@ -930,7 +964,7 @@ public class AnalyzerTest { @Test public void TestBinaryPredicates() throws AnalysisException { - // AnalyzesOk("select * from functional.alltypes where bool_col != true"); + AnalyzesOk("select * from functional.alltypes where bool_col != true"); AnalyzesOk("select * from functional.alltypes where tinyint_col <> 1"); AnalyzesOk("select * from functional.alltypes where smallint_col <= 23"); AnalyzesOk("select * from functional.alltypes where int_col > 15"); @@ -944,13 +978,23 @@ public class AnalyzerTest { AnalyzesOk("select * from functional.alltypes where bool_col = 0"); AnalyzesOk("select * from functional.alltypes where int_col = cast('0' as int)"); AnalyzesOk("select * from functional.alltypes where cast(string_col as int) = 15"); + // tests with NULL + AnalyzesOk("select * from functional.alltypes where bool_col != NULL"); + AnalyzesOk("select * from functional.alltypes where tinyint_col <> NULL"); + AnalyzesOk("select * from functional.alltypes where smallint_col <= NULL"); + AnalyzesOk("select * from functional.alltypes where int_col > NULL"); + AnalyzesOk("select * from functional.alltypes where bigint_col >= NULL"); + AnalyzesOk("select * from functional.alltypes where float_col < NULL"); + AnalyzesOk("select * from functional.alltypes where double_col > NULL"); + AnalyzesOk("select * from functional.alltypes where string_col = NULL"); + AnalyzesOk("select * from functional.alltypes where timestamp_col = NULL"); // invalid casts AnalysisError("select * from functional.alltypes where bool_col = '15'", "operands are not comparable: bool_col = '15'"); - //AnalysisError("select * from functional.alltypes where date_col = 15", - //"operands are not comparable: date_col = 15"); - //AnalysisError("select * from functional.alltypes where datetime_col = 1.0", - //"operands are not comparable: datetime_col = 1.0"); + // AnalysisError("select * from functional.alltypes where date_col = 15", + // "operands are not comparable: date_col = 15"); + // AnalysisError("select * from functional.alltypes where datetime_col = 1.0", + // "operands are not comparable: datetime_col = 1.0"); } @Test @@ -998,11 +1042,11 @@ public class AnalyzerTest { // We need to add 1 to MIN_VALUE because there are no negative integer literals. // The reason is that whether a minus belongs to an // arithmetic expr or a literal must be decided by the parser, not the lexer. - AnalyzesOk("select 1 | cast('" + Long.toString(Long.MIN_VALUE+1) + "' as bigint)"); + AnalyzesOk("select 1 | cast('" + Long.toString(Long.MIN_VALUE + 1) + "' as bigint)"); AnalyzesOk("select 1 | cast('" + Long.toString(Long.MAX_VALUE) + "' as bigint)"); // Cast to numeric never overflow AnalyzesOk("select * from functional.alltypes where tinyint_col = " + - "cast('" + Long.toString(Long.MIN_VALUE) + "1' as tinyint)"); + "cast('" + Long.toString(Long.MIN_VALUE) + "1' as tinyint)"); AnalyzesOk("select * from functional.alltypes where tinyint_col = " + "cast('" + Long.toString(Long.MAX_VALUE) + "1' as tinyint)"); AnalyzesOk("select * from functional.alltypes where tinyint_col = " + @@ -1017,6 +1061,27 @@ public class AnalyzerTest { "tinyint_col = cast('--1' as tinyint)"); } + /** + * Tests that cast(null to type) returns type for all types. + */ + @Test + public void TestNullCasts() throws AnalysisException { + for (PrimitiveType type: PrimitiveType.values()) { + // Cannot cast to INVALID_TYPE, NULL_TYPE or unsupported types. + if (!type.isValid() || type.isNull() || !type.isSupported()) { + continue; + } + checkExprType("select cast(null as " + type + ")", type); + } + } + + // Analyzes query and asserts that the first result expr returns the given type. + // Requires query to parse to a SelectStmt. + private void checkExprType(String query, PrimitiveType type) { + SelectStmt select = (SelectStmt) AnalyzesOk(query); + assertEquals(select.getResultExprs().get(0).getType(), type); + } + @Test public void TestLikePredicates() throws AnalysisException { AnalyzesOk("select * from functional.alltypes where string_col like 'test%'"); @@ -1032,6 +1097,16 @@ public class AnalyzerTest { "left operand of LIKE must be of type STRING"); AnalysisError("select * from functional.alltypes where string_col regexp 'test]['", "invalid regular expression in 'string_col REGEXP 'test][''"); + // Test NULLs. + String[] likePreds = new String[] {"LIKE", "RLIKE", "REGEXP"}; + for (String likePred: likePreds) { + AnalyzesOk(String.format("select * from functional.alltypes " + + "where string_col %s NULL", likePred)); + AnalyzesOk(String.format("select * from functional.alltypes " + + "where NULL %s string_col", likePred)); + AnalyzesOk(String.format("select * from functional.alltypes " + + "where NULL %s NULL", likePred)); + } } @Test @@ -1041,18 +1116,30 @@ public class AnalyzerTest { AnalyzesOk("select * from functional.alltypes where " + "string_col = '5' or int_col = 5"); AnalyzesOk("select * from functional.alltypes where (string_col = '5' " + - "or int_col = 5) and string_col > '1'"); + "or int_col = 5) and string_col > '1'"); AnalyzesOk("select * from functional.alltypes where not string_col = '5'"); AnalyzesOk("select * from functional.alltypes where int_col = cast('5' as int)"); + // test NULLs + AnalyzesOk("select * from functional.alltypes where NULL and NULL"); + AnalyzesOk("select * from functional.alltypes where NULL or NULL"); + AnalyzesOk("select * from functional.alltypes where not NULL"); + // arithmetic exprs as operands to compound predicates should fail to analyze + AnalysisError("select * from functional.alltypes where 1 + 2 and false", + "Operand '1 + 2' part of predicate '1 + 2 AND FALSE' should return " + + "type 'BOOLEAN' but returns type 'BIGINT'."); + AnalysisError("select * from functional.alltypes where 1 + 2 or true", + "Operand '1 + 2' part of predicate '1 + 2 OR TRUE' should return " + + "type 'BOOLEAN' but returns type 'BIGINT'."); + AnalysisError("select * from functional.alltypes where not 1 + 2", + "Operand '1 + 2' part of predicate 'NOT 1 + 2' should return " + + "type 'BOOLEAN' but returns type 'BIGINT'."); } @Test public void TestIsNullPredicates() throws AnalysisException { AnalyzesOk("select * from functional.alltypes where int_col is null"); AnalyzesOk("select * from functional.alltypes where string_col is not null"); - // TODO: add null literals (i think this would require a null type, which is - // compatible with anything else) - // AnalyzesOk("select * from functional.alltypes where null is not null"); + AnalyzesOk("select * from functional.alltypes where null is not null"); } @Test @@ -1071,6 +1158,15 @@ public class AnalyzerTest { // Comparison expr requires implicit cast. AnalyzesOk("select * from functional.alltypes " + "where smallint_col between float_col and double_col"); + // Test NULLs. + AnalyzesOk("select * from functional.alltypes " + + "where NULL between float_col and double_col"); + AnalyzesOk("select * from functional.alltypes " + + "where smallint_col between NULL and double_col"); + AnalyzesOk("select * from functional.alltypes " + + "where smallint_col between float_col and NULL"); + AnalyzesOk("select * from functional.alltypes " + + "where NULL between NULL and NULL"); // Incompatible types. AnalysisError("select * from functional.alltypes " + "where string_col between bool_col and double_col", @@ -1101,6 +1197,12 @@ public class AnalyzerTest { // Comparison expr requires implicit cast. AnalyzesOk("select * from functional.alltypes where " + "int_col in (double_col, bigint_col)"); + // Test predicates. + AnalyzesOk("select * from functional.alltypes where " + + "!true in (false or true, true and false)"); + // Test NULLs. + AnalyzesOk("select * from functional.alltypes where " + + "NULL in (NULL, NULL)"); // Incompatible types. AnalysisError("select * from functional.alltypes where " + "string_col in (bool_col, double_col)", @@ -1110,6 +1212,10 @@ public class AnalyzerTest { "timestamp_col in (int_col, double_col)", "Incompatible return types 'TIMESTAMP' and 'INT' " + "of exprs 'timestamp_col' and 'int_col'."); + AnalysisError("select * from functional.alltypes where " + + "timestamp_col in (NULL, int_col)", + "Incompatible return types 'TIMESTAMP' and 'INT' " + + "of exprs 'timestamp_col' and 'int_col'."); } /** @@ -1118,34 +1224,40 @@ public class AnalyzerTest { */ @Test public void TestArithmeticTypeCasts() throws AnalysisException { - for (PrimitiveType type1 : PrimitiveType.getNumericTypes()) { - for (PrimitiveType type2 : PrimitiveType.getNumericTypes()) { + // test all numeric types and the null type + List numericTypes = + new ArrayList(PrimitiveType.getNumericTypes()); + numericTypes.add(PrimitiveType.NULL_TYPE); + + for (PrimitiveType type1 : numericTypes) { + for (PrimitiveType type2 : numericTypes) { PrimitiveType compatibleType = PrimitiveType.getAssignmentCompatibleType(type1, type2); PrimitiveType promotedType = compatibleType.getMaxResolutionType(); // +, -, * typeCastTest(type1, type2, false, ArithmeticExpr.Operator.ADD, null, - promotedType); + promotedType); typeCastTest(type1, type2, true, ArithmeticExpr.Operator.ADD, null, - promotedType); + promotedType); typeCastTest(type1, type2, false, ArithmeticExpr.Operator.SUBTRACT, null, - promotedType); + promotedType); typeCastTest(type1, type2, true, ArithmeticExpr.Operator.SUBTRACT, null, - promotedType); + promotedType); typeCastTest(type1, type2, false, ArithmeticExpr.Operator.MULTIPLY, null, - promotedType); + promotedType); typeCastTest(type1, type2, true, ArithmeticExpr.Operator.MULTIPLY, null, - promotedType); + promotedType); // / typeCastTest(type1, type2, false, ArithmeticExpr.Operator.DIVIDE, null, - PrimitiveType.DOUBLE); + PrimitiveType.DOUBLE); typeCastTest(type1, type2, true, ArithmeticExpr.Operator.DIVIDE, null, - PrimitiveType.DOUBLE); + PrimitiveType.DOUBLE); // % div, &, |, ^ only for fixed-point types - if (!type1.isFixedPointType() || !type2.isFixedPointType()) { + if ((!type1.isFixedPointType() && !type1.isNull()) + || (!type2.isFixedPointType() && !type2.isNull())) { continue; } typeCastTest(type1, type2, false, ArithmeticExpr.Operator.MOD, null, @@ -1153,40 +1265,48 @@ public class AnalyzerTest { typeCastTest(type1, type2, true, ArithmeticExpr.Operator.MOD, null, compatibleType); typeCastTest(type1, type2, false, ArithmeticExpr.Operator.INT_DIVIDE, null, - compatibleType); + compatibleType); typeCastTest(type1, type2, true, ArithmeticExpr.Operator.INT_DIVIDE, null, - compatibleType); + compatibleType); typeCastTest(type1, type2, false, ArithmeticExpr.Operator.BITAND, null, - compatibleType); + compatibleType); typeCastTest(type1, type2, true, ArithmeticExpr.Operator.BITAND, null, - compatibleType); + compatibleType); typeCastTest(type1, type2, false, ArithmeticExpr.Operator.BITOR, null, - compatibleType); + compatibleType); typeCastTest(type1, type2, true, ArithmeticExpr.Operator.BITOR, null, - compatibleType); + compatibleType); typeCastTest(type1, type2, false, ArithmeticExpr.Operator.BITXOR, null, - compatibleType); + compatibleType); typeCastTest(type1, type2, true, ArithmeticExpr.Operator.BITXOR, null, - compatibleType); + compatibleType); } } - for (PrimitiveType type : PrimitiveType.getFixedPointTypes()) { + List fixedPointTypes = new ArrayList( + PrimitiveType.getFixedPointTypes()); + fixedPointTypes.add(PrimitiveType.NULL_TYPE); + for (PrimitiveType type: fixedPointTypes) { typeCastTest(null, type, false, ArithmeticExpr.Operator.BITNOT, null, type); } } /** * Test of all type casts in comparisons following mysql's casting policy. + * * @throws AnalysisException */ @Test public void TestComparisonTypeCasts() throws AnalysisException { + // test all numeric types and the null type + List types = + new ArrayList(PrimitiveType.getNumericTypes()); + types.add(PrimitiveType.NULL_TYPE); + // test on all comparison ops for (BinaryPredicate.Operator cmpOp : BinaryPredicate.Operator.values()) { - // test all numeric - for (PrimitiveType type1 : PrimitiveType.getNumericTypes()) { - for (PrimitiveType type2 : PrimitiveType.getNumericTypes()) { + for (PrimitiveType type1 : types) { + for (PrimitiveType type2 : types) { PrimitiveType compatibleType = PrimitiveType.getAssignmentCompatibleType(type1, type2); typeCastTest(type1, type2, false, null, cmpOp, compatibleType); @@ -1219,10 +1339,10 @@ public class AnalyzerTest { String queryStr = null; if (arithmeticMode) { queryStr = "select " + op1 + " " + arithmeticOp.toString() + " " + op2 + - " AS a from functional.alltypes"; + " AS a from functional.alltypes"; } else { queryStr = "select int_col from functional.alltypes " + - "where " + op1 + " " + cmpOp.toString() + " " + op2; + "where " + op1 + " " + cmpOp.toString() + " " + op2; } System.err.println(queryStr); SelectStmt select = (SelectStmt) AnalyzesOk(queryStr); @@ -1237,13 +1357,18 @@ public class AnalyzerTest { } else { // check the where clause expr = select.getWhereClause(); - assertEquals(PrimitiveType.BOOLEAN, expr.getType()); + if (!expr.getType().isNull()) { + assertEquals(PrimitiveType.BOOLEAN, expr.getType()); + } } checkCasts(expr); - assertEquals(opType, expr.getChild(0).getType()); + // The children's types must be NULL or equal to the requested opType. + Assert.assertTrue(opType == expr.getChild(0).getType() + || opType.isNull() || expr.getChild(0).getType().isNull()); if (type1 != null) { - assertEquals(opType, expr.getChild(1).getType()); + Assert.assertTrue(opType == expr.getChild(1).getType() + || opType.isNull() || expr.getChild(1).getType().isNull()); } } @@ -1296,7 +1421,7 @@ public class AnalyzerTest { @Test public void TestTimestampArithmeticExpressions() { String[] valueTypeCols = - new String[] { "tinyint_col", "smallint_col", "int_col" }; + new String[] {"tinyint_col", "smallint_col", "int_col", "NULL"}; // Tests all time units. for (TimeUnit timeUnit : TimeUnit.values()) { @@ -1307,6 +1432,8 @@ public class AnalyzerTest { " from functional.alltypes"); AnalyzesOk("select timestamp_col - interval " + col + " " + timeUnit.toString() + " from functional.alltypes"); + AnalyzesOk("select NULL - interval " + col + " " + timeUnit.toString() + + " from functional.alltypes"); // Reversed interval and timestamp using addition. AnalyzesOk("select interval " + col + " " + timeUnit.toString() + " + timestamp_col from functional.alltypes"); @@ -1315,6 +1442,10 @@ public class AnalyzerTest { timeUnit.toString() + ") from functional.alltypes"); AnalyzesOk("select date_sub(timestamp_col, interval " + col + " " + timeUnit.toString() + ") from functional.alltypes"); + AnalyzesOk("select date_add(NULL, interval " + col + " " + + timeUnit.toString() + ") from functional.alltypes"); + AnalyzesOk("select date_sub(NULL, interval " + col + " " + + timeUnit.toString() + ") from functional.alltypes"); } } @@ -1393,15 +1524,15 @@ public class AnalyzerTest { "which is incompatible with expected type 'INT'."); // Cast from STRING to INT. AnalyzesOk("select date_add(timestamp_col, interval cast('10' as int) years) " + - " from functional.alltypes"); + " from functional.alltypes"); // Invalid time unit. Non-function-call like version. AnalysisError("select timestamp_col + interval 10 error from functional.alltypes", "Invalid time unit 'error' in timestamp arithmetic expression " + - "'timestamp_col + INTERVAL 10 error'."); + "'timestamp_col + INTERVAL 10 error'."); AnalysisError("select timestamp_col - interval 10 error from functional.alltypes", "Invalid time unit 'error' in timestamp arithmetic expression " + - "'timestamp_col - INTERVAL 10 error'."); + "'timestamp_col - INTERVAL 10 error'."); // Reversed interval and timestamp using addition. AnalysisError("select interval 10 error + timestamp_col from functional.alltypes", "Invalid time unit 'error' in timestamp arithmetic expression " + @@ -1443,6 +1574,43 @@ public class AnalyzerTest { AnalysisError("select coalesce()"); } + /** + * Tests that functions with NULL arguments get resolved properly, + * and that proper errors are reported when the non-null arguments + * cannot be cast to match a signature. + */ + @Test + public void TestNullFunctionArguments() { + // Test fixed arg functions using 'substring' as representative. + AnalyzesOk("select substring(NULL, 1, 2)"); + AnalyzesOk("select substring('a', NULL, 2)"); + AnalyzesOk("select substring('a', 1, NULL)"); + AnalyzesOk("select substring(NULL, NULL, NULL)"); + // Cannot cast non-null args to match a signature. + AnalysisError("select substring(1, NULL, NULL)", + "No matching function with those arguments: " + + "substring (TINYINT, NULL_TYPE, NULL_TYPE)"); + AnalysisError("select substring(NULL, 'a', NULL)", + "No matching function with those arguments: " + + "substring (NULL_TYPE, STRING, NULL_TYPE)"); + + // Test vararg functions with 'concat' as representative. + AnalyzesOk("select concat(NULL, 'a', 'b')"); + AnalyzesOk("select concat('a', NULL, 'b')"); + AnalyzesOk("select concat('a', 'b', NULL)"); + AnalyzesOk("select concat(NULL, NULL, NULL)"); + // Cannot cast non-null args to match a signature. + AnalysisError("select concat(NULL, 1, 'b')", + "No matching function with those arguments: " + + "concat (NULL_TYPE, TINYINT, STRING)"); + AnalysisError("select concat('a', NULL, 1)", + "No matching function with those arguments: " + + "concat (STRING, NULL_TYPE, TINYINT)"); + AnalysisError("select concat(1, 'b', NULL)", + "No matching function with those arguments: " + + "concat (TINYINT, STRING, NULL_TYPE)"); + } + @Test public void TestCaseExpr() throws AnalysisException { // No case expr. @@ -1464,7 +1632,7 @@ public class AnalyzerTest { AnalysisError("select case when 20 > 10 then 20 when 1 > 2 then timestamp_col " + "when 4 < 5 then 2 else 15 end from functional.alltypes", "Incompatible return types 'TINYINT' and 'TIMESTAMP' " + - "of exprs '20' and 'timestamp_col'."); + "of exprs '20' and 'timestamp_col'."); // With case expr. AnalyzesOk("select case int_col when 20 then 30 else 15 end " + @@ -1490,7 +1658,7 @@ public class AnalyzerTest { AnalysisError("select case bigint_col when int_col then 30 " + "when double_col then timestamp_col else 15 end from functional.alltypes", "Incompatible return types 'TINYINT' and 'TIMESTAMP' " + - "of exprs '30' and 'timestamp_col'."); + "of exprs '30' and 'timestamp_col'."); // Test different type classes (all types are tested in BE tests). AnalyzesOk("select case when true then 1 end"); @@ -1498,6 +1666,12 @@ public class AnalyzerTest { AnalyzesOk("select case when true then 'abc' end"); AnalyzesOk("select case when true then cast('2011-01-01 09:01:01' " + "as timestamp) end"); + // Test NULLs. + AnalyzesOk("select case NULL when 1 then 2 else 3 end"); + AnalyzesOk("select case 1 when NULL then 2 else 3 end"); + AnalyzesOk("select case 1 when 2 then NULL else 3 end"); + AnalyzesOk("select case 1 when 2 then 3 else NULL end"); + AnalyzesOk("select case NULL when NULL then NULL else NULL end"); } @Test @@ -1506,6 +1680,11 @@ public class AnalyzerTest { AnalyzesOk("select if(1 != 2, false, false)"); AnalyzesOk("select if(bool_col, false, true) from functional.alltypes"); AnalyzesOk("select if(bool_col, int_col, double_col) from functional.alltypes"); + // Test NULLs. + AnalyzesOk("select if(NULL, false, true) from functional.alltypes"); + AnalyzesOk("select if(bool_col, NULL, true) from functional.alltypes"); + AnalyzesOk("select if(bool_col, false, NULL) from functional.alltypes"); + AnalyzesOk("select if(NULL, NULL, NULL) from functional.alltypes"); // if() only accepts three arguments AnalysisError("select if(true, false, true, true)", @@ -1540,6 +1719,13 @@ public class AnalyzerTest { // Make sure table aliases aren't visible across union operands. AnalyzesOk("select a.smallint_col from functional.alltypes a " + "union select a.int_col from functional.alltypessmall a"); + // All columns compatible with NULL. + AnalyzesOk("select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, " + + "float_col, double_col, date_string_col, string_col, timestamp_col, year," + + "month from functional.alltypes union " + + "select NULL, NULL, NULL, NULL, NULL, NULL, " + + "NULL, NULL, NULL, NULL, NULL, NULL," + + "NULL from functional.alltypes"); // No from clause. Has literals and NULLs. Requires implicit casts. AnalyzesOk("select 1, 2, 3 " + @@ -1597,14 +1783,14 @@ public class AnalyzerTest { AnalysisError("select bool_col from functional.alltypes " + "union select string_col from functional.alltypes", "Incompatible return types 'BOOLEAN' and 'STRING' " + - "of exprs 'bool_col' and 'string_col'."); + "of exprs 'bool_col' and 'string_col'."); // Incompatible types, longer union chain. AnalysisError("select int_col, string_col from functional.alltypes " + "union select tinyint_col, bool_col from functional.alltypes " + "union select smallint_col, int_col from functional.alltypes " + "union select smallint_col, bool_col from functional.alltypes", "Incompatible return types 'STRING' and 'BOOLEAN' of " + - "exprs 'string_col' and 'bool_col'."); + "exprs 'string_col' and 'bool_col'."); // Invalid ordinal in order by. AnalysisError("(select int_col from functional.alltypes) " + "union (select int_col from functional.alltypessmall) order by 2", @@ -1751,7 +1937,7 @@ public class AnalyzerTest { AnalysisError("alter table functional.alltypes drop column year", "Cannot drop partition column: year"); - + // Tables should always have at least 1 column AnalysisError("alter table functional_seq_snap.bad_seq_snap drop column field", "Cannot drop column 'field' from functional_seq_snap.bad_seq_snap. " + @@ -1783,7 +1969,7 @@ public class AnalyzerTest { AnalysisError("alter table functional.alltypes change column int_col Tinyint_col int", "Column already exists: Tinyint_col"); - + // Table/Db does not exist AnalysisError("alter table db_does_not_exist.alltypes change c1 c2 int", "Unknown database: db_does_not_exist"); @@ -1799,9 +1985,9 @@ public class AnalyzerTest { "set location '/a/b'"); AnalyzesOk("alter table functional.alltypes PARTITION (month=11, year=2010) " + "set fileformat parquetfile"); - AnalyzesOk("alter table functional.stringpartitionkey PARTITION " + + AnalyzesOk("alter table functional.stringpartitionkey PARTITION " + "(string_col='partition1') set fileformat parquetfile"); - AnalyzesOk("alter table functional.stringpartitionkey PARTITION " + + AnalyzesOk("alter table functional.stringpartitionkey PARTITION " + "(string_col='PaRtiTion1') set location '/a/b/c'"); // Partition spec does not exist @@ -1820,10 +2006,10 @@ public class AnalyzerTest { AnalysisError("alter table functional.alltypesnopart PARTITION (month=1) " + "set location '/a/b/c'", "Table is not partitioned: functional.alltypesnopart"); - AnalysisError("alter table functional.stringpartitionkey PARTITION " + + AnalysisError("alter table functional.stringpartitionkey PARTITION " + "(string_col='partition2') set location '/a/b'", "No matching partition spec found: (string_col='partition2')"); - AnalysisError("alter table functional.stringpartitionkey PARTITION " + + AnalysisError("alter table functional.stringpartitionkey PARTITION " + "(string_col='partition2') set fileformat sequencefile", "No matching partition spec found: (string_col='partition2')"); @@ -1859,7 +2045,7 @@ public class AnalyzerTest { AnalysisError("alter table functional.alltypes rename to db_does_not_exist.new_table", "Unknown database: db_does_not_exist"); } - + @Test public void TestDrop() throws AnalysisException { AnalyzesOk("drop database functional"); @@ -1954,6 +2140,12 @@ public class AnalyzerTest { "select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, " + "float_col, double_col, date_string_col, " + "string_col, timestamp_col, NULL, NULL from functional.alltypes"); + // Fully dynamic partitions with NULL partition keys and column values. + AnalyzesOk("insert " + qualifier + " table functional.alltypessmall " + + "partition (year, month)" + + "select NULL, NULL, NULL, NULL, NULL, NULL, " + + "NULL, NULL, NULL, NULL, NULL, NULL, " + + "NULL from functional.alltypes"); // Fully dynamic partitions. Order of corresponding select list items doesn't matter, // as long as they appear at the very end of the select list. AnalyzesOk("insert " + qualifier + " table functional.alltypessmall " + @@ -1967,12 +2159,24 @@ public class AnalyzerTest { "select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, " + "float_col, double_col, date_string_col, string_col, timestamp_col, month " + "from functional.alltypes"); + // Partially dynamic partitions with NULL static partition key value. + AnalyzesOk("insert " + qualifier + " table functional.alltypessmall " + + "partition (year=NULL, month)" + + "select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, " + + "float_col, double_col, date_string_col, string_col, timestamp_col, year from " + + "functional.alltypes"); // Partially dynamic partitions. AnalyzesOk("insert " + qualifier + " table functional.alltypessmall " + "partition (year, month=4)" + "select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, " + "float_col, double_col, date_string_col, string_col, timestamp_col, year from " + "functional.alltypes"); + // Partially dynamic partitions with NULL static partition key value. + AnalyzesOk("insert " + qualifier + " table functional.alltypessmall " + + "partition (year, month=NULL)" + + "select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, " + + "float_col, double_col, date_string_col, string_col, timestamp_col, year from " + + "functional.alltypes"); // Partially dynamic partitions with NULL literal as column. AnalyzesOk("insert " + qualifier + " table functional.alltypessmall " + "partition (year=2009, month)" + @@ -2004,8 +2208,8 @@ public class AnalyzerTest { "float_col, double_col, date_string_col, string_col, timestamp_col " + "from functional.alltypes", "No matching select list item found for dynamic partition 'year'.\n" + - "The select list items corresponding to dynamic partition keys " + - "must be at the end of the select list."); + "The select list items corresponding to dynamic partition keys " + + "must be at the end of the select list."); // No corresponding select list items of partially dynamic partitions. AnalysisError("insert " + qualifier + " table functional.alltypessmall " + "partition (year=2009, month)" + @@ -2013,8 +2217,8 @@ public class AnalyzerTest { "float_col, double_col, date_string_col, string_col, timestamp_col " + "from functional.alltypes", "No matching select list item found for dynamic partition 'month'.\n" + - "The select list items corresponding to dynamic partition keys " + - "must be at the end of the select list."); + "The select list items corresponding to dynamic partition keys " + + "must be at the end of the select list."); // No corresponding select list items of partially dynamic partitions. AnalysisError("insert " + qualifier + " table functional.alltypessmall " + "partition (year, month=4)" + @@ -2022,24 +2226,24 @@ public class AnalyzerTest { "float_col, double_col, date_string_col, string_col, timestamp_col " + "from functional.alltypes", "No matching select list item found for dynamic partition 'year'.\n" + - "The select list items corresponding to dynamic partition keys " + - "must be at the end of the select list."); + "The select list items corresponding to dynamic partition keys " + + "must be at the end of the select list."); // Select '*' includes partitioning columns, and hence, is not union compatible. AnalysisError("insert " + qualifier + " table functional.alltypessmall " + "partition (year=2009, month=4)" + "select * from functional.alltypes", "Target table 'alltypessmall' and result of select statement are not union " + - "compatible.\n" + - "Target table expects 11 columns but the select statement returns 13."); + "compatible.\n" + + "Target table expects 11 columns but the select statement returns 13."); // Select '*' includes partitioning columns // but they don't appear at the end of the select list. AnalysisError("insert " + qualifier + " table functional.alltypessmall partition (year, month)" + "select * from functional.alltypes", "Target table 'alltypessmall' and result of select statement are not union " + - "compatible.\n" + - "Incompatible types 'INT' and 'STRING' in column " + - "'functional.alltypes.string_col'."); + "compatible.\n" + + "Incompatible types 'INT' and 'STRING' in column " + + "'functional.alltypes.string_col'."); } /** @@ -2058,6 +2262,11 @@ public class AnalyzerTest { "select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, " + "float_col, double_col, date_string_col, string_col, timestamp_col from " + "functional.alltypes"); + // All NULL column values. + AnalyzesOk("insert " + qualifier + " table functional.alltypesnopart " + + "select NULL, NULL, NULL, NULL, NULL, NULL, " + + "NULL, NULL, NULL, NULL, NULL " + + "from functional.alltypes"); String hbaseQuery = "INSERT " + qualifier + " TABLE " + "functional.hbaseinsertalltypesagg select id, bigint_col, bool_col, " + @@ -2090,6 +2299,18 @@ public class AnalyzerTest { "float_col, double_col, date_string_col, string_col, timestamp_col " + "from functional.alltypes"); // Static partition with NULL partition keys. + AnalyzesOk("insert " + qualifier + " table functional.alltypessmall " + + "partition (year=NULL, month=NULL)" + + "select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, " + + "float_col, double_col, date_string_col, string_col, timestamp_col " + + "from functional.alltypes"); + // Static partition with NULL column values. + AnalyzesOk("insert " + qualifier + " table functional.alltypessmall " + + "partition (year=NULL, month=NULL)" + + "select NULL, NULL, NULL, NULL, NULL, NULL, " + + "NULL, NULL, NULL, NULL, NULL " + + "from functional.alltypes"); + // Static partition with NULL partition keys. AnalyzesOk("insert " + qualifier + " table functional.alltypessmall " + "partition (year=NULL, month=NULL)" + "select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, " + @@ -2204,7 +2425,7 @@ public class AnalyzerTest { "Unsupported type 'BINARY' in 'bin_col'."); // Mixed supported/unsupported types. AnalysisError("select int_col, dec_col, str_col, bin_col " + - "from functional.unsupported_types", + "from functional.unsupported_types", "Unsupported type 'DECIMAL' in 'dec_col'."); } diff --git a/fe/src/test/java/com/cloudera/impala/analysis/ParserTest.java b/fe/src/test/java/com/cloudera/impala/analysis/ParserTest.java index 46006249c..582e5ea79 100644 --- a/fe/src/test/java/com/cloudera/impala/analysis/ParserTest.java +++ b/fe/src/test/java/com/cloudera/impala/analysis/ParserTest.java @@ -92,6 +92,7 @@ public class ParserTest { @Test public void TestSelect() { ParsesOk("select a from tbl"); ParsesOk("select a, b, c, d from tbl"); + ParsesOk("select true, false, NULL from tbl"); ParsesOk("select all a, b, c from tbl"); ParserError("a from tbl"); ParserError("select a b c from tbl"); @@ -167,6 +168,16 @@ public class ParserTest { "inner join src src3 using (d, e, f) " + "where src2.bla = src3.bla " + "order by src1.key, src1.value, src2.key, src2.value, src3.key, src3.value"); + // Test NULLs in on clause. + ParsesOk("select * from src src1 " + + "left outer join src src2 on NULL " + + "right outer join src src3 on (NULL) " + + "full outer join src src3 on NULL " + + "left semi join src src3 on (NULL) " + + "join src src3 on NULL " + + "inner join src src3 on (NULL) " + + "where src2.bla = src3.bla " + + "order by src1.key, src1.value, src2.key, src2.value, src3.key, src3.value"); ParserError("select * from src src1 join src src2 using (1)"); ParserError("select * from src src1 join src src2 on ('a')"); ParserError("select * from src src1 " + @@ -176,13 +187,21 @@ public class ParserTest { @Test public void TestWhereClause() { ParsesOk("select a, b, count(c) from test where a > 15"); ParsesOk("select a, b, count(c) from test where true"); + ParsesOk("select a, b, count(c) from test where NULL"); ParserError("select a, b, count(c) where a > 15 from test"); ParserError("select a, b, count(c) from test where 15"); + ParserError("select a, b, count(c) from test where a + b"); + // Does not parse to a predicate although return value is boolean. + ParserError("select a, b, count(c) where case a when b then true else false"); + ParserError("select a, b, count(c) where if (a > b, true, false)"); + // SlotRef is not a predicate. + ParserError("select a, b, count(c) where bool_col"); } @Test public void TestGroupBy() { ParsesOk("select a, b, count(c) from test group by 1, 2"); ParsesOk("select a, b, count(c) from test group by a, b"); + ParsesOk("select a, b, count(c) from test group by true, false, NULL"); // semantically wrong but parses fine ParsesOk("select a, b, count(c) from test group by 1, b"); ParserError("select a, b, count(c) from test group 1, 2"); @@ -194,15 +213,22 @@ public class ParserTest { "order by string_col, 15.7 * float_col, int_col + bigint_col"); ParsesOk("select int_col, string_col, bigint_col, count(*) from alltypes " + "order by string_col asc, 15.7 * float_col desc, int_col + bigint_col asc"); + ParsesOk("select int_col from alltypes order by true, false, NULL"); ParserError("select int_col, string_col, bigint_col, count(*) from alltypes " + "order by by string_col asc desc"); } @Test public void TestHaving() { ParsesOk("select a, b, count(c) from test group by a, b having count(*) > 5"); + ParsesOk("select a, b, count(c) from test group by a, b having NULL"); + ParsesOk("select a, b, count(c) from test group by a, b having true"); + ParsesOk("select a, b, count(c) from test group by a, b having false"); ParserError("select a, b, count(c) from test group by a, b having 5"); ParserError("select a, b, count(c) from test group by a, b having order by 5"); ParserError("select a, b, count(c) from test having count(*) > 5 group by a, b"); + // Does not parse to a predicate although return value is boolean. + ParserError("select count(c) group by a having case a when b then true else false"); + ParserError("select count(c) group by if (a > b, true, false)"); } @Test public void TestLimit() { @@ -212,6 +238,9 @@ public class ParserTest { ParserError("select a, b, c from test inner join test2 using(a) limit 10 + 10"); ParserError("select a, b, c from test inner join test2 using(a) limit 10 " + "where a > 10"); + ParserError("select a, b, c from test inner join test2 using(a) limit true"); + ParserError("select a, b, c from test inner join test2 using(a) limit false"); + ParserError("select a, b, c from test inner join test2 using(a) limit NULL"); } @Test public void TestUnion() { @@ -314,7 +343,7 @@ public class ParserTest { // NULL literal predicate. ParsesOk("select a from t where NULL OR NULL"); ParsesOk("select a from t where NULL AND NULL"); - // NULL in select list currently becomes a literal predicate. + // NULL in select list becomes a literal predicate. ParsesOk("select NULL from t"); // bool literal predicate ParsesOk("select a from t where true"); @@ -399,15 +428,24 @@ public class ParserTest { ParserError("select &1 from t"); ParserError("select =1 from t"); - // NULL literal in binary predicate. - for (BinaryPredicate.Operator op : BinaryPredicate.Operator.values()) { - ParsesOk("select a from t where a " + op.toString() + " NULL"); - } - // bool literal in binary predicate. + // bool and NULL literals in binary predicate. for (BinaryPredicate.Operator op : BinaryPredicate.Operator.values()) { ParsesOk("select a from t where a " + op.toString() + " true"); ParsesOk("select a from t where a " + op.toString() + " false"); + ParsesOk("select a from t where a " + op.toString() + " NULL"); } + // bool and NULL literals in compound predicates with binary ops. + for (CompoundPredicate.Operator op : CompoundPredicate.Operator.values()) { + if (op == CompoundPredicate.Operator.NOT) { + continue; + } + ParsesOk("select a from t where true " + op.toString() + + " false " + op.toString() + " NULL"); + // with negation + ParsesOk("select a from t where !true " + op.toString() + + " !false " + op.toString() + " !NULL"); + } + // test string literals with and without quotes in the literal ParsesOk("select 5, 'five', 5.0, i + 5 from t"); ParsesOk("select \"\\\"five\\\"\" from t\n"); @@ -446,15 +484,29 @@ public class ParserTest { // Single backslash is a scanner error. ScannerError("select \"\\\" from t"); - // NULL literal in arithmetic expr + // bool and NULL literal in arithmetic expr with binary ops. for (ArithmeticExpr.Operator op : ArithmeticExpr.Operator.values()) { + if (op == ArithmeticExpr.Operator.BITNOT) { + continue; + } + // NULL as operand parses ok in select list. + ParsesOk("select a " + op.toString() + " NULL"); + // Predicates are not allowed as operands to arithmetic exprs. + ParserError("select a " + op.toString() + " true"); + ParserError("select a " + op.toString() + " false"); + // Does not parse in where clause because an arithmetic expr is not a predicate. ParserError("select a from t where a " + op.toString() + " NULL"); - } - // bool literal in arithmetic expr - for (ArithmeticExpr.Operator op : ArithmeticExpr.Operator.values()) { ParserError("select a from t where a " + op.toString() + " true"); ParserError("select a from t where a " + op.toString() + " false"); } + // NULL as operand parses ok in select list. + ParsesOk("select ~NULL"); + // Predicates are not allowed as operands to arithmetic exprs. + ParserError("select ~true, ~false"); + // Does not parse in where clause because an arithmetic expr is not a predicate. + ParserError("select a from t where ~true"); + ParserError("select a from t where ~false"); + ParserError("select a from t where ~NULL"); } // test string literal s with single and double quotes @@ -501,13 +553,18 @@ public class ParserTest { // Non-function call like versions. ParsesOk("select a + interval b " + timeUnit.toString()); ParsesOk("select a - interval b " + timeUnit.toString()); + ParsesOk("select NULL + interval NULL " + timeUnit.toString()); + ParsesOk("select NULL - interval NULL " + timeUnit.toString()); // Reversed interval and timestamp is ok for addition. ParsesOk("select interval b " + timeUnit.toString() + " + a"); + ParsesOk("select interval NULL " + timeUnit.toString() + " + NULL"); // Reversed interval and timestamp is an error for subtraction. ParserError("select interval b " + timeUnit.toString() + " - a"); // Function-call like versions. ParsesOk("select date_add(a, interval b " + timeUnit.toString() + ")"); ParsesOk("select date_sub(a, interval b " + timeUnit.toString() + ")"); + ParsesOk("select date_add(NULL, interval NULL " + timeUnit.toString() + ")"); + ParsesOk("select date_sub(NULL, interval NULL " + timeUnit.toString() + ")"); // Invalid function name for timestamp arithmetic expr should parse ok. ParsesOk("select error(a, interval b " + timeUnit.toString() + ")"); // Invalid time unit parses ok. @@ -540,6 +597,10 @@ public class ParserTest { ParsesOk("select case when a > 2 then x when false then false else true end from t"); ParsesOk("select case false when a > 2 then x when '6' then false else true end " + "from t"); + // Test NULLs; + ParsesOk("select case NULL when NULL then NULL when NULL then NULL else NULL end " + + "from t"); + ParsesOk("select case when NULL then NULL when NULL then NULL else NULL end from t"); // Missing end. ParserError("select case a when true then x when false then y else z from t"); // Missing else after first when. @@ -550,6 +611,7 @@ public class ParserTest { @Test public void TestCastExprs() { ParsesOk("select cast(a + 5.0 as string) from t"); + ParsesOk("select cast(NULL as string) from t"); ParserError("select cast(a + 5.0 as badtype) from t"); ParserError("select cast(a + 5.0, string) from t"); } @@ -557,6 +619,7 @@ public class ParserTest { @Test public void TestConditionalExprs() { ParsesOk("select if(TRUE, TRUE, FALSE) from t"); + ParsesOk("select if(NULL, NULL, NULL) from t"); ParsesOk("select c1, c2, if(TRUE, TRUE, FALSE) from t"); ParsesOk("select if(1 = 2, c1, c2) from t"); ParsesOk("select if(1 = 2, c1, c2)"); @@ -565,6 +628,8 @@ public class ParserTest { @Test public void TestAggregateExprs() { ParsesOk("select count(*), count(a), count(distinct a, b) from t"); + ParsesOk("select count(NULL), count(TRUE), count(FALSE), " + + "count(distinct TRUE, FALSE, NULL) from t"); ParserError("select count() from t"); ParsesOk("select count(all *) from t"); ParsesOk("select count(all 1) from t"); @@ -594,6 +659,9 @@ public class ParserTest { ParsesOk("select a, b, c from t where i like 'abc%'"); ParsesOk("select a, b, c from t where i rlike 'abc.*'"); ParsesOk("select a, b, c from t where i regexp 'abc.*'"); + ParsesOk("select a, b, c from t where NULL like NULL"); + ParsesOk("select a, b, c from t where NULL rlike NULL"); + ParsesOk("select a, b, c from t where NULL regexp NULL"); ParsesOk("select a, b, c from t where i is null"); ParsesOk("select a, b, c from t where i is not null"); ParsesOk("select a, b, c from t where i + 5 is not null"); @@ -617,11 +685,12 @@ public class ParserTest { notStr + "b = 6) " + andStr + " " + notStr + "c = 7"); // select a, b, c from t where (!(!a = 5)) ParsesOk("select a, b, c from t where (" + notStr + "(" + notStr + "a = 5))"); + // semantically incorrect negation, but parses ok + ParsesOk("select a, b, c from t where a = " + notStr + "5"); // unbalanced parentheses ParserError("select a, b, c from t where (a = 5 " + orStr + " b = 6) " + andStr + " c = 7)"); ParserError("select a, b, c from t where ((a = 5 " + orStr + " b = 6) " + andStr + " c = 7"); // incorrectly positioned negation (!) - ParserError("select a, b, c from t where a = " + notStr + "5"); ParserError("select a, b, c from t where a = 5 " + orStr + " " + notStr); ParserError("select a, b, c from t where " + notStr + "(a = 5) " + orStr + " " + notStr); } @@ -712,6 +781,9 @@ public class ParserTest { // Static partition with two NULL partitioning keys. ParsesOk("insert " + qualifier + " t partition (pk1=NULL, pk2=NULL) " + "select a from src where b > 5"); + // Static partition with boolean partitioning keys. + ParsesOk("insert " + qualifier + " t partition (pk1=false, pk2=true) " + + "select a from src where b > 5"); } @Test public void TestInsert() { @@ -1147,8 +1219,8 @@ public class ParserTest { "select c, b, c where a = 5\n" + " ^\n" + "Encountered: WHERE\n" + - "Expected: AS, BETWEEN, DIV, FROM, IS, IN, LIKE, LIMIT, NOT, ORDER, " + - "REGEXP, RLIKE, UNION, COMMA, IDENTIFIER\n"); + "Expected: AND, AS, BETWEEN, DIV, FROM, IS, IN, LIKE, LIMIT, NOT, OR, " + + "ORDER, REGEXP, RLIKE, UNION, COMMA, IDENTIFIER\n"); // missing table list ParserError("select c, b, c from where a = 5", diff --git a/fe/src/test/java/com/cloudera/impala/analysis/ToSqlTest.java b/fe/src/test/java/com/cloudera/impala/analysis/ToSqlTest.java index 838c4fed1..c1faa6eed 100644 --- a/fe/src/test/java/com/cloudera/impala/analysis/ToSqlTest.java +++ b/fe/src/test/java/com/cloudera/impala/analysis/ToSqlTest.java @@ -49,11 +49,11 @@ public class ToSqlTest { public void selectListTest() { testToSql("select 1234, 1234.0, 1234.0 + 1, 1234.0 + 1.0, 1 + 1, \"abc\" " + "from functional.alltypes", - "SELECT 1234, 1234.0, 1234.0 + 1.0, 1234.0 + 1.0, 1 + 1, 'abc' " + + "SELECT 1234, 1234.0, 1234.0 + 1.0, 1234.0 + 1.0, 1 + 1, 'abc' " + "FROM functional.alltypes"); - testToSql("select null, 1234 < 5678, 1234.0 < 5678.0, 1234 < null " + + testToSql("select null, 1234 < 5678, 1234.0 < 5678.0, 1234 < null " + "from functional.alltypes", - "SELECT NULL, 1234 < 5678, 1234.0 < 5678.0, 1234 < NULL " + + "SELECT NULL, 1234 < 5678, 1234.0 < 5678.0, 1234 < NULL " + "FROM functional.alltypes"); testToSql("select int_col + int_col, " + "tinyint_col + int_col, " + @@ -75,24 +75,24 @@ public class ToSqlTest { // Test the toSql() output of the where clause. @Test public void whereTest() { - testToSql("select id from functional.alltypes " + + testToSql("select id from functional.alltypes " + "where tinyint_col < 40 OR int_col = 4 AND float_col > 1.4", - "SELECT id FROM functional.alltypes " + + "SELECT id FROM functional.alltypes " + "WHERE tinyint_col < 40 OR int_col = 4 AND float_col > 1.4"); testToSql("select id from functional.alltypes where string_col = \"abc\"", "SELECT id FROM functional.alltypes WHERE string_col = 'abc'"); testToSql("select id from functional.alltypes where string_col = 'abc'", "SELECT id FROM functional.alltypes WHERE string_col = 'abc'"); - testToSql("select id from functional.alltypes " + + testToSql("select id from functional.alltypes " + "where 5 between smallint_col and int_col", "SELECT id FROM functional.alltypes WHERE 5 BETWEEN smallint_col AND int_col"); - testToSql("select id from functional.alltypes " + + testToSql("select id from functional.alltypes " + "where 5 not between smallint_col and int_col", - "SELECT id FROM functional.alltypes " + + "SELECT id FROM functional.alltypes " + "WHERE 5 NOT BETWEEN smallint_col AND int_col"); testToSql("select id from functional.alltypes where 5 in (smallint_col, int_col)", "SELECT id FROM functional.alltypes WHERE 5 IN (smallint_col, int_col)"); - testToSql("select id from functional.alltypes " + + testToSql("select id from functional.alltypes " + "where 5 not in (smallint_col, int_col)", "SELECT id FROM functional.alltypes WHERE 5 NOT IN (smallint_col, int_col)"); } @@ -100,21 +100,21 @@ public class ToSqlTest { // Test the toSql() output of aggregate and group by expressions. @Test public void aggregationTest() { - testToSql("select COUNT(*), count(id), COUNT(id), SUM(id), AVG(id) " + + testToSql("select COUNT(*), count(id), COUNT(id), SUM(id), AVG(id) " + "from functional.alltypes group by tinyint_col", - "SELECT COUNT(*), COUNT(id), COUNT(id), SUM(id), AVG(id) " + + "SELECT COUNT(*), COUNT(id), COUNT(id), SUM(id), AVG(id) " + "FROM functional.alltypes GROUP BY tinyint_col"); testToSql("select avg(float_col / id) from functional.alltypes group by tinyint_col", "SELECT AVG(float_col / id) " + "FROM functional.alltypes GROUP BY tinyint_col"); - testToSql("select avg(double_col) from functional.alltypes " + + testToSql("select avg(double_col) from functional.alltypes " + "group by int_col, tinyint_col, bigint_col", - "SELECT AVG(double_col) FROM functional.alltypes " + + "SELECT AVG(double_col) FROM functional.alltypes " + "GROUP BY int_col, tinyint_col, bigint_col"); // Group by with having clause - testToSql("select avg(id) from functional.alltypes " + + testToSql("select avg(id) from functional.alltypes " + "group by tinyint_col having count(tinyint_col) > 10", - "SELECT AVG(id) FROM functional.alltypes " + + "SELECT AVG(id) FROM functional.alltypes " + "GROUP BY tinyint_col HAVING COUNT(tinyint_col) > 10"); testToSql("select sum(id) from functional.alltypes group by tinyint_col " + "having avg(tinyint_col) > 10 AND count(tinyint_col) > 5", @@ -138,13 +138,13 @@ public class ToSqlTest { // Test the toSql() output of queries with all clauses. @Test public void allTest() { - testToSql("select bigint_col, avg(double_col), sum(tinyint_col) " + + testToSql("select bigint_col, avg(double_col), sum(tinyint_col) " + "from functional.alltypes " + "where double_col > 2.5 AND string_col != \"abc\"" + "group by bigint_col, int_col " + "having count(int_col) > 10 OR sum(bigint_col) > 20 " + "order by 2 DESC, 3 ASC", - "SELECT bigint_col, AVG(double_col), SUM(tinyint_col) " + + "SELECT bigint_col, AVG(double_col), SUM(tinyint_col) " + "FROM functional.alltypes " + "WHERE double_col > 2.5 AND string_col != 'abc' " + "GROUP BY bigint_col, int_col " + @@ -169,12 +169,12 @@ public class ToSqlTest { // With 'order by' and 'limit' on union, and also on last select. testToSql("(select bool_col, int_col from functional.alltypes) " + "union all (select bool_col, int_col from functional.alltypessmall) " + - "union all (select bool_col, bigint_col " + + "union all (select bool_col, bigint_col " + "from functional.alltypes order by 1 limit 1) " + "order by int_col, bool_col limit 10", "SELECT bool_col, int_col FROM functional.alltypes " + "UNION ALL SELECT bool_col, int_col FROM functional.alltypessmall " + - "UNION ALL SELECT bool_col, bigint_col " + + "UNION ALL SELECT bool_col, bigint_col " + "FROM functional.alltypes ORDER BY 1 ASC LIMIT 1 " + "ORDER BY int_col ASC, bool_col ASC LIMIT 10"); // With 'order by' and 'limit' on union but not on last select. @@ -203,18 +203,18 @@ public class ToSqlTest { // Insert into unpartitioned table without partition clause. testToSql("insert into table functional.alltypesnopart " + "select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, " + - "float_col, double_col, date_string_col, string_col, timestamp_col " + + "float_col, double_col, date_string_col, string_col, timestamp_col " + "from functional.alltypes", - "INSERT INTO TABLE functional.alltypesnopart " + + "INSERT INTO TABLE functional.alltypesnopart " + "SELECT id, bool_col, tinyint_col, " + "smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, " + "string_col, timestamp_col FROM functional.alltypes"); // Insert into overwrite unpartitioned table without partition clause. testToSql("insert overwrite table functional.alltypesnopart " + "select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, " + - "float_col, double_col, date_string_col, string_col, timestamp_col " + + "float_col, double_col, date_string_col, string_col, timestamp_col " + "from functional.alltypes", - "INSERT OVERWRITE TABLE functional.alltypesnopart " + + "INSERT OVERWRITE TABLE functional.alltypesnopart " + "SELECT id, bool_col, tinyint_col, " + "smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, " + "string_col, timestamp_col FROM functional.alltypes"); @@ -222,34 +222,34 @@ public class ToSqlTest { testToSql("insert into table functional.alltypessmall " + "partition (year=2009, month=4)" + "select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, " + - "float_col, double_col, date_string_col, string_col, timestamp_col " + + "float_col, double_col, date_string_col, string_col, timestamp_col " + "from functional.alltypes", - "INSERT INTO TABLE functional.alltypessmall " + + "INSERT INTO TABLE functional.alltypessmall " + "PARTITION (year=2009, month=4) SELECT id, " + "bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, " + - "double_col, date_string_col, string_col, timestamp_col " + + "double_col, date_string_col, string_col, timestamp_col " + "FROM functional.alltypes"); // Fully dynamic partitions. testToSql("insert into table functional.alltypessmall " + "partition (year, month)" + "select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, " + - "float_col, double_col, date_string_col, string_col, timestamp_col, year, " + + "float_col, double_col, date_string_col, string_col, timestamp_col, year, " + "month from functional.alltypes", - "INSERT INTO TABLE functional.alltypessmall " + + "INSERT INTO TABLE functional.alltypessmall " + "PARTITION (year, month) SELECT id, bool_col, " + "tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, " + - "date_string_col, string_col, timestamp_col, year, month " + + "date_string_col, string_col, timestamp_col, year, month " + "FROM functional.alltypes"); // Partially dynamic partitions. testToSql("insert into table functional.alltypessmall " + "partition (year=2009, month)" + "select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, " + - "float_col, double_col, date_string_col, string_col, timestamp_col, month " + + "float_col, double_col, date_string_col, string_col, timestamp_col, month " + "from functional.alltypes", - "INSERT INTO TABLE functional.alltypessmall " + + "INSERT INTO TABLE functional.alltypessmall " + "PARTITION (year=2009, month) SELECT id, " + "bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, " + - "double_col, date_string_col, string_col, timestamp_col, month " + + "double_col, date_string_col, string_col, timestamp_col, month " + "FROM functional.alltypes"); } } diff --git a/testdata/workloads/functional-query/queries/QueryTest/aggregation.test b/testdata/workloads/functional-query/queries/QueryTest/aggregation.test index dc26896d5..e9035b9a5 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/aggregation.test +++ b/testdata/workloads/functional-query/queries/QueryTest/aggregation.test @@ -829,3 +829,25 @@ timestamp, bigint 2010-01-01 01:03:19.530000000,1 2010-01-01 01:04:20.160000000,1 ==== +---- QUERY +# Test NULLs in aggregate functions +select count(NULL), min(NULL), max(NULL), sum(NULL), avg(NULL) from alltypesagg +---- TYPES +bigint, NULL, NULL, NULL, double +---- RESULTS +0,NULL,NULL,NULL,NULL +==== +---- QUERY +# Test ignored distinct in MIN and MAX with NULLs +---- TYPES +NULL, NULL +---- RESULTS +NULL,NULL +---- QUERY +# TODO: Fix count(distinct null) to return 0 instead of 1 +select count(distinct NULL) from alltypesagg +---- TYPES +bigint +---- RESULTS +1 +==== diff --git a/testdata/workloads/functional-query/queries/QueryTest/exprs.test b/testdata/workloads/functional-query/queries/QueryTest/exprs.test index 676d11f7e..c1c3827cb 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/exprs.test +++ b/testdata/workloads/functional-query/queries/QueryTest/exprs.test @@ -694,6 +694,63 @@ boolean true ==== ---- QUERY +# IN predicate with NULLs and other types +select NULL in ('a', NULL, 'b') +---- TYPES +boolean +---- RESULTS +NULL +==== +---- QUERY +select NULL not in ('a', NULL, 'b') +---- TYPES +boolean +---- RESULTS +NULL +==== +---- QUERY +select NULL not in (1.0, NULL, 2.0) +---- TYPES +boolean +---- RESULTS +NULL +==== +---- QUERY +select NULL in (1.0, NULL, 2.0) +---- TYPES +boolean +---- RESULTS +NULL +==== +---- QUERY +select NULL in (true, NULL, false) +---- TYPES +boolean +---- RESULTS +NULL +==== +---- QUERY +select NULL not in (true, NULL, false) +---- TYPES +boolean +---- RESULTS +NULL +==== +---- QUERY +select true in (NULL, false) +---- TYPES +boolean +---- RESULTS +NULL +==== +---- QUERY +select true not in (NULL, false) +---- TYPES +boolean +---- RESULTS +NULL +==== +---- QUERY select count(*) from alltypesagg where true in (bool_col, tinyint_col) ---- TYPES diff --git a/testdata/workloads/functional-query/queries/QueryTest/insert_null.test b/testdata/workloads/functional-query/queries/QueryTest/insert_null.test index 6c468aea9..17d955841 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/insert_null.test +++ b/testdata/workloads/functional-query/queries/QueryTest/insert_null.test @@ -15,16 +15,42 @@ RELOAD nullinsert ---- TYPES string, string, string, string, int ---- RESULTS -'NULL','','NULL','\N',NULL +'NULL','','NULL','NULL',NULL ==== ---- QUERY -select * from alt_nullinsert +select * from nullinsert_alt ---- SETUP -RELOAD alt_nullinsert +RELOAD nullinsert_alt ---- TYPES string ---- RESULTS -'\N,,NULL,\\N,\N' +'\N,,NULL,\N,\N' +==== +---- QUERY +# Test inserting NULLs for all types +insert overwrite table alltypesnopart_insert +select NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL +from alltypessmall limit 10 +---- RESULTS +: 10 +==== +---- QUERY +select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, +float_col, double_col, date_string_col, string_col, timestamp_col +from alltypesnopart_insert +---- TYPES +int, boolean, tinyint, smallint, int, bigint, float, double, string, string, timestamp +---- RESULTS +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'NULL','NULL',NULL +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'NULL','NULL',NULL +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'NULL','NULL',NULL +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'NULL','NULL',NULL +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'NULL','NULL',NULL +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'NULL','NULL',NULL +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'NULL','NULL',NULL +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'NULL','NULL',NULL +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'NULL','NULL',NULL +NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'NULL','NULL',NULL ==== ---- QUERY # Test NULL partition keys using static partition insert. Both partitions keys are NULL. @@ -71,7 +97,7 @@ year=__HIVE_DEFAULT_PARTITION__/month=10/: 25 ==== ---- QUERY # Verify contents of alltypesinsert. -select cout(*) from alltypesinsert where year is null and month=10 +select count(*) from alltypesinsert where year is null and month=10 ---- TYPES bigint ---- RESULTS diff --git a/testdata/workloads/functional-query/queries/QueryTest/union.test b/testdata/workloads/functional-query/queries/QueryTest/union.test index fffc144f8..ab9d09e8b 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/union.test +++ b/testdata/workloads/functional-query/queries/QueryTest/union.test @@ -706,7 +706,7 @@ select 2, 'b', NULL, 20.0f union all select 3, 'c', NULL, 30.0f ---- TYPES -tinyint, string, boolean, float +tinyint, string, null, float ---- RESULTS: VERIFY_IS_EQUAL_SORTED 1,'a',NULL,10.0 2,'b',NULL,20.0 @@ -720,7 +720,7 @@ select 2, 'b', NULL, 20.0f union distinct select 1, 'a', NULL, 10.0f ---- TYPES -tinyint, string, boolean, float +tinyint, string, null, float ---- RESULTS: VERIFY_IS_EQUAL_SORTED 1,'a',NULL,10.0 2,'b',NULL,20.0 diff --git a/tests/query_test/test_insert.py b/tests/query_test/test_insert.py index b56ab0a01..ed7457f40 100644 --- a/tests/query_test/test_insert.py +++ b/tests/query_test/test_insert.py @@ -30,7 +30,6 @@ class TestInsertQueries(ImpalaTestSuite): self.run_test_case('QueryTest/insert_overwrite', vector) @pytest.mark.execute_serially - @pytest.mark.xfail(run=False, reason="IMPALA-82") def test_insert_null(self, vector): self.run_test_case('QueryTest/insert_null', vector)