From 1a3d7ffd4fd392b3ed831dfc7a3bfcfdb8cb8bbd Mon Sep 17 00:00:00 2001 From: Jim Apple Date: Wed, 26 Aug 2015 20:18:26 -0700 Subject: [PATCH] IMPALA-2147: Support IS [NOT] DISTINCT FROM and "<=>" predicates Enforces that the planner treats IS NOT DISTINCT FROM as eligible for hash joins, but does not find the minimum spanning tree of equivalences for use in optimizing query plans; this is left as future work. Change-Id: I62c5300b1fbd764796116f95efe36573eed4c8d0 Reviewed-on: http://gerrit.cloudera.org:8080/710 Reviewed-by: Jim Apple Tested-by: Internal Jenkins --- be/src/exec/aggregation-node.cc | 4 +- be/src/exec/hash-join-node.cc | 12 +- be/src/exec/hash-join-node.h | 4 + be/src/exec/hash-table-test.cc | 18 +- be/src/exec/hash-table.cc | 17 +- be/src/exec/hash-table.h | 15 +- be/src/exec/hash-table.inline.h | 2 +- be/src/exec/old-hash-table-test.cc | 12 +- be/src/exec/old-hash-table.cc | 15 +- be/src/exec/old-hash-table.h | 16 +- be/src/exec/old-hash-table.inline.h | 2 +- be/src/exec/partitioned-aggregation-node.cc | 5 +- be/src/exec/partitioned-hash-join-node.cc | 14 +- be/src/exec/partitioned-hash-join-node.h | 4 + be/src/exprs/decimal-operators.cc | 73 +++--- be/src/exprs/decimal-operators.h | 4 + be/src/exprs/expr-test.cc | 68 ++++- be/src/exprs/operators.cc | 66 ++++- be/src/exprs/operators.h | 42 ++++ common/function-registry/impala_functions.py | 24 ++ common/thrift/ExternalDataSource.thrift | 2 +- common/thrift/PlanNodes.thrift | 2 + fe/src/main/cup/sql-parser.y | 8 +- .../cloudera/impala/analysis/Analyzer.java | 3 +- .../impala/analysis/BinaryPredicate.java | 13 +- .../impala/analysis/StmtRewriter.java | 13 +- .../cloudera/impala/planner/HashJoinNode.java | 6 +- .../cloudera/impala/planner/HdfsScanNode.java | 31 ++- .../impala/planner/SingleNodePlanner.java | 21 +- .../impala/analysis/AnalyzeExprsTest.java | 110 +++++--- .../cloudera/impala/analysis/ParserTest.java | 3 +- .../PlannerTest/data-source-tables.test | 14 ++ .../queries/PlannerTest/hdfs.test | 237 ++++++++++++++++++ .../queries/PlannerTest/join-order.test | 79 ++++++ .../queries/PlannerTest/joins.test | 128 ++++++++++ .../queries/PlannerTest/subquery-rewrite.test | 149 +++++++++++ .../queries/QueryTest/data-source-tables.test | 66 ++++- .../queries/QueryTest/exprs.test | 58 +++++ .../queries/QueryTest/joins.test | 148 +++++++++++ tests/query_test/test_join_queries.py | 2 +- 40 files changed, 1365 insertions(+), 145 deletions(-) diff --git a/be/src/exec/aggregation-node.cc b/be/src/exec/aggregation-node.cc index d19acd0bf..5871ac24a 100644 --- a/be/src/exec/aggregation-node.cc +++ b/be/src/exec/aggregation-node.cc @@ -144,8 +144,8 @@ Status AggregationNode::Prepare(RuntimeState* state) { } // TODO: how many buckets? - hash_tbl_.reset(new OldHashTable(state, build_expr_ctxs_, probe_expr_ctxs_, 1, - true, true, id(), mem_tracker(), true)); + hash_tbl_.reset(new OldHashTable(state, build_expr_ctxs_, probe_expr_ctxs_, 1, true, + std::vector(build_expr_ctxs_.size(), true), id(), mem_tracker(), true)); if (probe_expr_ctxs_.empty()) { // create single intermediate tuple now; we need to output something diff --git a/be/src/exec/hash-join-node.cc b/be/src/exec/hash-join-node.cc index 65d933097..7a1570470 100644 --- a/be/src/exec/hash-join-node.cc +++ b/be/src/exec/hash-join-node.cc @@ -14,6 +14,8 @@ #include "exec/hash-join-node.h" +#include +#include #include #include "codegen/llvm-codegen.h" @@ -40,6 +42,7 @@ const char* HashJoinNode::LLVM_CLASS_NAME = "class.impala::HashJoinNode"; HashJoinNode::HashJoinNode( ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) : BlockingJoinNode("HashJoinNode", tnode.hash_join_node.join_op, pool, tnode, descs), + is_not_distinct_from_(), codegen_process_build_batch_fn_(NULL), process_build_batch_fn_(NULL), process_probe_batch_fn_(NULL) { @@ -70,6 +73,7 @@ Status HashJoinNode::Init(const TPlanNode& tnode) { probe_expr_ctxs_.push_back(ctx); RETURN_IF_ERROR(Expr::CreateExprTree(pool_, eq_join_conjuncts[i].right, &ctx)); build_expr_ctxs_.push_back(ctx); + is_not_distinct_from_.push_back(eq_join_conjuncts[i].is_not_distinct_from); } RETURN_IF_ERROR( Expr::CreateExprTrees(pool_, tnode.hash_join_node.other_join_conjuncts, @@ -104,11 +108,13 @@ Status HashJoinNode::Prepare(RuntimeState* state) { AddExprCtxsToFree(other_join_conjunct_ctxs_); // TODO: default buckets - bool stores_nulls = - join_op_ == TJoinOp::RIGHT_OUTER_JOIN || join_op_ == TJoinOp::FULL_OUTER_JOIN; + const bool stores_nulls = join_op_ == TJoinOp::RIGHT_OUTER_JOIN || + join_op_ == TJoinOp::FULL_OUTER_JOIN || + std::accumulate(is_not_distinct_from_.begin(), is_not_distinct_from_.end(), false, + std::logical_or()); hash_tbl_.reset(new OldHashTable(state, build_expr_ctxs_, probe_expr_ctxs_, child(1)->row_desc().tuple_descriptors().size(), stores_nulls, - false, state->fragment_hash_seed(), mem_tracker())); + is_not_distinct_from_, state->fragment_hash_seed(), mem_tracker())); bool build_codegen_enabled = false; bool probe_codegen_enabled = false; diff --git a/be/src/exec/hash-join-node.h b/be/src/exec/hash-join-node.h index ec947ed9f..b2cd77983 100644 --- a/be/src/exec/hash-join-node.h +++ b/be/src/exec/hash-join-node.h @@ -72,6 +72,10 @@ class HashJoinNode : public BlockingJoinNode { std::vector probe_expr_ctxs_; std::vector build_expr_ctxs_; + /// is_not_distinct_from_[i] is true if and only if the ith equi-join predicate is IS + /// NOT DISTINCT FROM, rather than equality. + std::vector is_not_distinct_from_; + /// non-equi-join conjuncts from the JOIN clause std::vector other_join_conjunct_ctxs_; diff --git a/be/src/exec/hash-table-test.cc b/be/src/exec/hash-table-test.cc index 012711199..7e332ae0d 100644 --- a/be/src/exec/hash-table-test.cc +++ b/be/src/exec/hash-table-test.cc @@ -237,7 +237,8 @@ class HashTableTest : public testing::Test { // Create the hash table and insert the build rows scoped_ptr hash_table; ASSERT_TRUE(CreateHashTable(quadratic, initial_num_buckets, &hash_table)); - HashTableCtx ht_ctx(build_expr_ctxs_, probe_expr_ctxs_, false, false, 1, 0, 1); + HashTableCtx ht_ctx(build_expr_ctxs_, probe_expr_ctxs_, false, + std::vector(build_expr_ctxs_.size(), false), 1, 0, 1); uint32_t hash = 0; bool success = hash_table->CheckAndResize(5, &ht_ctx); @@ -287,7 +288,8 @@ class HashTableTest : public testing::Test { ASSERT_TRUE(CreateHashTable(quadratic, initial_size, &hash_table)); int total_rows = rows_to_insert + additional_rows; - HashTableCtx ht_ctx(build_expr_ctxs_, probe_expr_ctxs_, false, false, 1, 0, 1); + HashTableCtx ht_ctx(build_expr_ctxs_, probe_expr_ctxs_, false, + std::vector(build_expr_ctxs_.size(), false), 1, 0, 1); // Add 1 row with val 1, 2 with val 2, etc. vector build_rows; @@ -340,7 +342,8 @@ class HashTableTest : public testing::Test { MemTracker tracker(100 * 1024 * 1024); scoped_ptr hash_table; ASSERT_TRUE(CreateHashTable(quadratic, num_to_add, &hash_table)); - HashTableCtx ht_ctx(build_expr_ctxs_, probe_expr_ctxs_, false, false, 1, 0, 1); + HashTableCtx ht_ctx(build_expr_ctxs_, probe_expr_ctxs_, false, + std::vector(build_expr_ctxs_.size(), false), 1, 0, 1); // Inserts num_to_add + (num_to_add^2) + (num_to_add^4) + ... + (num_to_add^20) // entries. When num_to_add == 4, then the total number of inserts is 4194300. @@ -389,7 +392,8 @@ class HashTableTest : public testing::Test { void InsertFullTest(bool quadratic, int table_size) { scoped_ptr hash_table; ASSERT_TRUE(CreateHashTable(quadratic, table_size, &hash_table)); - HashTableCtx ht_ctx(build_expr_ctxs_, probe_expr_ctxs_, false, false, 1, 0, 1); + HashTableCtx ht_ctx(build_expr_ctxs_, probe_expr_ctxs_, false, + std::vector(build_expr_ctxs_.size(), false), 1, 0, 1); EXPECT_EQ(hash_table->EmptyBuckets(), table_size); // Insert and probe table_size different tuples. All of them are expected to be @@ -454,7 +458,8 @@ class HashTableTest : public testing::Test { scoped_ptr hash_table; ASSERT_FALSE(CreateHashTable(quadratic, table_size, &hash_table, block_size, max_num_blocks, reserved_blocks)); - HashTableCtx ht_ctx(build_expr_ctxs_, probe_expr_ctxs_, false, false, 1, 0, 1); + HashTableCtx ht_ctx(build_expr_ctxs_, probe_expr_ctxs_, false, + std::vector(build_expr_ctxs_.size(), false), 1, 0, 1); HashTable::Iterator iter = hash_table->Begin(&ht_ctx); EXPECT_TRUE(iter.AtEnd()); @@ -533,7 +538,8 @@ TEST_F(HashTableTest, QuadraticInsertFullTest) { // Test that hashing empty string updates hash value. TEST_F(HashTableTest, HashEmpty) { - HashTableCtx ht_ctx(build_expr_ctxs_, probe_expr_ctxs_, false, false, 1, 2, 1); + HashTableCtx ht_ctx(build_expr_ctxs_, probe_expr_ctxs_, false, + std::vector(build_expr_ctxs_.size(), false), 1, 2, 1); uint32_t seed = 9999; ht_ctx.set_level(0); EXPECT_NE(seed, ht_ctx.Hash(NULL, 0, seed)); diff --git a/be/src/exec/hash-table.cc b/be/src/exec/hash-table.cc index b56099c92..c1493b7a0 100644 --- a/be/src/exec/hash-table.cc +++ b/be/src/exec/hash-table.cc @@ -14,6 +14,9 @@ #include "exec/hash-table.inline.h" +#include +#include + #include "codegen/codegen-anyval.h" #include "codegen/llvm-codegen.h" #include "exprs/expr.h" @@ -77,17 +80,21 @@ static int64_t NULL_VALUE[] = { HashUtil::FNV_SEED, HashUtil::FNV_SEED, static const int64_t INITIAL_DATA_PAGE_SIZES[] = { 64 * 1024, 512 * 1024 }; static const int NUM_SMALL_DATA_PAGES = sizeof(INITIAL_DATA_PAGE_SIZES) / sizeof(int64_t); -HashTableCtx::HashTableCtx(const vector& build_expr_ctxs, - const vector& probe_expr_ctxs, bool stores_nulls, bool finds_nulls, - int32_t initial_seed, int max_levels, int num_build_tuples) +HashTableCtx::HashTableCtx(const std::vector& build_expr_ctxs, + const std::vector& probe_expr_ctxs, bool stores_nulls, + const std::vector& finds_nulls, int32_t initial_seed, + int max_levels, int num_build_tuples) : build_expr_ctxs_(build_expr_ctxs), probe_expr_ctxs_(probe_expr_ctxs), stores_nulls_(stores_nulls), finds_nulls_(finds_nulls), + finds_some_nulls_(std::accumulate( + finds_nulls_.begin(), finds_nulls_.end(), false, std::logical_or())), level_(0), row_(reinterpret_cast(malloc(sizeof(Tuple*) * num_build_tuples))) { // Compute the layout and buffer size to store the evaluated expr results DCHECK_EQ(build_expr_ctxs_.size(), probe_expr_ctxs_.size()); + DCHECK_EQ(build_expr_ctxs_.size(), finds_nulls_.size()); DCHECK(!build_expr_ctxs_.empty()); results_buffer_size_ = Expr::ComputeResultsLayout(build_expr_ctxs_, &expr_values_buffer_offsets_, &var_result_begin_); @@ -170,7 +177,7 @@ bool HashTableCtx::Equals(TupleRow* build_row) { for (int i = 0; i < build_expr_ctxs_.size(); ++i) { void* val = build_expr_ctxs_[i]->GetValue(build_row); if (val == NULL) { - if (!stores_nulls_) return false; + if (!(stores_nulls_ && finds_nulls_[i])) return false; if (!expr_value_null_bits_[i]) return false; continue; } else { @@ -830,7 +837,7 @@ Function* HashTableCtx::CodegenEquals(RuntimeState* state) { // the case where the hash table does not store nulls, this is always false. Value* probe_is_null = codegen->false_value(); uint8_t* null_byte_loc = &expr_value_null_bits_[i]; - if (stores_nulls_) { + if (stores_nulls_ && finds_nulls_[i]) { Value* llvm_null_byte_loc = codegen->CastPtrToLlvmPtr(codegen->ptr_type(), null_byte_loc); Value* null_byte = builder.CreateLoad(llvm_null_byte_loc); diff --git a/be/src/exec/hash-table.h b/be/src/exec/hash-table.h index 595f30068..f27b709d8 100644 --- a/be/src/exec/hash-table.h +++ b/be/src/exec/hash-table.h @@ -108,14 +108,18 @@ class HashTableCtx { /// - build_exprs are the exprs that should be used to evaluate rows during Insert(). /// - probe_exprs are used during Find() /// - stores_nulls: if false, TupleRows with nulls are ignored during Insert - /// - finds_nulls: if false, Find() returns End() for TupleRows with nulls - /// even if stores_nulls is true + /// - finds_nulls: if finds_nulls[i] is false, Find() returns End() for TupleRows with + /// nulls in position i even if stores_nulls is true. /// - initial_seed: Initial seed value to use when computing hashes for rows with /// level 0. Other levels have their seeds derived from this seed. /// - The max levels we will hash with. + /// TODO: stores_nulls is too coarse: for a hash table in which some columns are joined + /// with '<=>' and others with '=', stores_nulls could distinguish between columns + /// in which nulls are stored and columns in which they are not, which could save + /// space by not storing some rows we know will never match. HashTableCtx(const std::vector& build_expr_ctxs, const std::vector& probe_expr_ctxs, bool stores_nulls, - bool finds_nulls, int32_t initial_seed, int max_levels, + const std::vector& finds_nulls, int32_t initial_seed, int max_levels, int num_build_tuples); /// Call to cleanup any resources. @@ -235,7 +239,10 @@ class HashTableCtx { /// TODO: these constants are an ideal candidate to be removed with codegen. /// TODO: ..or with template-ization const bool stores_nulls_; - const bool finds_nulls_; + const std::vector finds_nulls_; + + /// finds_some_nulls_ is just the logical OR of finds_nulls_. + const bool finds_some_nulls_; /// The current level this context is working on. Each level needs to use a /// different seed. diff --git a/be/src/exec/hash-table.inline.h b/be/src/exec/hash-table.inline.h index dd6ead902..27e72f2d2 100644 --- a/be/src/exec/hash-table.inline.h +++ b/be/src/exec/hash-table.inline.h @@ -29,7 +29,7 @@ inline bool HashTableCtx::EvalAndHashBuild(TupleRow* row, uint32_t* hash) { inline bool HashTableCtx::EvalAndHashProbe(TupleRow* row, uint32_t* hash) { bool has_null = EvalProbeRow(row); - if ((!stores_nulls_ || !finds_nulls_) && has_null) return false; + if (has_null && !(stores_nulls_ && finds_some_nulls_)) return false; *hash = HashCurrentRow(); return true; } diff --git a/be/src/exec/old-hash-table-test.cc b/be/src/exec/old-hash-table-test.cc index 499ccabc6..a46f7b015 100644 --- a/be/src/exec/old-hash-table-test.cc +++ b/be/src/exec/old-hash-table-test.cc @@ -198,8 +198,8 @@ TEST_F(OldHashTableTest, BasicTest) { // Create the hash table and insert the build rows MemTracker tracker; - OldHashTable hash_table(NULL, build_expr_ctxs_, probe_expr_ctxs_, - 1, false, false, 0, &tracker); + OldHashTable hash_table(NULL, build_expr_ctxs_, probe_expr_ctxs_, 1, false, + std::vector(build_expr_ctxs_.size(), false), 0, &tracker); for (int i = 0; i < 5; ++i) { hash_table.Insert(build_rows[i]); } @@ -240,8 +240,8 @@ TEST_F(OldHashTableTest, BasicTest) { // This tests makes sure we can scan ranges of buckets TEST_F(OldHashTableTest, ScanTest) { MemTracker tracker; - OldHashTable hash_table(NULL, build_expr_ctxs_, probe_expr_ctxs_, - 1, false, false, 0, &tracker); + OldHashTable hash_table(NULL, build_expr_ctxs_, probe_expr_ctxs_, 1, false, + std::vector(build_expr_ctxs_.size(), false), 0, &tracker); // Add 1 row with val 1, 2 with val 2, etc vector build_rows; ProbeTestData probe_rows[15]; @@ -286,8 +286,8 @@ TEST_F(OldHashTableTest, GrowTableTest) { int num_to_add = 4; int expected_size = 0; MemTracker tracker(100 * 1024 * 1024); - OldHashTable hash_table(NULL, build_expr_ctxs_, probe_expr_ctxs_, - 1, false, false, 0, &tracker, false, num_to_add); + OldHashTable hash_table(NULL, build_expr_ctxs_, probe_expr_ctxs_, 1, false, + std::vector(build_expr_ctxs_.size(), false), 0, &tracker, false, num_to_add); EXPECT_FALSE(hash_table.mem_limit_exceeded()); EXPECT_TRUE(!tracker.LimitExceeded()); diff --git a/be/src/exec/old-hash-table.cc b/be/src/exec/old-hash-table.cc index 5bab1e429..ebec5e736 100644 --- a/be/src/exec/old-hash-table.cc +++ b/be/src/exec/old-hash-table.cc @@ -14,6 +14,9 @@ #include "exec/old-hash-table.inline.h" +#include +#include + #include "codegen/codegen-anyval.h" #include "codegen/llvm-codegen.h" #include "exprs/expr.h" @@ -53,14 +56,16 @@ static int64_t NULL_VALUE[] = { HashUtil::FNV_SEED, HashUtil::FNV_SEED, OldHashTable::OldHashTable(RuntimeState* state, const vector& build_expr_ctxs, const vector& probe_expr_ctxs, int num_build_tuples, bool stores_nulls, - bool finds_nulls, int32_t initial_seed, MemTracker* mem_tracker, bool stores_tuples, - int64_t num_buckets) + const std::vector& finds_nulls, int32_t initial_seed, MemTracker* mem_tracker, + bool stores_tuples, int64_t num_buckets) : state_(state), build_expr_ctxs_(build_expr_ctxs), probe_expr_ctxs_(probe_expr_ctxs), num_build_tuples_(num_build_tuples), stores_nulls_(stores_nulls), finds_nulls_(finds_nulls), + finds_some_nulls_(std::accumulate( + finds_nulls_.begin(), finds_nulls_.end(), false, std::logical_or())), stores_tuples_(stores_tuples), initial_seed_(initial_seed), num_filled_buckets_(0), @@ -73,7 +78,7 @@ OldHashTable::OldHashTable(RuntimeState* state, const vector& buil mem_limit_exceeded_(false) { DCHECK(mem_tracker != NULL); DCHECK_EQ(build_expr_ctxs_.size(), probe_expr_ctxs_.size()); - + DCHECK_EQ(build_expr_ctxs_.size(), finds_nulls_.size()); DCHECK_EQ((num_buckets & (num_buckets-1)), 0) << "num_buckets must be a power of 2"; buckets_.resize(num_buckets); num_buckets_ = num_buckets; @@ -508,7 +513,7 @@ bool OldHashTable::Equals(TupleRow* build_row) { for (int i = 0; i < build_expr_ctxs_.size(); ++i) { void* val = build_expr_ctxs_[i]->GetValue(build_row); if (val == NULL) { - if (!stores_nulls_) return false; + if (!(stores_nulls_ && finds_nulls_[i])) return false; if (!expr_value_null_bits_[i]) return false; continue; } else { @@ -630,7 +635,7 @@ Function* OldHashTable::CodegenEquals(RuntimeState* state) { // the case where the hash table does not store nulls, this is always false. Value* probe_is_null = codegen->false_value(); uint8_t* null_byte_loc = &expr_value_null_bits_[i]; - if (stores_nulls_) { + if (stores_nulls_ && finds_nulls_[i]) { Value* llvm_null_byte_loc = codegen->CastPtrToLlvmPtr(codegen->ptr_type(), null_byte_loc); Value* null_byte = builder.CreateLoad(llvm_null_byte_loc); diff --git a/be/src/exec/old-hash-table.h b/be/src/exec/old-hash-table.h index d2612c58c..0bf4da99a 100644 --- a/be/src/exec/old-hash-table.h +++ b/be/src/exec/old-hash-table.h @@ -93,17 +93,21 @@ class OldHashTable { /// - probe_exprs are used during Find() /// - num_build_tuples: number of Tuples in the build tuple row /// - stores_nulls: if false, TupleRows with nulls are ignored during Insert - /// - finds_nulls: if false, Find() returns End() for TupleRows with nulls - /// even if stores_nulls is true + /// - finds_nulls: if finds_nulls[i] is false, Find() returns End() for TupleRows with + /// nulls in position i even if stores_nulls is true. /// - num_buckets: number of buckets that the hash table should be initialized to /// - mem_tracker: if non-empty, all memory allocations for nodes and for buckets are /// tracked; the tracker must be valid until the d'tor is called /// - initial_seed: Initial seed value to use when computing hashes for rows /// - stores_tuples: If true, the hash table stores tuples, otherwise it stores tuple /// rows. + /// TODO: stores_nulls is too coarse: for a hash table in which some columns are joined + /// with '<=>' and others with '=', stores_nulls could distinguish between columns + /// in which nulls are stored and columns in which they are not, which could save + /// space by not storing some rows we know will never match. OldHashTable(RuntimeState* state, const std::vector& build_expr_ctxs, const std::vector& probe_expr_ctxs, int num_build_tuples, - bool stores_nulls, bool finds_nulls, int32_t initial_seed, + bool stores_nulls, const std::vector& finds_nulls, int32_t initial_seed, MemTracker* mem_tracker, bool stores_tuples = false, int64_t num_buckets = 1024); /// Call to cleanup any resources. Must be called once. @@ -435,7 +439,11 @@ class OldHashTable { /// different behavior. /// TODO: these constants are an ideal candidate to be removed with codegen. const bool stores_nulls_; - const bool finds_nulls_; + const std::vector finds_nulls_; + + /// finds_some_nulls_ is just the logical OR of finds_nulls_. + const bool finds_some_nulls_; + const bool stores_tuples_; const int32_t initial_seed_; diff --git a/be/src/exec/old-hash-table.inline.h b/be/src/exec/old-hash-table.inline.h index c4c361004..811dae2de 100644 --- a/be/src/exec/old-hash-table.inline.h +++ b/be/src/exec/old-hash-table.inline.h @@ -102,7 +102,7 @@ inline bool OldHashTable::EvalAndHashBuild(TupleRow* row, uint32_t* hash) { inline bool OldHashTable::EvalAndHashProbe(TupleRow* row, uint32_t* hash) { bool has_null = EvalProbeRow(row); - if ((!stores_nulls_ || !finds_nulls_) && has_null) return false; + if (has_null && !(stores_nulls_ && finds_some_nulls_)) return false; *hash = HashCurrentRow(); return true; } diff --git a/be/src/exec/partitioned-aggregation-node.cc b/be/src/exec/partitioned-aggregation-node.cc index ca749dcb4..b66753175 100644 --- a/be/src/exec/partitioned-aggregation-node.cc +++ b/be/src/exec/partitioned-aggregation-node.cc @@ -189,8 +189,9 @@ Status PartitionedAggregationNode::Prepare(RuntimeState* state) { RETURN_IF_ERROR(state_->GetQueryStatus()); singleton_output_tuple_returned_ = false; } else { - ht_ctx_.reset(new HashTableCtx(build_expr_ctxs_, probe_expr_ctxs_, true, true, - state->fragment_hash_seed(), MAX_PARTITION_DEPTH, 1)); + ht_ctx_.reset(new HashTableCtx(build_expr_ctxs_, probe_expr_ctxs_, true, + std::vector(build_expr_ctxs_.size(), true), state->fragment_hash_seed(), + MAX_PARTITION_DEPTH, 1)); RETURN_IF_ERROR(state_->block_mgr()->RegisterClient( MinRequiredBuffers(), true, mem_tracker(), state, &block_mgr_client_)); RETURN_IF_ERROR(CreateHashPartitions(0)); diff --git a/be/src/exec/partitioned-hash-join-node.cc b/be/src/exec/partitioned-hash-join-node.cc index 4e06ab373..e7a633235 100644 --- a/be/src/exec/partitioned-hash-join-node.cc +++ b/be/src/exec/partitioned-hash-join-node.cc @@ -14,6 +14,8 @@ #include "exec/partitioned-hash-join-node.inline.h" +#include +#include #include #include @@ -44,6 +46,7 @@ PartitionedHashJoinNode::PartitionedHashJoinNode( ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) : BlockingJoinNode("PartitionedHashJoinNode", tnode.hash_join_node.join_op, pool, tnode, descs), + is_not_distinct_from_(), block_mgr_client_(NULL), partition_build_timer_(NULL), null_aware_eval_timer_(NULL), @@ -79,6 +82,7 @@ Status PartitionedHashJoinNode::Init(const TPlanNode& tnode) { probe_expr_ctxs_.push_back(ctx); RETURN_IF_ERROR(Expr::CreateExprTree(pool_, eq_join_conjuncts[i].right, &ctx)); build_expr_ctxs_.push_back(ctx); + is_not_distinct_from_.push_back(eq_join_conjuncts[i].is_not_distinct_from); } RETURN_IF_ERROR( Expr::CreateExprTrees(pool_, tnode.hash_join_node.other_join_conjuncts, @@ -130,10 +134,12 @@ Status PartitionedHashJoinNode::Prepare(RuntimeState* state) { RETURN_IF_ERROR(state->block_mgr()->RegisterClient( MinRequiredBuffers(), true, mem_tracker(), state, &block_mgr_client_)); - bool should_store_nulls = join_op_ == TJoinOp::RIGHT_OUTER_JOIN || - join_op_ == TJoinOp::RIGHT_ANTI_JOIN || join_op_ == TJoinOp::FULL_OUTER_JOIN; - ht_ctx_.reset(new HashTableCtx(build_expr_ctxs_, probe_expr_ctxs_, - should_store_nulls, false, state->fragment_hash_seed(), MAX_PARTITION_DEPTH, + const bool should_store_nulls = join_op_ == TJoinOp::RIGHT_OUTER_JOIN || + join_op_ == TJoinOp::RIGHT_ANTI_JOIN || join_op_ == TJoinOp::FULL_OUTER_JOIN || + std::accumulate(is_not_distinct_from_.begin(), is_not_distinct_from_.end(), false, + std::logical_or()); + ht_ctx_.reset(new HashTableCtx(build_expr_ctxs_, probe_expr_ctxs_, should_store_nulls, + is_not_distinct_from_, state->fragment_hash_seed(), MAX_PARTITION_DEPTH, child(1)->row_desc().tuple_descriptors().size())); if (join_op_ == TJoinOp::NULL_AWARE_LEFT_ANTI_JOIN) { diff --git a/be/src/exec/partitioned-hash-join-node.h b/be/src/exec/partitioned-hash-join-node.h index 78ff440a9..ac6884c0c 100644 --- a/be/src/exec/partitioned-hash-join-node.h +++ b/be/src/exec/partitioned-hash-join-node.h @@ -296,6 +296,10 @@ class PartitionedHashJoinNode : public BlockingJoinNode { std::vector probe_expr_ctxs_; std::vector build_expr_ctxs_; + /// is_not_distinct_from_[i] is true if and only if the ith equi-join predicate is IS + /// NOT DISTINCT FROM, rather than equality. + std::vector is_not_distinct_from_; + /// Non-equi-join conjuncts from the ON clause. std::vector other_join_conjunct_ctxs_; diff --git a/be/src/exprs/decimal-operators.cc b/be/src/exprs/decimal-operators.cc index ef2d169d0..16911c388 100644 --- a/be/src/exprs/decimal-operators.cc +++ b/be/src/exprs/decimal-operators.cc @@ -647,41 +647,53 @@ BooleanVal DecimalOperators::CastToBooleanVal( return DecimalVal::null(); \ } +#define DECIMAL_BINARY_OP_NONNULL(OP_FN, X, Y) \ + bool dummy = false; \ + const FunctionContext::TypeDesc& x_type = *context->GetArgType(0); \ + const FunctionContext::TypeDesc& y_type = *context->GetArgType(1); \ + int byte_size = ::max(ColumnType::GetDecimalByteSize(x_type.precision), \ + ColumnType::GetDecimalByteSize(y_type.precision)); \ + switch (byte_size) { \ + case 4: { \ + Decimal4Value x_val = GetDecimal4Value(X, x_type, &dummy); \ + Decimal4Value y_val = GetDecimal4Value(Y, y_type, &dummy); \ + bool result = x_val.OP_FN(x_type.scale, y_val, y_type.scale); \ + return BooleanVal(result); \ + } \ + case 8: { \ + Decimal8Value x_val = GetDecimal8Value(X, x_type, &dummy); \ + Decimal8Value y_val = GetDecimal8Value(Y, y_type, &dummy); \ + bool result = x_val.OP_FN(x_type.scale, y_val, y_type.scale); \ + return BooleanVal(result); \ + } \ + case 16: { \ + Decimal16Value x_val = GetDecimal16Value(X, x_type, &dummy); \ + Decimal16Value y_val = GetDecimal16Value(Y, y_type, &dummy); \ + bool result = x_val.OP_FN(x_type.scale, y_val, y_type.scale); \ + return BooleanVal(result); \ + } \ + default: \ + DCHECK(false); \ + break; \ + } \ + return BooleanVal::null(); + #define DECIMAL_BINARY_OP(FN_NAME, OP_FN) \ BooleanVal DecimalOperators::FN_NAME( \ FunctionContext* context, const DecimalVal& x, const DecimalVal& y) { \ if (x.is_null || y.is_null) return BooleanVal::null(); \ - bool dummy = false; \ - const FunctionContext::TypeDesc& x_type = *context->GetArgType(0); \ - const FunctionContext::TypeDesc& y_type = *context->GetArgType(1); \ - int byte_size = ::max(ColumnType::GetDecimalByteSize(x_type.precision), \ - ColumnType::GetDecimalByteSize(y_type.precision)); \ - switch (byte_size) { \ - case 4: { \ - Decimal4Value x_val = GetDecimal4Value(x, x_type, &dummy); \ - Decimal4Value y_val = GetDecimal4Value(y, y_type, &dummy); \ - bool result = x_val.OP_FN(x_type.scale, y_val, y_type.scale); \ - return BooleanVal(result); \ - } \ - case 8: { \ - Decimal8Value x_val = GetDecimal8Value(x, x_type, &dummy); \ - Decimal8Value y_val = GetDecimal8Value(y, y_type, &dummy); \ - bool result = x_val.OP_FN(x_type.scale, y_val, y_type.scale); \ - return BooleanVal(result); \ - } \ - case 16: { \ - Decimal16Value x_val = GetDecimal16Value(x, x_type, &dummy); \ - Decimal16Value y_val = GetDecimal16Value(y, y_type, &dummy); \ - bool result = x_val.OP_FN(x_type.scale, y_val, y_type.scale); \ - return BooleanVal(result); \ - } \ - default: \ - DCHECK(false); \ - break; \ - } \ - return BooleanVal::null(); \ + DECIMAL_BINARY_OP_NONNULL(OP_FN, x, y) \ } +#define NULLSAFE_DECIMAL_BINARY_OP(FN_NAME, OP_FN, IS_EQUAL) \ + BooleanVal DecimalOperators::FN_NAME( \ + FunctionContext* context, const DecimalVal& x, const DecimalVal& y) { \ + if (x.is_null) return BooleanVal(IS_EQUAL ? y.is_null : !y.is_null); \ + if (y.is_null) return BooleanVal(!IS_EQUAL); \ + DECIMAL_BINARY_OP_NONNULL(OP_FN, x, y) \ + } + + DECIMAL_ARITHMETIC_OP(Add_DecimalVal_DecimalVal, Add) DECIMAL_ARITHMETIC_OP(Subtract_DecimalVal_DecimalVal, Subtract) DECIMAL_ARITHMETIC_OP(Multiply_DecimalVal_DecimalVal, Multiply) @@ -694,5 +706,6 @@ DECIMAL_BINARY_OP(Ge_DecimalVal_DecimalVal, Ge) DECIMAL_BINARY_OP(Gt_DecimalVal_DecimalVal, Gt) DECIMAL_BINARY_OP(Le_DecimalVal_DecimalVal, Le) DECIMAL_BINARY_OP(Lt_DecimalVal_DecimalVal, Lt) - +NULLSAFE_DECIMAL_BINARY_OP(DistinctFrom_DecimalVal_DecimalVal, Ne, false) +NULLSAFE_DECIMAL_BINARY_OP(NotDistinct_DecimalVal_DecimalVal, Eq, true) } diff --git a/be/src/exprs/decimal-operators.h b/be/src/exprs/decimal-operators.h index f32590e1e..0d5568da3 100644 --- a/be/src/exprs/decimal-operators.h +++ b/be/src/exprs/decimal-operators.h @@ -75,6 +75,10 @@ class DecimalOperators { FunctionContext*, const DecimalVal&, const DecimalVal&); static BooleanVal Lt_DecimalVal_DecimalVal( FunctionContext*, const DecimalVal&, const DecimalVal&); + static BooleanVal DistinctFrom_DecimalVal_DecimalVal( + FunctionContext*, const DecimalVal&, const DecimalVal&); + static BooleanVal NotDistinct_DecimalVal_DecimalVal( + FunctionContext*, const DecimalVal&, const DecimalVal&); /// The rounding rule when converting decimals. These only apply going from a higher /// scale to a lower one. diff --git a/be/src/exprs/expr-test.cc b/be/src/exprs/expr-test.cc index 038be44be..f10b28aec 100644 --- a/be/src/exprs/expr-test.cc +++ b/be/src/exprs/expr-test.cc @@ -537,7 +537,31 @@ class ExprTest : public testing::Test { TestValue("1.23 < 1.234", TYPE_BOOLEAN, true); TestValue("1.23 > 1.234", TYPE_BOOLEAN, false); TestValue("1.23 = 1.230000000000000000000", TYPE_BOOLEAN, true); - TestValue("1.2300 != 1.230000000000000000001", TYPE_BOOLEAN, true); + + // Some values are too precise to be stored to full precision as doubles, but not too + // precise to be stored as decimals. + static const string not_too_precise = "1.25"; + // The closest double to 'too_precise' is 1.25 - the string as written cannot be + // preresented exactly as a double. + static const string too_precise = "1.250000000000000000001"; + TestValue( + "cast(" + not_too_precise + " as double) != cast(" + too_precise + " as double)", + TYPE_BOOLEAN, false); + TestValue(not_too_precise + " != " + too_precise, TYPE_BOOLEAN, true); + TestValue("cast(" + not_too_precise + " as double) IS DISTINCT FROM cast(" + + too_precise + " as double)", + TYPE_BOOLEAN, false); + TestValue(not_too_precise + " IS DISTINCT FROM " + too_precise, TYPE_BOOLEAN, true); + TestValue("cast(" + not_too_precise + " as double) IS NOT DISTINCT FROM cast(" + + too_precise + " as double)", + TYPE_BOOLEAN, true); + TestValue( + not_too_precise + " IS NOT DISTINCT FROM " + too_precise, TYPE_BOOLEAN, false); + TestValue( + "cast(" + not_too_precise + " as double) <=> cast(" + too_precise + " as double)", + TYPE_BOOLEAN, true); + TestValue(not_too_precise + " <=> " + too_precise, TYPE_BOOLEAN, false); + TestValue("cast(1 as decimal(38,0)) = cast(1 as decimal(38,37))", TYPE_BOOLEAN, true); TestValue("cast(1 as decimal(38,0)) = cast(0.1 as decimal(38,38))", TYPE_BOOLEAN, false); @@ -568,6 +592,47 @@ class ExprTest : public testing::Test { TestIsNull("NULL >= " + op, TYPE_BOOLEAN); } + // Test IS DISTINCT FROM operator and its variants + void TestDistinctFrom() { + static const string operators[] = {"<=>", "IS DISTINCT FROM", "IS NOT DISTINCT FROM"}; + static const string types[] = {"Boolean", "TinyInt", "SmallInt", "Int", "BigInt", + "Float", "Double", "String", "Timestamp", "Decimal"}; + static const string operands1[] = { + "true", "cast(1 as TinyInt)", "cast(1 as SmallInt)", "cast(1 as Int)", + "cast(1 as BigInt)", "cast(1 as Float)", "cast(1 as Double)", + "'this is a string'", "cast(1 as TimeStamp)", "cast(1 as Decimal)" + }; + static const string operands2[] = { + "false", "cast(2 as TinyInt)", "cast(2 as SmallInt)", "cast(2 as Int)", + "cast(2 as BigInt)", "cast(2 as Float)", "cast(2 as Double)", + "'this is ALSO a string'", "cast(2 as TimeStamp)", "cast(2 as Decimal)" + }; + for (int i = 0; i < sizeof(operators) / sizeof(string); ++i) { + // "IS DISTINCT FROM" and "<=>" are generalized equality, and + // this fact is recorded in is_equal. + const bool is_equal = operators[i] != "IS DISTINCT FROM"; + // Everything IS NOT DISTINCT FROM itself. + for (int j = 0; j < sizeof(types) / sizeof(string); ++j) { + const string operand = "cast(NULL as " + types[j] + ")"; + TestValue(operand + ' ' + operators[i] + ' ' + operand, TYPE_BOOLEAN, is_equal); + } + for (int j = 0; j < sizeof(operands1) / sizeof(string); ++j) { + TestValue(operands1[j] + ' ' + operators[i] + ' ' + operands1[j], TYPE_BOOLEAN, + is_equal); + } + // NULL IS DISTINCT FROM all non-null things. + for (int j = 0; j < sizeof(operands1) / sizeof(string); ++j) { + TestValue("NULL " + operators[i] + ' ' + operands1[j], TYPE_BOOLEAN, !is_equal); + TestValue(operands1[j] + ' ' + operators[i] + " NULL", TYPE_BOOLEAN, !is_equal); + } + // Non-null values can be DISTINCT. + for (int j = 0; j < sizeof(operands1) / sizeof(string); ++j) { + TestValue(operands1[j] + ' ' + operators[i] + ' ' + operands2[j], TYPE_BOOLEAN, + !is_equal); + } + } + } + // Test comparison operators with a left or right NULL operand on all types. void TestNullComparisons() { unordered_map::iterator def_iter; @@ -1181,6 +1246,7 @@ TEST_F(ExprTest, BinaryPredicates) { TestStringComparisons(); TestDecimalComparisons(); TestNullComparisons(); + TestDistinctFrom(); } // Test casting from all types to all other types diff --git a/be/src/exprs/operators.cc b/be/src/exprs/operators.cc index 1ceb8ebc4..42b3c7648 100644 --- a/be/src/exprs/operators.cc +++ b/be/src/exprs/operators.cc @@ -54,31 +54,64 @@ using strings::Substitute; return BigIntVal(fact); \ } +#define BINARY_PREDICATE_NUMERIC_NONNULL(OP, V1, V2) \ + return BooleanVal(V1.val OP V2.val) + +#define BINARY_PREDICATE_NONNUMERIC_NONNULL(TYPE, IMPALA_TYPE, OP, V1, V2) \ + IMPALA_TYPE iv1 = IMPALA_TYPE::From##TYPE(V1);\ + IMPALA_TYPE iv2 = IMPALA_TYPE::From##TYPE(V2);\ + return BooleanVal(iv1 OP iv2) + +#define BINARY_PREDICATE_CHAR_NONNULL(OP, V1, V2) \ + StringValue iv1 = StringValue::FromStringVal(V1);\ + StringValue iv2 = StringValue::FromStringVal(V2);\ + iv1.len = StringValue::UnpaddedCharLength(iv1.ptr, c->GetArgType(0)->len); \ + iv2.len = StringValue::UnpaddedCharLength(iv2.ptr, c->GetArgType(1)->len); \ + return BooleanVal(iv1 OP iv2) + #define BINARY_PREDICATE_NUMERIC_FN(NAME, TYPE, OP) \ BooleanVal Operators::NAME##_##TYPE##_##TYPE(\ FunctionContext* c, const TYPE& v1, const TYPE& v2) {\ if (v1.is_null || v2.is_null) return BooleanVal::null();\ - return BooleanVal(v1.val OP v2.val);\ + BINARY_PREDICATE_NUMERIC_NONNULL(OP, v1, v2);\ } #define BINARY_PREDICATE_NONNUMERIC_FN(NAME, TYPE, IMPALA_TYPE, OP) \ BooleanVal Operators::NAME##_##TYPE##_##TYPE(\ FunctionContext* c, const TYPE& v1, const TYPE& v2) {\ if (v1.is_null || v2.is_null) return BooleanVal::null();\ - IMPALA_TYPE iv1 = IMPALA_TYPE::From##TYPE(v1);\ - IMPALA_TYPE iv2 = IMPALA_TYPE::From##TYPE(v2);\ - return BooleanVal(iv1 OP iv2);\ + BINARY_PREDICATE_NONNUMERIC_NONNULL(TYPE, IMPALA_TYPE, OP, v1, v2);\ } #define BINARY_PREDICATE_CHAR(NAME, OP) \ BooleanVal Operators::NAME##_Char_Char(\ FunctionContext* c, const StringVal& v1, const StringVal& v2) {\ if (v1.is_null || v2.is_null) return BooleanVal::null();\ - StringValue iv1 = StringValue::FromStringVal(v1);\ - StringValue iv2 = StringValue::FromStringVal(v2);\ - iv1.len = StringValue::UnpaddedCharLength(iv1.ptr, c->GetArgType(0)->len); \ - iv2.len = StringValue::UnpaddedCharLength(iv2.ptr, c->GetArgType(1)->len); \ - return BooleanVal(iv1 OP iv2);\ + BINARY_PREDICATE_CHAR_NONNULL(OP, v1, v2);\ + } + +#define NULLSAFE_NUMERIC_DISTINCTION(NAME, TYPE, OP, IS_EQUAL) \ + BooleanVal Operators::NAME##_##TYPE##_##TYPE(\ + FunctionContext* c, const TYPE& v1, const TYPE& v2) {\ + if (v1.is_null) return BooleanVal(IS_EQUAL ? v2.is_null : !v2.is_null); \ + if (v2.is_null) return BooleanVal(!IS_EQUAL);\ + BINARY_PREDICATE_NUMERIC_NONNULL(OP, v1, v2);\ + } + +#define NULLSAFE_NONNUMERIC_DISTINCTION(NAME, TYPE, IMPALA_TYPE, OP, IS_EQUAL) \ + BooleanVal Operators::NAME##_##TYPE##_##TYPE(\ + FunctionContext* c, const TYPE& v1, const TYPE& v2) {\ + if (v1.is_null) return BooleanVal(IS_EQUAL ? v2.is_null : !v2.is_null); \ + if (v2.is_null) return BooleanVal(!IS_EQUAL);\ + BINARY_PREDICATE_NONNUMERIC_NONNULL(TYPE, IMPALA_TYPE, OP, v1, v2);\ + } + +#define NULLSAFE_CHAR_DISTINCTION(NAME, OP, IS_EQUAL) \ + BooleanVal Operators::NAME##_Char_Char(\ + FunctionContext* c, const StringVal& v1, const StringVal& v2) {\ + if (v1.is_null) return BooleanVal(IS_EQUAL ? v2.is_null : !v2.is_null); \ + if (v2.is_null) return BooleanVal(!IS_EQUAL);\ + BINARY_PREDICATE_CHAR_NONNULL(OP, v1, v2);\ } #define BINARY_OP_NUMERIC_TYPES(NAME, OP) \ @@ -113,6 +146,18 @@ using strings::Substitute; BINARY_PREDICATE_NONNUMERIC_FN(NAME, TimestampVal, TimestampValue, OP);\ BINARY_PREDICATE_CHAR(NAME, OP); +#define NULLSAFE_DISTINCTION(NAME, OP, IS_EQUAL) \ + NULLSAFE_NUMERIC_DISTINCTION(NAME, BooleanVal, OP, IS_EQUAL); \ + NULLSAFE_NUMERIC_DISTINCTION(NAME, TinyIntVal, OP, IS_EQUAL); \ + NULLSAFE_NUMERIC_DISTINCTION(NAME, SmallIntVal, OP, IS_EQUAL); \ + NULLSAFE_NUMERIC_DISTINCTION(NAME, IntVal, OP, IS_EQUAL); \ + NULLSAFE_NUMERIC_DISTINCTION(NAME, BigIntVal, OP, IS_EQUAL); \ + NULLSAFE_NUMERIC_DISTINCTION(NAME, FloatVal, OP, IS_EQUAL); \ + NULLSAFE_NUMERIC_DISTINCTION(NAME, DoubleVal, OP, IS_EQUAL); \ + NULLSAFE_NONNUMERIC_DISTINCTION(NAME, StringVal, StringValue, OP, IS_EQUAL);\ + NULLSAFE_NONNUMERIC_DISTINCTION(NAME, TimestampVal, TimestampValue, OP, IS_EQUAL);\ + NULLSAFE_CHAR_DISTINCTION(NAME, OP, IS_EQUAL); + namespace impala { BINARY_OP_NUMERIC_TYPES(Add, +); @@ -182,4 +227,7 @@ BINARY_PREDICATE_ALL_TYPES(Lt, <); BINARY_PREDICATE_ALL_TYPES(Ge, >=); BINARY_PREDICATE_ALL_TYPES(Le, <=); +NULLSAFE_DISTINCTION(DistinctFrom, !=, false); +NULLSAFE_DISTINCTION(NotDistinct, ==, true); + } // namespace impala diff --git a/be/src/exprs/operators.h b/be/src/exprs/operators.h index ee1fc152e..1d79cf138 100644 --- a/be/src/exprs/operators.h +++ b/be/src/exprs/operators.h @@ -151,6 +151,48 @@ class Operators { static BooleanVal Ne_TimestampVal_TimestampVal( FunctionContext*, const TimestampVal&, const TimestampVal&); + static BooleanVal DistinctFrom_BooleanVal_BooleanVal( + FunctionContext*, const BooleanVal&, const BooleanVal&); + static BooleanVal DistinctFrom_TinyIntVal_TinyIntVal( + FunctionContext*, const TinyIntVal&, const TinyIntVal&); + static BooleanVal DistinctFrom_SmallIntVal_SmallIntVal( + FunctionContext*, const SmallIntVal&, const SmallIntVal&); + static BooleanVal DistinctFrom_IntVal_IntVal( + FunctionContext*, const IntVal&, const IntVal&); + static BooleanVal DistinctFrom_BigIntVal_BigIntVal( + FunctionContext*, const BigIntVal&, const BigIntVal&); + static BooleanVal DistinctFrom_FloatVal_FloatVal( + FunctionContext*, const FloatVal&, const FloatVal&); + static BooleanVal DistinctFrom_DoubleVal_DoubleVal( + FunctionContext*, const DoubleVal&, const DoubleVal&); + static BooleanVal DistinctFrom_StringVal_StringVal( + FunctionContext*, const StringVal&, const StringVal&); + static BooleanVal DistinctFrom_Char_Char( + FunctionContext*, const StringVal&, const StringVal&); + static BooleanVal DistinctFrom_TimestampVal_TimestampVal( + FunctionContext*, const TimestampVal&, const TimestampVal&); + + static BooleanVal NotDistinct_BooleanVal_BooleanVal( + FunctionContext*, const BooleanVal&, const BooleanVal&); + static BooleanVal NotDistinct_TinyIntVal_TinyIntVal( + FunctionContext*, const TinyIntVal&, const TinyIntVal&); + static BooleanVal NotDistinct_SmallIntVal_SmallIntVal( + FunctionContext*, const SmallIntVal&, const SmallIntVal&); + static BooleanVal NotDistinct_IntVal_IntVal( + FunctionContext*, const IntVal&, const IntVal&); + static BooleanVal NotDistinct_BigIntVal_BigIntVal( + FunctionContext*, const BigIntVal&, const BigIntVal&); + static BooleanVal NotDistinct_FloatVal_FloatVal( + FunctionContext*, const FloatVal&, const FloatVal&); + static BooleanVal NotDistinct_DoubleVal_DoubleVal( + FunctionContext*, const DoubleVal&, const DoubleVal&); + static BooleanVal NotDistinct_StringVal_StringVal( + FunctionContext*, const StringVal&, const StringVal&); + static BooleanVal NotDistinct_Char_Char( + FunctionContext*, const StringVal&, const StringVal&); + static BooleanVal NotDistinct_TimestampVal_TimestampVal( + FunctionContext*, const TimestampVal&, const TimestampVal&); + static BooleanVal Gt_BooleanVal_BooleanVal( FunctionContext*, const BooleanVal&, const BooleanVal&); static BooleanVal Gt_TinyIntVal_TinyIntVal( diff --git a/common/function-registry/impala_functions.py b/common/function-registry/impala_functions.py index 0c5bebe4e..0d0988c4e 100644 --- a/common/function-registry/impala_functions.py +++ b/common/function-registry/impala_functions.py @@ -658,4 +658,28 @@ invisible_functions = [ '_ZN6impala18TimestampFunctions6AddSubILb0EN10impala_udf6IntValEN5boost9date_time15months_durationINS4_9gregorian21greg_durations_configEEELb0EEENS2_12TimestampValEPNS2_15FunctionContextERKSA_RKT0_'], [['months_sub_interval'], 'TIMESTAMP', ['TIMESTAMP', 'BIGINT'], '_ZN6impala18TimestampFunctions6AddSubILb0EN10impala_udf9BigIntValEN5boost9date_time15months_durationINS4_9gregorian21greg_durations_configEEELb0EEENS2_12TimestampValEPNS2_15FunctionContextERKSA_RKT0_'], + + [['distinctfrom'], 'BOOLEAN', ['BOOLEAN', 'BOOLEAN'], 'impala::Operators::DistinctFrom_BooleanVal_BooleanVal'], + [['distinctfrom'], 'BOOLEAN', ['TINYINT', 'TINYINT'], 'impala::Operators::DistinctFrom_TinyIntVal_TinyIntVal'], + [['distinctfrom'], 'BOOLEAN', ['SMALLINT', 'SMALLINT'], 'impala::Operators::DistinctFrom_SmallIntVal_SmallIntVal'], + [['distinctfrom'], 'BOOLEAN', ['INT', 'INT'], 'impala::Operators::DistinctFrom_IntVal_IntVal'], + [['distinctfrom'], 'BOOLEAN', ['BIGINT', 'BIGINT'], 'impala::Operators::DistinctFrom_BigIntVal_BigIntVal'], + [['distinctfrom'], 'BOOLEAN', ['FLOAT', 'FLOAT'], 'impala::Operators::DistinctFrom_FloatVal_FloatVal'], + [['distinctfrom'], 'BOOLEAN', ['DOUBLE', 'DOUBLE'], 'impala::Operators::DistinctFrom_DoubleVal_DoubleVal'], + [['distinctfrom'], 'BOOLEAN', ['STRING', 'STRING'], 'impala::Operators::DistinctFrom_StringVal_StringVal'], + [['distinctfrom'], 'BOOLEAN', ['TIMESTAMP', 'TIMESTAMP'], 'impala::Operators::DistinctFrom_TimestampVal_TimestampVal'], + [['distinctfrom'], 'BOOLEAN', ['CHAR', 'CHAR'], 'impala::Operators::DistinctFrom_Char_Char'], + [['distinctfrom'], 'BOOLEAN', ['DECIMAL', 'DECIMAL'], 'impala::DecimalOperators::DistinctFrom_DecimalVal_DecimalVal'], + + [['notdistinct'], 'BOOLEAN', ['BOOLEAN', 'BOOLEAN'], 'impala::Operators::NotDistinct_BooleanVal_BooleanVal'], + [['notdistinct'], 'BOOLEAN', ['TINYINT', 'TINYINT'], 'impala::Operators::NotDistinct_TinyIntVal_TinyIntVal'], + [['notdistinct'], 'BOOLEAN', ['SMALLINT', 'SMALLINT'], 'impala::Operators::NotDistinct_SmallIntVal_SmallIntVal'], + [['notdistinct'], 'BOOLEAN', ['INT', 'INT'], 'impala::Operators::NotDistinct_IntVal_IntVal'], + [['notdistinct'], 'BOOLEAN', ['BIGINT', 'BIGINT'], 'impala::Operators::NotDistinct_BigIntVal_BigIntVal'], + [['notdistinct'], 'BOOLEAN', ['FLOAT', 'FLOAT'], 'impala::Operators::NotDistinct_FloatVal_FloatVal'], + [['notdistinct'], 'BOOLEAN', ['DOUBLE', 'DOUBLE'], 'impala::Operators::NotDistinct_DoubleVal_DoubleVal'], + [['notdistinct'], 'BOOLEAN', ['STRING', 'STRING'], 'impala::Operators::NotDistinct_StringVal_StringVal'], + [['notdistinct'], 'BOOLEAN', ['TIMESTAMP', 'TIMESTAMP'], 'impala::Operators::NotDistinct_TimestampVal_TimestampVal'], + [['notdistinct'], 'BOOLEAN', ['CHAR', 'CHAR'], 'impala::Operators::NotDistinct_Char_Char'], + [['notdistinct'], 'BOOLEAN', ['DECIMAL', 'DECIMAL'], 'impala::DecimalOperators::NotDistinct_DecimalVal_DecimalVal'], ] diff --git a/common/thrift/ExternalDataSource.thrift b/common/thrift/ExternalDataSource.thrift index ee550e762..824184d61 100644 --- a/common/thrift/ExternalDataSource.thrift +++ b/common/thrift/ExternalDataSource.thrift @@ -50,7 +50,7 @@ struct TRowBatch { // Comparison operators used in predicates. enum TComparisonOp { - LT, LE, EQ, NE, GE, GT, + LT, LE, EQ, NE, GE, GT, DISTINCT_FROM, NOT_DISTINCT } // Binary predicates that can be pushed to the external data source and diff --git a/common/thrift/PlanNodes.thrift b/common/thrift/PlanNodes.thrift index 8ec6bdd07..f194a0278 100644 --- a/common/thrift/PlanNodes.thrift +++ b/common/thrift/PlanNodes.thrift @@ -162,6 +162,8 @@ struct TEqJoinCondition { 1: required Exprs.TExpr left; // right-hand side of " = " 2: required Exprs.TExpr right; + // true if and only if operator is "<=>", also known as "IS NOT DISTINCT FROM" + 3: required bool is_not_distinct_from; } enum TJoinOp { diff --git a/fe/src/main/cup/sql-parser.y b/fe/src/main/cup/sql-parser.y index 5aa59d8d2..fa51595fd 100644 --- a/fe/src/main/cup/sql-parser.y +++ b/fe/src/main/cup/sql-parser.y @@ -458,7 +458,7 @@ precedence left KW_AND; precedence right KW_NOT, NOT; precedence left KW_BETWEEN, KW_IN, KW_IS, KW_EXISTS; precedence left KW_LIKE, KW_RLIKE, KW_REGEXP; -precedence left EQUAL, NOTEQUAL, LESSTHAN, GREATERTHAN; +precedence left EQUAL, NOTEQUAL, LESSTHAN, GREATERTHAN, KW_FROM, KW_DISTINCT; precedence left ADD, SUBTRACT; precedence left STAR, DIVIDE, MOD, KW_DIV; precedence left BITAND, BITOR, BITXOR, BITNOT; @@ -2406,6 +2406,12 @@ comparison_predicate ::= {: RESULT = new BinaryPredicate(BinaryPredicate.Operator.LT, e1, e2); :} | expr:e1 GREATERTHAN expr:e2 {: RESULT = new BinaryPredicate(BinaryPredicate.Operator.GT, e1, e2); :} + | expr:e1 LESSTHAN EQUAL GREATERTHAN expr:e2 + {: RESULT = new BinaryPredicate(BinaryPredicate.Operator.NOT_DISTINCT, e1, e2); :} + | expr:e1 KW_IS KW_DISTINCT KW_FROM expr:e2 + {: RESULT = new BinaryPredicate(BinaryPredicate.Operator.DISTINCT_FROM, e1, e2); :} + | expr:e1 KW_IS KW_NOT KW_DISTINCT KW_FROM expr:e2 + {: RESULT = new BinaryPredicate(BinaryPredicate.Operator.NOT_DISTINCT, e1, e2); :} ; like_predicate ::= diff --git a/fe/src/main/java/com/cloudera/impala/analysis/Analyzer.java b/fe/src/main/java/com/cloudera/impala/analysis/Analyzer.java index 933c9b7fe..9ea4dfd2e 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/Analyzer.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/Analyzer.java @@ -1009,7 +1009,8 @@ public class Analyzer { // form = where at least one of the exprs is bound by // exactly one tuple id if (binaryPred.getOp() != BinaryPredicate.Operator.EQ && - binaryPred.getOp() != BinaryPredicate.Operator.NULL_MATCHING_EQ) { + binaryPred.getOp() != BinaryPredicate.Operator.NULL_MATCHING_EQ && + binaryPred.getOp() != BinaryPredicate.Operator.NOT_DISTINCT) { return; } // the binary predicate must refer to at least two tuples to be an eqJoinConjunct diff --git a/fe/src/main/java/com/cloudera/impala/analysis/BinaryPredicate.java b/fe/src/main/java/com/cloudera/impala/analysis/BinaryPredicate.java index b2bbc07a0..293faf410 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/BinaryPredicate.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/BinaryPredicate.java @@ -49,6 +49,8 @@ public class BinaryPredicate extends Predicate { GE(">=", "ge", TComparisonOp.GE), LT("<", "lt", TComparisonOp.LT), GT(">", "gt", TComparisonOp.GT), + DISTINCT_FROM("IS DISTINCT FROM", "distinctfrom", TComparisonOp.DISTINCT_FROM), + NOT_DISTINCT("IS NOT DISTINCT FROM", "notdistinct", TComparisonOp.NOT_DISTINCT), // Same as EQ, except it returns True if the rhs is NULL. There is no backend // function for this. The functionality is embedded in the hash-join // implementation. @@ -68,6 +70,7 @@ public class BinaryPredicate extends Predicate { public String toString() { return description_; } public String getName() { return name_; } public TComparisonOp getThriftOp() { return thriftOp_; } + public boolean isEquivalence() { return this == EQ || this == NOT_DISTINCT; } public Operator converse() { switch (this) { @@ -77,6 +80,8 @@ public class BinaryPredicate extends Predicate { case GE: return LE; case LT: return GT; case GT: return LT; + case DISTINCT_FROM: return DISTINCT_FROM; + case NOT_DISTINCT: return NOT_DISTINCT; case NULL_MATCHING_EQ: throw new IllegalStateException("Not implemented"); default: throw new IllegalStateException("Invalid operator"); @@ -207,7 +212,7 @@ public class BinaryPredicate extends Predicate { // determine selectivity // TODO: Compute selectivity for nested predicates Reference slotRefRef = new Reference(); - if (op_ == Operator.EQ + if ((op_ == Operator.EQ || op_ == Operator.NOT_DISTINCT) && isSingleColumnPredicate(slotRefRef, null) && slotRefRef.getRef().getNumDistinctValues() > 0) { Preconditions.checkState(slotRefRef.getRef() != null); @@ -293,6 +298,12 @@ public class BinaryPredicate extends Predicate { case GT: newOp = Operator.LE; break; + case DISTINCT_FROM: + newOp = Operator.NOT_DISTINCT; + break; + case NOT_DISTINCT: + newOp = Operator.DISTINCT_FROM; + break; case NULL_MATCHING_EQ: throw new IllegalStateException("Not implemented"); } diff --git a/fe/src/main/java/com/cloudera/impala/analysis/StmtRewriter.java b/fe/src/main/java/com/cloudera/impala/analysis/StmtRewriter.java index 67ac6908c..518442d78 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/StmtRewriter.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/StmtRewriter.java @@ -465,10 +465,9 @@ public class StmtRewriter { // Check if we have a valid ON clause for an equi-join. boolean hasEqJoinPred = false; for (Expr conjunct: onClausePredicate.getConjuncts()) { - if (!(conjunct instanceof BinaryPredicate) || - ((BinaryPredicate)conjunct).getOp() != BinaryPredicate.Operator.EQ) { - continue; - } + if (!(conjunct instanceof BinaryPredicate)) continue; + BinaryPredicate.Operator operator = ((BinaryPredicate) conjunct).getOp(); + if (!operator.isEquivalence()) continue; List lhsTupleIds = Lists.newArrayList(); conjunct.getChild(0).getIds(lhsTupleIds, null); if (lhsTupleIds.isEmpty()) continue; @@ -524,9 +523,9 @@ public class StmtRewriter { for (Expr conjunct: onClausePredicate.getConjuncts()) { if (conjunct.equals(joinConjunct)) { Preconditions.checkState(conjunct instanceof BinaryPredicate); - Preconditions.checkState(((BinaryPredicate)conjunct).getOp() == - BinaryPredicate.Operator.EQ); - ((BinaryPredicate)conjunct).setOp(BinaryPredicate.Operator.NULL_MATCHING_EQ); + BinaryPredicate binaryPredicate = (BinaryPredicate)conjunct; + Preconditions.checkState(binaryPredicate.getOp().isEquivalence()); + binaryPredicate.setOp(BinaryPredicate.Operator.NULL_MATCHING_EQ); break; } } diff --git a/fe/src/main/java/com/cloudera/impala/planner/HashJoinNode.java b/fe/src/main/java/com/cloudera/impala/planner/HashJoinNode.java index 8305d60ba..878c5dae0 100644 --- a/fe/src/main/java/com/cloudera/impala/planner/HashJoinNode.java +++ b/fe/src/main/java/com/cloudera/impala/planner/HashJoinNode.java @@ -127,9 +127,11 @@ public class HashJoinNode extends JoinNode { msg.hash_join_node = new THashJoinNode(); msg.hash_join_node.join_op = joinOp_.toThrift(); for (Expr entry: eqJoinConjuncts_) { + BinaryPredicate bp = (BinaryPredicate)entry; TEqJoinCondition eqJoinCondition = - new TEqJoinCondition(entry.getChild(0).treeToThrift(), - entry.getChild(1).treeToThrift()); + new TEqJoinCondition(bp.getChild(0).treeToThrift(), + bp.getChild(1).treeToThrift(), + bp.getOp() == BinaryPredicate.Operator.NOT_DISTINCT); msg.hash_join_node.addToEq_join_conjuncts(eqJoinCondition); } for (Expr e: otherJoinConjuncts_) { diff --git a/fe/src/main/java/com/cloudera/impala/planner/HdfsScanNode.java b/fe/src/main/java/com/cloudera/impala/planner/HdfsScanNode.java index 2070531c6..29ac7c6bf 100644 --- a/fe/src/main/java/com/cloudera/impala/planner/HdfsScanNode.java +++ b/fe/src/main/java/com/cloudera/impala/planner/HdfsScanNode.java @@ -399,7 +399,11 @@ public class HdfsScanNode extends ScanNode { Preconditions.checkNotNull(bindingExpr); Preconditions.checkState(bindingExpr.isLiteral()); LiteralExpr literal = (LiteralExpr)bindingExpr; - if (literal instanceof NullLiteral) return Sets.newHashSet(); + Operator op = bp.getOp(); + if ((literal instanceof NullLiteral) && (op != Operator.NOT_DISTINCT) + && (op != Operator.DISTINCT_FROM)) { + return Sets.newHashSet(); + } // Get the partition column position and retrieve the associated partition // value metadata. @@ -410,13 +414,36 @@ public class HdfsScanNode extends ScanNode { HashSet matchingIds = Sets.newHashSet(); // Compute the matching partition ids - Operator op = bp.getOp(); + if (op == Operator.NOT_DISTINCT) { + // Case: SlotRef <=> Literal + if (literal instanceof NullLiteral) { + HashSet ids = tbl_.getNullPartitionIds(partitionPos); + if (ids != null) matchingIds.addAll(ids); + return matchingIds; + } + // Punt to equality case: + op = Operator.EQ; + } if (op == Operator.EQ) { // Case: SlotRef = Literal HashSet ids = partitionValueMap.get(literal); if (ids != null) matchingIds.addAll(ids); return matchingIds; } + if (op == Operator.DISTINCT_FROM) { + // Case: SlotRef IS DISTINCT FROM Literal + if (literal instanceof NullLiteral) { + matchingIds.addAll(tbl_.getPartitionIds()); + HashSet nullIds = tbl_.getNullPartitionIds(partitionPos); + matchingIds.removeAll(nullIds); + return matchingIds; + } else { + matchingIds.addAll(tbl_.getPartitionIds()); + HashSet ids = partitionValueMap.get(literal); + if (ids != null) matchingIds.removeAll(ids); + return matchingIds; + } + } if (op == Operator.NE) { // Case: SlotRef != Literal matchingIds.addAll(tbl_.getPartitionIds()); diff --git a/fe/src/main/java/com/cloudera/impala/planner/SingleNodePlanner.java b/fe/src/main/java/com/cloudera/impala/planner/SingleNodePlanner.java index d9710aeca..43f449afa 100644 --- a/fe/src/main/java/com/cloudera/impala/planner/SingleNodePlanner.java +++ b/fe/src/main/java/com/cloudera/impala/planner/SingleNodePlanner.java @@ -994,7 +994,11 @@ public class SingleNodePlanner { Expr e = i.next(); if (!(e instanceof BinaryPredicate)) continue; BinaryPredicate comp = (BinaryPredicate) e; - if (comp.getOp() == BinaryPredicate.Operator.NE) continue; + if ((comp.getOp() == BinaryPredicate.Operator.NE) + || (comp.getOp() == BinaryPredicate.Operator.DISTINCT_FROM) + || (comp.getOp() == BinaryPredicate.Operator.NOT_DISTINCT)) { + continue; + } Expr slotBinding = comp.getSlotBinding(d.getId()); if (slotBinding == null || !slotBinding.isConstant() || !slotBinding.getType().equals(Type.STRING)) { @@ -1168,18 +1172,13 @@ public class SingleNodePlanner { Predicate isIdentityPredicate = new Predicate() { @Override public boolean apply(Expr expr) { - if (!(expr instanceof BinaryPredicate) - || ((BinaryPredicate) expr).getOp() != BinaryPredicate.Operator.EQ) { - return false; - } - if (!expr.isRegisteredPredicate() + return (expr instanceof BinaryPredicate) + && (((BinaryPredicate) expr).getOp().isEquivalence()) + && !expr.isRegisteredPredicate() && expr.getChild(0) instanceof SlotRef && expr.getChild(1) instanceof SlotRef - && (((SlotRef) expr.getChild(0)).getSlotId() == - ((SlotRef) expr.getChild(1)).getSlotId())) { - return true; - } - return false; + && (((SlotRef) expr.getChild(0)).getSlotId() + == ((SlotRef) expr.getChild(1)).getSlotId()); } }; Iterables.removeIf(viewPredicates, isIdentityPredicate); diff --git a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeExprsTest.java b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeExprsTest.java index 9b222f4c7..35229dfec 100644 --- a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeExprsTest.java +++ b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeExprsTest.java @@ -20,6 +20,7 @@ import static org.junit.Assert.assertTrue; import java.math.BigInteger; import java.util.ArrayList; +import java.util.Arrays; import java.util.List; import org.junit.Assert; @@ -160,33 +161,83 @@ public class AnalyzeExprsTest extends AnalyzerTest { @Test public void TestBinaryPredicates() throws AnalysisException { - AnalyzesOk("select * from functional.alltypes where bool_col != true"); - AnalyzesOk("select * from functional.alltypes where tinyint_col <> 1"); - AnalyzesOk("select * from functional.alltypes where smallint_col <= 23"); - AnalyzesOk("select * from functional.alltypes where int_col > 15"); - AnalyzesOk("select * from functional.alltypes where bigint_col >= 17"); - AnalyzesOk("select * from functional.alltypes where float_col < 15.0"); - AnalyzesOk("select * from functional.alltypes where double_col > 7.7"); - // automatic type cast if compatible - AnalyzesOk("select * from functional.alltypes where 1 = 0"); - AnalyzesOk("select * from functional.alltypes where int_col = smallint_col"); - AnalyzesOk("select * from functional.alltypes where bigint_col = float_col"); - AnalyzesOk("select * from functional.alltypes where bool_col = 0"); - AnalyzesOk("select * from functional.alltypes where int_col = cast('0' as int)"); - AnalyzesOk("select * from functional.alltypes where cast(string_col as int) = 15"); - // tests with NULL - AnalyzesOk("select * from functional.alltypes where bool_col != NULL"); - AnalyzesOk("select * from functional.alltypes where tinyint_col <> NULL"); - AnalyzesOk("select * from functional.alltypes where smallint_col <= NULL"); - AnalyzesOk("select * from functional.alltypes where int_col > NULL"); - AnalyzesOk("select * from functional.alltypes where bigint_col >= NULL"); - AnalyzesOk("select * from functional.alltypes where float_col < NULL"); - AnalyzesOk("select * from functional.alltypes where double_col > NULL"); - AnalyzesOk("select * from functional.alltypes where string_col = NULL"); - AnalyzesOk("select * from functional.alltypes where timestamp_col = NULL"); + for (String operator: new String[]{"<=>", "IS DISTINCT FROM", + "IS NOT DISTINCT FROM", "<", ">", ">=", "<=", "!=", "=", "<>"}) { + // Operator can compare numeric values (literals, casts, and columns), even ones of + // different types. + ArrayList numericValues = + new ArrayList(Arrays.asList("0", "1", "1.1", "-7", "-7.7", "1.2e99", + "false", "1234567890123456789012345678901234567890", "tinyint_col", + "smallint_col", "int_col", "bigint_col", "float_col", "double_col")); + String numericTypes[] = new String[] { + "TINYINT", "SMALLINT", "INT", "BIGINT", "FLOAT", "DOUBLE", "DECIMAL"}; + for (String numericType : numericTypes) { + numericValues.add("cast(NULL as " + numericType + ")"); + } + for (String lhs : numericValues) { + for (String rhs : numericValues) { + AnalyzesOk("select * from functional.alltypes where " + lhs + " " + + operator + " " + rhs); + } + } - AnalyzesOk("select cast('hi' as CHAR(2)) = cast('hi' as CHAR(3))"); - AnalyzesOk("select cast('hi' as CHAR(2)) = 'hi'"); + // Operator can compare identical non-numeric types + for (String operand : + new String[] {"bool_col", "string_col", "timestamp_col", "NULL"}) { + AnalyzesOk("select * from functional.alltypes where " + operand + " " + + operator + " " + operand); + AnalyzesOk("select * from functional.alltypes where " + operand + " " + + operator + " NULL"); + AnalyzesOk( + "select * from functional.alltypes where NULL " + operator + " " + operand); + } + + // Operator can compare string column and literals + AnalyzesOk( + "select * from functional.alltypes where string_col " + operator + " 'hi'"); + // Operator can compare timestamp column and literals + AnalyzesOk("select * from functional.alltypes where timestamp_col " + + operator + " '1993-01-21 02:00:00'"); + // Operator can compare bool column and literals + AnalyzesOk("select * from functional.alltypes where bool_col " + operator + + " true"); + + // Decimal types of different precisions and scales are comparable + String decimalColumns[] = new String[]{"d1", "d2", "d3", "d4", "d5", "NULL"}; + for (String operand1 : decimalColumns) { + for (String operand2 : decimalColumns) { + AnalyzesOk("select * from functional.decimal_tbl where " + operand1 + " " + + operator + " " + operand2); + } + } + + // Chars of different length are comparable + for (int i = 1; i < 16; ++i) { + AnalyzesOk("select cast('hi' as char(" + i + ")) " + operator + + " 'hi'"); + AnalyzesOk("select cast('hi' as char(" + i + ")) " + operator + + " NULL"); + for (int j = 1; j < 16; ++j) { + AnalyzesOk("select cast('hi' as char(" + i + ")) " + operator + + " cast('hi' as char(" + j + "))"); + } + } + + // Binary operators do not operate on expression with incompatible types + for (String numeric_type: new String[]{"BOOLEAN", "TINYINT", "SMALLINT", "INT", + "BIGINT", "FLOAT", "DOUBLE", "DECIMAL(9,0)"}) { + for (String string_type: new String[]{"STRING", "TIMESTAMP"}) { + AnalysisError("select cast(NULL as " + numeric_type + ") " + + operator + " cast(NULL as " + string_type + ")", + "operands of type " + numeric_type + " and " + string_type + + " are not comparable:"); + AnalysisError("select cast(NULL as " + string_type + ") " + + operator + " cast(NULL as " + numeric_type + ")", + "operands of type " + string_type + " and " + numeric_type + + " are not comparable:"); + } + } + } // invalid casts AnalysisError("select * from functional.alltypes where bool_col = '15'", @@ -2028,13 +2079,6 @@ public class AnalyzeExprsTest extends AnalyzerTest { "'~' operation only allowed on integer types: ~d1"); AnalysisError("select d1! from functional.decimal_tbl", "'!' operation only allowed on integer types: d1!"); - - AnalyzesOk("select d3 = d4 from functional.decimal_tbl"); - AnalyzesOk("select d5 != d1 from functional.decimal_tbl"); - AnalyzesOk("select d2 > d2 from functional.decimal_tbl"); - AnalyzesOk("select d4 >= d1 from functional.decimal_tbl"); - AnalyzesOk("select d2 < d5 from functional.decimal_tbl"); - AnalyzesOk("select d2 <= d5 from functional.decimal_tbl"); } @Test diff --git a/fe/src/test/java/com/cloudera/impala/analysis/ParserTest.java b/fe/src/test/java/com/cloudera/impala/analysis/ParserTest.java index f86d8e3fc..7b9e0fc9e 100644 --- a/fe/src/test/java/com/cloudera/impala/analysis/ParserTest.java +++ b/fe/src/test/java/com/cloudera/impala/analysis/ParserTest.java @@ -2759,7 +2759,8 @@ public class ParserTest { public void TestSubqueries() { // Binary nested predicates String subquery = "(SELECT count(*) FROM bar)"; - String[] operators = {"=", "!=", "<>", ">", ">=", "<", "<="}; + String[] operators = {"=", "!=", "<>", ">", ">=", "<", "<=", "<=>", + "IS DISTINCT FROM", "IS NOT DISTINCT FROM"}; for (String op: operators) { ParsesOk(String.format("SELECT * FROM foo WHERE a %s %s", op, subquery)); ParsesOk(String.format("SELECT * FROM foo WHERE %s %s a", subquery, op)); diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/data-source-tables.test b/testdata/workloads/functional-planner/queries/PlannerTest/data-source-tables.test index e4f10b77c..c23a24ba4 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/data-source-tables.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/data-source-tables.test @@ -59,3 +59,17 @@ where a.tinyint_col = a.smallint_col and a.int_col = a.bigint_col 00:SCAN DATA SOURCE [functional.alltypes_datasource a] predicates: a.tinyint_col = a.smallint_col, a.int_col = a.bigint_col, a.id = a.tinyint_col, a.id = a.int_col ==== +# Tests that <=>, IS DISTINCT FROM, and IS NOT DISTINCT FROM all can be offered to the +# data source. +select * from functional.alltypes_datasource +where id <=> 1 +and bool_col <=> true +and tinyint_col IS DISTINCT FROM 2 +and smallint_col IS DISTINCT FROM 3 +and int_col is not distinct from 4 +and bigint_col is not distinct from 5 +---- PLAN +00:SCAN DATA SOURCE [functional.alltypes_datasource] +data source predicates: id IS NOT DISTINCT FROM 1, tinyint_col IS DISTINCT FROM 2, int_col IS NOT DISTINCT FROM 4 +predicates: bool_col IS NOT DISTINCT FROM TRUE, smallint_col IS DISTINCT FROM 3, bigint_col IS NOT DISTINCT FROM 5 +==== diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/hdfs.test b/testdata/workloads/functional-planner/queries/PlannerTest/hdfs.test index 222707c96..fc841566f 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/hdfs.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/hdfs.test @@ -113,6 +113,11 @@ select * from functional.alltypes where 2009 = year 00:SCAN HDFS [functional.alltypes] partitions=12/24 files=12 size=238.68KB ==== +select * from functional.alltypes where 2009 <=> year +---- PLAN +00:SCAN HDFS [functional.alltypes] + partitions=12/24 files=12 size=238.68KB +==== # compound predicate on the second partition key select * from functional.alltypes where !(month > 2) ---- PLAN @@ -125,6 +130,11 @@ select * from functional.alltypes where !(!(month=1)) 00:SCAN HDFS [functional.alltypes] partitions=2/24 files=2 size=40.32KB ==== +select * from functional.alltypes where !(!(month<=>1)) +---- PLAN +00:SCAN HDFS [functional.alltypes] + partitions=2/24 files=2 size=40.32KB +==== # predicates on both partition keys one of which is a compound predicate with NOT select * from functional.alltypes where year=2009 and !(month < 6) ---- PLAN @@ -143,30 +153,63 @@ select * from functional.alltypes where !(year = 2009 and month > 6) 00:SCAN HDFS [functional.alltypes] partitions=18/24 files=18 size=357.58KB ==== +select * from functional.alltypes where !(year <=> 2009 and month > 6) +---- PLAN +00:SCAN HDFS [functional.alltypes] + partitions=18/24 files=18 size=357.58KB +==== +select * from functional.alltypes where !(year <=> 2009) or !(month > 6) +---- PLAN +00:SCAN HDFS [functional.alltypes] + partitions=18/24 files=18 size=357.58KB +==== # compound predicate on a disjunct select * from functional.alltypes where !(month = 6 or month = 8) ---- PLAN 00:SCAN HDFS [functional.alltypes] partitions=20/24 files=20 size=398.31KB ==== +select * from functional.alltypes where !(month <=> 6 or month <=> 8) +---- PLAN +00:SCAN HDFS [functional.alltypes] + partitions=20/24 files=20 size=398.31KB +==== # not predicate with is null select * from functional.alltypes where not (year = 2009 or month is null) ---- PLAN 00:SCAN HDFS [functional.alltypes] partitions=12/24 files=12 size=239.77KB ==== +# not predicate with "<=> null" as a synonym of "is null" +select * from functional.alltypes where not (year = 2009 or month <=> null) +---- PLAN +00:SCAN HDFS [functional.alltypes] + partitions=12/24 files=12 size=239.77KB +==== # nested not predicates with is null select * from functional.alltypes where not (not (month is null)) ---- PLAN 00:SCAN HDFS [functional.alltypes] partitions=0/24 files=0 size=0B ==== +# nested not predicates with "<=> null" as a synonym of "is null" +select * from functional.alltypes where not (not (month <=> null)) +---- PLAN +00:SCAN HDFS [functional.alltypes] + partitions=0/24 files=0 size=0B +==== # nested not predicates with disjunct select * from functional.alltypes where not (not (month is null or year = 2009)) ---- PLAN 00:SCAN HDFS [functional.alltypes] partitions=12/24 files=12 size=238.68KB ==== +# nested not predicates with disjunct and "<=> null" as a synonym of "is null" +select * from functional.alltypes where not (not (month <=> null or year = 2009)) +---- PLAN +00:SCAN HDFS [functional.alltypes] + partitions=12/24 files=12 size=238.68KB +==== # predicate on second partition key select * from functional.alltypes where month=1 ---- PLAN @@ -195,6 +238,11 @@ select * from functional.alltypes where year=2009 and month in (1, 3, 5, 7) 00:SCAN HDFS [functional.alltypes] partitions=4/24 files=4 size=80.74KB ==== +select * from functional.alltypes where year<=>2009 and month in (1, 3, 5, 7) +---- PLAN +00:SCAN HDFS [functional.alltypes] + partitions=4/24 files=4 size=80.74KB +==== # adding a predicate that always evaluates to true should not change anything select * from functional.alltypes where year=2009 and month in (1, 3, 5, 7) and month is not null @@ -239,12 +287,25 @@ where year < 2010 and (month > 6 or month = 1 or month in (3, 4)) 00:SCAN HDFS [functional.alltypes] partitions=9/24 files=9 size=180.49KB ==== +# multiple predicates on second key +select * from functional.alltypes +where year < 2010 and (month > 6 or month <=> 1 or month in (3, 4)) +---- PLAN +00:SCAN HDFS [functional.alltypes] + partitions=9/24 files=9 size=180.49KB +==== # between predicate on second key select * from functional.alltypes where year = 2009 and month between 6 and 8 ---- PLAN 00:SCAN HDFS [functional.alltypes] partitions=3/24 files=3 size=60.43KB ==== +# between predicate on second key +select * from functional.alltypes where year <=> 2009 and month between 6 and 8 +---- PLAN +00:SCAN HDFS [functional.alltypes] + partitions=3/24 files=3 size=60.43KB +==== # between predicates on first and second keys select * from functional.alltypes where year between 2009 and 2009 and month between 6 and 8 @@ -270,6 +331,11 @@ select * from functional.alltypes where year - 1 = 2009 00:SCAN HDFS [functional.alltypes] partitions=12/24 files=12 size=239.77KB ==== +select * from functional.alltypes where year - 1 <=> 2009 +---- PLAN +00:SCAN HDFS [functional.alltypes] + partitions=12/24 files=12 size=239.77KB +==== # Predicates on a partition key with nulls (see IMPALA-887) # IS NULL predicate on a partition key with nulls select * from functional.alltypesagg where day is null @@ -277,36 +343,82 @@ select * from functional.alltypesagg where day is null 00:SCAN HDFS [functional.alltypesagg] partitions=1/11 files=1 size=71.05KB ==== +# <=> null predicate on a partition key with nulls +select * from functional.alltypesagg where day <=> null +---- PLAN +00:SCAN HDFS [functional.alltypesagg] + partitions=1/11 files=1 size=71.05KB +==== # IS NOT NULL predicate on a partition key with nulls select * from functional.alltypesagg where day is not null ---- PLAN 00:SCAN HDFS [functional.alltypesagg] partitions=10/11 files=10 size=743.67KB ==== +# IS DISTINCT FROM NULL predicate on a partition key with nulls +select * from functional.alltypesagg where day is distinct from null +---- PLAN +00:SCAN HDFS [functional.alltypesagg] + partitions=10/11 files=10 size=743.67KB +==== +select * from functional.alltypesagg where day = day +---- PLAN +00:SCAN HDFS [functional.alltypesagg] + partitions=10/11 files=10 size=743.67KB +==== +select * from functional.alltypesagg where day <=> day +---- PLAN +00:SCAN HDFS [functional.alltypesagg] + partitions=11/11 files=11 size=814.73KB +==== # partition key predicates which are in conjunctive normal form (case 1) select * from functional.alltypesagg where day is null and day = 10 ---- PLAN 00:SCAN HDFS [functional.alltypesagg] partitions=0/11 files=0 size=0B ==== +# partition key predicates which are in conjunctive normal form (case 1) +select * from functional.alltypesagg where day <=> null and day = 10 +---- PLAN +00:SCAN HDFS [functional.alltypesagg] + partitions=0/11 files=0 size=0B +==== # partition key predicates which are in conjunctive normal form (case 2) select * from functional.alltypesagg where day is null and month = 1 ---- PLAN 00:SCAN HDFS [functional.alltypesagg] partitions=1/11 files=1 size=71.05KB ==== +# partition key predicates which are in conjunctive normal form (case 2) +select * from functional.alltypesagg where day <=> null and month = 1 +---- PLAN +00:SCAN HDFS [functional.alltypesagg] + partitions=1/11 files=1 size=71.05KB +==== # partition key predicates which are in conjunctive normal form (case 3) select * from functional.alltypesagg where month = 1 and (day is null or day = 10) ---- PLAN 00:SCAN HDFS [functional.alltypesagg] partitions=2/11 files=2 size=145.53KB ==== +# partition key predicates which are in conjunctive normal form (case 3) +select * from functional.alltypesagg where month = 1 and (day <=> null or day = 10) +---- PLAN +00:SCAN HDFS [functional.alltypesagg] + partitions=2/11 files=2 size=145.53KB +==== # partition key predicates which are in conjunctive normal form (case 4) select * from functional.alltypesagg where month = 1 and (day is null or year = 2010) ---- PLAN 00:SCAN HDFS [functional.alltypesagg] partitions=11/11 files=11 size=814.73KB ==== +# partition key predicates which are in conjunctive normal form (case 4) +select * from functional.alltypesagg where month = 1 and (day <=> null or year = 2010) +---- PLAN +00:SCAN HDFS [functional.alltypesagg] + partitions=11/11 files=11 size=814.73KB +==== # partition key predicates which are in conjunctive normal form (case 5) select * from functional.alltypesagg where (year = 2010 or month = 1) and (day is not null or day = 10) @@ -314,24 +426,49 @@ where (year = 2010 or month = 1) and (day is not null or day = 10) 00:SCAN HDFS [functional.alltypesagg] partitions=10/11 files=10 size=743.67KB ==== +# partition key predicates which are in conjunctive normal form (case 5) +select * from functional.alltypesagg +where (year = 2010 or month = 1) and (day is distinct from null or day = 10) +---- PLAN +00:SCAN HDFS [functional.alltypesagg] + partitions=10/11 files=10 size=743.67KB +==== # partition key predicates which are in disjunctive normal form (case 1) select * from functional.alltypesagg where day is null or month = 1 ---- PLAN 00:SCAN HDFS [functional.alltypesagg] partitions=11/11 files=11 size=814.73KB ==== +# partition key predicates which are in disjunctive normal form (case 1) +select * from functional.alltypesagg where day <=> null or month = 1 +---- PLAN +00:SCAN HDFS [functional.alltypesagg] + partitions=11/11 files=11 size=814.73KB +==== # partition key predicates which are in disjunctive normal form (case 2) select * from functional.alltypesagg where day is null or day = 10 ---- PLAN 00:SCAN HDFS [functional.alltypesagg] partitions=2/11 files=2 size=145.53KB ==== +# partition key predicates which are in disjunctive normal form (case 2) +select * from functional.alltypesagg where day <=> null or day = 10 +---- PLAN +00:SCAN HDFS [functional.alltypesagg] + partitions=2/11 files=2 size=145.53KB +==== # partition key predicates which are in disjunctive normal form (case 3) select * from functional.alltypesagg where day = 10 or (day is null and year = 2010) ---- PLAN 00:SCAN HDFS [functional.alltypesagg] partitions=2/11 files=2 size=145.53KB ==== +# partition key predicates which are in disjunctive normal form (case 3) +select * from functional.alltypesagg where day = 10 or (day <=> null and year = 2010) +---- PLAN +00:SCAN HDFS [functional.alltypesagg] + partitions=2/11 files=2 size=145.53KB +==== # partition key predicates which are in disjunctive normal form (case 4) select * from functional.alltypesagg where (month = 1 and day = 1) or (day is null and year = 2010) @@ -339,18 +476,37 @@ where (month = 1 and day = 1) or (day is null and year = 2010) 00:SCAN HDFS [functional.alltypesagg] partitions=2/11 files=2 size=144.45KB ==== +# partition key predicates which are in disjunctive normal form (case 4) +select * from functional.alltypesagg +where (month = 1 and day = 1) or (day <=> null and year = 2010) +---- PLAN +00:SCAN HDFS [functional.alltypesagg] + partitions=2/11 files=2 size=144.45KB +==== # partition key predicates with negation (case 1) select * from functional.alltypesagg where not (day is not null) ---- PLAN 00:SCAN HDFS [functional.alltypesagg] partitions=1/11 files=1 size=71.05KB ==== +# partition key predicates with negation (case 1) +select * from functional.alltypesagg where not (day is distinct from null) +---- PLAN +00:SCAN HDFS [functional.alltypesagg] + partitions=1/11 files=1 size=71.05KB +==== # partition key predicates with negation (case 2) select * from functional.alltypesagg where not (not (day is null)) ---- PLAN 00:SCAN HDFS [functional.alltypesagg] partitions=1/11 files=1 size=71.05KB ==== +# partition key predicates with negation (case 2) +select * from functional.alltypesagg where not (not (day <=> null)) +---- PLAN +00:SCAN HDFS [functional.alltypesagg] + partitions=1/11 files=1 size=71.05KB +==== # partition key predicates with negation (case 3) select * from functional.alltypesagg where not (day is not null and month = 1) ---- PLAN @@ -358,11 +514,23 @@ select * from functional.alltypesagg where not (day is not null and month = 1) partitions=1/11 files=1 size=71.05KB ==== # partition key predicates with negation (case 3) +select * from functional.alltypesagg where not (day is distinct from null and month = 1) +---- PLAN +00:SCAN HDFS [functional.alltypesagg] + partitions=1/11 files=1 size=71.05KB +==== +# partition key predicates with negation (case 3) select * from functional.alltypesagg where not (day is not null or day < 9) ---- PLAN 00:SCAN HDFS [functional.alltypesagg] partitions=0/11 files=0 size=0B ==== +# partition key predicates with negation (case 3) +select * from functional.alltypesagg where not (day is distinct from null or day < 9) +---- PLAN +00:SCAN HDFS [functional.alltypesagg] + partitions=0/11 files=0 size=0B +==== # partition key predicates with negation (case 4) select * from functional.alltypesagg where not (day is not null and (not (day < 9 and month = 1))) @@ -370,6 +538,13 @@ where not (day is not null and (not (day < 9 and month = 1))) 00:SCAN HDFS [functional.alltypesagg] partitions=9/11 files=9 size=665.77KB ==== +# partition key predicates with negation (case 4) +select * from functional.alltypesagg +where not (day is distinct from null and (not (day < 9 and month = 1))) +---- PLAN +00:SCAN HDFS [functional.alltypesagg] + partitions=9/11 files=9 size=665.77KB +==== # partition key predicates with negation (case 5) select * from functional.alltypesagg where not (day is not null or (day = 1 and (not (month = 1 or year = 2010)))) @@ -377,6 +552,13 @@ where not (day is not null or (day = 1 and (not (month = 1 or year = 2010)))) 00:SCAN HDFS [functional.alltypesagg] partitions=1/11 files=1 size=71.05KB ==== +# partition key predicates with negation (case 5) +select * from functional.alltypesagg +where not (day is distinct from null or (day = 1 and (not (month = 1 or year = 2010)))) +---- PLAN +00:SCAN HDFS [functional.alltypesagg] + partitions=1/11 files=1 size=71.05KB +==== # partition key predicates where some are evaluated by the index and others are evaluated in the BE select * from functional.alltypesagg where year + 1 = 2011 and month + 1 <= 3 and day is null @@ -384,6 +566,13 @@ where year + 1 = 2011 and month + 1 <= 3 and day is null 00:SCAN HDFS [functional.alltypesagg] partitions=1/11 files=1 size=71.05KB ==== +# partition key predicates where some are evaluated by the index and others are evaluated in the BE +select * from functional.alltypesagg +where year + 1 = 2011 and month + 1 <= 3 and day <=> null +---- PLAN +00:SCAN HDFS [functional.alltypesagg] + partitions=1/11 files=1 size=71.05KB +==== # all supported predicates that can be evaluated using partition key index select * from functional.alltypesagg where day = 5 or (day >= 1 and day <= 2) or (day > 6 and day < 8) @@ -403,6 +592,25 @@ NODE 0: HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263 HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=__HIVE_DEFAULT_PARTITION__/000000_0 0:72759 ==== +# all supported predicates that can be evaluated using partition key index +select * from functional.alltypesagg +where day = 5 or (day >= 1 and day <= 2) or (day > 6 and day < 8) +or day <=> null or day in (4) or not(day is distinct from null) +or not (day not in (10)) or not (day != 8) +---- PLAN +00:SCAN HDFS [functional.alltypesagg] + partitions=8/11 files=8 size=591.30KB +---- SCANRANGELOCATIONS +NODE 0: + HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=1/100101.txt 0:75153 + HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=10/100110.txt 0:76263 + HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=2/100102.txt 0:76263 + HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=4/100104.txt 0:76263 + HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=5/100105.txt 0:76263 + HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=7/100107.txt 0:76263 + HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=8/100108.txt 0:76263 + HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypesagg/year=2010/month=1/day=__HIVE_DEFAULT_PARTITION__/000000_0 0:72759 +==== # Predicates on a partition key with no values (see CDH-20089) select * from functional.emptytable where f2 = 10 ---- PLAN @@ -471,24 +679,46 @@ select * from scale_db.num_partitions_1234_blocks_per_partition_1 where j = 1 00:SCAN HDFS [scale_db.num_partitions_1234_blocks_per_partition_1] partitions=1/1234 files=1 size=2B ==== +select * from scale_db.num_partitions_1234_blocks_per_partition_1 where j <=> 1 +---- PLAN +00:SCAN HDFS [scale_db.num_partitions_1234_blocks_per_partition_1] + partitions=1/1234 files=1 size=2B +==== # Test disjunctive predicate on a partition column select * from scale_db.num_partitions_1234_blocks_per_partition_1 where j = 1 or j = 2 ---- PLAN 00:SCAN HDFS [scale_db.num_partitions_1234_blocks_per_partition_1] partitions=2/1234 files=2 size=4B ==== +select * from scale_db.num_partitions_1234_blocks_per_partition_1 where j <=> 1 or j <=> 2 +---- PLAN +00:SCAN HDFS [scale_db.num_partitions_1234_blocks_per_partition_1] + partitions=2/1234 files=2 size=4B +==== # Test conjunctive predicate on a partition column select * from scale_db.num_partitions_1234_blocks_per_partition_1 where j = 1 and j = 2 ---- PLAN 00:SCAN HDFS [scale_db.num_partitions_1234_blocks_per_partition_1] partitions=0/1234 files=0 size=0B ==== +select * from scale_db.num_partitions_1234_blocks_per_partition_1 where j <=> 1 and j <=> 2 +---- PLAN +00:SCAN HDFS [scale_db.num_partitions_1234_blocks_per_partition_1] + partitions=0/1234 files=0 size=0B +==== # Partition pruning when a binary predicate contains a NullLiteral (IMPALA-1535) select * from functional.alltypestiny t1 where t1.year != null or t1.year = null ---- PLAN 00:SCAN HDFS [functional.alltypestiny t1] partitions=0/4 files=0 size=0B ==== +# Partition pruning when a binary predicate contains a NullLiteral and IS DISTINCT FROM +select * from functional.alltypestiny t1 +where t1.year IS DISTINCT FROM null or t1.year = null +---- PLAN +00:SCAN HDFS [functional.alltypestiny t1] + partitions=4/4 files=4 size=460B +==== # Partition pruning when a binary predicate contains a NullLiteral in an arithmetic # expression select * from functional.alltypesagg t1 where t1.year + null != t1.day @@ -496,6 +726,13 @@ select * from functional.alltypesagg t1 where t1.year + null != t1.day 00:SCAN HDFS [functional.alltypesagg t1] partitions=0/11 files=0 size=0B ==== +# Partition pruning when a binary predicate contains a NullLiteral in an arithmetic +# expression and IS DISTINCT FROM +select * from functional.alltypesagg t1 where t1.year + null IS DISTINCT FROM t1.day +---- PLAN +00:SCAN HDFS [functional.alltypesagg t1] + partitions=10/11 files=10 size=743.67KB +==== # Partition pruning when an IN predicate contains a NullLiteral # (a single partition is scanned) select * from functional.alltypesagg t1 where day in (10, null) diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/join-order.test b/testdata/workloads/functional-planner/queries/PlannerTest/join-order.test index 5f04c7512..c6d2a7403 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/join-order.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/join-order.test @@ -1264,3 +1264,82 @@ on (t3.string_col = t1.string_col_1 and t3.date_string_col = t1.string_col_2) 01:SCAN HDFS [functional.alltypes t2] partitions=24/24 files=24 size=478.45KB ==== +# Test that filtering with "<=>" sets selectivity, just as "=" does. First, the +# base case: functional.alltypes.timestamp_col has more distinct vals than +# functional.alltypes.date_string_col. As a result, in a left semi join between +# functional.alltypes and itself, if one side of the join is filtered on timestamp_col and +# the other on date_string_col, the one filtered on timestamp_col is expected by the +# planner to be smaller and becomes the build side of the hash join. +select * from functional.alltypes a +left semi join +(select * from functional.alltypes +where timestamp_col = now()) b +on (a.id = b.id) +and a.date_string_col = '' +---- PLAN +02:HASH JOIN [LEFT SEMI JOIN] +| hash predicates: a.id = functional.alltypes.id +| +|--01:SCAN HDFS [functional.alltypes] +| partitions=24/24 files=24 size=478.45KB +| predicates: timestamp_col = now() +| +00:SCAN HDFS [functional.alltypes a] + partitions=24/24 files=24 size=478.45KB + predicates: a.date_string_col = '' +==== +select * from functional.alltypes a +left semi join +(select * from functional.alltypes +where date_string_col = '') b +on (a.id = b.id) +and a.timestamp_col = now() +---- PLAN +02:HASH JOIN [RIGHT SEMI JOIN] +| hash predicates: functional.alltypes.id = a.id +| +|--00:SCAN HDFS [functional.alltypes a] +| partitions=24/24 files=24 size=478.45KB +| predicates: a.timestamp_col = now() +| +01:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB + predicates: date_string_col = '' +==== +# The same should hold true when the filtering is done with "<=>" rather than "=". +select * from functional.alltypes a +left semi join +(select * from functional.alltypes +where timestamp_col <=> now()) b +on (a.id = b.id) +and a.date_string_col <=> '' +---- PLAN +02:HASH JOIN [LEFT SEMI JOIN] +| hash predicates: a.id = functional.alltypes.id +| +|--01:SCAN HDFS [functional.alltypes] +| partitions=24/24 files=24 size=478.45KB +| predicates: timestamp_col IS NOT DISTINCT FROM now() +| +00:SCAN HDFS [functional.alltypes a] + partitions=24/24 files=24 size=478.45KB + predicates: a.date_string_col IS NOT DISTINCT FROM '' +==== +select * from functional.alltypes a +left semi join +(select * from functional.alltypes +where date_string_col <=> '') b +on (a.id = b.id) +and a.timestamp_col <=> now() +---- PLAN +02:HASH JOIN [RIGHT SEMI JOIN] +| hash predicates: functional.alltypes.id = a.id +| +|--00:SCAN HDFS [functional.alltypes a] +| partitions=24/24 files=24 size=478.45KB +| predicates: a.timestamp_col IS NOT DISTINCT FROM now() +| +01:SCAN HDFS [functional.alltypes] + partitions=24/24 files=24 size=478.45KB + predicates: date_string_col IS NOT DISTINCT FROM '' +==== diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/joins.test b/testdata/workloads/functional-planner/queries/PlannerTest/joins.test index 68dfa6df5..a90636247 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/joins.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/joins.test @@ -1906,3 +1906,131 @@ where t2.int_col in (t2.int_col, 10); 00:SCAN HDFS [functional.alltypestiny t1] partitions=4/4 files=4 size=460B ==== +# Test queries that appear earlier in this file, but substitute "<=>" or "IS DISTINCT +# FROM" for "=" in the join predicates. +select * +from functional.testtbl t1 join functional.testtbl t2 +where t1.id <=> t2.id and t1.zip = 94611 +---- PLAN +02:HASH JOIN [INNER JOIN] +| hash predicates: t1.id IS NOT DISTINCT FROM t2.id +| +|--01:SCAN HDFS [functional.testtbl t2] +| partitions=1/1 files=0 size=0B +| +00:SCAN HDFS [functional.testtbl t1] + partitions=1/1 files=0 size=0B + predicates: t1.zip = 94611 +==== +select * +from functional.testtbl t1 join functional.testtbl t2 +where t1.id is not distinct from t2.id and t1.zip = 94611 +---- PLAN +02:HASH JOIN [INNER JOIN] +| hash predicates: t1.id IS NOT DISTINCT FROM t2.id +| +|--01:SCAN HDFS [functional.testtbl t2] +| partitions=1/1 files=0 size=0B +| +00:SCAN HDFS [functional.testtbl t1] + partitions=1/1 files=0 size=0B + predicates: t1.zip = 94611 +==== +select * +from functional.testtbl t1 join functional.testtbl t2 +where (t1.id IS DISTINCT FROM t2.id) and t1.zip = 94611 +---- PLAN +02:NESTED LOOP JOIN [INNER JOIN] +| predicates: (t1.id IS DISTINCT FROM t2.id) +| +|--01:SCAN HDFS [functional.testtbl t2] +| partitions=1/1 files=0 size=0B +| +00:SCAN HDFS [functional.testtbl t1] + partitions=1/1 files=0 size=0B + predicates: t1.zip = 94611 +==== +# Test that "is not distinct from" plans the same as "=" in the same query above. +select t1.* +from (select * from functional.alltypestiny) t1 + join (select * from functional.alltypestiny) t2 on (t1.id is not distinct from t2.id) + join functional.alltypestiny t3 on (coalesce(t1.id, t2.id) is not distinct from t3.id) +---- PLAN +04:HASH JOIN [INNER JOIN] +| hash predicates: coalesce(functional.alltypestiny.id, functional.alltypestiny.id) IS NOT DISTINCT FROM t3.id +| +|--02:SCAN HDFS [functional.alltypestiny t3] +| partitions=4/4 files=4 size=460B +| +03:HASH JOIN [INNER JOIN] +| hash predicates: functional.alltypestiny.id IS NOT DISTINCT FROM functional.alltypestiny.id +| +|--01:SCAN HDFS [functional.alltypestiny] +| partitions=4/4 files=4 size=460B +| +00:SCAN HDFS [functional.alltypestiny] + partitions=4/4 files=4 size=460B +==== +select * +from functional.alltypesagg a +full outer join functional.alltypessmall b on (a.id <=> b.id and a.int_col = b.int_col) +right join functional.alltypesaggnonulls c on (a.id = c.id and b.string_col <=> c.string_col) +where a.day >= 6 +and b.month > 2 +and c.day < 3 +and a.tinyint_col = 15 +and b.string_col = '15' +and a.tinyint_col + b.tinyint_col < 15 +and a.float_col - c.double_col < 0 +and (b.double_col * c.tinyint_col > 1000 or c.tinyint_col < 1000) +---- PLAN +04:HASH JOIN [LEFT OUTER JOIN] +| hash predicates: c.id = a.id, c.string_col IS NOT DISTINCT FROM b.string_col +| other predicates: a.day >= 6, b.month > 2, a.tinyint_col = 15, b.string_col = '15', a.tinyint_col + b.tinyint_col < 15, a.float_col - c.double_col < 0, (b.double_col * c.tinyint_col > 1000 OR c.tinyint_col < 1000) +| +|--03:HASH JOIN [FULL OUTER JOIN] +| | hash predicates: a.id IS NOT DISTINCT FROM b.id, a.int_col = b.int_col +| | +| |--01:SCAN HDFS [functional.alltypessmall b] +| | partitions=2/4 files=2 size=3.17KB +| | predicates: b.string_col = '15' +| | +| 00:SCAN HDFS [functional.alltypesagg a] +| partitions=5/11 files=5 size=372.38KB +| predicates: a.tinyint_col = 15 +| +02:SCAN HDFS [functional.alltypesaggnonulls c] + partitions=2/10 files=2 size=148.10KB +==== +select t1.d, t2.d from functional.nulltable t1, functional.nulltable t2 +where not(t1.d IS DISTINCT FROM t2.d) +---- PLAN +02:NESTED LOOP JOIN [INNER JOIN] +| predicates: NOT (t1.d IS DISTINCT FROM t2.d) +| +|--01:SCAN HDFS [functional.nulltable t2] +| partitions=1/1 files=1 size=18B +| +00:SCAN HDFS [functional.nulltable t1] + partitions=1/1 files=1 size=18B +==== +select t1.d, t2.d +from functional.nulltable t1, functional.nulltable t2, functional.nulltable t3 +where t1.d IS DISTINCT FROM t2.d +and t3.a != t2.g +---- PLAN +04:NESTED LOOP JOIN [INNER JOIN] +| predicates: t3.a != t2.g +| +|--02:SCAN HDFS [functional.nulltable t3] +| partitions=1/1 files=1 size=18B +| +03:NESTED LOOP JOIN [INNER JOIN] +| predicates: t1.d IS DISTINCT FROM t2.d +| +|--01:SCAN HDFS [functional.nulltable t2] +| partitions=1/1 files=1 size=18B +| +00:SCAN HDFS [functional.nulltable t1] + partitions=1/1 files=1 size=18B +==== \ No newline at end of file diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test index 4e25511a8..167c9a259 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test @@ -1497,3 +1497,152 @@ and not exists partitions=11/11 files=11 size=814.73KB predicates: t3.id > 100 ==== +# Tests for <=> (aka IS NOT DISTINCT FROM) and IS DISTINCT FROM +select * from functional.alltypesagg t1 +where t1.id is not distinct from +(select min(id) from functional.alltypes t2 +where t1.int_col is not distinct from t2.int_col); +---- PLAN +03:HASH JOIN [LEFT SEMI JOIN] +| hash predicates: t1.id IS NOT DISTINCT FROM min(id), t1.int_col IS NOT DISTINCT FROM t2.int_col +| +|--02:AGGREGATE [FINALIZE] +| | output: min(id) +| | group by: t2.int_col +| | +| 01:SCAN HDFS [functional.alltypes t2] +| partitions=24/24 files=24 size=478.45KB +| +00:SCAN HDFS [functional.alltypesagg t1] + partitions=11/11 files=11 size=814.73KB +==== +select * from functional.alltypesagg t1 +where t1.id is distinct from +(select min(id) from functional.alltypes t2 +where t1.int_col is not distinct from t2.int_col); +---- PLAN +03:HASH JOIN [LEFT SEMI JOIN] +| hash predicates: t1.int_col IS NOT DISTINCT FROM t2.int_col +| other join predicates: t1.id IS DISTINCT FROM min(id) +| +|--02:AGGREGATE [FINALIZE] +| | output: min(id) +| | group by: t2.int_col +| | +| 01:SCAN HDFS [functional.alltypes t2] +| partitions=24/24 files=24 size=478.45KB +| +00:SCAN HDFS [functional.alltypesagg t1] + partitions=11/11 files=11 size=814.73KB +==== +select * from functional.alltypesagg t1 +where t1.id = +(select min(id) from functional.alltypes t2 +where t1.int_col is not distinct from t2.int_col); +---- PLAN +03:HASH JOIN [LEFT SEMI JOIN] +| hash predicates: t1.id = min(id), t1.int_col IS NOT DISTINCT FROM t2.int_col +| +|--02:AGGREGATE [FINALIZE] +| | output: min(id) +| | group by: t2.int_col +| | +| 01:SCAN HDFS [functional.alltypes t2] +| partitions=24/24 files=24 size=478.45KB +| +00:SCAN HDFS [functional.alltypesagg t1] + partitions=11/11 files=11 size=814.73KB +==== +select * from functional.alltypesagg t1 +where t1.id != +(select min(id) from functional.alltypes t2 +where t1.int_col is not distinct from t2.int_col); +---- PLAN +03:HASH JOIN [LEFT SEMI JOIN] +| hash predicates: t1.int_col IS NOT DISTINCT FROM t2.int_col +| other join predicates: t1.id != min(id) +| +|--02:AGGREGATE [FINALIZE] +| | output: min(id) +| | group by: t2.int_col +| | +| 01:SCAN HDFS [functional.alltypes t2] +| partitions=24/24 files=24 size=478.45KB +| +00:SCAN HDFS [functional.alltypesagg t1] + partitions=11/11 files=11 size=814.73KB +==== +select * from functional.alltypesagg t1 +where t1.id is not distinct from +(select min(id) from functional.alltypes t2 +where t1.int_col = t2.int_col); +---- PLAN +03:HASH JOIN [LEFT SEMI JOIN] +| hash predicates: t1.id IS NOT DISTINCT FROM min(id), t1.int_col = t2.int_col +| +|--02:AGGREGATE [FINALIZE] +| | output: min(id) +| | group by: t2.int_col +| | +| 01:SCAN HDFS [functional.alltypes t2] +| partitions=24/24 files=24 size=478.45KB +| +00:SCAN HDFS [functional.alltypesagg t1] + partitions=11/11 files=11 size=814.73KB +==== +select * from functional.alltypesagg t1 +where t1.id is distinct from +(select min(id) from functional.alltypes t2 +where t1.int_col = t2.int_col); +---- PLAN +03:HASH JOIN [LEFT SEMI JOIN] +| hash predicates: t1.int_col = t2.int_col +| other join predicates: t1.id IS DISTINCT FROM min(id) +| +|--02:AGGREGATE [FINALIZE] +| | output: min(id) +| | group by: t2.int_col +| | +| 01:SCAN HDFS [functional.alltypes t2] +| partitions=24/24 files=24 size=478.45KB +| +00:SCAN HDFS [functional.alltypesagg t1] + partitions=11/11 files=11 size=814.73KB +==== +select * from functional.alltypesagg t1 +where t1.id = +(select min(id) from functional.alltypes t2 +where t1.int_col = t2.int_col); +---- PLAN +03:HASH JOIN [LEFT SEMI JOIN] +| hash predicates: t1.id = min(id), t1.int_col = t2.int_col +| +|--02:AGGREGATE [FINALIZE] +| | output: min(id) +| | group by: t2.int_col +| | +| 01:SCAN HDFS [functional.alltypes t2] +| partitions=24/24 files=24 size=478.45KB +| +00:SCAN HDFS [functional.alltypesagg t1] + partitions=11/11 files=11 size=814.73KB +==== +select * from functional.alltypesagg t1 +where t1.id != +(select min(id) from functional.alltypes t2 +where t1.int_col = t2.int_col); +---- PLAN +03:HASH JOIN [LEFT SEMI JOIN] +| hash predicates: t1.int_col = t2.int_col +| other join predicates: t1.id != min(id) +| +|--02:AGGREGATE [FINALIZE] +| | output: min(id) +| | group by: t2.int_col +| | +| 01:SCAN HDFS [functional.alltypes t2] +| partitions=24/24 files=24 size=478.45KB +| +00:SCAN HDFS [functional.alltypesagg t1] + partitions=11/11 files=11 size=814.73KB +==== \ No newline at end of file diff --git a/testdata/workloads/functional-query/queries/QueryTest/data-source-tables.test b/testdata/workloads/functional-query/queries/QueryTest/data-source-tables.test index ff97ceb2c..c12968d6e 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/data-source-tables.test +++ b/testdata/workloads/functional-query/queries/QueryTest/data-source-tables.test @@ -63,4 +63,68 @@ where string_col = 'VALIDATE_PREDICATES##id LT 1 && id GT 1 && id LE 1 && id GE 'SUCCESS' ---- TYPES STRING -==== \ No newline at end of file +==== +---- QUERY +# Test that <=>, IS DISTINCT FROM, and IS NOT DISTINCT FROM all can be validated +select string_col from alltypes_datasource +where string_col = 'VALIDATE_PREDICATES##id NOT_DISTINCT 1 && id DISTINCT_FROM 1 && id NOT_DISTINCT 1' + and 1 <=> id and 1 IS DISTINCT FROM id and 1 IS NOT DISTINCT FROM id +---- RESULTS +'SUCCESS' +---- TYPES +STRING +==== +---- QUERY +# Test that <=>, IS DISTINCT FROM, and IS NOT DISTINCT FROM are evaluated just like their +# equality counterparts +select * from +(select count(*) from alltypes_datasource +where tinyint_col = 1 and smallint_col = 11) a +union all +(select count(*) from alltypes_datasource +where tinyint_col <=> 1 and smallint_col <=> 11) +---- RESULTS +50 +50 +---- TYPES +BIGINT +==== +---- QUERY +select * from +(select count(*) from alltypes_datasource +where smallint_col = 11 and tinyint_col = 1) a +union all +(select count(*) from alltypes_datasource +where smallint_col <=> 11 and tinyint_col <=> 1) +---- RESULTS +500 +500 +---- TYPES +BIGINT +==== +---- QUERY +select * from +(select count(*) from alltypes_datasource +where tinyint_col != 1 and smallint_col != 11) a +union all +(select count(*) from alltypes_datasource +where tinyint_col IS DISTINCT FROM 1 and smallint_col IS DISTINCT FROM 11) +---- RESULTS +4950 +4950 +---- TYPES +BIGINT +==== +---- QUERY +select * from +(select count(*) from alltypes_datasource +where smallint_col != 11 and tinyint_col != 1) a +union all +(select count(*) from alltypes_datasource +where smallint_col IS DISTINCT FROM 11 and tinyint_col IS DISTINCT FROM 1) +---- RESULTS +4096 +4096 +---- TYPES +BIGINT +==== diff --git a/testdata/workloads/functional-query/queries/QueryTest/exprs.test b/testdata/workloads/functional-query/queries/QueryTest/exprs.test index b5a5de5af..710eb650a 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/exprs.test +++ b/testdata/workloads/functional-query/queries/QueryTest/exprs.test @@ -2338,3 +2338,61 @@ select regexp_match_count(tmp.str, tmp.pattern, tmp.start_pos, tmp.params) from ('iPhone\niPad\niPod', '^I.*$', 1, 'imn')) as tmp ---- CATCH Illegal match parameter x +==== +---- QUERY +# IMPALA-2147: IS [NOT] DISTINCT FROM and "<=>" +select NULL <=> NULL +---- RESULTS +true +---- TYPES +BOOLEAN +==== +---- QUERY +select NULL <=> 1 +---- RESULTS +false +---- TYPES +BOOLEAN +==== +---- QUERY +select NULL <=> "foo" +---- RESULTS +false +---- TYPES +BOOLEAN +==== +---- QUERY +select NULL IS DISTINCT FROM NULL +---- RESULTS +false +---- TYPES +BOOLEAN +==== +---- QUERY +select NULL IS DISTINCT FROM 3.14 +---- RESULTS +true +---- TYPES +BOOLEAN +==== +---- QUERY +select cast(0 as bigint) IS DISTINCT FROM NULL +---- RESULTS +true +---- TYPES +BOOLEAN +==== +---- QUERY +select 2.78 IS DISTINCT FROM 3.14 +---- RESULTS +true +---- TYPES +BOOLEAN +==== +---- QUERY +select 2.78 IS NOT DISTINCT FROM 3.14 +---- RESULTS +false +---- TYPES +BOOLEAN +==== diff --git a/testdata/workloads/functional-query/queries/QueryTest/joins.test b/testdata/workloads/functional-query/queries/QueryTest/joins.test index c880996a5..e570d3587 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/joins.test +++ b/testdata/workloads/functional-query/queries/QueryTest/joins.test @@ -562,3 +562,151 @@ cross join functional.alltypes t2 where t1.id = 0 limit 1; ---- TYPES INT,BOOLEAN ==== +---- QUERY +# IMPALA-2147: IS [NOT] DISTINCT FROM and "<=>" +select count(*) > 0 +from alltypesagg as a, alltypesagg as b +where (a.tinyint_col IS DISTINCT FROM b.tinyint_col) +and a.tinyint_col is null +---- RESULTS +true +---- TYPES +BOOLEAN +==== +---- QUERY +select count(*) > 0 +from alltypesagg as a, alltypesagg as b +where (a.tinyint_col IS NOT DISTINCT FROM b.tinyint_col) +and a.tinyint_col is null +---- RESULTS +true +---- TYPES +BOOLEAN +==== +---- QUERY +select count(*) > 0 +from alltypesagg as a, alltypesagg as b +where (a.tinyint_col <=> b.tinyint_col) +and a.tinyint_col is null +---- RESULTS +true +---- TYPES +BOOLEAN +==== +---- QUERY +# left joins with <=> are different from left joins with = +select P.d, Q.d, Q.b, Q.b is null +from nulltable P left join nulltable Q +on P.d = Q.d +---- RESULTS +NULL,NULL,'NULL',true +---- TYPES +INT,INT,STRING,BOOLEAN +==== +---- QUERY +select P.d, Q.d, Q.b +from nulltable P left join nulltable Q +on P.d <=> Q.d +---- RESULTS +NULL,NULL,'' +---- TYPES +INT,INT,STRING +==== +---- QUERY +select count(*) +from nulltable P left anti join nulltable Q +on P.d = Q.d +---- RESULTS +1 +---- TYPES +BIGINT +==== +---- QUERY +select count(*) +from nulltable P left anti join nulltable Q +on P.d <=> Q.d +---- RESULTS +0 +---- TYPES +BIGINT +==== +---- QUERY +# Test that 'IS DISTINCT FROM' works in nested loop joins +select count(*) from nulltable t1, nulltable t2 where not(t1.d != t2.d) +---- RESULTS +0 +---- TYPES +BIGINT +==== +---- QUERY +select t1.d, t2.d from nulltable t1, nulltable t2 where not(t1.d IS DISTINCT FROM t2.d) +---- RESULTS +NULL,NULL +---- TYPES +INT,INT +==== +---- QUERY +select count(*) from nulltable t1, nulltable t2 +where t1.d != length(t2.a) +---- RESULTS +0 +---- TYPES +BIGINT +==== +---- QUERY +select t1.d, t2.a from nulltable t1, nulltable t2 +where t1.d IS DISTINCT FROM length(t2.a) +---- RESULTS +NULL,'a' +---- TYPES +INT,STRING +==== +---- QUERY +select t1.a, t1.b, t2.a, t2.b from +(values +(NULL a, NULL b), (NULL, 0), (NULL, 1), +(0, NULL), (0, 0), (0, 1), +(1, NULL), (1, 0), (1, 1)) as t1, +(values +(NULL a, NULL b), (NULL, 0), (NULL, 1), +(0, NULL), (0, 0), (0, 1), +(1, NULL), (1, 0), (1, 1)) as t2 +where t1.a <=> t2.a +and t1.b <=> t2.b +order by t1.a, t1.b, t2.a, t2.b +---- RESULTS +0,0,0,0 +0,1,0,1 +0,NULL,0,NULL +1,0,1,0 +1,1,1,1 +1,NULL,1,NULL +NULL,0,NULL,0 +NULL,1,NULL,1 +NULL,NULL,NULL,NULL +---- TYPES +TINYINT,TINYINT,TINYINT,TINYINT +==== +---- QUERY +select t1.a, t1.b, t2.a, t2.b from +(values +(NULL a, NULL b), (NULL, 0), (NULL, 1), +(0, NULL), (0, 0), (0, 1), +(1, NULL), (1, 0), (1, 1)) as t1, +(values +(NULL a, NULL b), (NULL, 0), (NULL, 1), +(0, NULL), (0, 0), (0, 1), +(1, NULL), (1, 0), (1, 1)) as t2 +where t1.a <=> t2.a +and t1.b = t2.b +order by t1.a, t1.b, t2.a, t2.b +---- RESULTS +0,0,0,0 +0,1,0,1 +1,0,1,0 +1,1,1,1 +NULL,0,NULL,0 +NULL,1,NULL,1 +---- TYPES +TINYINT,TINYINT,TINYINT,TINYINT +==== diff --git a/tests/query_test/test_join_queries.py b/tests/query_test/test_join_queries.py index 582343de0..f617ffdeb 100644 --- a/tests/query_test/test_join_queries.py +++ b/tests/query_test/test_join_queries.py @@ -30,7 +30,7 @@ class TestJoinQueries(ImpalaTestSuite): # Cut down on execution time when not running in exhaustive mode. cls.TestMatrix.add_constraint(lambda v: v.get_value('batch_size') != 1) - def test_joins(self, vector): + def test_basic_joins(self, vector): new_vector = copy(vector) new_vector.get_value('exec_option')['batch_size'] = vector.get_value('batch_size') self.run_test_case('QueryTest/joins', new_vector)