From 0827146a2bb0f7e08c2caee83bde7740936108bb Mon Sep 17 00:00:00 2001 From: Marcel Kornacker Date: Mon, 19 Sep 2011 07:45:01 -0700 Subject: [PATCH] adding outer joins plus new tests --- .gitignore | 1 + be/src/exec/exec-node.cc | 6 +- be/src/exec/exec-node.h | 3 + be/src/exec/hash-join-node.cc | 107 ++++- be/src/exec/hash-join-node.h | 22 + be/src/exec/hash-table.cc | 6 +- be/src/exec/hash-table.h | 3 +- be/src/runtime/row-batch.h | 4 + fe/.settings/org.eclipse.jdt.core.prefs | 2 +- fe/src/main/cup/sql-parser.y | 6 +- .../cloudera/impala/analysis/Analyzer.java | 51 +-- .../impala/analysis/JoinOperator.java | 28 +- .../cloudera/impala/analysis/Predicate.java | 11 + .../cloudera/impala/analysis/SelectStmt.java | 11 +- .../cloudera/impala/analysis/TableRef.java | 145 +++++-- .../cloudera/impala/planner/HashJoinNode.java | 64 ++- .../com/cloudera/impala/planner/PlanNode.java | 13 +- .../com/cloudera/impala/planner/Planner.java | 105 ++++- fe/src/main/thrift/PlanNodes.thrift | 17 +- .../impala/analysis/AnalyzerTest.java | 30 +- .../cloudera/impala/catalog/CatalogTest.java | 13 +- .../cloudera/impala/service/QueryTest.java | 1 + .../cloudera/impala/testutil/TestUtils.java | 12 +- fe/src/test/resources/PlannerTest/joins.test | 13 +- .../test/resources/QueryTest/outer-joins.test | 382 ++++++++++++++++++ testdata/DimTbl/data.csv | 10 + testdata/JoinTbl/data.csv | 19 + testdata/bin/create.sql | 15 + testdata/bin/load.sql | 2 + 29 files changed, 926 insertions(+), 176 deletions(-) create mode 100644 fe/src/test/resources/QueryTest/outer-joins.test create mode 100644 testdata/DimTbl/data.csv create mode 100644 testdata/JoinTbl/data.csv diff --git a/.gitignore b/.gitignore index 56f481fef..94d09affb 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ derby.log thirdparty cscope.files cscope.out +org.eclipse.jdt.core.prefs diff --git a/be/src/exec/exec-node.cc b/be/src/exec/exec-node.cc index 40891478a..4d09c820a 100644 --- a/be/src/exec/exec-node.cc +++ b/be/src/exec/exec-node.cc @@ -136,7 +136,11 @@ void ExecNode::PrepareConjuncts(RuntimeState* state) { } bool ExecNode::EvalConjuncts(TupleRow* row) { - for (vector::iterator i = conjuncts_.begin(); i != conjuncts_.end(); ++i) { + return EvalConjuncts(conjuncts_, row); +} + +bool ExecNode::EvalConjuncts(const vector& conjuncts, TupleRow* row) { + for (vector::const_iterator i = conjuncts.begin(); i != conjuncts.end(); ++i) { void* value = (*i)->GetValue(row); if (value == NULL || *reinterpret_cast(value) == false) return false; } diff --git a/be/src/exec/exec-node.h b/be/src/exec/exec-node.h index 1a5f737e3..0d0b69718 100644 --- a/be/src/exec/exec-node.h +++ b/be/src/exec/exec-node.h @@ -86,6 +86,9 @@ class ExecNode { void PrepareConjuncts(RuntimeState* state); // Evaluate conjuncts. Return true if all conjuncts return true, otherwise false. + bool EvalConjuncts(const std::vector& conjuncts, TupleRow* row); + + // Evaluate conjuncts_. Return true if all conjuncts return true, otherwise false. bool EvalConjuncts(TupleRow* row); }; diff --git a/be/src/exec/hash-join-node.cc b/be/src/exec/hash-join-node.cc index 45a46cb99..b176f316e 100644 --- a/be/src/exec/hash-join-node.cc +++ b/be/src/exec/hash-join-node.cc @@ -19,10 +19,19 @@ using namespace boost; HashJoinNode::HashJoinNode( ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) - : ExecNode(pool, tnode, descs) { + : ExecNode(pool, tnode, descs), + join_op_(tnode.hash_join_node.join_op) { // TODO: log errors in runtime state Status status = Init(pool, tnode); - DCHECK(status.ok()); + DCHECK(status.ok()) + << "HashJoinNode c'tor: Init() failed:\n" + << status.GetErrorMsg(); + + match_all_probe_ = + (join_op_ == TJoinOp::LEFT_OUTER_JOIN || join_op_ == TJoinOp::FULL_OUTER_JOIN); + match_one_build_ = (join_op_ == TJoinOp::LEFT_SEMI_JOIN); + match_all_build_ = + (join_op_ == TJoinOp::RIGHT_OUTER_JOIN || join_op_ == TJoinOp::FULL_OUTER_JOIN); } HashJoinNode::~HashJoinNode() { @@ -33,14 +42,18 @@ HashJoinNode::~HashJoinNode() { Status HashJoinNode::Init(ObjectPool* pool, const TPlanNode& tnode) { DCHECK(tnode.__isset.hash_join_node); - const vector& join_preds = tnode.hash_join_node.join_predicates; - for (int i = 0; i < join_preds.size(); ++i) { + const vector& eq_join_conjuncts = + tnode.hash_join_node.eq_join_conjuncts; + for (int i = 0; i < eq_join_conjuncts.size(); ++i) { Expr* expr; - RETURN_IF_ERROR(Expr::CreateExprTree(pool, join_preds[i].left, &expr)); + RETURN_IF_ERROR(Expr::CreateExprTree(pool, eq_join_conjuncts[i].left, &expr)); probe_exprs_.push_back(expr); - RETURN_IF_ERROR(Expr::CreateExprTree(pool, join_preds[i].right, &expr)); + RETURN_IF_ERROR(Expr::CreateExprTree(pool, eq_join_conjuncts[i].right, &expr)); build_exprs_.push_back(expr); } + RETURN_IF_ERROR( + Expr::CreateExprTrees(pool, tnode.hash_join_node.other_join_conjuncts, + &other_join_conjuncts_)); return Status::OK; } @@ -80,25 +93,59 @@ Status HashJoinNode::Open(RuntimeState* state) { RETURN_IF_ERROR(child(1)->Close(state)); RETURN_IF_ERROR(child(0)->Open(state)); - // prime probe batch + + // seed probe batch and current_probe_row_, etc. RETURN_IF_ERROR(child(0)->GetNext(state, probe_batch_.get())); probe_batch_pos_ = 0; + matched_probe_ = false; + current_probe_row_ = probe_batch_->GetRow(probe_batch_pos_++); + matched_probe_ = false; + hash_tbl_->Scan(current_probe_row_, &hash_tbl_iterator_); + return Status::OK; } +inline TupleRow* HashJoinNode::CreateOutputRow( + RowBatch* out_batch, TupleRow* probe_row, Tuple* build_tuple) { + DCHECK(!out_batch->IsFull()); + // copy probe row to output + int row_idx = out_batch->AddRow(); + TupleRow* out_row = out_batch->GetRow(row_idx); + if (probe_row != NULL) { + out_batch->CopyRow(probe_row, out_row); + } else { + out_batch->ClearRow(out_row); + } + out_row->SetTuple(build_tuple_idx_, build_tuple); + return out_row; +} + Status HashJoinNode::GetNext(RuntimeState* state, RowBatch* out_batch) { while (!eos_) { Tuple* tuple; - while (!out_batch->IsFull() && (tuple = hash_tbl_iterator_.GetNext()) != NULL) { - // copy probe row to output - int row_idx = out_batch->AddRow(); - TupleRow* out_row = out_batch->GetRow(row_idx); - out_batch->CopyRow(current_probe_row_, out_row); - out_row->SetTuple(build_tuple_idx_, tuple); - if (EvalConjuncts(out_row)) { + // create output rows as long as: + // * our output batch isn't full; + // * we haven't already created an output row for the probe row and are doing + // a semi-join; + // * there are more matching build rows + while (!out_batch->IsFull() + && !(match_one_build_ && matched_probe_) + && (tuple = hash_tbl_iterator_.GetNext()) != NULL) { + TupleRow* out_row = CreateOutputRow(out_batch, current_probe_row_, tuple); + if (!EvalConjuncts(other_join_conjuncts_, out_row)) continue; + // we have a match for the purpose of the (outer?) join as soon as we + // satisfy the JOIN clause conjuncts + matched_probe_ = true; + if (match_all_build_) { + // remember that we matched this build tuple + joined_build_tuples_.insert(tuple); + } + if (EvalConjuncts(conjuncts_, out_row)) { out_batch->CommitLastRow(); } + if (match_one_build_) break; } + if (out_batch->IsFull()) return Status::OK; if (probe_batch_pos_ == probe_batch_->num_rows()) { @@ -106,18 +153,40 @@ Status HashJoinNode::GetNext(RuntimeState* state, RowBatch* out_batch) { if (probe_batch_->num_rows() < probe_batch_->capacity()) { // this was the last probe batch eos_ = true; - return Status::OK; + if (match_all_build_) hash_tbl_->Scan(NULL, &hash_tbl_iterator_); + } else { + // pass on pools, out_batch might still need them + out_batch->AddMemPools(probe_batch_.get()); + RETURN_IF_ERROR(child(0)->GetNext(state, probe_batch_.get())); + probe_batch_pos_ = 0; } - // pass on pools, out_batch might still need them - out_batch->AddMemPools(probe_batch_.get()); - RETURN_IF_ERROR(child(0)->GetNext(state, probe_batch_.get())); - probe_batch_pos_ = 0; } + if (match_all_probe_ && !matched_probe_) { + TupleRow* out_row = CreateOutputRow(out_batch, current_probe_row_, NULL); + if (EvalConjuncts(conjuncts_, out_row)) { + out_batch->CommitLastRow(); + } + } + if (eos_) break; + // join remaining rows in probe_batch_ current_probe_row_ = probe_batch_->GetRow(probe_batch_pos_++); + matched_probe_ = false; hash_tbl_->Scan(current_probe_row_, &hash_tbl_iterator_); } + + if (match_all_build_) { + // output remaining unmatched build rows + Tuple* tuple; + while (!out_batch->IsFull() && (tuple = hash_tbl_iterator_.GetNext()) != NULL) { + if (joined_build_tuples_.find(tuple) != joined_build_tuples_.end()) continue; + TupleRow* out_row = CreateOutputRow(out_batch, NULL, tuple); + if (EvalConjuncts(conjuncts_, out_row)) { + out_batch->CommitLastRow(); + } + } + } return Status::OK; } diff --git a/be/src/exec/hash-join-node.h b/be/src/exec/hash-join-node.h index 89e4bbb01..ca67dc013 100644 --- a/be/src/exec/hash-join-node.h +++ b/be/src/exec/hash-join-node.h @@ -4,10 +4,13 @@ #define IMPALA_EXEC_HASH_JOIN_NODE_H #include +#include #include "exec/exec-node.h" #include "exec/hash-table.h" +#include "gen-cpp/PlanNodes_types.h" // for TJoinOp + namespace impala { class MemPool; @@ -42,11 +45,26 @@ class HashJoinNode : public ExecNode { boost::scoped_ptr hash_tbl_; HashTable::Iterator hash_tbl_iterator_; + // for right outer joins, keep track of what's been joined + typedef boost::unordered_set BuildTupleSet; + BuildTupleSet joined_build_tuples_; + + TJoinOp::type join_op_; + // our equi-join predicates " = " are separated into // build_exprs_ (over child(1)) and probe_exprs_ (over child(0)) std::vector probe_exprs_; std::vector build_exprs_; + // non-equi-join conjuncts from the JOIN clause + std::vector other_join_conjuncts_; + + // derived from join_op_ + bool match_all_probe_; // output all tuples coming from the probe input + bool match_one_build_; // match at most one build tuple to each probe tuple + bool match_all_build_; // output all tuples coming from the build input + + bool matched_probe_; // if true, we have matched the current probe tuple bool eos_; // if true, nothing left to return in GetNext() int build_tuple_idx_; // w/in our output row std::vector build_pools_; // everything handed to us by the scan of child(1) @@ -56,6 +74,10 @@ class HashJoinNode : public ExecNode { // set up build_- and probe_exprs_ Status Init(ObjectPool* pool, const TPlanNode& tnode); + + // Write combined row, consisting of probe_row and build_tuple, to out_batch + // and return row. + TupleRow* CreateOutputRow(RowBatch* out_batch, TupleRow* probe_row, Tuple* build_tuple); }; } diff --git a/be/src/exec/hash-table.cc b/be/src/exec/hash-table.cc index 197c24849..aa955fac1 100644 --- a/be/src/exec/hash-table.cc +++ b/be/src/exec/hash-table.cc @@ -133,7 +133,11 @@ void HashTable::Insert(Tuple* t) { void HashTable::Scan(TupleRow* probe_row, Iterator* it) { current_probe_row_ = probe_row; current_build_row_ = NULL; - it->Reset(hash_tbl_->equal_range(NULL)); + if (probe_row != NULL) { + it->Reset(hash_tbl_->equal_range(NULL)); + } else { + it->Reset(make_pair(hash_tbl_->begin(), hash_tbl_->end())); + } } void HashTable::DebugString(int indentation_level, std::stringstream* out) const { diff --git a/be/src/exec/hash-table.h b/be/src/exec/hash-table.h index 7a82c9385..725c33847 100644 --- a/be/src/exec/hash-table.h +++ b/be/src/exec/hash-table.h @@ -117,7 +117,8 @@ class HashTable { }; // Starts as a scan of tuples based on values of probe_exprs in the context - // of probe_row. Returns the scan through 'it'. + // of probe_row. Scans entire table if probe_row is NULL. + // Returns the scan through 'it'. void Scan(TupleRow* probe_row, Iterator* it); }; diff --git a/be/src/runtime/row-batch.h b/be/src/runtime/row-batch.h index 474e3f28d..dd21ce59b 100644 --- a/be/src/runtime/row-batch.h +++ b/be/src/runtime/row-batch.h @@ -106,6 +106,10 @@ class RowBatch { memcpy(dest, src, num_tuples_per_row_ * sizeof(Tuple*)); } + void ClearRow(TupleRow* row) { + memset(row, 0, num_tuples_per_row_ * sizeof(Tuple*)); + } + int num_rows() const { return num_rows_; } int capacity() const { return capacity_; } diff --git a/fe/.settings/org.eclipse.jdt.core.prefs b/fe/.settings/org.eclipse.jdt.core.prefs index 70f0be2da..785518dfd 100644 --- a/fe/.settings/org.eclipse.jdt.core.prefs +++ b/fe/.settings/org.eclipse.jdt.core.prefs @@ -1,4 +1,4 @@ -#Mon Aug 29 13:48:51 PDT 2011 +#Thu Sep 22 17:32:34 PDT 2011 eclipse.preferences.version=1 org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6 org.eclipse.jdt.core.compiler.compliance=1.6 diff --git a/fe/src/main/cup/sql-parser.y b/fe/src/main/cup/sql-parser.y index fb11113c5..fdb5b5092 100644 --- a/fe/src/main/cup/sql-parser.y +++ b/fe/src/main/cup/sql-parser.y @@ -290,14 +290,14 @@ table_ref_list ::= :} | table_ref_list:list join_operator:op table_ref:t {: - t.setJoinOperator((JoinOperator) op); + t.setJoinOp((JoinOperator) op); list.add(t); RESULT = list; :} | table_ref_list:list join_operator:op table_ref:t KW_ON predicate:p {: - t.setJoinOperator((JoinOperator) op); + t.setJoinOp((JoinOperator) op); t.setOnClause(p); list.add(t); RESULT = list; @@ -305,7 +305,7 @@ table_ref_list ::= | table_ref_list:list join_operator:op table_ref:t KW_USING LPAREN ident_list:colNames RPAREN {: - t.setJoinOperator((JoinOperator) op); + t.setJoinOp((JoinOperator) op); t.setUsingClause(colNames); list.add(t); RESULT = list; diff --git a/fe/src/main/java/com/cloudera/impala/analysis/Analyzer.java b/fe/src/main/java/com/cloudera/impala/analysis/Analyzer.java index edf6cdf00..f722f6579 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/Analyzer.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/Analyzer.java @@ -170,9 +170,8 @@ public class Analyzer { /** * Register all conjuncts that make up the predicate. - * @param p */ - public void registerPredicate(Predicate p) { + public void registerConjuncts(Predicate p) { List conjuncts = p.getConjuncts(); for (Predicate conjunct: conjuncts) { registerConjunct(conjunct); @@ -182,8 +181,6 @@ public class Analyzer { /** * Register individual conjunct with all tuple and slot ids it references * and with the global conjunct list. - * - * @param p */ private void registerConjunct(Predicate p) { conjuncts.add(p); @@ -238,6 +235,7 @@ public class Analyzer { } else { eqJoinPredicates.get(lhsTupleIds.get(0)).add(p); } + binaryPred.setIsEqJoinConjunct(true); } } } @@ -258,47 +256,6 @@ public class Analyzer { return result; } - /** - * Return all registered conjuncts that are equi-join predicates - * in which one side is fully bound by lhsIds and the other by rhsId. - * Returns the conjuncts in 'joinConjuncts' and also in their disassembled - * form in 'joinPredicates' (in which " = " is returned as - * Pair(, )). - */ - public void getEqJoinPredicates( - List lhsIds, TupleId rhsId, - List > joinPredicates, - List joinConjuncts) { - joinPredicates.clear(); - joinConjuncts.clear(); - List candidates = eqJoinPredicates.get(rhsId); - if (candidates == null) return; - for (Predicate p: candidates) { - Expr rhsExpr = null; - if (p.getChild(0).isBound(rhsId.asList())) { - rhsExpr = p.getChild(0); - } else { - Preconditions.checkState(p.getChild(1).isBound(rhsId.asList())); - rhsExpr = p.getChild(1); - } - - Expr lhsExpr = null; - if (p.getChild(0).isBound(lhsIds)) { - lhsExpr = p.getChild(0); - } else if (p.getChild(1).isBound(lhsIds)) { - lhsExpr = p.getChild(1); - } else { - // not an equi-join condition between lhsIds and rhsId - continue; - } - - Preconditions.checkState(lhsExpr != rhsExpr); - joinConjuncts.add(p); - Pair entry = Pair.create(lhsExpr, rhsExpr); - joinPredicates.add(entry); - } - } - /** * Return slot descriptor corresponding to column referenced in the context of * tupleDesc, or null if no such reference exists. @@ -323,4 +280,8 @@ public class Analyzer { public Set getAliases() { return aliasMap.keySet(); } + + public List getEqJoinPredicates(TupleId id) { + return eqJoinPredicates.get(id); + } } diff --git a/fe/src/main/java/com/cloudera/impala/analysis/JoinOperator.java b/fe/src/main/java/com/cloudera/impala/analysis/JoinOperator.java index 5833a7c03..2524f5e83 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/JoinOperator.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/JoinOperator.java @@ -2,21 +2,37 @@ package com.cloudera.impala.analysis; +import com.cloudera.impala.thrift.TJoinOp; + public enum JoinOperator { - INNER_JOIN("INNER JOIN"), - LEFT_OUTER_JOIN("LEFT OUTER JOIN"), - LEFT_SEMI_JOIN("LEFT SEMI JOIN"), - RIGHT_OUTER_JOIN("RIGHT OUTER JOIN"), - FULL_OUTER_JOIN("FULL OUTER JOIN"); + INNER_JOIN("INNER JOIN", TJoinOp.INNER_JOIN), + LEFT_OUTER_JOIN("LEFT OUTER JOIN", TJoinOp.LEFT_OUTER_JOIN), + LEFT_SEMI_JOIN("LEFT SEMI JOIN", TJoinOp.LEFT_SEMI_JOIN), + RIGHT_OUTER_JOIN("RIGHT OUTER JOIN", TJoinOp.RIGHT_OUTER_JOIN), + FULL_OUTER_JOIN("FULL OUTER JOIN", TJoinOp.FULL_OUTER_JOIN); private final String description; - private JoinOperator(String description) { + private final TJoinOp thriftJoinOp; + + private JoinOperator(String description, TJoinOp thriftJoinOp) { this.description = description; + this.thriftJoinOp = thriftJoinOp; } + @Override public String toString() { return description; } + + public TJoinOp toThrift() { + return thriftJoinOp; + } + + public boolean isOuterJoin() { + return this == LEFT_OUTER_JOIN + || this == RIGHT_OUTER_JOIN + || this == FULL_OUTER_JOIN; + } } diff --git a/fe/src/main/java/com/cloudera/impala/analysis/Predicate.java b/fe/src/main/java/com/cloudera/impala/analysis/Predicate.java index 513d8fafa..f8c55362e 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/Predicate.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/Predicate.java @@ -9,8 +9,19 @@ import com.cloudera.impala.common.AnalysisException; import com.google.common.collect.Lists; public abstract class Predicate extends Expr { + protected boolean isEqJoinConjunct; + public Predicate() { super(); + this.isEqJoinConjunct = false; + } + + public boolean isEqJoinConjunct() { + return isEqJoinConjunct; + } + + public void setIsEqJoinConjunct(boolean v) { + isEqJoinConjunct = v; } @Override diff --git a/fe/src/main/java/com/cloudera/impala/analysis/SelectStmt.java b/fe/src/main/java/com/cloudera/impala/analysis/SelectStmt.java index 7cc89db72..516afb232 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/SelectStmt.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/SelectStmt.java @@ -142,12 +142,8 @@ public class SelectStmt extends ParseNodeBase { // start out with table refs to establish aliases TableRef leftTblRef = null; // the one to the left of tblRef for (TableRef tblRef: tableRefs) { - tblRef.setDesc(analyzer.registerTableRef(tblRef)); - tblRef.expandUsingClause(leftTblRef, analyzer.getCatalog()); - if (tblRef.getOnClause() != null) { - tblRef.getOnClause().analyze(analyzer); - analyzer.registerPredicate(tblRef.getOnClause()); - } + tblRef.setLeftTblRef(leftTblRef); + tblRef.analyze(analyzer); leftTblRef = tblRef; } @@ -182,7 +178,7 @@ public class SelectStmt extends ParseNodeBase { throw new AnalysisException( "aggregation function not allowed in WHERE clause"); } - analyzer.registerPredicate(whereClause); + analyzer.registerConjuncts(whereClause); } if (orderByElements != null) { analyzeOrderByClause(analyzer); @@ -443,7 +439,6 @@ public class SelectStmt extends ParseNodeBase { strBuilder.append(" FROM "); for (int i = 0; i < tableRefs.size(); ++i) { strBuilder.append(tableRefs.get(i).toSql()); - strBuilder.append((i+1 != tableRefs.size()) ? ", " : ""); } // Where clause if (whereClause != null) { diff --git a/fe/src/main/java/com/cloudera/impala/analysis/TableRef.java b/fe/src/main/java/com/cloudera/impala/analysis/TableRef.java index 1254b0899..e273988a6 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/TableRef.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/TableRef.java @@ -9,8 +9,9 @@ import com.cloudera.impala.catalog.Table; import com.cloudera.impala.common.AnalysisException; import com.google.common.base.Joiner; import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; -public class TableRef { +public class TableRef extends ParseNodeBase { private final TableName name; private final String alias; @@ -18,8 +19,17 @@ public class TableRef { private Predicate onClause; private List usingColNames; + // the ref to the left of us, if we're part of a JOIN clause + private TableRef leftTblRef; + private TupleDescriptor desc; // analysis output + // conjuncts from the JOIN clause: + // 1. equi-join predicates + private List eqJoinConjuncts; + // 2. the rest + private List otherJoinConjuncts; + public TableRef(TableName name, String alias) { super(); Preconditions.checkArgument(!name.toString().isEmpty()); @@ -29,7 +39,8 @@ public class TableRef { } public JoinOperator getJoinOp() { - return joinOp; + // if it's not explicitly set, we're doing an inner join + return (joinOp == null ? JoinOperator.INNER_JOIN : joinOp); } public Predicate getOnClause() { @@ -44,10 +55,6 @@ public class TableRef { return desc.getId(); } - public void setDesc(TupleDescriptor desc) { - this.desc = desc; - } - public TableName getName() { return name; } @@ -60,7 +67,7 @@ public class TableRef { return desc.getTable(); } - public void setJoinOperator(JoinOperator op) { + public void setJoinOp(JoinOperator op) { this.joinOp = op; } @@ -72,54 +79,112 @@ public class TableRef { this.usingColNames = colNames; } - public void expandUsingClause(TableRef leftTblRef, Catalog catalog) - throws AnalysisException { - if (usingColNames == null) { - return; - } - Preconditions.checkState(desc != null); - Preconditions.checkState(onClause == null); - for (String colName: usingColNames) { - // check whether colName exists both for our table and the one - // to the left of us - if (leftTblRef.getDesc().getTable().getColumn(colName) == null) { - throw new AnalysisException( - "unknown column " + colName + " for alias " - + leftTblRef.getAlias() + " (in \"" + this.toSql() + "\")"); - } - if (desc.getTable().getColumn(colName) == null) { - throw new AnalysisException( - "unknown column " + colName + " for alias " - + getAlias() + " (in \"" + this.toSql() + "\")"); - } + public void setLeftTblRef(TableRef leftTblRef) { + this.leftTblRef = leftTblRef; + } - // create predicate ".colName = .colName" - BinaryPredicate eqPred = - new BinaryPredicate(BinaryPredicate.Operator.EQ, - new SlotRef(leftTblRef.getAliasAsName(), colName), - new SlotRef(getAliasAsName(), colName)); - if (onClause == null) { - onClause = eqPred; - } else { - onClause = - new CompoundPredicate(CompoundPredicate.Operator.AND, onClause, eqPred); + public List getEqJoinConjuncts() { + return eqJoinConjuncts; + } + + public List getOtherJoinConjuncts() { + return otherJoinConjuncts; + } + + /** + * Register this table ref and its ON conjuncts. + * Call this after calling expandUsingClause(). + */ + @Override + public void analyze(Analyzer analyzer) throws AnalysisException { + desc = analyzer.registerTableRef(this); + Preconditions.checkState(desc != null); + + if (usingColNames != null) { + // Turn USING clause into equivalent ON clause. + Preconditions.checkState(onClause == null); + for (String colName: usingColNames) { + // check whether colName exists both for our table and the one + // to the left of us + if (leftTblRef.getDesc().getTable().getColumn(colName) == null) { + throw new AnalysisException( + "unknown column " + colName + " for alias " + + leftTblRef.getAlias() + " (in \"" + this.toSql() + "\")"); + } + if (desc.getTable().getColumn(colName) == null) { + throw new AnalysisException( + "unknown column " + colName + " for alias " + + getAlias() + " (in \"" + this.toSql() + "\")"); + } + + // create predicate ".colName = .colName" + BinaryPredicate eqPred = + new BinaryPredicate(BinaryPredicate.Operator.EQ, + new SlotRef(leftTblRef.getAliasAsName(), colName), + new SlotRef(getAliasAsName(), colName)); + if (onClause == null) { + onClause = eqPred; + } else { + onClause = + new CompoundPredicate(CompoundPredicate.Operator.AND, onClause, eqPred); + } } } + + if (onClause != null) { + onClause.analyze(analyzer); + // need to register conjuncts before being able to call isEqJoinConjunct() + analyzer.registerConjuncts(onClause); + eqJoinConjuncts = Lists.newArrayList(); + otherJoinConjuncts = Lists.newArrayList(); + for (Predicate p: onClause.getConjuncts()) { + if (p.isEqJoinConjunct()) { + eqJoinConjuncts.add(p); + } else { + otherJoinConjuncts.add(p); + } + } + } else if (getJoinOp().isOuterJoin() || getJoinOp() == JoinOperator.LEFT_SEMI_JOIN) { + throw new AnalysisException(joinOpToSql() + " requires an ON or USING clause."); + } } + private String joinOpToSql() { + Preconditions.checkState(joinOp != null); + switch (joinOp) { + case INNER_JOIN: + return "INNER JOIN"; + case LEFT_OUTER_JOIN: + return "LEFT OUTER JOIN"; + case LEFT_SEMI_JOIN: + return "LEFT SEMI JOIN"; + case RIGHT_OUTER_JOIN: + return "RIGHT OUTER JOIN"; + case FULL_OUTER_JOIN: + return "FULL OUTER JOIN"; + default: + return "bad join op: " + joinOp.toString(); + } + } + + + @Override public String toSql() { if (joinOp == null) { - return name.toString() + (alias != null ? " " + alias : ""); + // prepend "," if we're part of a sequence of table refs w/o an + // explicit JOIN clause + return (leftTblRef != null ? ", " : "") + + name.toString() + (alias != null ? " " + alias : ""); } - StringBuilder output = new StringBuilder(joinOp.toString() + " "); + StringBuilder output = new StringBuilder(joinOpToSql() + " "); output.append(name.toString()).append(" "); if (alias != null) { output.append(alias).append(" "); } if (usingColNames != null) { output.append("USING (").append(Joiner.on(", ").join(usingColNames)).append(")"); - } else { + } else if (onClause != null) { output.append("ON (").append(onClause.toSql()).append(")"); } return output.toString(); diff --git a/fe/src/main/java/com/cloudera/impala/planner/HashJoinNode.java b/fe/src/main/java/com/cloudera/impala/planner/HashJoinNode.java index 64a3d925a..3d5d5bb76 100644 --- a/fe/src/main/java/com/cloudera/impala/planner/HashJoinNode.java +++ b/fe/src/main/java/com/cloudera/impala/planner/HashJoinNode.java @@ -5,6 +5,9 @@ package com.cloudera.impala.planner; import java.util.List; import com.cloudera.impala.analysis.Expr; +import com.cloudera.impala.analysis.JoinOperator; +import com.cloudera.impala.analysis.Predicate; +import com.cloudera.impala.analysis.SlotId; import com.cloudera.impala.common.Pair; import com.cloudera.impala.thrift.TEqJoinCondition; import com.cloudera.impala.thrift.THashJoinNode; @@ -20,16 +23,24 @@ import com.google.common.base.Preconditions; * */ public class HashJoinNode extends PlanNode { - // predicates of the form " = ", recorded as Pair(, ) - private final List > joinPredicates; + private final JoinOperator joinOp; + // conjuncts of the form " = ", recorded as Pair(, ) + private final List > eqJoinConjuncts; - public HashJoinNode(PlanNode outer, PlanNode inner, - List > joinPredicates) { + // join conjuncts from the JOIN clause that aren't equi-join predicates + private final List otherJoinConjuncts; + + public HashJoinNode( + PlanNode outer, PlanNode inner, JoinOperator joinOp, + List > eqJoinConjuncts, + List otherJoinConjuncts) { super(); tupleIds.addAll(outer.getTupleIds()); Preconditions.checkState(inner.getTupleIds().size() == 1); tupleIds.add(inner.getTupleIds().get(0)); - this.joinPredicates = joinPredicates; + this.joinOp = joinOp; + this.eqJoinConjuncts = eqJoinConjuncts; + this.otherJoinConjuncts = otherJoinConjuncts; children.add(outer); children.add(inner); } @@ -37,27 +48,45 @@ public class HashJoinNode extends PlanNode { @Override protected String debugString() { return Objects.toStringHelper(this) - .add("joinPreds", joinPredicatesDebugString()) + .add("eqJoinConjuncts", eqJoinConjunctsDebugString()) .addValue(super.debugString()) .toString(); } - private String joinPredicatesDebugString() { + private String eqJoinConjunctsDebugString() { Objects.ToStringHelper helper = Objects.toStringHelper(this); - for (Pair entry: joinPredicates) { + for (Pair entry: eqJoinConjuncts) { helper.add("lhs" , entry.first).add("rhs", entry.second); } return helper.toString(); } + @Override + public void getMaterializedIds(List ids) { + super.getMaterializedIds(ids); + // we also need to materialize everything referenced by eqJoinConjuncts + // and otherJoinConjuncts + for (Pair p: eqJoinConjuncts) { + p.first.getIds(null, ids); + p.second.getIds(null, ids); + } + for (Predicate p: otherJoinConjuncts) { + p.getIds(null, ids); + } + } + @Override protected void toThrift(TPlanNode msg) { msg.node_type = TPlanNodeType.HASH_JOIN_NODE; msg.hash_join_node = new THashJoinNode(); - for (Pair entry: joinPredicates) { + msg.hash_join_node.join_op = joinOp.toThrift(); + for (Pair entry: eqJoinConjuncts) { TEqJoinCondition eqJoinCondition = new TEqJoinCondition(entry.first.treeToThrift(), entry.second.treeToThrift()); - msg.hash_join_node.addToJoin_predicates(eqJoinCondition); + msg.hash_join_node.addToEq_join_conjuncts(eqJoinCondition); + } + for (Predicate p: otherJoinConjuncts) { + msg.hash_join_node.addToOther_join_conjuncts(p.treeToThrift()); } } @@ -65,17 +94,22 @@ public class HashJoinNode extends PlanNode { protected String getExplainString(String prefix) { StringBuilder output = new StringBuilder(); output.append(prefix + "HASH JOIN\n"); - output.append(prefix + "HASH PREDICATES:"); - for (Pair entry: joinPredicates) { + output.append(prefix + " JOIN OP: " + joinOp.toString() + "\n"); + output.append(prefix + " HASH PREDICATES:"); + for (Pair entry: eqJoinConjuncts) { output.append( "\n" + prefix + " " + entry.first.toSql() + " = " + entry.second.toSql()); } + if (!otherJoinConjuncts.isEmpty()) { + output.append("\n" + prefix + " OTHER JOIN PREDICATES: "); + output.append(getExplainString(otherJoinConjuncts)); + } if (!conjuncts.isEmpty()) { - output.append("\n" + prefix + "OTHER PREDICATES: "); + output.append("\n" + prefix + " OTHER PREDICATES: "); output.append(getExplainString(conjuncts)); } - output.append("\n" + getChild(0).getExplainString(prefix + " ")); - output.append("\n" + getChild(1).getExplainString(prefix + " ")); + output.append("\n" + getChild(0).getExplainString(prefix + " ")); + output.append("\n" + getChild(1).getExplainString(prefix + " ")); return output.toString(); } } diff --git a/fe/src/main/java/com/cloudera/impala/planner/PlanNode.java b/fe/src/main/java/com/cloudera/impala/planner/PlanNode.java index e12e9100d..2dcd53ec1 100644 --- a/fe/src/main/java/com/cloudera/impala/planner/PlanNode.java +++ b/fe/src/main/java/com/cloudera/impala/planner/PlanNode.java @@ -8,6 +8,7 @@ import java.util.List; import com.cloudera.impala.analysis.Analyzer; import com.cloudera.impala.analysis.Expr; import com.cloudera.impala.analysis.Predicate; +import com.cloudera.impala.analysis.SlotId; import com.cloudera.impala.analysis.TupleId; import com.cloudera.impala.common.InternalException; import com.cloudera.impala.common.TreeNode; @@ -29,7 +30,7 @@ import com.google.common.collect.Lists; * * conjuncts: Each node has a list of conjuncts that can be executed in the context of * this node, ie, they only reference tuples materialized by this node or one of - * its children (= are bound by tupleIds). + * its children (= are bound by tupleIds). */ abstract public class PlanNode extends TreeNode { protected long limit; // max. # of rows to be returned; 0: no limit @@ -128,6 +129,16 @@ abstract public class PlanNode extends TreeNode { } } + /** + * Appends ids of slots that need to be materialized for this node. + * By default, only slots referenced by conjuncts need to be materialized + * (the rationale being that only conjuncts need to be evaluated explicitly; + * exprs that are turned into scan predicates, etc., are evaluated implicitly). + */ + public void getMaterializedIds(List ids) { + Expr.getIds(getConjuncts(), null, ids); + } + // Convert this plan node into msg (excluding children), which requires setting // the node type and the node-specific field. protected abstract void toThrift(TPlanNode msg); diff --git a/fe/src/main/java/com/cloudera/impala/planner/Planner.java b/fe/src/main/java/com/cloudera/impala/planner/Planner.java index e38107cdd..108ca9ffc 100644 --- a/fe/src/main/java/com/cloudera/impala/planner/Planner.java +++ b/fe/src/main/java/com/cloudera/impala/planner/Planner.java @@ -25,6 +25,7 @@ import com.cloudera.impala.catalog.PrimitiveType; import com.cloudera.impala.common.InternalException; import com.cloudera.impala.common.NotImplementedException; import com.cloudera.impala.common.Pair; +import com.google.common.base.Preconditions; import com.google.common.collect.Lists; import com.google.common.collect.Sets; @@ -140,30 +141,97 @@ public class Planner { return scanNode; } + /** + * Return join conjuncts that can be used for hash table lookups. + * - for inner joins, those are equi-join predicates in which one side is fully bound + * by lhsIds and the other by rhs' id; + * - for outer joins: same type of conjuncts as inner joins, but only from the JOIN + * clause + * Returns the conjuncts in 'joinConjuncts' (in which " = " is returned + * as Pair(, )) and also in their original form in 'joinPredicates'. + */ + public void getHashLookupJoinConjuncts( + Analyzer analyzer, + List lhsIds, TableRef rhs, + List > joinConjuncts, + List joinPredicates) { + joinConjuncts.clear(); + joinPredicates.clear(); + TupleId rhsId = rhs.getId(); + List candidates; + if (rhs.getJoinOp().isOuterJoin()) { + // TODO: create test for this + Preconditions.checkState(rhs.getOnClause() != null); + candidates = rhs.getEqJoinConjuncts(); + Preconditions.checkState(candidates != null); + } else { + candidates = analyzer.getEqJoinPredicates(rhsId); + } + if (candidates == null) { + return; + } + for (Predicate p: candidates) { + Expr rhsExpr = null; + if (p.getChild(0).isBound(rhsId.asList())) { + rhsExpr = p.getChild(0); + } else { + Preconditions.checkState(p.getChild(1).isBound(rhsId.asList())); + rhsExpr = p.getChild(1); + } + + Expr lhsExpr = null; + if (p.getChild(0).isBound(lhsIds)) { + lhsExpr = p.getChild(0); + } else if (p.getChild(1).isBound(lhsIds)) { + lhsExpr = p.getChild(1); + } else { + // not an equi-join condition between lhsIds and rhsId + continue; + } + + Preconditions.checkState(lhsExpr != rhsExpr); + joinPredicates.add(p); + Pair entry = Pair.create(lhsExpr, rhsExpr); + joinConjuncts.add(entry); + } + } + /** * Create HashJoinNode to join outer with inner. */ private PlanNode createHashJoinNode( - Analyzer analyzer, PlanNode outer, PlanNode inner) throws NotImplementedException { - List > joinPredicates = Lists.newArrayList(); - List joinConjuncts = Lists.newArrayList(); - analyzer.getEqJoinPredicates( - outer.getTupleIds(), inner.getTupleIds().get(0), - joinPredicates, joinConjuncts); - if (joinPredicates.isEmpty()) { + Analyzer analyzer, PlanNode outer, TableRef innerRef) + throws NotImplementedException { + // the rows coming from the build node only need to have space for the tuple + // materialized by that node + PlanNode inner = createScanNode(analyzer, innerRef); + inner.rowTupleIds = Lists.newArrayList(innerRef.getId()); + + List > eqJoinConjuncts = Lists.newArrayList(); + List eqJoinPredicates = Lists.newArrayList(); + getHashLookupJoinConjuncts( + analyzer, outer.getTupleIds(), innerRef, eqJoinConjuncts, eqJoinPredicates); + if (eqJoinPredicates.isEmpty()) { throw new NotImplementedException( "Join requires at least one equality predicate between the two tables."); } - HashJoinNode result = new HashJoinNode(outer, inner, joinPredicates); + HashJoinNode result = + new HashJoinNode(outer, inner, innerRef.getJoinOp(), eqJoinConjuncts, + innerRef.getOtherJoinConjuncts()); - // All conjuncts that are join predicates are evaluated by the hash join - // implicitly as part of the hash table lookup; all conjuncts that are bound by - // outer.getTupleIds() are evaluated by outer (or one of its children); - // only the remaining conjuncts that are bound by result.getTupleIds() - // need to be evaluated explicitly by the hash join. + // conjuncts evaluated by this node: + // - equi-join conjuncts are evaluated as part of the hash table lookup + // - other join conjuncts are evaluated before establishing a match + // - all conjuncts that are bound by outer.getTupleIds() are evaluated by outer + // (or one of its children) + // - the remaining conjuncts that are bound by result.getTupleIds() + // need to be evaluated explicitly by the hash join ArrayList conjuncts = new ArrayList(analyzer.getConjuncts(result.getTupleIds())); - conjuncts.removeAll(joinConjuncts); + conjuncts.removeAll(eqJoinPredicates); + if (innerRef.getOtherJoinConjuncts() != null) { + conjuncts.removeAll(innerRef.getOtherJoinConjuncts()); + } conjuncts.removeAll(analyzer.getConjuncts(outer.getTupleIds())); conjuncts.removeAll(analyzer.getConjuncts(inner.getTupleIds())); result.setConjuncts(conjuncts); @@ -178,7 +246,7 @@ public class Planner { PlanNode node = root; List refdIdList = Lists.newArrayList(); while (node != null) { - Expr.getIds(node.getConjuncts(), null, refdIdList); + node.getMaterializedIds(refdIdList); if (node.hasChild(1)) { Expr.getIds(node.getChild(1).getConjuncts(), null, refdIdList); } @@ -231,12 +299,7 @@ public class Planner { root.rowTupleIds = rowTuples; for (int i = 1; i < selectStmt.getTableRefs().size(); ++i) { TableRef innerRef = selectStmt.getTableRefs().get(i); - // all joins are hash joins at this point, and the rows coming from the build - // node only need to have space for the tuple materialized by that node - // (this might change with nested-loop joins) - PlanNode inner = createScanNode(analyzer, innerRef); - inner.rowTupleIds = Lists.newArrayList(innerRef.getId()); - root = createHashJoinNode(analyzer, root, inner); + root = createHashJoinNode(analyzer, root, innerRef); root.rowTupleIds = rowTuples; } diff --git a/fe/src/main/thrift/PlanNodes.thrift b/fe/src/main/thrift/PlanNodes.thrift index fde096621..1d7dd07da 100644 --- a/fe/src/main/thrift/PlanNodes.thrift +++ b/fe/src/main/thrift/PlanNodes.thrift @@ -48,8 +48,23 @@ struct TEqJoinCondition { 2: required Exprs.TExpr right; } +enum TJoinOp { + INNER_JOIN, + LEFT_OUTER_JOIN, + LEFT_SEMI_JOIN, + RIGHT_OUTER_JOIN, + FULL_OUTER_JOIN +} + struct THashJoinNode { - 1: required list join_predicates; + 1: required TJoinOp join_op + + // anything from the ON, USING or WHERE clauses that's an equi-join predicate + 2: required list eq_join_conjuncts + + // anything from the ON or USING clauses (but *not* the WHERE clause) that's not an + // equi-join predicate + 3: optional list other_join_conjuncts } struct TAggregationNode { diff --git a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzerTest.java b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzerTest.java index 0f00766ff..bc7874386 100644 --- a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzerTest.java +++ b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzerTest.java @@ -288,6 +288,8 @@ public class AnalyzerTest { "select a.int_col " + "from alltypes a join alltypes b on " + "(a.int_col = b.int_col and a.string_col = b.string_col)"); + // ON or USING clause not required for inner join + AnalyzesOk("select a.int_col from alltypes a join alltypes b"); // unknown column AnalysisError("select a.int_col from alltypes a join alltypes b on (a.int_col = b.badcol)", "unknown column 'badcol'"); @@ -299,7 +301,8 @@ public class AnalyzerTest { "select a.int_col from alltypes a join alltypes b on (a.int_col = badalias.int_col)", "unknown table alias: 'badalias'"); // incompatible comparison - AnalysisError("select a.int_col from alltypes a join alltypes b on (a.bool_col = b.string_col)", + AnalysisError( + "select a.int_col from alltypes a join alltypes b on (a.bool_col = b.string_col)", "operands are not comparable: a.bool_col = b.string_col"); AnalyzesOk( "select a.int_col, b.int_col, c.int_col " + @@ -315,6 +318,31 @@ public class AnalyzerTest { "join alltypes c on " + "(b.int_col = c.int_col and b.string_col = c.string_col and b.bool_col = c.bool_col)", "unknown table alias: 'c'"); + + // outer joins require ON/USING clause + AnalyzesOk("select * from alltypes a left outer join alltypes b on (a.id = b.id)"); + AnalyzesOk("select * from alltypes a left outer join alltypes b using (id)"); + AnalysisError("select * from alltypes a left outer join alltypes b", + "LEFT OUTER JOIN requires an ON or USING clause"); + AnalyzesOk("select * from alltypes a right outer join alltypes b on (a.id = b.id)"); + AnalyzesOk("select * from alltypes a right outer join alltypes b using (id)"); + AnalysisError("select * from alltypes a right outer join alltypes b", + "RIGHT OUTER JOIN requires an ON or USING clause"); + AnalyzesOk("select * from alltypes a full outer join alltypes b on (a.id = b.id)"); + AnalyzesOk("select * from alltypes a full outer join alltypes b using (id)"); + AnalysisError("select * from alltypes a full outer join alltypes b", + "FULL OUTER JOIN requires an ON or USING clause"); + + // semi join requires ON/USING clause + AnalyzesOk("select a.id from alltypes a left semi join alltypes b on (a.id = b.id)"); + AnalyzesOk("select a.id from alltypes a left semi join alltypes b using (id)"); + AnalysisError("select a.id from alltypes a left semi join alltypes b", + "LEFT SEMI JOIN requires an ON or USING clause"); + // TODO: enable when implemented + // must not reference semi-joined alias outside of join clause + //AnalysisError( + //"select a.id, b.id from alltypes a left semi join alltypes b on (a.id = b.id)", + //"x"); } @Test public void TestUsingClause() { diff --git a/fe/src/test/java/com/cloudera/impala/catalog/CatalogTest.java b/fe/src/test/java/com/cloudera/impala/catalog/CatalogTest.java index 839c077f7..e03e46937 100644 --- a/fe/src/test/java/com/cloudera/impala/catalog/CatalogTest.java +++ b/fe/src/test/java/com/cloudera/impala/catalog/CatalogTest.java @@ -83,7 +83,7 @@ public class CatalogTest { assertNotNull(testDb); assertEquals(testDb.getName(), "testdb1"); - assertEquals(20, defaultDb.getTables().size()); + assertEquals(22, defaultDb.getTables().size()); assertNotNull(defaultDb.getTable("alltypes")); assertNotNull(defaultDb.getTable("alltypes_rc")); assertNotNull(defaultDb.getTable("alltypessmall")); @@ -98,6 +98,8 @@ public class CatalogTest { assertNotNull(defaultDb.getTable("alltypesaggnonulls_rc")); assertNotNull(defaultDb.getTable("testtbl")); assertNotNull(defaultDb.getTable("testtbl_rc")); + assertNotNull(defaultDb.getTable("dimtbl")); + assertNotNull(defaultDb.getTable("jointbl")); assertNotNull(defaultDb.getTable("liketbl")); assertNotNull(defaultDb.getTable("hbasealltypessmall")); assertNotNull(defaultDb.getTable("hbasealltypeserror")); @@ -146,6 +148,15 @@ public class CatalogTest { new PrimitiveType[] {PrimitiveType.STRING, PrimitiveType.STRING, PrimitiveType.STRING, PrimitiveType.STRING, PrimitiveType.STRING}); + checkTableCols(defaultDb, "dimtbl", 0, + new String[] {"id", "name", "zip"}, + new PrimitiveType[] + {PrimitiveType.BIGINT, PrimitiveType.STRING, PrimitiveType.INT}); + checkTableCols(defaultDb, "jointbl", 0, + new String[] {"test_id", "test_name", "test_zip", "alltypes_id"}, + new PrimitiveType[] + {PrimitiveType.BIGINT, PrimitiveType.STRING, PrimitiveType.INT, + PrimitiveType.INT}); checkHBaseTableCols(defaultDb, "hbasealltypessmall", "hbasealltypessmall", new String[] diff --git a/fe/src/test/java/com/cloudera/impala/service/QueryTest.java b/fe/src/test/java/com/cloudera/impala/service/QueryTest.java index 3beb1a65b..82a84e535 100644 --- a/fe/src/test/java/com/cloudera/impala/service/QueryTest.java +++ b/fe/src/test/java/com/cloudera/impala/service/QueryTest.java @@ -59,6 +59,7 @@ public class QueryTest { runTests("hbase-rowkeys", false, 1000); runTests("hbase-filters", false, 1000); runTests("joins", false, 1000); + runTests("outer-joins", false, 1000); // check whether any of the tests had errors if (testErrorLog.length() != 0) { diff --git a/fe/src/test/java/com/cloudera/impala/testutil/TestUtils.java b/fe/src/test/java/com/cloudera/impala/testutil/TestUtils.java index 15dfd4dbc..0068a3fff 100644 --- a/fe/src/test/java/com/cloudera/impala/testutil/TestUtils.java +++ b/fe/src/test/java/com/cloudera/impala/testutil/TestUtils.java @@ -3,6 +3,7 @@ package com.cloudera.impala.testutil; import static org.junit.Assert.fail; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.List; @@ -127,22 +128,19 @@ public class TestUtils { /** * Do an element-by-element comparison of actual and expected types. - * - * @param actual - * @param expected * @return an error message if actual does not match expected, "" otherwise. */ public static String compareOutputTypes(List actual, String[] expectedStrTypes) { if (actual.size() != expectedStrTypes.length) { - return "Unequal number of types. Found: " + actual.size() + ". Expected: " - + expectedStrTypes.length + "\n"; + return "Unequal number of output types.\nFound: " + actual.toString() + + ".\nExpected: " + Arrays.toString(expectedStrTypes) + "\n"; } for (int i = 0; i < expectedStrTypes.length; ++i) { String upperCaseTypeStr = expectedStrTypes[i].toUpperCase(); PrimitiveType expectedType = typeNameMap.get(upperCaseTypeStr.trim()); if (actual.get(i) != expectedType) { - return "Slot: " + i + ". Found: " + actual.get(i).toString() + ". Expected: " - + upperCaseTypeStr + "\n"; + return "Mismatched output types.\nFound: " + actual.toString() + + ".\nExpected: " + Arrays.toString(expectedStrTypes) + "\n"; } } return ""; diff --git a/fe/src/test/resources/PlannerTest/joins.test b/fe/src/test/resources/PlannerTest/joins.test index 4acd70a65..16d26cae9 100644 --- a/fe/src/test/resources/PlannerTest/joins.test +++ b/fe/src/test/resources/PlannerTest/joins.test @@ -3,6 +3,7 @@ from testtbl t1 join testtbl t2 using(id) where t1.zip = 94611 ---- HASH JOIN +JOIN OP: INNER JOIN HASH PREDICATES: t1.id = t2.id SCAN HDFS table=default.testtbl @@ -14,11 +15,12 @@ HASH PREDICATES: ==== # general exprs on both sides of equi-join predicates select * -from testtbl t1 join testtbl t2 +from testtbl t1 left outer join testtbl t2 on (t1.id - 1 = t2.id + 1) where t1.zip = 94611 ---- HASH JOIN +JOIN OP: LEFT OUTER JOIN HASH PREDICATES: t1.id - 1 = t2.id + 1 SCAN HDFS table=default.testtbl @@ -32,7 +34,7 @@ HASH PREDICATES: # scan predicates get propagated correctly; # non-eq join predicates are evaluated as extra conjuncts by the join node select * -from alltypesagg a join alltypessmall b using (id, int_col) +from alltypesagg a right outer join alltypessmall b using (id, int_col) where a.day >= 6 and b.month > 2 and a.tinyint_col = 15 @@ -40,6 +42,7 @@ and b.string_col = '15' and a.tinyint_col + b.tinyint_col < 15 ---- HASH JOIN +JOIN OP: RIGHT OUTER JOIN HASH PREDICATES: a.id = b.id a.int_col = b.int_col @@ -62,8 +65,8 @@ OTHER PREDICATES: a.tinyint_col + b.tinyint_col < 15 # non-eq join predicates are evaluated at the correct join node select * from alltypesagg a -join alltypessmall b using (id, int_col) -join alltypesaggnonulls c on (a.id = c.id and b.string_col = c.string_col) +full outer join alltypessmall b using (id, int_col) +right join alltypesaggnonulls c on (a.id = c.id and b.string_col = c.string_col) where a.day >= 6 and b.month > 2 and c.day < 3 @@ -74,11 +77,13 @@ and a.float_col - c.double_col < 0 and (b.double_col * c.tinyint_col > 1000 or c.tinyint_col < 1000) ---- HASH JOIN +JOIN OP: RIGHT OUTER JOIN HASH PREDICATES: a.id = c.id b.string_col = c.string_col OTHER PREDICATES: a.float_col - c.double_col < 0.0, b.double_col * c.tinyint_col > 1000.0 OR c.tinyint_col < 1000 HASH JOIN + JOIN OP: FULL OUTER JOIN HASH PREDICATES: a.id = b.id a.int_col = b.int_col diff --git a/fe/src/test/resources/QueryTest/outer-joins.test b/fe/src/test/resources/QueryTest/outer-joins.test new file mode 100644 index 000000000..fcf7b8e80 --- /dev/null +++ b/fe/src/test/resources/QueryTest/outer-joins.test @@ -0,0 +1,382 @@ +# join cols aren't part of select list (and still get materialized) +select j.test_name, d.name +from JoinTbl j inner join DimTbl d on (j.test_id = d.id) +---- +string, string +---- +'Name1','Name1' +'Name2','Name2' +'Name3','Name3' +'Name4','Name4' +'Name5','Name5' +'Name16','Name6' +'Name6','Name6' +'Name16','Name6' +'Name16','Name6' +'Name6','Name6' +'Name16','Name6' +==== +# join on bigint +select j.*, d.* +from JoinTbl j inner join DimTbl d on (j.test_id = d.id) +---- +bigint, string, int, int, bigint, string, int +---- +1001,'Name1',94611,5000,1001,'Name1',94611 +1002,'Name2',94611,5000,1002,'Name2',94611 +1003,'Name3',94611,5000,1003,'Name3',94612 +1004,'Name4',94611,5000,1004,'Name4',94612 +1005,'Name5',94611,5000,1005,'Name5',94613 +1006,'Name16',94612,5000,1006,'Name6',94613 +1006,'Name6',94616,5000,1006,'Name6',94613 +1006,'Name16',94616,5000,1006,'Name6',94613 +1006,'Name16',94612,15000,1006,'Name6',94613 +1006,'Name6',94616,15000,1006,'Name6',94613 +1006,'Name16',94616,15000,1006,'Name6',94613 +==== +select j.*, d.* +from JoinTbl j left outer join DimTbl d on (j.test_id = d.id) +---- +bigint, string, int, int, bigint, string, int +---- +1001,'Name1',94611,5000,1001,'Name1',94611 +1002,'Name2',94611,5000,1002,'Name2',94611 +1003,'Name3',94611,5000,1003,'Name3',94612 +1004,'Name4',94611,5000,1004,'Name4',94612 +1005,'Name5',94611,5000,1005,'Name5',94613 +1106,'Name6',94612,5000,NULL,'NULL',NULL +1006,'Name16',94612,5000,1006,'Name6',94613 +1006,'Name6',94616,5000,1006,'Name6',94613 +1106,'Name16',94612,5000,NULL,'NULL',NULL +1106,'Name6',94616,5000,NULL,'NULL',NULL +1006,'Name16',94616,5000,1006,'Name6',94613 +1106,'Name16',94616,5000,NULL,'NULL',NULL +1106,'Name6',94612,15000,NULL,'NULL',NULL +1006,'Name16',94612,15000,1006,'Name6',94613 +1006,'Name6',94616,15000,1006,'Name6',94613 +1106,'Name16',94612,15000,NULL,'NULL',NULL +1106,'Name6',94616,15000,NULL,'NULL',NULL +1006,'Name16',94616,15000,1006,'Name6',94613 +1106,'Name16',94616,15000,NULL,'NULL',NULL +==== +select j.*, d.* +from JoinTbl j right outer join DimTbl d on (j.test_id = d.id) +---- +bigint, string, int, int, bigint, string, int +---- +1001,'Name1',94611,5000,1001,'Name1',94611 +1002,'Name2',94611,5000,1002,'Name2',94611 +1003,'Name3',94611,5000,1003,'Name3',94612 +1004,'Name4',94611,5000,1004,'Name4',94612 +1005,'Name5',94611,5000,1005,'Name5',94613 +1006,'Name16',94612,5000,1006,'Name6',94613 +1006,'Name6',94616,5000,1006,'Name6',94613 +1006,'Name16',94616,5000,1006,'Name6',94613 +1006,'Name16',94612,15000,1006,'Name6',94613 +1006,'Name6',94616,15000,1006,'Name6',94613 +1006,'Name16',94616,15000,1006,'Name6',94613 +NULL,'NULL',NULL,NULL,1007,'Name7',94614 +NULL,'NULL',NULL,NULL,1008,'Name8',94614 +NULL,'NULL',NULL,NULL,1009,'Name9',94615 +NULL,'NULL',NULL,NULL,1010,'Name10',94615 +==== +select j.*, d.* +from JoinTbl j full outer join DimTbl d on (j.test_id = d.id) +---- +bigint, string, int, int, bigint, string, int +---- +1001,'Name1',94611,5000,1001,'Name1',94611 +1002,'Name2',94611,5000,1002,'Name2',94611 +1003,'Name3',94611,5000,1003,'Name3',94612 +1004,'Name4',94611,5000,1004,'Name4',94612 +1005,'Name5',94611,5000,1005,'Name5',94613 +1106,'Name6',94612,5000,NULL,'NULL',NULL +1006,'Name16',94612,5000,1006,'Name6',94613 +1006,'Name6',94616,5000,1006,'Name6',94613 +1106,'Name16',94612,5000,NULL,'NULL',NULL +1106,'Name6',94616,5000,NULL,'NULL',NULL +1006,'Name16',94616,5000,1006,'Name6',94613 +1106,'Name16',94616,5000,NULL,'NULL',NULL +1106,'Name6',94612,15000,NULL,'NULL',NULL +1006,'Name16',94612,15000,1006,'Name6',94613 +1006,'Name6',94616,15000,1006,'Name6',94613 +1106,'Name16',94612,15000,NULL,'NULL',NULL +1106,'Name6',94616,15000,NULL,'NULL',NULL +1006,'Name16',94616,15000,1006,'Name6',94613 +1106,'Name16',94616,15000,NULL,'NULL',NULL +NULL,'NULL',NULL,NULL,1007,'Name7',94614 +NULL,'NULL',NULL,NULL,1008,'Name8',94614 +NULL,'NULL',NULL,NULL,1009,'Name9',94615 +NULL,'NULL',NULL,NULL,1010,'Name10',94615 +==== +# join on string +select j.*, d.* +from JoinTbl j inner join DimTbl d on (j.test_name = d.name) +---- +bigint, string, int, int, bigint, string, int +---- +1001,'Name1',94611,5000,1001,'Name1',94611 +1002,'Name2',94611,5000,1002,'Name2',94611 +1003,'Name3',94611,5000,1003,'Name3',94612 +1004,'Name4',94611,5000,1004,'Name4',94612 +1005,'Name5',94611,5000,1005,'Name5',94613 +1106,'Name6',94612,5000,1006,'Name6',94613 +1006,'Name6',94616,5000,1006,'Name6',94613 +1106,'Name6',94616,5000,1006,'Name6',94613 +1106,'Name6',94612,15000,1006,'Name6',94613 +1006,'Name6',94616,15000,1006,'Name6',94613 +1106,'Name6',94616,15000,1006,'Name6',94613 +==== +select j.*, d.* +from JoinTbl j left outer join DimTbl d on (j.test_name = d.name) +---- +bigint, string, int, int, bigint, string, int +---- +1001,'Name1',94611,5000,1001,'Name1',94611 +1002,'Name2',94611,5000,1002,'Name2',94611 +1003,'Name3',94611,5000,1003,'Name3',94612 +1004,'Name4',94611,5000,1004,'Name4',94612 +1005,'Name5',94611,5000,1005,'Name5',94613 +1106,'Name6',94612,5000,1006,'Name6',94613 +1006,'Name16',94612,5000,NULL,'NULL',NULL +1006,'Name6',94616,5000,1006,'Name6',94613 +1106,'Name16',94612,5000,NULL,'NULL',NULL +1106,'Name6',94616,5000,1006,'Name6',94613 +1006,'Name16',94616,5000,NULL,'NULL',NULL +1106,'Name16',94616,5000,NULL,'NULL',NULL +1106,'Name6',94612,15000,1006,'Name6',94613 +1006,'Name16',94612,15000,NULL,'NULL',NULL +1006,'Name6',94616,15000,1006,'Name6',94613 +1106,'Name16',94612,15000,NULL,'NULL',NULL +1106,'Name6',94616,15000,1006,'Name6',94613 +1006,'Name16',94616,15000,NULL,'NULL',NULL +1106,'Name16',94616,15000,NULL,'NULL',NULL +==== +select j.*, d.* +from JoinTbl j right outer join DimTbl d on (j.test_name = d.name) +---- +bigint, string, int, int, bigint, string, int +---- +1001,'Name1',94611,5000,1001,'Name1',94611 +1002,'Name2',94611,5000,1002,'Name2',94611 +1003,'Name3',94611,5000,1003,'Name3',94612 +1004,'Name4',94611,5000,1004,'Name4',94612 +1005,'Name5',94611,5000,1005,'Name5',94613 +1106,'Name6',94612,5000,1006,'Name6',94613 +1006,'Name6',94616,5000,1006,'Name6',94613 +1106,'Name6',94616,5000,1006,'Name6',94613 +1106,'Name6',94612,15000,1006,'Name6',94613 +1006,'Name6',94616,15000,1006,'Name6',94613 +1106,'Name6',94616,15000,1006,'Name6',94613 +NULL,'NULL',NULL,NULL,1009,'Name9',94615 +NULL,'NULL',NULL,NULL,1008,'Name8',94614 +NULL,'NULL',NULL,NULL,1007,'Name7',94614 +NULL,'NULL',NULL,NULL,1010,'Name10',94615 +==== +select j.*, d.* +from JoinTbl j full outer join DimTbl d on (j.test_name = d.name) +---- +bigint, string, int, int, bigint, string, int +---- +1001,'Name1',94611,5000,1001,'Name1',94611 +1002,'Name2',94611,5000,1002,'Name2',94611 +1003,'Name3',94611,5000,1003,'Name3',94612 +1004,'Name4',94611,5000,1004,'Name4',94612 +1005,'Name5',94611,5000,1005,'Name5',94613 +1106,'Name6',94612,5000,1006,'Name6',94613 +1006,'Name16',94612,5000,NULL,'NULL',NULL +1006,'Name6',94616,5000,1006,'Name6',94613 +1106,'Name16',94612,5000,NULL,'NULL',NULL +1106,'Name6',94616,5000,1006,'Name6',94613 +1006,'Name16',94616,5000,NULL,'NULL',NULL +1106,'Name16',94616,5000,NULL,'NULL',NULL +1106,'Name6',94612,15000,1006,'Name6',94613 +1006,'Name16',94612,15000,NULL,'NULL',NULL +1006,'Name6',94616,15000,1006,'Name6',94613 +1106,'Name16',94612,15000,NULL,'NULL',NULL +1106,'Name6',94616,15000,1006,'Name6',94613 +1006,'Name16',94616,15000,NULL,'NULL',NULL +1106,'Name16',94616,15000,NULL,'NULL',NULL +NULL,'NULL',NULL,NULL,1009,'Name9',94615 +NULL,'NULL',NULL,NULL,1008,'Name8',94614 +NULL,'NULL',NULL,NULL,1007,'Name7',94614 +NULL,'NULL',NULL,NULL,1010,'Name10',94615 +==== +# join on int +select j.*, d.* +from JoinTbl j inner join DimTbl d on (j.test_zip = d.zip) +---- +bigint, string, int, int, bigint, string, int +---- +1001,'Name1',94611,5000,1001,'Name1',94611 +1001,'Name1',94611,5000,1002,'Name2',94611 +1002,'Name2',94611,5000,1001,'Name1',94611 +1002,'Name2',94611,5000,1002,'Name2',94611 +1003,'Name3',94611,5000,1001,'Name1',94611 +1003,'Name3',94611,5000,1002,'Name2',94611 +1004,'Name4',94611,5000,1001,'Name1',94611 +1004,'Name4',94611,5000,1002,'Name2',94611 +1005,'Name5',94611,5000,1001,'Name1',94611 +1005,'Name5',94611,5000,1002,'Name2',94611 +1106,'Name6',94612,5000,1003,'Name3',94612 +1106,'Name6',94612,5000,1004,'Name4',94612 +1006,'Name16',94612,5000,1003,'Name3',94612 +1006,'Name16',94612,5000,1004,'Name4',94612 +1106,'Name16',94612,5000,1003,'Name3',94612 +1106,'Name16',94612,5000,1004,'Name4',94612 +1106,'Name6',94612,15000,1003,'Name3',94612 +1106,'Name6',94612,15000,1004,'Name4',94612 +1006,'Name16',94612,15000,1003,'Name3',94612 +1006,'Name16',94612,15000,1004,'Name4',94612 +1106,'Name16',94612,15000,1003,'Name3',94612 +1106,'Name16',94612,15000,1004,'Name4',94612 +==== +select j.*, d.* +from JoinTbl j left outer join DimTbl d on (j.test_zip = d.zip) +---- +bigint, string, int, int, bigint, string, int +---- +1001,'Name1',94611,5000,1001,'Name1',94611 +1001,'Name1',94611,5000,1002,'Name2',94611 +1002,'Name2',94611,5000,1001,'Name1',94611 +1002,'Name2',94611,5000,1002,'Name2',94611 +1003,'Name3',94611,5000,1001,'Name1',94611 +1003,'Name3',94611,5000,1002,'Name2',94611 +1004,'Name4',94611,5000,1001,'Name1',94611 +1004,'Name4',94611,5000,1002,'Name2',94611 +1005,'Name5',94611,5000,1001,'Name1',94611 +1005,'Name5',94611,5000,1002,'Name2',94611 +1106,'Name6',94612,5000,1003,'Name3',94612 +1106,'Name6',94612,5000,1004,'Name4',94612 +1006,'Name16',94612,5000,1003,'Name3',94612 +1006,'Name16',94612,5000,1004,'Name4',94612 +1006,'Name6',94616,5000,NULL,'NULL',NULL +1106,'Name16',94612,5000,1003,'Name3',94612 +1106,'Name16',94612,5000,1004,'Name4',94612 +1106,'Name6',94616,5000,NULL,'NULL',NULL +1006,'Name16',94616,5000,NULL,'NULL',NULL +1106,'Name16',94616,5000,NULL,'NULL',NULL +1106,'Name6',94612,15000,1003,'Name3',94612 +1106,'Name6',94612,15000,1004,'Name4',94612 +1006,'Name16',94612,15000,1003,'Name3',94612 +1006,'Name16',94612,15000,1004,'Name4',94612 +1006,'Name6',94616,15000,NULL,'NULL',NULL +1106,'Name16',94612,15000,1003,'Name3',94612 +1106,'Name16',94612,15000,1004,'Name4',94612 +1106,'Name6',94616,15000,NULL,'NULL',NULL +1006,'Name16',94616,15000,NULL,'NULL',NULL +1106,'Name16',94616,15000,NULL,'NULL',NULL +==== +select j.*, d.* +from JoinTbl j right outer join DimTbl d on (j.test_zip = d.zip) +---- +bigint, string, int, int, bigint, string, int +---- +1001,'Name1',94611,5000,1001,'Name1',94611 +1001,'Name1',94611,5000,1002,'Name2',94611 +1002,'Name2',94611,5000,1001,'Name1',94611 +1002,'Name2',94611,5000,1002,'Name2',94611 +1003,'Name3',94611,5000,1001,'Name1',94611 +1003,'Name3',94611,5000,1002,'Name2',94611 +1004,'Name4',94611,5000,1001,'Name1',94611 +1004,'Name4',94611,5000,1002,'Name2',94611 +1005,'Name5',94611,5000,1001,'Name1',94611 +1005,'Name5',94611,5000,1002,'Name2',94611 +1106,'Name6',94612,5000,1003,'Name3',94612 +1106,'Name6',94612,5000,1004,'Name4',94612 +1006,'Name16',94612,5000,1003,'Name3',94612 +1006,'Name16',94612,5000,1004,'Name4',94612 +1106,'Name16',94612,5000,1003,'Name3',94612 +1106,'Name16',94612,5000,1004,'Name4',94612 +1106,'Name6',94612,15000,1003,'Name3',94612 +1106,'Name6',94612,15000,1004,'Name4',94612 +1006,'Name16',94612,15000,1003,'Name3',94612 +1006,'Name16',94612,15000,1004,'Name4',94612 +1106,'Name16',94612,15000,1003,'Name3',94612 +1106,'Name16',94612,15000,1004,'Name4',94612 +NULL,'NULL',NULL,NULL,1005,'Name5',94613 +NULL,'NULL',NULL,NULL,1006,'Name6',94613 +NULL,'NULL',NULL,NULL,1007,'Name7',94614 +NULL,'NULL',NULL,NULL,1008,'Name8',94614 +NULL,'NULL',NULL,NULL,1009,'Name9',94615 +NULL,'NULL',NULL,NULL,1010,'Name10',94615 +==== +select j.*, d.* +from JoinTbl j full outer join DimTbl d on (j.test_zip = d.zip) +---- +bigint, string, int, int, bigint, string, int +---- +1001,'Name1',94611,5000,1001,'Name1',94611 +1001,'Name1',94611,5000,1002,'Name2',94611 +1002,'Name2',94611,5000,1001,'Name1',94611 +1002,'Name2',94611,5000,1002,'Name2',94611 +1003,'Name3',94611,5000,1001,'Name1',94611 +1003,'Name3',94611,5000,1002,'Name2',94611 +1004,'Name4',94611,5000,1001,'Name1',94611 +1004,'Name4',94611,5000,1002,'Name2',94611 +1005,'Name5',94611,5000,1001,'Name1',94611 +1005,'Name5',94611,5000,1002,'Name2',94611 +1106,'Name6',94612,5000,1003,'Name3',94612 +1106,'Name6',94612,5000,1004,'Name4',94612 +1006,'Name16',94612,5000,1003,'Name3',94612 +1006,'Name16',94612,5000,1004,'Name4',94612 +1006,'Name6',94616,5000,NULL,'NULL',NULL +1106,'Name16',94612,5000,1003,'Name3',94612 +1106,'Name16',94612,5000,1004,'Name4',94612 +1106,'Name6',94616,5000,NULL,'NULL',NULL +1006,'Name16',94616,5000,NULL,'NULL',NULL +1106,'Name16',94616,5000,NULL,'NULL',NULL +1106,'Name6',94612,15000,1003,'Name3',94612 +1106,'Name6',94612,15000,1004,'Name4',94612 +1006,'Name16',94612,15000,1003,'Name3',94612 +1006,'Name16',94612,15000,1004,'Name4',94612 +1006,'Name6',94616,15000,NULL,'NULL',NULL +1106,'Name16',94612,15000,1003,'Name3',94612 +1106,'Name16',94612,15000,1004,'Name4',94612 +1106,'Name6',94616,15000,NULL,'NULL',NULL +1006,'Name16',94616,15000,NULL,'NULL',NULL +1106,'Name16',94616,15000,NULL,'NULL',NULL +NULL,'NULL',NULL,NULL,1005,'Name5',94613 +NULL,'NULL',NULL,NULL,1006,'Name6',94613 +NULL,'NULL',NULL,NULL,1007,'Name7',94614 +NULL,'NULL',NULL,NULL,1008,'Name8',94614 +NULL,'NULL',NULL,NULL,1009,'Name9',94615 +NULL,'NULL',NULL,NULL,1010,'Name10',94615 +==== +# semi-join on bigint +select d.* +from DimTbl d left semi join JoinTbl j on (d.id = j.test_id) +---- +bigint, string, int +---- +1001,'Name1',94611 +1002,'Name2',94611 +1003,'Name3',94612 +1004,'Name4',94612 +1005,'Name5',94613 +1006,'Name6',94613 +==== +# semi-join on string +select d.* +from DimTbl d left semi join JoinTbl j on (j.test_name = d.name) +---- +bigint, string, int +---- +1001,'Name1',94611 +1002,'Name2',94611 +1003,'Name3',94612 +1004,'Name4',94612 +1005,'Name5',94613 +1006,'Name6',94613 +==== +# semi-join on int +select d.* +from DimTbl d left semi join JoinTbl j on (j.test_zip = d.zip) +---- +bigint, string, int +---- +1001,'Name1',94611 +1002,'Name2',94611 +1003,'Name3',94612 +1004,'Name4',94612 +==== diff --git a/testdata/DimTbl/data.csv b/testdata/DimTbl/data.csv new file mode 100644 index 000000000..31bb3db7f --- /dev/null +++ b/testdata/DimTbl/data.csv @@ -0,0 +1,10 @@ +1001,Name1,94611 +1002,Name2,94611 +1003,Name3,94612 +1004,Name4,94612 +1005,Name5,94613 +1006,Name6,94613 +1007,Name7,94614 +1008,Name8,94614 +1009,Name9,94615 +1010,Name10,94615 diff --git a/testdata/JoinTbl/data.csv b/testdata/JoinTbl/data.csv new file mode 100644 index 000000000..1d58506df --- /dev/null +++ b/testdata/JoinTbl/data.csv @@ -0,0 +1,19 @@ +1001,Name1,94611,5000 +1002,Name2,94611,5000 +1003,Name3,94611,5000 +1004,Name4,94611,5000 +1005,Name5,94611,5000 +1106,Name6,94612,5000 +1006,Name16,94612,5000 +1006,Name6,94616,5000 +1106,Name16,94612,5000 +1106,Name6,94616,5000 +1006,Name16,94616,5000 +1106,Name16,94616,5000 +1106,Name6,94612,15000 +1006,Name16,94612,15000 +1006,Name6,94616,15000 +1106,Name16,94612,15000 +1106,Name6,94616,15000 +1006,Name16,94616,15000 +1106,Name16,94616,15000 diff --git a/testdata/bin/create.sql b/testdata/bin/create.sql index f87650b6b..0f6eefa1f 100644 --- a/testdata/bin/create.sql +++ b/testdata/bin/create.sql @@ -107,6 +107,21 @@ CREATE TABLE TestTbl_rc ( zip int) STORED AS RCFILE; +DROP TABLE IF EXISTS DimTbl; +CREATE TABLE DimTbl ( + id bigint, + name string, + zip int) +row format delimited fields terminated by ',' escaped by '\\' stored as textfile; + +DROP TABLE IF EXISTS JoinTbl; +CREATE TABLE JoinTbl ( + test_id bigint, + test_name string, + test_zip int, + alltypes_id int) +row format delimited fields terminated by ',' escaped by '\\' stored as textfile; + CREATE DATABASE IF NOT EXISTS testdb1; DROP TABLE IF EXISTS testdb1.AllTypes; diff --git a/testdata/bin/load.sql b/testdata/bin/load.sql index d320319f9..c0e3ce538 100644 --- a/testdata/bin/load.sql +++ b/testdata/bin/load.sql @@ -86,6 +86,8 @@ SELECT id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, FROM alltypesaggnonulls; LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/LikeTbl/data.csv' OVERWRITE INTO TABLE LikeTbl; +LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/DimTbl/data.csv' OVERWRITE INTO TABLE DimTbl; +LOAD DATA LOCAL INPATH '${env:IMPALA_HOME}/testdata/JoinTbl/data.csv' OVERWRITE INTO TABLE JoinTbl; INSERT OVERWRITE TABLE hbasealltypessmall SELECT id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col