diff --git a/be/src/exec/merge-node.cc b/be/src/exec/merge-node.cc index 37098d14b..4360cda82 100644 --- a/be/src/exec/merge-node.cc +++ b/be/src/exec/merge-node.cc @@ -26,13 +26,12 @@ namespace impala { MergeNode::MergeNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs) : ExecNode(pool, tnode, descs), + tuple_id_(tnode.merge_node.tuple_id), const_result_expr_idx_(0), child_idx_(INVALID_CHILD_IDX), child_row_batch_(NULL), child_eos_(false), child_row_idx_(0) { - DCHECK_EQ(1, tnode.row_tuples.size()); - tuple_id_ = tnode.row_tuples[0]; // TODO: log errors in runtime state Status status = Init(pool, tnode); DCHECK(status.ok()) diff --git a/common/thrift/PlanNodes.thrift b/common/thrift/PlanNodes.thrift index be45ee21c..20b04a69c 100644 --- a/common/thrift/PlanNodes.thrift +++ b/common/thrift/PlanNodes.thrift @@ -164,11 +164,13 @@ struct TSortNode { } struct TMergeNode { + // A MergeNode could be the left input of a join and needs to know which tuple to write. + 1: required Types.TTupleId tuple_id // List or expr lists materialized by this node. // There is one list of exprs per query stmt feeding into this merge node. - 1: required list> result_expr_lists + 2: required list> result_expr_lists // Separate list of expr lists coming from a constant select stmts. - 2: required list> const_expr_lists + 3: required list> const_expr_lists } struct TExchangeNode { diff --git a/fe/src/main/java/com/cloudera/impala/analysis/DescriptorTable.java b/fe/src/main/java/com/cloudera/impala/analysis/DescriptorTable.java index 23818dff2..d8d03d1ac 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/DescriptorTable.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/DescriptorTable.java @@ -85,11 +85,13 @@ public class DescriptorTable { TDescriptorTable result = new TDescriptorTable(); HashSet referencedTbls = Sets.newHashSet(); for (TupleDescriptor tupleD: tupleDescs.values()) { - // inline view has a non-materialized tuple descriptor in the descriptor table - // just for type checking, which we need to skip + // inline view of a non-constant select has a non-materialized tuple descriptor + // in the descriptor table just for type checking, which we need to skip if (tupleD.getIsMaterialized()) { result.addToTupleDescriptors(tupleD.toThrift()); - if (tupleD.getTable() != null) { + // an inline view of a constant select has a materialized tuple + // but its table has no id + if (tupleD.getTable() != null && tupleD.getTable().getId() != null) { referencedTbls.add(tupleD.getTable()); } for (SlotDescriptor slotD: tupleD.getSlots()) { diff --git a/fe/src/main/java/com/cloudera/impala/analysis/InlineViewRef.java b/fe/src/main/java/com/cloudera/impala/analysis/InlineViewRef.java index d5a89d436..4ae4d7f88 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/InlineViewRef.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/InlineViewRef.java @@ -76,6 +76,14 @@ public class InlineViewRef extends TableRef { desc = analyzer.registerInlineViewRef(this); isAnalyzed = true; // true now that we have assigned desc + // For constant selects we materialize its exprs into a tuple. + if (materializedTupleIds.isEmpty()) { + Preconditions.checkState(queryStmt instanceof SelectStmt); + Preconditions.checkState(((SelectStmt) queryStmt).getTableRefs().isEmpty()); + desc.setIsMaterialized(true); + materializedTupleIds.add(desc.getId()); + } + // Now do the remaining join analysis analyzeJoin(analyzer); diff --git a/fe/src/main/java/com/cloudera/impala/analysis/TupleDescriptor.java b/fe/src/main/java/com/cloudera/impala/analysis/TupleDescriptor.java index c40f1fad7..dd2104935 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/TupleDescriptor.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/TupleDescriptor.java @@ -102,7 +102,7 @@ public class TupleDescriptor { public TTupleDescriptor toThrift() { TTupleDescriptor ttupleDesc = new TTupleDescriptor(id.asInt(), byteSize, numNullBytes); - if (table != null) { + if (table != null && table.getId() != null) { ttupleDesc.setTableId(table.getId().asInt()); } return ttupleDesc; diff --git a/fe/src/main/java/com/cloudera/impala/planner/MergeNode.java b/fe/src/main/java/com/cloudera/impala/planner/MergeNode.java index 794002695..d3cb3fbed 100644 --- a/fe/src/main/java/com/cloudera/impala/planner/MergeNode.java +++ b/fe/src/main/java/com/cloudera/impala/planner/MergeNode.java @@ -54,13 +54,17 @@ public class MergeNode extends PlanNode { // Output tuple materialized by this node. protected final List tupleDescs = Lists.newArrayList(); + protected final TupleId tupleId; + protected MergeNode(PlanNodeId id, TupleId tupleId) { super(id, Lists.newArrayList(tupleId)); this.rowTupleIds.add(tupleId); + this.tupleId = tupleId; } protected MergeNode(PlanNodeId id, MergeNode node) { super(id, node); + this.tupleId = node.tupleId; } public void addConstExprList(List exprs) { @@ -111,7 +115,7 @@ public class MergeNode extends PlanNode { for (List constTexprList : constExprLists) { constTexprLists.add(Expr.treesToThrift(constTexprList)); } - msg.merge_node = new TMergeNode(texprLists, constTexprLists); + msg.merge_node = new TMergeNode(tupleId.asInt(), texprLists, constTexprLists); msg.node_type = TPlanNodeType.MERGE_NODE; } diff --git a/fe/src/main/java/com/cloudera/impala/planner/PlanFragment.java b/fe/src/main/java/com/cloudera/impala/planner/PlanFragment.java index a321dc501..9b2029031 100644 --- a/fe/src/main/java/com/cloudera/impala/planner/PlanFragment.java +++ b/fe/src/main/java/com/cloudera/impala/planner/PlanFragment.java @@ -56,7 +56,7 @@ import com.google.common.collect.Sets; */ public class PlanFragment { private final static Logger LOG = LoggerFactory.getLogger(PlanFragment.class); - + // root of plan tree executed by this fragment private PlanNode planRoot; diff --git a/fe/src/main/java/com/cloudera/impala/planner/Planner.java b/fe/src/main/java/com/cloudera/impala/planner/Planner.java index 051ca2337..6a8e77616 100644 --- a/fe/src/main/java/com/cloudera/impala/planner/Planner.java +++ b/fe/src/main/java/com/cloudera/impala/planner/Planner.java @@ -946,8 +946,8 @@ public class Planner { // evaluated inside the subquery tree; // if it does contain a limit clause, it's not correct to have the view plan // evaluate predicates from the enclosing scope. + List conjuncts = Lists.newArrayList(); if (!inlineViewRef.getViewStmt().hasLimitClause()) { - List conjuncts = Lists.newArrayList(); for (Predicate p: analyzer.getUnassignedConjuncts(inlineViewRef.getMaterializedTupleIds())) { if (canEvalPredicate(inlineViewRef.getMaterializedTupleIds(), p, analyzer)) { @@ -958,6 +958,21 @@ public class Planner { analyzer.markConjunctsAssigned(conjuncts); } + // Turn a constant select into a MergeNode that materializes the exprs. + QueryStmt viewStmt = inlineViewRef.getViewStmt(); + if (viewStmt instanceof SelectStmt) { + SelectStmt selectStmt = (SelectStmt) viewStmt; + if (selectStmt.getTableRefs().isEmpty()) { + // Analysis should have generated a tuple id into which to materialize the exprs. + Preconditions.checkState(inlineViewRef.getMaterializedTupleIds().size() == 1); + MergeNode mergeNode = new MergeNode(new PlanNodeId(nodeIdGenerator), + inlineViewRef.getMaterializedTupleIds().get(0)); + mergeNode.getConstExprLists().add(selectStmt.getResultExprs()); + mergeNode.getConjuncts().addAll(conjuncts); + return mergeNode; + } + } + return createQueryPlan(inlineViewRef.getViewStmt(), inlineViewRef.getAnalyzer(), -1); } @@ -1056,10 +1071,10 @@ public class Planner { } Expr lhsExpr = null; - if (p.getChild(0).isBound(lhsIds)) { - lhsExpr = p.getChild(0); - } else if (p.getChild(1).isBound(lhsIds)) { + if (p.getChild(1).isBound(lhsIds)) { lhsExpr = p.getChild(1); + } else if (p.getChild(0).isBound(lhsIds)) { + lhsExpr = p.getChild(0); } else { // not an equi-join condition between lhsIds and rhsId continue; diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/subquery.test b/testdata/workloads/functional-planner/queries/PlannerTest/subquery.test index 713321ead..3a6cc17cc 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/subquery.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/subquery.test @@ -1594,3 +1594,226 @@ NODE 2: HDFS SPLIT hdfs://localhost:20500/test-warehouse/alltypessmall/year=2009/month=4/090401.txt 0:1621 LOCATIONS: ==== +# Constant selects in subqueries +select * from (select 1, 2) x +---- PLAN +Plan Fragment 0 + UNPARTITIONED + MERGE (0) + TUPLE IDS: 0 + SELECT CONSTANT +---- DISTRIBUTEDPLAN +Plan Fragment 0 + UNPARTITIONED + MERGE (0) + TUPLE IDS: 0 + SELECT CONSTANT +==== +# Constant selects in subqueries with predicate +select * from (select y from (select 1 y) a where y < 10) b +---- PLAN +Plan Fragment 0 + UNPARTITIONED + MERGE (0) + TUPLE IDS: 0 + PREDICATES: 1 < 10 + SELECT CONSTANT +---- DISTRIBUTEDPLAN +Plan Fragment 0 + UNPARTITIONED + MERGE (0) + TUPLE IDS: 0 + PREDICATES: 1 < 10 + SELECT CONSTANT +==== +# Union of constant selects in subquery +# TODO: We could combine the merge nodes below. +select * from (select 1 union all select 2 union all select * from (select 3) y) x +---- PLAN +Plan Fragment 0 + UNPARTITIONED + MERGE (0) + TUPLE IDS: 1 + SELECT CONSTANT + SELECT CONSTANT + MERGE (1) + TUPLE IDS: 0 + SELECT CONSTANT +---- DISTRIBUTEDPLAN +Plan Fragment 0 + UNPARTITIONED + EXCHANGE (2) + TUPLE IDS: 1 + +Plan Fragment 1 + UNPARTITIONED + STREAM DATA SINK + EXCHANGE ID: 2 + UNPARTITIONED + + MERGE (4) + TUPLE IDS: 1 + SELECT CONSTANT + SELECT CONSTANT + +Plan Fragment 2 + UNPARTITIONED + STREAM DATA SINK + EXCHANGE ID: 2 + UNPARTITIONED + + MERGE (3) + TUPLE IDS: 1 + MERGE (1) + TUPLE IDS: 0 + SELECT CONSTANT +==== +# Inner join on inline views made up of constant selects +select * from (select 1 a, 2 b) x +inner join (select 1 a, 3 b) y on x.a = y.a +inner join (select 1 a, 3 b) z on z.b = y.b +---- PLAN +Plan Fragment 0 + UNPARTITIONED + HASH JOIN + JOIN OP: INNER JOIN + HASH PREDICATES: + 3 = 3 + TUPLE IDS: 0 1 2 + HASH JOIN + JOIN OP: INNER JOIN + HASH PREDICATES: + 1 = 1 + 3 = 3 + TUPLE IDS: 0 1 + MERGE (0) + TUPLE IDS: 0 + PREDICATES: 1 = 1, 3 = 3 + SELECT CONSTANT + MERGE (1) + TUPLE IDS: 1 + SELECT CONSTANT + MERGE (3) + TUPLE IDS: 2 + SELECT CONSTANT +---- DISTRIBUTEDPLAN +Plan Fragment 0 + UNPARTITIONED + HASH JOIN + JOIN OP: INNER JOIN + HASH PREDICATES: + 3 = 3 + TUPLE IDS: 0 1 2 + HASH JOIN + JOIN OP: INNER JOIN + HASH PREDICATES: + 1 = 1 + 3 = 3 + TUPLE IDS: 0 1 + MERGE (0) + TUPLE IDS: 0 + PREDICATES: 1 = 1, 3 = 3 + SELECT CONSTANT + EXCHANGE (5) + TUPLE IDS: 1 + EXCHANGE (6) + TUPLE IDS: 2 + +Plan Fragment 1 + UNPARTITIONED + STREAM DATA SINK + EXCHANGE ID: 6 + UNPARTITIONED + + MERGE (3) + TUPLE IDS: 2 + SELECT CONSTANT + +Plan Fragment 2 + UNPARTITIONED + STREAM DATA SINK + EXCHANGE ID: 5 + UNPARTITIONED + + MERGE (1) + TUPLE IDS: 1 + SELECT CONSTANT +==== +# Semi and inner join on a table and on inline views made up of constant selects +select * from functional.alltypessmall x +left semi join (select 1 a, 3 b) y on y.a = x.id +inner join (select 1 a, 3 b) z on z.b = y.b +---- PLAN +Plan Fragment 0 + UNPARTITIONED + HASH JOIN + JOIN OP: INNER JOIN + HASH PREDICATES: + 3 = 3 + TUPLE IDS: 0 1 2 + HASH JOIN + JOIN OP: LEFT SEMI JOIN + HASH PREDICATES: + x.id = 1 + 3 = 3 + TUPLE IDS: 0 1 + SCAN HDFS table=functional.alltypessmall #partitions=4 size=6.32KB (0) + PREDICATES: 1 = x.id + TUPLE IDS: 0 + MERGE (1) + TUPLE IDS: 1 + SELECT CONSTANT + MERGE (3) + TUPLE IDS: 2 + SELECT CONSTANT +---- DISTRIBUTEDPLAN +Plan Fragment 0 + UNPARTITIONED + EXCHANGE (7) + TUPLE IDS: 0 1 2 + +Plan Fragment 1 + RANDOM + STREAM DATA SINK + EXCHANGE ID: 7 + UNPARTITIONED + + HASH JOIN + JOIN OP: INNER JOIN + HASH PREDICATES: + 3 = 3 + TUPLE IDS: 0 1 2 + HASH JOIN + JOIN OP: LEFT SEMI JOIN + HASH PREDICATES: + x.id = 1 + 3 = 3 + TUPLE IDS: 0 1 + SCAN HDFS table=functional.alltypessmall #partitions=4 size=6.32KB (0) + PREDICATES: 1 = x.id + TUPLE IDS: 0 + EXCHANGE (5) + TUPLE IDS: 1 + EXCHANGE (6) + TUPLE IDS: 2 + +Plan Fragment 2 + UNPARTITIONED + STREAM DATA SINK + EXCHANGE ID: 6 + UNPARTITIONED + + MERGE (3) + TUPLE IDS: 2 + SELECT CONSTANT + +Plan Fragment 3 + UNPARTITIONED + STREAM DATA SINK + EXCHANGE ID: 5 + UNPARTITIONED + + MERGE (1) + TUPLE IDS: 1 + SELECT CONSTANT +==== \ No newline at end of file diff --git a/testdata/workloads/functional-query/queries/QueryTest/subquery.test b/testdata/workloads/functional-query/queries/QueryTest/subquery.test index d80c1567e..cf6bd75b2 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/subquery.test +++ b/testdata/workloads/functional-query/queries/QueryTest/subquery.test @@ -472,3 +472,48 @@ bigint, string, int, int, bigint, string, int 1006,'Name6',94616,15000,1006,'Name6',94613 1006,'Name6',94616,5000,1006,'Name6',94613 ==== +---- QUERY +# Constant selects in subqueries +select * from (select 1, 2) x +---- TYPES +tinyint, tinyint +---- RESULTS +1,2 +==== +---- QUERY +# Constant selects in subqueries +select * from (select y from (select 1 y) a where y < 10) b +---- TYPES +tinyint +---- RESULTS +==== +---- QUERY +# Constant selects in subqueries +select * from (select 1 union all select 2 union all select * from (select 3) y) x +---- TYPES +tinyint +---- RESULTS +1 +2 +3 +==== +---- QUERY +# Join on inline views made up of constant selects +select * from (select 1 a, 2 b) x +inner join (select 1 a, 3 b) y on x.a = y.a +inner join (select 1 a, 3 b) z on z.b = y.b +---- TYPES +tinyint, tinyint, tinyint, tinyint, tinyint, tinyint +---- RESULTS +1,2,1,3,1,3 +==== +---- QUERY +# Semi and inner join on a table and on inline views made up of constant selects +select x.date_string_col, y.*, z.* from functional.alltypessmall x +left semi join (select 1 a, 3 b) y on y.a = x.id +inner join (select 1 a, 3 b) z on z.b = y.b +---- TYPES +string, tinyint, tinyint, tinyint, tinyint +---- RESULTS +'01/01/09',1,3,1,3 +==== \ No newline at end of file