diff --git a/be/src/service/query-options.cc b/be/src/service/query-options.cc index 8546522fc..150f0541d 100644 --- a/be/src/service/query-options.cc +++ b/be/src/service/query-options.cc @@ -910,6 +910,10 @@ Status impala::SetQueryOption(const string& key, const string& value, query_options->__set_async_codegen(IsTrue(value)); break; } + case TImpalaQueryOptions::ENABLE_DISTINCT_SEMI_JOIN_OPTIMIZATION: { + query_options->__set_enable_distinct_semi_join_optimization(IsTrue(value)); + break; + } default: if (IsRemovedQueryOption(key)) { LOG(WARNING) << "Ignoring attempt to set removed query option '" << key << "'"; diff --git a/be/src/service/query-options.h b/be/src/service/query-options.h index 2aa65b556..4de2174c5 100644 --- a/be/src/service/query-options.h +++ b/be/src/service/query-options.h @@ -47,7 +47,7 @@ typedef std::unordered_map // time we add or remove a query option to/from the enum TImpalaQueryOptions. #define QUERY_OPTS_TABLE\ DCHECK_EQ(_TImpalaQueryOptions_VALUES_TO_NAMES.size(),\ - TImpalaQueryOptions::ASYNC_CODEGEN + 1);\ + TImpalaQueryOptions::ENABLE_DISTINCT_SEMI_JOIN_OPTIMIZATION + 1);\ REMOVED_QUERY_OPT_FN(abort_on_default_limit_exceeded, ABORT_ON_DEFAULT_LIMIT_EXCEEDED)\ QUERY_OPT_FN(abort_on_error, ABORT_ON_ERROR, TQueryOptionLevel::REGULAR)\ REMOVED_QUERY_OPT_FN(allow_unsupported_formats, ALLOW_UNSUPPORTED_FORMATS)\ @@ -203,6 +203,8 @@ typedef std::unordered_map QUERY_OPT_FN(enabled_runtime_filter_types, ENABLED_RUNTIME_FILTER_TYPES,\ TQueryOptionLevel::ADVANCED)\ QUERY_OPT_FN(async_codegen, ASYNC_CODEGEN, TQueryOptionLevel::DEVELOPMENT)\ + QUERY_OPT_FN(enable_distinct_semi_join_optimization,\ + ENABLE_DISTINCT_SEMI_JOIN_OPTIMIZATION, TQueryOptionLevel::ADVANCED)\ ; /// Enforce practical limits on some query options to avoid undesired query state. diff --git a/common/thrift/ImpalaInternalService.thrift b/common/thrift/ImpalaInternalService.thrift index eea71367d..107203f3b 100644 --- a/common/thrift/ImpalaInternalService.thrift +++ b/common/thrift/ImpalaInternalService.thrift @@ -429,6 +429,9 @@ struct TQueryOptions { // See comment in ImpalaService.thrift 105: optional bool async_codegen = false; + + // See comment in ImpalaService.thrift + 106: optional bool enable_distinct_semi_join_optimization = true; } // Impala currently has two types of sessions: Beeswax and HiveServer2 diff --git a/common/thrift/ImpalaService.thrift b/common/thrift/ImpalaService.thrift index 5b6333916..3930da7b3 100644 --- a/common/thrift/ImpalaService.thrift +++ b/common/thrift/ImpalaService.thrift @@ -538,6 +538,11 @@ enum TImpalaQueryOptions { // Enable asynchronous codegen. ASYNC_CODEGEN = 104 + + // If true, the planner will consider adding a distinct aggregation to SEMI JOIN + // operations. If false, disables the optimization (i.e. falls back to pre-Impala-4.0 + // behaviour). + ENABLE_DISTINCT_SEMI_JOIN_OPTIMIZATION = 105 } // The summary of a DML statement. diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java index 5f9caea75..958e47b95 100644 --- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java +++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java @@ -58,13 +58,13 @@ import org.apache.impala.catalog.IcebergTable; import org.apache.impala.catalog.KuduTable; import org.apache.impala.catalog.TableLoadingException; import org.apache.impala.catalog.Type; -import org.apache.impala.compat.MetastoreShim; import org.apache.impala.common.AnalysisException; import org.apache.impala.common.IdGenerator; import org.apache.impala.common.ImpalaException; import org.apache.impala.common.InternalException; import org.apache.impala.common.Pair; import org.apache.impala.common.RuntimeEnv; +import org.apache.impala.compat.MetastoreShim; import org.apache.impala.planner.JoinNode; import org.apache.impala.planner.PlanNode; import org.apache.impala.rewrite.BetweenToCompoundRule; @@ -73,6 +73,7 @@ import org.apache.impala.rewrite.EqualityDisjunctsToInRule; import org.apache.impala.rewrite.ExprRewriteRule; import org.apache.impala.rewrite.ExprRewriter; import org.apache.impala.rewrite.ExtractCommonConjunctRule; +import org.apache.impala.rewrite.ExtractCompoundVerticalBarExprRule; import org.apache.impala.rewrite.FoldConstantsRule; import org.apache.impala.rewrite.NormalizeBinaryPredicatesRule; import org.apache.impala.rewrite.NormalizeCountStarRule; @@ -80,7 +81,6 @@ import org.apache.impala.rewrite.NormalizeExprsRule; import org.apache.impala.rewrite.SimplifyCastStringToTimestamp; import org.apache.impala.rewrite.SimplifyConditionalsRule; import org.apache.impala.rewrite.SimplifyDistinctFromRule; -import org.apache.impala.rewrite.ExtractCompoundVerticalBarExprRule; import org.apache.impala.service.FeSupport; import org.apache.impala.thrift.TAccessEvent; import org.apache.impala.thrift.TCatalogObjectType; @@ -875,6 +875,17 @@ public class Analyzer { return globalState_.descTbl.getSlotDesc(id); } + /** + * Helper to get all slot descriptors in list. + */ + public List getSlotDescs(List ids) { + List result = new ArrayList<>(ids.size()); + for (SlotId id : ids) { + result.add(getSlotDesc(id)); + } + return result; + } + public int getNumTableRefs() { return tableRefMap_.size(); } public TableRef getTableRef(TupleId tid) { return tableRefMap_.get(tid); } public ExprRewriter getConstantFolder() { return globalState_.constantFolder_; } diff --git a/fe/src/main/java/org/apache/impala/analysis/Expr.java b/fe/src/main/java/org/apache/impala/analysis/Expr.java index b110515e0..4f00c4afd 100644 --- a/fe/src/main/java/org/apache/impala/analysis/Expr.java +++ b/fe/src/main/java/org/apache/impala/analysis/Expr.java @@ -1316,6 +1316,11 @@ abstract public class Expr extends TreeNode implements ParseNode, Cloneabl return null; } + /** + * Find all unique slot and/or tuple ids referenced by this expr tree. + * @param tupleIds unique tuple IDs from this expr tree are appended here. + * @param slotIds unique slot IDs from this expr tree are appended here. + */ public void getIds(List tupleIds, List slotIds) { Set tupleIdSet = new HashSet<>(); Set slotIdSet = new HashSet<>(); diff --git a/fe/src/main/java/org/apache/impala/planner/AggregationNode.java b/fe/src/main/java/org/apache/impala/planner/AggregationNode.java index b8d271c08..3a75c6614 100644 --- a/fe/src/main/java/org/apache/impala/planner/AggregationNode.java +++ b/fe/src/main/java/org/apache/impala/planner/AggregationNode.java @@ -247,18 +247,28 @@ public class AggregationNode extends PlanNode { // limit the potential overestimation. We could, in future, improve this further // by recognizing functional dependencies. List groupingExprs = aggInfo.getGroupingExprs(); + long aggInputCardinality = getAggInputCardinality(); + long numGroups = estimateNumGroups(groupingExprs, aggInputCardinality); + if (LOG.isTraceEnabled()) { + LOG.trace("Node " + id_ + " numGroups= " + numGroups + " aggInputCardinality=" + + aggInputCardinality + " for agg class " + aggInfo.debugString()); + } + return numGroups; + } + + /** + * Estimate the number of groups that will be present for the provided grouping + * expressions and input cardinality. + * Returns -1 if a reasonable cardinality estimate cannot be produced. + */ + public static long estimateNumGroups( + List groupingExprs, long aggInputCardinality) { if (groupingExprs.isEmpty()) { // Non-grouping aggregation class - always results in one group even if there are // zero input rows. return 1; } long numGroups = Expr.getNumDistinctValues(groupingExprs); - // Sanity check the cardinality_ based on the input cardinality_. - long aggInputCardinality = getAggInputCardinality(); - if (LOG.isTraceEnabled()) { - LOG.trace("Node " + id_ + " numGroups= " + numGroups + " aggInputCardinality=" + - aggInputCardinality + " for agg class " + aggInfo.debugString()); - } if (numGroups == -1) { // A worst-case cardinality_ is better than an unknown cardinality_. // Note that this will still be -1 if the child's cardinality is unknown. diff --git a/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java b/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java index f4d73a699..09d86f95f 100644 --- a/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java +++ b/fe/src/main/java/org/apache/impala/planner/SingleNodePlanner.java @@ -96,10 +96,20 @@ import com.google.common.collect.Sets; * The single-node plan needs to be wrapped in a plan fragment for it to be executable. */ public class SingleNodePlanner { + // Controls whether a distinct aggregation should be inserted before a join input. + // If the size of the distinct values after aggregation is less than or equal to + // the original input size multiplied by this threshold, the distinct agg should be + // inserted. + private static final double JOIN_DISTINCT_THRESHOLD = 0.25; + private final static Logger LOG = LoggerFactory.getLogger(SingleNodePlanner.class); private final PlannerContext ctx_; + // Set to true if single node planning added new value transfers to the + // value transfer graph in 'analyzer'. + private boolean valueTransferGraphNeedsUpdate_ = false; + public SingleNodePlanner(PlannerContext ctx) { ctx_ = ctx; } @@ -161,6 +171,14 @@ public class SingleNodePlanner { PlanNode singleNodePlan = createQueryPlan(queryStmt, analyzer, ctx_.getQueryOptions().isDisable_outermost_topn()); Preconditions.checkNotNull(singleNodePlan); + // Recompute the graph since we may have added new equivalences. + if (valueTransferGraphNeedsUpdate_) { + ctx_.getTimeline().markEvent("Recomputing value transfer graph"); + analyzer.computeValueTransferGraph(); + ctx_.getTimeline().markEvent("Value transfer graph computed"); + valueTransferGraphNeedsUpdate_ = false; + } + return singleNodePlan; } @@ -1808,6 +1826,12 @@ public class SingleNodePlanner { } analyzer.markConjunctsAssigned(otherJoinConjuncts); + if (analyzer.getQueryOptions().isEnable_distinct_semi_join_optimization() && + innerRef.getJoinOp().isLeftSemiJoin()) { + inner = + addDistinctToJoinInput(inner, analyzer, eqJoinConjuncts, otherJoinConjuncts); + } + // Use a nested-loop join if there are no equi-join conjuncts, or if the inner // (build side) is a singular row src. A singular row src has a cardinality of 1, so // a nested-loop join is certainly cheaper than a hash join. @@ -1827,6 +1851,99 @@ public class SingleNodePlanner { return result; } + /** + * Optionally add a aggregation node on top of 'joinInput' if it is cheaper to project + * and aggregate the slots needed to evaluate the provided join conjuncts. This + * is only safe to do if the join's results do not depend on the number of duplicate + * values and if the join does not need to return any slots from 'joinInput'. E.g. + * the inner of a left semi join satisfies both of those conditions. + * @return the original 'joinInput' or its new AggregationNode parent. + */ + private PlanNode addDistinctToJoinInput(PlanNode joinInput, Analyzer analyzer, + List eqJoinConjuncts, List otherJoinConjuncts) + throws InternalException, AnalysisException { + List allJoinConjuncts = new ArrayList<>(); + allJoinConjuncts.addAll(eqJoinConjuncts); + allJoinConjuncts.addAll(otherJoinConjuncts); + allJoinConjuncts = Expr.substituteList( + allJoinConjuncts, joinInput.getOutputSmap(), analyzer, true); + + // Identify the unique slots from the inner required by the join conjuncts. Since this + // is a semi-join, the inner tuple is not returned from the join and we do not need + // any other slots from the inner. + List allSlotIds = new ArrayList<>(); + Expr.getIds(allJoinConjuncts, null, allSlotIds); + List joinInputTupleIds = joinInput.getTupleIds(); + List distinctExprs = new ArrayList<>(); + double estDistinctTupleSize = 0; + for (SlotDescriptor slot : analyzer.getSlotDescs(allSlotIds)) { + if (joinInputTupleIds.contains(slot.getParent().getId())) { + distinctExprs.add(new SlotRef(slot)); + } + } + + // If there are no join predicates, this can be more efficiently handled by + // inserting a limit in the plan (since the first row returned from 'joinInput' + // will satisfy the join predicates). + if (distinctExprs.isEmpty()) { + joinInput.setLimit(1); + return joinInput; + } + long numDistinct = AggregationNode.estimateNumGroups(distinctExprs, + joinInput.getCardinality()); + if (numDistinct < 0 || joinInput.getCardinality() < 0) { + // Default to not adding the aggregation if stats are missing. + LOG.trace("addDistinctToJoinInput():: missing stats, will not add agg"); + return joinInput; + } + if (LOG.isTraceEnabled()) { + LOG.trace("addDistinctToJoinInput(): " + "numDistinct=" + numDistinct + + " inputCardinality=" + joinInput.getCardinality()); + } + + // Check to see if an aggregation would reduce input by enough to justify inserting + // it. We factor in the average row size to account for the aggregate projecting + // out slots. The agg would be ineffective if the input already have 0 or 1 rows. + if (joinInput.getCardinality() <= 1 || + numDistinct > JOIN_DISTINCT_THRESHOLD * joinInput.getCardinality()) { + return joinInput; + } + + // Set up an aggregation node to return only distinct slots. + MultiAggregateInfo distinctAggInfo = + new MultiAggregateInfo(distinctExprs, Collections.emptyList(), null); + distinctAggInfo.analyze(analyzer); + distinctAggInfo.materializeRequiredSlots(analyzer, new ExprSubstitutionMap()); + AggregationNode agg = new AggregationNode( + ctx_.getNextNodeId(), joinInput, distinctAggInfo, AggPhase.FIRST); + agg.init(analyzer); + // Mark the agg as materializing the same table ref. This is required so that other + // parts of planning, e.g. subplan generation, know that this plan tree materialized + // the table ref. + agg.setTblRefIds(joinInput.getTblRefIds()); + // All references to the input slots in join conjuncts must be replaced with + // references to aggregate slots. The output smap from the aggregate info contains + // these mappings, so we can add it to the output smap of the agg to ensure that + // join conjuncts get replaced correctly. + agg.setOutputSmap(ExprSubstitutionMap.compose( + agg.getOutputSmap(), distinctAggInfo.getOutputSmap(), analyzer)); + + // Add value transfers between original slots and aggregate tuple so that runtime + // filters can be pushed through the aggregation. We can defer updating the + // value transfer graph until after the single node plan is constructed because + // a precondition of calling this function is that the join does not return any + // of the slots from this plan tree. + for (int i = 0; i < distinctExprs.size(); ++i) { + Expr distinctExpr = distinctExprs.get(i); + SlotDescriptor outputSlot = + distinctAggInfo.getAggClass(0).getResultTupleDesc().getSlots().get(i); + analyzer.registerValueTransfer( + ((SlotRef)distinctExpr).getSlotId(), outputSlot.getId()); + valueTransferGraphNeedsUpdate_ = true; + } + return agg; + } + /** * Create a tree of PlanNodes for the given tblRef, which can be a BaseTableRef, * CollectionTableRef or an InlineViewRef. diff --git a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java index 587268bd8..df6770296 100644 --- a/fe/src/test/java/org/apache/impala/planner/PlannerTest.java +++ b/fe/src/test/java/org/apache/impala/planner/PlannerTest.java @@ -432,6 +432,15 @@ public class PlannerTest extends PlannerTestBase { runPlannerTestFile("subquery-rewrite", options); } + /** + * Tests for the IMPALA-1270 optimization of automatically adding a distinct + * agg to semi joins. + */ + @Test + public void testSemiJoinDistinct() { + runPlannerTestFile("semi-join-distinct"); + } + @Test public void testUnion() { runPlannerTestFile("union"); diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/join-order.test b/testdata/workloads/functional-planner/queries/PlannerTest/join-order.test index 79242f4d8..6891f903c 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/join-order.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/join-order.test @@ -40,7 +40,7 @@ PLAN-ROOT SINK | row-size=117B cardinality=17.56K | |--00:SCAN HDFS [tpch.customer c] -| partitions=1/1 files=1 size=23.08MB +| HDFS partitions=1/1 files=1 size=23.08MB | predicates: c.c_mktsegment = 'BUILDING' | row-size=29B cardinality=30.00K | @@ -50,13 +50,13 @@ PLAN-ROOT SINK | row-size=88B cardinality=57.58K | |--01:SCAN HDFS [tpch.orders o] -| partitions=1/1 files=1 size=162.56MB +| HDFS partitions=1/1 files=1 size=162.56MB | predicates: o_orderdate < '1995-03-15' | runtime filters: RF000 -> o.o_custkey | row-size=42B cardinality=150.00K | 02:SCAN HDFS [tpch.lineitem l] - partitions=1/1 files=1 size=718.94MB + HDFS partitions=1/1 files=1 size=718.94MB predicates: l_shipdate > '1995-03-15' runtime filters: RF002 -> l.l_orderkey row-size=46B cardinality=600.12K @@ -91,7 +91,7 @@ PLAN-ROOT SINK |--08:EXCHANGE [BROADCAST] | | | 00:SCAN HDFS [tpch.customer c] -| partitions=1/1 files=1 size=23.08MB +| HDFS partitions=1/1 files=1 size=23.08MB | predicates: c.c_mktsegment = 'BUILDING' | row-size=29B cardinality=30.00K | @@ -103,13 +103,13 @@ PLAN-ROOT SINK |--07:EXCHANGE [BROADCAST] | | | 01:SCAN HDFS [tpch.orders o] -| partitions=1/1 files=1 size=162.56MB +| HDFS partitions=1/1 files=1 size=162.56MB | predicates: o_orderdate < '1995-03-15' | runtime filters: RF000 -> o.o_custkey | row-size=42B cardinality=150.00K | 02:SCAN HDFS [tpch.lineitem l] - partitions=1/1 files=1 size=718.94MB + HDFS partitions=1/1 files=1 size=718.94MB predicates: l_shipdate > '1995-03-15' runtime filters: RF002 -> l.l_orderkey row-size=46B cardinality=600.12K @@ -156,7 +156,7 @@ PLAN-ROOT SINK | row-size=117B cardinality=575.77K | |--02:SCAN HDFS [tpch.lineitem l] -| partitions=1/1 files=1 size=718.94MB +| HDFS partitions=1/1 files=1 size=718.94MB | predicates: l_shipdate > '1995-03-15' | row-size=46B cardinality=600.12K | @@ -166,13 +166,13 @@ PLAN-ROOT SINK | row-size=71B cardinality=150.00K | |--01:SCAN HDFS [tpch.orders o] -| partitions=1/1 files=1 size=162.56MB +| HDFS partitions=1/1 files=1 size=162.56MB | predicates: o_orderdate < '1995-03-15' | runtime filters: RF000 -> o.o_orderkey | row-size=42B cardinality=150.00K | 00:SCAN HDFS [tpch.customer c] - partitions=1/1 files=1 size=23.08MB + HDFS partitions=1/1 files=1 size=23.08MB predicates: c.c_mktsegment = 'BUILDING' runtime filters: RF002 -> c.c_custkey row-size=29B cardinality=30.00K @@ -207,7 +207,7 @@ PLAN-ROOT SINK |--08:EXCHANGE [BROADCAST] | | | 02:SCAN HDFS [tpch.lineitem l] -| partitions=1/1 files=1 size=718.94MB +| HDFS partitions=1/1 files=1 size=718.94MB | predicates: l_shipdate > '1995-03-15' | row-size=46B cardinality=600.12K | @@ -219,13 +219,13 @@ PLAN-ROOT SINK |--07:EXCHANGE [BROADCAST] | | | 01:SCAN HDFS [tpch.orders o] -| partitions=1/1 files=1 size=162.56MB +| HDFS partitions=1/1 files=1 size=162.56MB | predicates: o_orderdate < '1995-03-15' | runtime filters: RF000 -> o.o_orderkey | row-size=42B cardinality=150.00K | 00:SCAN HDFS [tpch.customer c] - partitions=1/1 files=1 size=23.08MB + HDFS partitions=1/1 files=1 size=23.08MB predicates: c.c_mktsegment = 'BUILDING' runtime filters: RF002 -> c.c_custkey row-size=29B cardinality=30.00K @@ -274,7 +274,7 @@ PLAN-ROOT SINK | row-size=134B cardinality=115.16K | |--05:SCAN HDFS [tpch.region] -| partitions=1/1 files=1 size=384B +| HDFS partitions=1/1 files=1 size=384B | predicates: r_name = 'ASIA' | row-size=21B cardinality=1 | @@ -284,7 +284,7 @@ PLAN-ROOT SINK | row-size=113B cardinality=575.77K | |--04:SCAN HDFS [tpch.nation] -| partitions=1/1 files=1 size=2.15KB +| HDFS partitions=1/1 files=1 size=2.15KB | runtime filters: RF000 -> n_regionkey | row-size=23B cardinality=25 | @@ -294,7 +294,7 @@ PLAN-ROOT SINK | row-size=90B cardinality=575.77K | |--03:SCAN HDFS [tpch.supplier s] -| partitions=1/1 files=1 size=1.33MB +| HDFS partitions=1/1 files=1 size=1.33MB | runtime filters: RF002 -> s_nationkey | row-size=10B cardinality=10.00K | @@ -304,7 +304,7 @@ PLAN-ROOT SINK | row-size=80B cardinality=575.77K | |--00:SCAN HDFS [tpch.customer] -| partitions=1/1 files=1 size=23.08MB +| HDFS partitions=1/1 files=1 size=23.08MB | runtime filters: RF002 -> tpch.customer.c_nationkey, RF004 -> c_nationkey | row-size=10B cardinality=150.00K | @@ -314,13 +314,13 @@ PLAN-ROOT SINK | row-size=70B cardinality=575.77K | |--01:SCAN HDFS [tpch.orders o] -| partitions=1/1 files=1 size=162.56MB +| HDFS partitions=1/1 files=1 size=162.56MB | predicates: o_orderdate < '1995-01-01', o_orderdate >= '1994-01-01' | runtime filters: RF008 -> o_custkey | row-size=38B cardinality=150.00K | 02:SCAN HDFS [tpch.lineitem l] - partitions=1/1 files=1 size=718.94MB + HDFS partitions=1/1 files=1 size=718.94MB runtime filters: RF005 -> l_suppkey, RF010 -> l_orderkey row-size=32B cardinality=6.00M ---- DISTRIBUTEDPLAN @@ -354,7 +354,7 @@ PLAN-ROOT SINK |--17:EXCHANGE [BROADCAST] | | | 05:SCAN HDFS [tpch.region] -| partitions=1/1 files=1 size=384B +| HDFS partitions=1/1 files=1 size=384B | predicates: r_name = 'ASIA' | row-size=21B cardinality=1 | @@ -366,7 +366,7 @@ PLAN-ROOT SINK |--16:EXCHANGE [BROADCAST] | | | 04:SCAN HDFS [tpch.nation] -| partitions=1/1 files=1 size=2.15KB +| HDFS partitions=1/1 files=1 size=2.15KB | runtime filters: RF000 -> n_regionkey | row-size=23B cardinality=25 | @@ -378,7 +378,7 @@ PLAN-ROOT SINK |--15:EXCHANGE [BROADCAST] | | | 03:SCAN HDFS [tpch.supplier s] -| partitions=1/1 files=1 size=1.33MB +| HDFS partitions=1/1 files=1 size=1.33MB | runtime filters: RF002 -> s_nationkey | row-size=10B cardinality=10.00K | @@ -390,7 +390,7 @@ PLAN-ROOT SINK |--14:EXCHANGE [BROADCAST] | | | 00:SCAN HDFS [tpch.customer] -| partitions=1/1 files=1 size=23.08MB +| HDFS partitions=1/1 files=1 size=23.08MB | runtime filters: RF002 -> tpch.customer.c_nationkey, RF004 -> c_nationkey | row-size=10B cardinality=150.00K | @@ -402,13 +402,13 @@ PLAN-ROOT SINK |--13:EXCHANGE [BROADCAST] | | | 01:SCAN HDFS [tpch.orders o] -| partitions=1/1 files=1 size=162.56MB +| HDFS partitions=1/1 files=1 size=162.56MB | predicates: o_orderdate < '1995-01-01', o_orderdate >= '1994-01-01' | runtime filters: RF008 -> o_custkey | row-size=38B cardinality=150.00K | 02:SCAN HDFS [tpch.lineitem l] - partitions=1/1 files=1 size=718.94MB + HDFS partitions=1/1 files=1 size=718.94MB runtime filters: RF005 -> l_suppkey, RF010 -> l_orderkey row-size=32B cardinality=6.00M ==== @@ -446,7 +446,7 @@ PLAN-ROOT SINK | row-size=325B cardinality=1.01K | |--04:SCAN HDFS [tpch.region r] -| partitions=1/1 files=1 size=384B +| HDFS partitions=1/1 files=1 size=384B | predicates: r.r_name = 'EUROPE' | row-size=21B cardinality=1 | @@ -456,7 +456,7 @@ PLAN-ROOT SINK | row-size=304B cardinality=5.05K | |--03:SCAN HDFS [tpch.nation n] -| partitions=1/1 files=1 size=2.15KB +| HDFS partitions=1/1 files=1 size=2.15KB | runtime filters: RF000 -> n.n_regionkey | row-size=23B cardinality=25 | @@ -471,17 +471,17 @@ PLAN-ROOT SINK | | row-size=95B cardinality=5.05K | | | |--00:SCAN HDFS [tpch.part p] -| | partitions=1/1 files=1 size=22.83MB +| | HDFS partitions=1/1 files=1 size=22.83MB | | predicates: p.p_size = 15, p.p_type LIKE '%BRASS' | | row-size=71B cardinality=1.26K | | | 02:SCAN HDFS [tpch.partsupp ps] -| partitions=1/1 files=1 size=112.71MB +| HDFS partitions=1/1 files=1 size=112.71MB | runtime filters: RF006 -> ps.ps_partkey | row-size=24B cardinality=800.00K | 01:SCAN HDFS [tpch.supplier s] - partitions=1/1 files=1 size=1.33MB + HDFS partitions=1/1 files=1 size=1.33MB runtime filters: RF002 -> s.s_nationkey, RF004 -> s.s_suppkey row-size=187B cardinality=10.00K ---- DISTRIBUTEDPLAN @@ -497,7 +497,7 @@ PLAN-ROOT SINK |--12:EXCHANGE [BROADCAST] | | | 04:SCAN HDFS [tpch.region r] -| partitions=1/1 files=1 size=384B +| HDFS partitions=1/1 files=1 size=384B | predicates: r.r_name = 'EUROPE' | row-size=21B cardinality=1 | @@ -509,7 +509,7 @@ PLAN-ROOT SINK |--11:EXCHANGE [BROADCAST] | | | 03:SCAN HDFS [tpch.nation n] -| partitions=1/1 files=1 size=2.15KB +| HDFS partitions=1/1 files=1 size=2.15KB | runtime filters: RF000 -> n.n_regionkey | row-size=23B cardinality=25 | @@ -528,17 +528,17 @@ PLAN-ROOT SINK | |--09:EXCHANGE [BROADCAST] | | | | | 00:SCAN HDFS [tpch.part p] -| | partitions=1/1 files=1 size=22.83MB +| | HDFS partitions=1/1 files=1 size=22.83MB | | predicates: p.p_size = 15, p.p_type LIKE '%BRASS' | | row-size=71B cardinality=1.26K | | | 02:SCAN HDFS [tpch.partsupp ps] -| partitions=1/1 files=1 size=112.71MB +| HDFS partitions=1/1 files=1 size=112.71MB | runtime filters: RF006 -> ps.ps_partkey | row-size=24B cardinality=800.00K | 01:SCAN HDFS [tpch.supplier s] - partitions=1/1 files=1 size=1.33MB + HDFS partitions=1/1 files=1 size=1.33MB runtime filters: RF002 -> s.s_nationkey, RF004 -> s.s_suppkey row-size=187B cardinality=10.00K ==== @@ -577,12 +577,12 @@ PLAN-ROOT SINK | row-size=50B cardinality=150.00K | |--00:SCAN HDFS [tpch.orders] -| partitions=1/1 files=1 size=162.56MB +| HDFS partitions=1/1 files=1 size=162.56MB | predicates: o_orderdate < '1993-10-01', o_orderdate >= '1993-07-01' | row-size=50B cardinality=150.00K | 01:SCAN HDFS [tpch.lineitem] - partitions=1/1 files=1 size=718.94MB + HDFS partitions=1/1 files=1 size=718.94MB predicates: l_commitdate < l_receiptdate runtime filters: RF000 -> l_orderkey row-size=52B cardinality=600.12K @@ -617,14 +617,14 @@ PLAN-ROOT SINK |--06:EXCHANGE [HASH(o_orderkey)] | | | 00:SCAN HDFS [tpch.orders] -| partitions=1/1 files=1 size=162.56MB +| HDFS partitions=1/1 files=1 size=162.56MB | predicates: o_orderdate < '1993-10-01', o_orderdate >= '1993-07-01' | row-size=50B cardinality=150.00K | 05:EXCHANGE [HASH(l_orderkey)] | 01:SCAN HDFS [tpch.lineitem] - partitions=1/1 files=1 size=718.94MB + HDFS partitions=1/1 files=1 size=718.94MB predicates: l_commitdate < l_receiptdate runtime filters: RF000 -> l_orderkey row-size=52B cardinality=600.12K @@ -652,11 +652,11 @@ PLAN-ROOT SINK | row-size=36B cardinality=7.50M | |--00:SCAN HDFS [tpch.orders] -| partitions=1/1 files=1 size=162.56MB +| HDFS partitions=1/1 files=1 size=162.56MB | row-size=28B cardinality=1.50M | 01:SCAN HDFS [tpch.lineitem] - partitions=1/1 files=1 size=718.94MB + HDFS partitions=1/1 files=1 size=718.94MB row-size=8B cardinality=6.00M ---- DISTRIBUTEDPLAN PLAN-ROOT SINK @@ -688,13 +688,13 @@ PLAN-ROOT SINK |--06:EXCHANGE [HASH(o_orderkey)] | | | 00:SCAN HDFS [tpch.orders] -| partitions=1/1 files=1 size=162.56MB +| HDFS partitions=1/1 files=1 size=162.56MB | row-size=28B cardinality=1.50M | 05:EXCHANGE [HASH(l_orderkey)] | 01:SCAN HDFS [tpch.lineitem] - partitions=1/1 files=1 size=718.94MB + HDFS partitions=1/1 files=1 size=718.94MB row-size=8B cardinality=6.00M ==== select o_orderpriority, count(*) as order_count @@ -720,11 +720,11 @@ PLAN-ROOT SINK | row-size=36B cardinality=6.00M | |--00:SCAN HDFS [tpch.orders] -| partitions=1/1 files=1 size=162.56MB +| HDFS partitions=1/1 files=1 size=162.56MB | row-size=28B cardinality=1.50M | 01:SCAN HDFS [tpch.lineitem] - partitions=1/1 files=1 size=718.94MB + HDFS partitions=1/1 files=1 size=718.94MB row-size=8B cardinality=6.00M ---- DISTRIBUTEDPLAN PLAN-ROOT SINK @@ -756,13 +756,13 @@ PLAN-ROOT SINK |--06:EXCHANGE [HASH(o_orderkey)] | | | 00:SCAN HDFS [tpch.orders] -| partitions=1/1 files=1 size=162.56MB +| HDFS partitions=1/1 files=1 size=162.56MB | row-size=28B cardinality=1.50M | 05:EXCHANGE [HASH(l_orderkey)] | 01:SCAN HDFS [tpch.lineitem] - partitions=1/1 files=1 size=718.94MB + HDFS partitions=1/1 files=1 size=718.94MB row-size=8B cardinality=6.00M ==== # order does not become the leftmost input because of the outer join; @@ -785,7 +785,7 @@ PLAN-ROOT SINK | row-size=39B cardinality=60.00K | |--02:SCAN HDFS [tpch.nation] -| partitions=1/1 files=1 size=2.15KB +| HDFS partitions=1/1 files=1 size=2.15KB | predicates: n_name = 'x' | row-size=21B cardinality=1 | @@ -795,12 +795,12 @@ PLAN-ROOT SINK | row-size=18B cardinality=1.50M | |--00:SCAN HDFS [tpch.customer] -| partitions=1/1 files=1 size=23.08MB +| HDFS partitions=1/1 files=1 size=23.08MB | runtime filters: RF000 -> c_nationkey | row-size=10B cardinality=150.00K | 01:SCAN HDFS [tpch.orders] - partitions=1/1 files=1 size=162.56MB + HDFS partitions=1/1 files=1 size=162.56MB runtime filters: RF002 -> o_custkey row-size=8B cardinality=1.50M ---- DISTRIBUTEDPLAN @@ -824,7 +824,7 @@ PLAN-ROOT SINK |--08:EXCHANGE [BROADCAST] | | | 02:SCAN HDFS [tpch.nation] -| partitions=1/1 files=1 size=2.15KB +| HDFS partitions=1/1 files=1 size=2.15KB | predicates: n_name = 'x' | row-size=21B cardinality=1 | @@ -836,14 +836,14 @@ PLAN-ROOT SINK |--07:EXCHANGE [HASH(c_custkey)] | | | 00:SCAN HDFS [tpch.customer] -| partitions=1/1 files=1 size=23.08MB +| HDFS partitions=1/1 files=1 size=23.08MB | runtime filters: RF000 -> c_nationkey | row-size=10B cardinality=150.00K | 06:EXCHANGE [HASH(o_custkey)] | 01:SCAN HDFS [tpch.orders] - partitions=1/1 files=1 size=162.56MB + HDFS partitions=1/1 files=1 size=162.56MB runtime filters: RF002 -> o_custkey row-size=8B cardinality=1.50M ==== @@ -865,7 +865,7 @@ PLAN-ROOT SINK | row-size=23B cardinality=9.00G | |--01:SCAN HDFS [tpch.orders] -| partitions=1/1 files=1 size=162.56MB +| HDFS partitions=1/1 files=1 size=162.56MB | row-size=0B cardinality=1.50M | 03:HASH JOIN [INNER JOIN] @@ -874,12 +874,12 @@ PLAN-ROOT SINK | row-size=23B cardinality=6.00K | |--02:SCAN HDFS [tpch.nation] -| partitions=1/1 files=1 size=2.15KB +| HDFS partitions=1/1 files=1 size=2.15KB | predicates: n_name = 'x' | row-size=21B cardinality=1 | 00:SCAN HDFS [tpch.customer] - partitions=1/1 files=1 size=23.08MB + HDFS partitions=1/1 files=1 size=23.08MB runtime filters: RF000 -> c_nationkey row-size=2B cardinality=150.00K ---- DISTRIBUTEDPLAN @@ -901,7 +901,7 @@ PLAN-ROOT SINK |--07:EXCHANGE [BROADCAST] | | | 01:SCAN HDFS [tpch.orders] -| partitions=1/1 files=1 size=162.56MB +| HDFS partitions=1/1 files=1 size=162.56MB | row-size=0B cardinality=1.50M | 03:HASH JOIN [INNER JOIN, BROADCAST] @@ -912,12 +912,12 @@ PLAN-ROOT SINK |--06:EXCHANGE [BROADCAST] | | | 02:SCAN HDFS [tpch.nation] -| partitions=1/1 files=1 size=2.15KB +| HDFS partitions=1/1 files=1 size=2.15KB | predicates: n_name = 'x' | row-size=21B cardinality=1 | 00:SCAN HDFS [tpch.customer] - partitions=1/1 files=1 size=23.08MB + HDFS partitions=1/1 files=1 size=23.08MB runtime filters: RF000 -> c_nationkey row-size=2B cardinality=150.00K ==== @@ -938,15 +938,15 @@ PLAN-ROOT SINK | | row-size=0B cardinality=64 | | | |--01:SCAN HDFS [functional.alltypestiny b] -| | partitions=4/4 files=4 size=460B +| | HDFS partitions=4/4 files=4 size=460B | | row-size=0B cardinality=8 | | | 00:SCAN HDFS [functional.alltypestiny a] -| partitions=4/4 files=4 size=460B +| HDFS partitions=4/4 files=4 size=460B | row-size=0B cardinality=8 | 02:SCAN HDFS [functional.alltypes c] - partitions=24/24 files=24 size=478.45KB + HDFS partitions=24/24 files=24 size=478.45KB row-size=4B cardinality=7.30K ==== # Test that tables are not re-ordered across outer/semi joins (IMPALA-860), @@ -976,7 +976,7 @@ PLAN-ROOT SINK | | row-size=20B cardinality=9 | | | |--05:SCAN HDFS [functional.alltypestiny t6] -| | partitions=4/4 files=4 size=460B +| | HDFS partitions=4/4 files=4 size=460B | | row-size=4B cardinality=8 | | | 08:HASH JOIN [LEFT OUTER JOIN] @@ -994,27 +994,27 @@ PLAN-ROOT SINK | | | | row-size=8B cardinality=8 | | | | | | | |--00:SCAN HDFS [functional.alltypestiny t1] -| | | | partitions=4/4 files=4 size=460B +| | | | HDFS partitions=4/4 files=4 size=460B | | | | runtime filters: RF002 -> t1.id | | | | row-size=4B cardinality=8 | | | | | | | 01:SCAN HDFS [functional.alltypes t2] -| | | partitions=24/24 files=24 size=478.45KB +| | | HDFS partitions=24/24 files=24 size=478.45KB | | | runtime filters: RF002 -> t2.id, RF006 -> t2.id | | | row-size=4B cardinality=7.30K | | | | | 02:SCAN HDFS [functional.alltypessmall t3] -| | partitions=4/4 files=4 size=6.32KB +| | HDFS partitions=4/4 files=4 size=6.32KB | | runtime filters: RF002 -> t3.id, RF004 -> t3.id | | row-size=4B cardinality=100 | | | 03:SCAN HDFS [functional.alltypesagg t4] -| partitions=11/11 files=11 size=814.73KB +| HDFS partitions=11/11 files=11 size=814.73KB | runtime filters: RF002 -> t4.id | row-size=4B cardinality=11.00K | 04:SCAN HDFS [functional.alltypes t5] - partitions=24/24 files=24 size=478.45KB + HDFS partitions=24/24 files=24 size=478.45KB runtime filters: RF000 -> t5.id row-size=4B cardinality=7.30K ==== @@ -1059,31 +1059,31 @@ PLAN-ROOT SINK | | | | | row-size=8B cardinality=8 | | | | | | | | | |--00:SCAN HDFS [functional.alltypestiny t1] -| | | | | partitions=4/4 files=4 size=460B +| | | | | HDFS partitions=4/4 files=4 size=460B | | | | | row-size=4B cardinality=8 | | | | | | | | | 01:SCAN HDFS [functional.alltypes t2] -| | | | partitions=24/24 files=24 size=478.45KB +| | | | HDFS partitions=24/24 files=24 size=478.45KB | | | | runtime filters: RF006 -> t2.id | | | | row-size=4B cardinality=7.30K | | | | | | | 02:SCAN HDFS [functional.alltypessmall t3] -| | | partitions=4/4 files=4 size=6.32KB +| | | HDFS partitions=4/4 files=4 size=6.32KB | | | runtime filters: RF004 -> t3.id | | | row-size=4B cardinality=100 | | | | | 03:SCAN HDFS [functional.alltypesagg t4] -| | partitions=11/11 files=11 size=814.73KB +| | HDFS partitions=11/11 files=11 size=814.73KB | | runtime filters: RF002 -> t4.id | | row-size=4B cardinality=11.00K | | | 04:SCAN HDFS [functional.alltypes t5] -| partitions=24/24 files=24 size=478.45KB +| HDFS partitions=24/24 files=24 size=478.45KB | runtime filters: RF000 -> t5.id | row-size=4B cardinality=7.30K | 05:SCAN HDFS [functional.alltypestiny t6] - partitions=4/4 files=4 size=460B + HDFS partitions=4/4 files=4 size=460B row-size=4B cardinality=8 ==== # Check that a join in between outer/semi joins is re-ordered correctly. @@ -1128,31 +1128,31 @@ PLAN-ROOT SINK | | | | | row-size=8B cardinality=8 | | | | | | | | | |--00:SCAN HDFS [functional.alltypestiny t1] -| | | | | partitions=4/4 files=4 size=460B +| | | | | HDFS partitions=4/4 files=4 size=460B | | | | | row-size=4B cardinality=8 | | | | | | | | | 01:SCAN HDFS [functional.alltypes t2] -| | | | partitions=24/24 files=24 size=478.45KB +| | | | HDFS partitions=24/24 files=24 size=478.45KB | | | | runtime filters: RF008 -> t2.id | | | | row-size=4B cardinality=7.30K | | | | | | | 03:SCAN HDFS [functional.alltypessmall t4] -| | | partitions=4/4 files=4 size=6.32KB +| | | HDFS partitions=4/4 files=4 size=6.32KB | | | runtime filters: RF006 -> t4.id | | | row-size=4B cardinality=100 | | | | | 02:SCAN HDFS [functional.alltypesagg t3] -| | partitions=11/11 files=11 size=814.73KB +| | HDFS partitions=11/11 files=11 size=814.73KB | | runtime filters: RF004 -> t3.id | | row-size=4B cardinality=11.00K | | | 04:SCAN HDFS [functional.alltypes t5] -| partitions=24/24 files=24 size=478.45KB +| HDFS partitions=24/24 files=24 size=478.45KB | runtime filters: RF002 -> t5.id | row-size=4B cardinality=7.30K | 05:SCAN HDFS [functional.alltypestiny t6] - partitions=4/4 files=4 size=460B + HDFS partitions=4/4 files=4 size=460B runtime filters: RF000 -> t6.id row-size=4B cardinality=8 ==== @@ -1196,30 +1196,30 @@ PLAN-ROOT SINK | | | | | row-size=8B cardinality=8 | | | | | | | | | |--00:SCAN HDFS [functional.alltypestiny t1] -| | | | | partitions=4/4 files=4 size=460B +| | | | | HDFS partitions=4/4 files=4 size=460B | | | | | row-size=4B cardinality=8 | | | | | | | | | 01:SCAN HDFS [functional.alltypes t2] -| | | | partitions=24/24 files=24 size=478.45KB +| | | | HDFS partitions=24/24 files=24 size=478.45KB | | | | runtime filters: RF006 -> t2.id | | | | row-size=4B cardinality=7.30K | | | | | | | 03:SCAN HDFS [functional.alltypessmall t4] -| | | partitions=4/4 files=4 size=6.32KB +| | | HDFS partitions=4/4 files=4 size=6.32KB | | | runtime filters: RF004 -> t4.id | | | row-size=4B cardinality=100 | | | | | 02:SCAN HDFS [functional.alltypesagg t3] -| | partitions=11/11 files=11 size=814.73KB +| | HDFS partitions=11/11 files=11 size=814.73KB | | runtime filters: RF002 -> t3.id | | row-size=4B cardinality=11.00K | | | 04:SCAN HDFS [functional.alltypes t5] -| partitions=24/24 files=24 size=478.45KB +| HDFS partitions=24/24 files=24 size=478.45KB | row-size=4B cardinality=7.30K | 05:SCAN HDFS [functional.alltypestiny t6] - partitions=4/4 files=4 size=460B + HDFS partitions=4/4 files=4 size=460B runtime filters: RF000 -> t6.id row-size=4B cardinality=8 ==== @@ -1247,7 +1247,7 @@ PLAN-ROOT SINK | row-size=28B cardinality=1 | |--09:SCAN HDFS [functional.alltypestiny t4] -| partitions=4/4 files=4 size=460B +| HDFS partitions=4/4 files=4 size=460B | row-size=4B cardinality=8 | 11:HASH JOIN [RIGHT OUTER JOIN] @@ -1274,11 +1274,11 @@ PLAN-ROOT SINK | | | row-size=8B cardinality=8 | | | | | |--01:SCAN HDFS [functional.alltypestiny b] -| | | partitions=4/4 files=4 size=460B +| | | HDFS partitions=4/4 files=4 size=460B | | | row-size=4B cardinality=8 | | | | | 00:SCAN HDFS [functional.alltypestiny a] -| | partitions=4/4 files=4 size=460B +| | HDFS partitions=4/4 files=4 size=460B | | runtime filters: RF006 -> a.id | | row-size=4B cardinality=8 | | @@ -1294,11 +1294,11 @@ PLAN-ROOT SINK | | row-size=8B cardinality=0 | | | 06:SCAN HDFS [functional.alltypestiny b] -| partitions=4/4 files=4 size=460B +| HDFS partitions=4/4 files=4 size=460B | row-size=4B cardinality=8 | 08:SCAN HDFS [functional.alltypes t3] - partitions=24/24 files=24 size=478.45KB + HDFS partitions=24/24 files=24 size=478.45KB runtime filters: RF000 -> t3.id, RF002 -> t3.id row-size=4B cardinality=7.30K ==== @@ -1326,7 +1326,7 @@ PLAN-ROOT SINK | row-size=28B cardinality=9 | |--09:SCAN HDFS [functional.alltypestiny t4] -| partitions=4/4 files=4 size=460B +| HDFS partitions=4/4 files=4 size=460B | row-size=4B cardinality=8 | 11:HASH JOIN [FULL OUTER JOIN] @@ -1352,11 +1352,11 @@ PLAN-ROOT SINK | | | row-size=8B cardinality=8 | | | | | |--01:SCAN HDFS [functional.alltypestiny b] -| | | partitions=4/4 files=4 size=460B +| | | HDFS partitions=4/4 files=4 size=460B | | | row-size=4B cardinality=8 | | | | | 00:SCAN HDFS [functional.alltypestiny a] -| | partitions=4/4 files=4 size=460B +| | HDFS partitions=4/4 files=4 size=460B | | runtime filters: RF004 -> a.id | | row-size=4B cardinality=8 | | @@ -1372,11 +1372,11 @@ PLAN-ROOT SINK | | row-size=8B cardinality=0 | | | 06:SCAN HDFS [functional.alltypestiny b] -| partitions=4/4 files=4 size=460B +| HDFS partitions=4/4 files=4 size=460B | row-size=4B cardinality=8 | 08:SCAN HDFS [functional.alltypes t3] - partitions=24/24 files=24 size=478.45KB + HDFS partitions=24/24 files=24 size=478.45KB runtime filters: RF000 -> t3.id row-size=4B cardinality=7.30K ==== @@ -1404,7 +1404,7 @@ PLAN-ROOT SINK | row-size=16B cardinality=1 | |--09:SCAN HDFS [functional.alltypestiny t4] -| partitions=4/4 files=4 size=460B +| HDFS partitions=4/4 files=4 size=460B | row-size=4B cardinality=8 | 11:HASH JOIN [RIGHT SEMI JOIN] @@ -1431,11 +1431,11 @@ PLAN-ROOT SINK | | | row-size=8B cardinality=8 | | | | | |--01:SCAN HDFS [functional.alltypestiny b] -| | | partitions=4/4 files=4 size=460B +| | | HDFS partitions=4/4 files=4 size=460B | | | row-size=4B cardinality=8 | | | | | 00:SCAN HDFS [functional.alltypestiny a] -| | partitions=4/4 files=4 size=460B +| | HDFS partitions=4/4 files=4 size=460B | | runtime filters: RF008 -> a.id | | row-size=4B cardinality=8 | | @@ -1445,18 +1445,18 @@ PLAN-ROOT SINK | | row-size=4B cardinality=8 | | | |--05:SCAN HDFS [functional.alltypestiny a] -| | partitions=4/4 files=4 size=460B +| | HDFS partitions=4/4 files=4 size=460B | | runtime filters: RF000 -> a.id, RF004 -> a.id | | row-size=4B cardinality=8 | | | 06:SCAN HDFS [functional.alltypes b] | partition predicates: b.month = 1 -| partitions=2/24 files=2 size=40.32KB +| HDFS partitions=2/24 files=2 size=40.32KB | runtime filters: RF000 -> b.id, RF004 -> b.id, RF006 -> b.id | row-size=4B cardinality=620 | 08:SCAN HDFS [functional.alltypes t3] - partitions=24/24 files=24 size=478.45KB + HDFS partitions=24/24 files=24 size=478.45KB runtime filters: RF000 -> t3.id, RF002 -> t3.id row-size=4B cardinality=7.30K ==== @@ -1484,7 +1484,7 @@ PLAN-ROOT SINK | row-size=16B cardinality=1 | |--09:SCAN HDFS [functional.alltypestiny t4] -| partitions=4/4 files=4 size=460B +| HDFS partitions=4/4 files=4 size=460B | row-size=4B cardinality=8 | 11:HASH JOIN [RIGHT ANTI JOIN] @@ -1510,11 +1510,11 @@ PLAN-ROOT SINK | | | row-size=8B cardinality=8 | | | | | |--01:SCAN HDFS [functional.alltypestiny b] -| | | partitions=4/4 files=4 size=460B +| | | HDFS partitions=4/4 files=4 size=460B | | | row-size=4B cardinality=8 | | | | | 00:SCAN HDFS [functional.alltypestiny a] -| | partitions=4/4 files=4 size=460B +| | HDFS partitions=4/4 files=4 size=460B | | runtime filters: RF004 -> a.id | | row-size=4B cardinality=8 | | @@ -1523,18 +1523,18 @@ PLAN-ROOT SINK | | row-size=4B cardinality=620 | | | |--05:SCAN HDFS [functional.alltypestiny a] -| | partitions=4/4 files=4 size=460B +| | HDFS partitions=4/4 files=4 size=460B | | runtime filters: RF000 -> a.id, RF002 -> a.id | | row-size=4B cardinality=8 | | | 06:SCAN HDFS [functional.alltypes b] | partition predicates: b.month = 1 -| partitions=2/24 files=2 size=40.32KB +| HDFS partitions=2/24 files=2 size=40.32KB | runtime filters: RF000 -> b.id, RF002 -> b.id | row-size=4B cardinality=620 | 08:SCAN HDFS [functional.alltypes t3] - partitions=24/24 files=24 size=478.45KB + HDFS partitions=24/24 files=24 size=478.45KB runtime filters: RF000 -> t3.id row-size=4B cardinality=7.30K ==== @@ -1557,17 +1557,21 @@ WHERE `$a$2`.`$c$1` > t4.id ---- PLAN PLAN-ROOT SINK | -10:AGGREGATE [FINALIZE] +11:AGGREGATE [FINALIZE] | output: sum(t4.tinyint_col) | row-size=8B cardinality=1 | -09:HASH JOIN [LEFT SEMI JOIN] +10:HASH JOIN [LEFT SEMI JOIN] | hash predicates: t4.bigint_col = tt1.int_col | runtime filters: RF000 <- tt1.int_col | row-size=31B cardinality=8 | -|--06:SCAN HDFS [functional.alltypestiny tt1] -| partitions=4/4 files=4 size=460B +|--09:AGGREGATE [FINALIZE] +| | group by: tt1.int_col +| | row-size=4B cardinality=2 +| | +| 06:SCAN HDFS [functional.alltypestiny tt1] +| HDFS partitions=4/4 files=4 size=460B | row-size=4B cardinality=8 | 08:NESTED LOOP JOIN [INNER JOIN] @@ -1580,7 +1584,7 @@ PLAN-ROOT SINK | | row-size=8B cardinality=1 | | | 04:SCAN HDFS [functional.alltypesagg t1] -| partitions=11/11 files=11 size=814.73KB +| HDFS partitions=11/11 files=11 size=814.73KB | row-size=4B cardinality=11.00K | 07:NESTED LOOP JOIN [CROSS JOIN] @@ -1593,16 +1597,16 @@ PLAN-ROOT SINK | | row-size=10B cardinality=1 | | | |--02:SCAN HDFS [functional.alltypestiny t2] -| | partitions=4/4 files=4 size=460B +| | HDFS partitions=4/4 files=4 size=460B | | row-size=2B cardinality=8 | | | 01:SCAN HDFS [functional.alltypes t1] -| partitions=24/24 files=24 size=478.45KB +| HDFS partitions=24/24 files=24 size=478.45KB | runtime filters: RF002 -> t1.bigint_col | row-size=8B cardinality=7.30K | 00:SCAN HDFS [functional.alltypestiny t4] - partitions=4/4 files=4 size=460B + HDFS partitions=4/4 files=4 size=460B runtime filters: RF000 -> t4.bigint_col row-size=13B cardinality=8 ==== @@ -1623,11 +1627,11 @@ PLAN-ROOT SINK | row-size=25B cardinality=8 | |--00:SCAN HDFS [functional.alltypestiny a] -| partitions=4/4 files=4 size=460B +| HDFS partitions=4/4 files=4 size=460B | row-size=9B cardinality=8 | 01:SCAN HDFS [functional.alltypessmall b] - partitions=4/4 files=4 size=6.32KB + HDFS partitions=4/4 files=4 size=6.32KB runtime filters: RF000 -> b.id, RF001 -> b.int_col row-size=16B cardinality=100 ==== @@ -1656,16 +1660,16 @@ PLAN-ROOT SINK | | row-size=27B cardinality=8 | | | |--00:SCAN HDFS [functional.alltypestiny a] -| | partitions=4/4 files=4 size=460B +| | HDFS partitions=4/4 files=4 size=460B | | row-size=9B cardinality=8 | | | 01:SCAN HDFS [functional.alltypessmall b] -| partitions=4/4 files=4 size=6.32KB +| HDFS partitions=4/4 files=4 size=6.32KB | runtime filters: RF002 -> b.id | row-size=18B cardinality=100 | 02:SCAN HDFS [functional.alltypes c] - partitions=24/24 files=24 size=478.45KB + HDFS partitions=24/24 files=24 size=478.45KB runtime filters: RF000 -> c.tinyint_col row-size=6B cardinality=7.30K ==== @@ -1693,16 +1697,16 @@ PLAN-ROOT SINK | | row-size=10B cardinality=9 | | | |--00:SCAN HDFS [functional.alltypestiny a] -| | partitions=4/4 files=4 size=460B +| | HDFS partitions=4/4 files=4 size=460B | | row-size=4B cardinality=8 | | | 01:SCAN HDFS [functional.alltypessmall b] -| partitions=4/4 files=4 size=6.32KB +| HDFS partitions=4/4 files=4 size=6.32KB | runtime filters: RF002 -> b.id | row-size=6B cardinality=100 | 02:SCAN HDFS [functional.alltypes c] - partitions=24/24 files=24 size=478.45KB + HDFS partitions=24/24 files=24 size=478.45KB runtime filters: RF000 -> c.tinyint_col row-size=6B cardinality=7.30K ==== @@ -1727,7 +1731,7 @@ PLAN-ROOT SINK | row-size=81B cardinality=83.39K | |--03:SCAN HDFS [functional.alltypestiny t3] -| partitions=4/4 files=4 size=460B +| HDFS partitions=4/4 files=4 size=460B | row-size=33B cardinality=8 | 02:HASH JOIN [RIGHT OUTER JOIN] @@ -1737,11 +1741,11 @@ PLAN-ROOT SINK | row-size=48B cardinality=83.39K | |--00:SCAN HDFS [functional.alltypesagg t1] -| partitions=11/11 files=11 size=814.73KB +| HDFS partitions=11/11 files=11 size=814.73KB | row-size=15B cardinality=11.00K | 01:SCAN HDFS [functional.alltypes t2] - partitions=24/24 files=24 size=478.45KB + HDFS partitions=24/24 files=24 size=478.45KB runtime filters: RF000 -> t2.date_string_col, RF001 -> t2.date_string_col row-size=33B cardinality=7.30K ==== @@ -1766,12 +1770,12 @@ PLAN-ROOT SINK | row-size=89B cardinality=1 | |--01:SCAN HDFS [functional.alltypes] -| partitions=24/24 files=24 size=478.45KB +| HDFS partitions=24/24 files=24 size=478.45KB | predicates: timestamp_col = TIMESTAMP '2016-11-20 00:00:00' | row-size=20B cardinality=1 | 00:SCAN HDFS [functional.alltypes a] - partitions=24/24 files=24 size=478.45KB + HDFS partitions=24/24 files=24 size=478.45KB predicates: a.date_string_col = '' runtime filters: RF000 -> a.id row-size=89B cardinality=10 @@ -1791,12 +1795,12 @@ PLAN-ROOT SINK | row-size=89B cardinality=1 | |--00:SCAN HDFS [functional.alltypes a] -| partitions=24/24 files=24 size=478.45KB +| HDFS partitions=24/24 files=24 size=478.45KB | predicates: a.timestamp_col = TIMESTAMP '2016-11-20 00:00:00' | row-size=89B cardinality=1 | 01:SCAN HDFS [functional.alltypes] - partitions=24/24 files=24 size=478.45KB + HDFS partitions=24/24 files=24 size=478.45KB predicates: date_string_col = '' runtime filters: RF000 -> functional.alltypes.id row-size=24B cardinality=10 @@ -1817,12 +1821,12 @@ PLAN-ROOT SINK | row-size=89B cardinality=1 | |--01:SCAN HDFS [functional.alltypes] -| partitions=24/24 files=24 size=478.45KB +| HDFS partitions=24/24 files=24 size=478.45KB | predicates: timestamp_col IS NOT DISTINCT FROM TIMESTAMP '2016-11-20 00:00:00' | row-size=20B cardinality=1 | 00:SCAN HDFS [functional.alltypes a] - partitions=24/24 files=24 size=478.45KB + HDFS partitions=24/24 files=24 size=478.45KB predicates: a.date_string_col IS NOT DISTINCT FROM '' runtime filters: RF000 -> a.id row-size=89B cardinality=10 @@ -1842,12 +1846,12 @@ PLAN-ROOT SINK | row-size=89B cardinality=1 | |--00:SCAN HDFS [functional.alltypes a] -| partitions=24/24 files=24 size=478.45KB +| HDFS partitions=24/24 files=24 size=478.45KB | predicates: a.timestamp_col IS NOT DISTINCT FROM TIMESTAMP '2016-11-20 00:00:00' | row-size=89B cardinality=1 | 01:SCAN HDFS [functional.alltypes] - partitions=24/24 files=24 size=478.45KB + HDFS partitions=24/24 files=24 size=478.45KB predicates: date_string_col IS NOT DISTINCT FROM '' runtime filters: RF000 -> functional.alltypes.id row-size=24B cardinality=10 diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/joins.test b/testdata/workloads/functional-planner/queries/PlannerTest/joins.test index 8e1f69368..678054dcd 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/joins.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/joins.test @@ -2181,24 +2181,34 @@ on (a.string_col = b.string_col and a.int_col = b.int_col) ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | -07:EXCHANGE [UNPARTITIONED] +10:EXCHANGE [UNPARTITIONED] | -03:HASH JOIN [LEFT SEMI JOIN, PARTITIONED] +04:HASH JOIN [LEFT SEMI JOIN, PARTITIONED] | hash predicates: int_col = b.int_col, string_col = b.string_col | runtime filters: RF000 <- b.int_col, RF001 <- b.string_col | row-size=17B cardinality=4 | -|--06:EXCHANGE [HASH(b.int_col,b.string_col)] +|--09:EXCHANGE [HASH(b.int_col,b.string_col)] +| | +| 08:AGGREGATE [FINALIZE] +| | group by: b.string_col, b.int_col +| | row-size=17B cardinality=100 +| | +| 07:EXCHANGE [HASH(b.string_col,b.int_col)] +| | +| 03:AGGREGATE [STREAMING] +| | group by: b.string_col, b.int_col +| | row-size=17B cardinality=100 | | | 02:SCAN HDFS [functional.alltypes b] | HDFS partitions=24/24 files=24 size=478.45KB | row-size=17B cardinality=7.30K | -05:AGGREGATE [FINALIZE] +06:AGGREGATE [FINALIZE] | group by: int_col, string_col | row-size=17B cardinality=4 | -04:EXCHANGE [HASH(int_col,string_col)] +05:EXCHANGE [HASH(int_col,string_col)] | 01:AGGREGATE [STREAMING] | group by: int_col, string_col @@ -2566,42 +2576,50 @@ where a.id < 10 ---- PLAN PLAN-ROOT SINK | -08:NESTED LOOP JOIN [RIGHT ANTI JOIN] +10:NESTED LOOP JOIN [LEFT ANTI JOIN] | join predicates: c.string_col != e.string_col | row-size=267B cardinality=100 | -|--07:NESTED LOOP JOIN [RIGHT SEMI JOIN] -| | join predicates: b.bigint_col > d.bigint_col -| | row-size=267B cardinality=100 +|--09:AGGREGATE [FINALIZE] +| | group by: e.string_col +| | row-size=15B cardinality=963 | | -| |--06:NESTED LOOP JOIN [RIGHT OUTER JOIN] -| | | join predicates: a.int_col != c.int_col OR a.tinyint_col > c.tinyint_col -| | | row-size=267B cardinality=100 -| | | -| | |--05:NESTED LOOP JOIN [INNER JOIN] -| | | | predicates: a.id < b.id -| | | | row-size=178B cardinality=100 -| | | | -| | | |--00:SCAN HDFS [functional.alltypestiny a] -| | | | HDFS partitions=4/4 files=4 size=460B -| | | | predicates: a.id < 10 -| | | | row-size=89B cardinality=1 -| | | | -| | | 01:SCAN HDFS [functional.alltypessmall b] -| | | HDFS partitions=4/4 files=4 size=6.32KB -| | | row-size=89B cardinality=100 -| | | -| | 02:SCAN HDFS [functional.alltypes c] -| | HDFS partitions=24/24 files=24 size=478.45KB -| | row-size=89B cardinality=7.30K +| 04:SCAN HDFS [functional.alltypesagg e] +| HDFS partitions=11/11 files=11 size=814.73KB +| row-size=15B cardinality=11.00K +| +08:NESTED LOOP JOIN [LEFT SEMI JOIN] +| join predicates: b.bigint_col > d.bigint_col +| row-size=267B cardinality=100 +| +|--07:AGGREGATE [FINALIZE] +| | group by: d.bigint_col +| | row-size=8B cardinality=1.01K | | | 03:SCAN HDFS [functional.alltypesagg d] | HDFS partitions=11/11 files=11 size=814.73KB | row-size=8B cardinality=11.00K | -04:SCAN HDFS [functional.alltypesagg e] - HDFS partitions=11/11 files=11 size=814.73KB - row-size=15B cardinality=11.00K +06:NESTED LOOP JOIN [RIGHT OUTER JOIN] +| join predicates: a.int_col != c.int_col OR a.tinyint_col > c.tinyint_col +| row-size=267B cardinality=100 +| +|--05:NESTED LOOP JOIN [INNER JOIN] +| | predicates: a.id < b.id +| | row-size=178B cardinality=100 +| | +| |--00:SCAN HDFS [functional.alltypestiny a] +| | HDFS partitions=4/4 files=4 size=460B +| | predicates: a.id < 10 +| | row-size=89B cardinality=1 +| | +| 01:SCAN HDFS [functional.alltypessmall b] +| HDFS partitions=4/4 files=4 size=6.32KB +| row-size=89B cardinality=100 +| +02:SCAN HDFS [functional.alltypes c] + HDFS partitions=24/24 files=24 size=478.45KB + row-size=89B cardinality=7.30K ==== # Regression test for IMPALA-2495: Crash: impala::InPredicate::SetLookupPrepare select count(id) from functional.alltypestiny t1 @@ -3067,7 +3085,7 @@ PLAN-ROOT SINK | row-size=96B cardinality=0 | |--01:SCAN HDFS [functional_text_gzip.alltypes b] -| HDFS partitions=24/24 files=24 size=123.32KB +| HDFS partitions=24/24 files=24 size=77.88KB | row-size=80B cardinality=unavailable | 00:SCAN HDFS [functional_text_gzip.emptytable a] diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/nested-collections.test b/testdata/workloads/functional-planner/queries/PlannerTest/nested-collections.test index ff61101e2..78705c6e8 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/nested-collections.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/nested-collections.test @@ -28,7 +28,7 @@ PLAN-ROOT SINK | 01:AGGREGATE [FINALIZE] | output: count(f21) -| row-size=8B cardinality=0 +| row-size=8B cardinality=1 | 00:SCAN HDFS [functional.allcomplextypes.complex_nested_struct_col.f2.f12] partitions=0/0 files=0 size=0B @@ -46,7 +46,7 @@ PLAN-ROOT SINK | 03:AGGREGATE [FINALIZE] | output: count(*) -| row-size=8B cardinality=0 +| row-size=8B cardinality=1 | 02:HASH JOIN [INNER JOIN] | hash predicates: a.f1 = b.f1 @@ -94,7 +94,7 @@ PLAN-ROOT SINK | | row-size=0B cardinality=10 | | | 00:SCAN HDFS [tpch_nested_parquet.region r] -| partitions=1/1 files=1 size=3.44KB +| HDFS partitions=1/1 files=1 size=3.59KB | predicates: !empty(r.r_nations) | row-size=12B cardinality=5 | @@ -104,12 +104,12 @@ PLAN-ROOT SINK | row-size=163B cardinality=10.16K | |--06:SCAN HDFS [tpch_nested_parquet.supplier s] -| partitions=1/1 files=1 size=41.79MB +| HDFS partitions=1/1 files=1 size=41.80MB | runtime filters: RF000 -> s.s_nationkey, RF001 -> s_comment | row-size=77B cardinality=10.00K | 05:SCAN HDFS [tpch_nested_parquet.customer c] - partitions=1/1 files=4 size=288.99MB + HDFS partitions=1/1 files=4 size=289.08MB runtime filters: RF000 -> c_nationkey, RF001 -> c.c_comment, RF004 -> c.c_nationkey, RF005 -> c_comment row-size=87B cardinality=150.00K ==== @@ -153,6 +153,7 @@ PLAN-ROOT SINK | | row-size=16B cardinality=1 | | | 03:UNNEST [a.int_array_col b] +| limit: 1 | row-size=0B cardinality=10 | 00:SCAN HDFS [functional.allcomplextypes a] @@ -200,6 +201,7 @@ PLAN-ROOT SINK | | row-size=16B cardinality=1 | | | 03:UNNEST [a.int_array_col b] +| limit: 1 | row-size=0B cardinality=10 | 00:SCAN HDFS [functional.allcomplextypes a] @@ -224,6 +226,7 @@ PLAN-ROOT SINK | | row-size=16B cardinality=1 | | | 03:UNNEST [a.int_array_col] +| limit: 1 | row-size=0B cardinality=10 | 00:SCAN HDFS [functional.allcomplextypes a] @@ -836,24 +839,24 @@ where e.f1 < 10 ---- PLAN PLAN-ROOT SINK | -11:HASH JOIN [INNER JOIN] +12:HASH JOIN [INNER JOIN] | hash predicates: d.id = b.id | runtime filters: RF000 <- b.id | row-size=36B cardinality=0 | -|--10:SUBPLAN +|--11:SUBPLAN | | row-size=32B cardinality=0 | | -| |--08:NESTED LOOP JOIN [CROSS JOIN] +| |--09:NESTED LOOP JOIN [CROSS JOIN] | | | row-size=32B cardinality=10 | | | -| | |--06:SINGULAR ROW SRC +| | |--07:SINGULAR ROW SRC | | | row-size=24B cardinality=1 | | | -| | 07:UNNEST [a.struct_array_col e] +| | 08:UNNEST [a.struct_array_col e] | | row-size=0B cardinality=10 | | -| 09:HASH JOIN [RIGHT ANTI JOIN] +| 10:HASH JOIN [RIGHT ANTI JOIN] | | hash predicates: c.int_col = b.int_col | | row-size=24B cardinality=0 | | @@ -869,16 +872,20 @@ PLAN-ROOT SINK | | | row-size=16B cardinality=0 | | | | | 01:SCAN HDFS [functional.alltypestiny b] -| | partitions=4/4 files=4 size=460B +| | HDFS partitions=4/4 files=4 size=460B | | runtime filters: RF002 -> b.id | | row-size=8B cardinality=8 | | +| 06:AGGREGATE [FINALIZE] +| | group by: c.int_col +| | row-size=4B cardinality=10 +| | | 02:SCAN HDFS [functional.alltypessmall c] -| partitions=4/4 files=4 size=6.32KB +| HDFS partitions=4/4 files=4 size=6.32KB | row-size=4B cardinality=100 | 03:SCAN HDFS [functional.alltypes d] - partitions=24/24 files=24 size=478.45KB + HDFS partitions=24/24 files=24 size=478.45KB runtime filters: RF000 -> d.id row-size=4B cardinality=7.30K ==== @@ -902,7 +909,7 @@ PLAN-ROOT SINK | | row-size=4B cardinality=100 | | | |--02:SCAN HDFS [functional.alltypessmall c] -| | partitions=4/4 files=4 size=6.32KB +| | HDFS partitions=4/4 files=4 size=6.32KB | | row-size=4B cardinality=100 | | | 09:SUBPLAN @@ -927,11 +934,11 @@ PLAN-ROOT SINK | | row-size=16B cardinality=0 | | | 01:SCAN HDFS [functional.alltypestiny b] -| partitions=4/4 files=4 size=460B +| HDFS partitions=4/4 files=4 size=460B | row-size=8B cardinality=8 | 03:SCAN HDFS [functional.alltypes d] - partitions=24/24 files=24 size=478.45KB + HDFS partitions=24/24 files=24 size=478.45KB row-size=4B cardinality=7.30K ==== # Test subplans: Test joining relative refs with independent table refs. @@ -989,13 +996,13 @@ PLAN-ROOT SINK | | row-size=36B cardinality=0 | | | 05:SCAN HDFS [functional.alltypessmall c] -| partitions=4/4 files=4 size=6.32KB +| HDFS partitions=4/4 files=4 size=6.32KB | predicates: c.id < 10, c.int_col > 30 | runtime filters: RF002 -> c.id | row-size=89B cardinality=10 | 06:SCAN HDFS [functional.alltypes e] - partitions=24/24 files=24 size=478.45KB + HDFS partitions=24/24 files=24 size=478.45KB runtime filters: RF000 -> e.id row-size=4B cardinality=7.30K ==== @@ -1015,7 +1022,7 @@ PLAN-ROOT SINK | row-size=89B cardinality=8 | |--06:SCAN HDFS [functional.alltypestiny e] -| partitions=4/4 files=4 size=460B +| HDFS partitions=4/4 files=4 size=460B | row-size=89B cardinality=8 | 11:SUBPLAN @@ -1053,7 +1060,7 @@ PLAN-ROOT SINK | row-size=36B cardinality=0 | 05:SCAN HDFS [functional.alltypessmall c] - partitions=4/4 files=4 size=6.32KB + HDFS partitions=4/4 files=4 size=6.32KB predicates: c.int_col > 30 row-size=89B cardinality=10 ==== @@ -1069,7 +1076,7 @@ PLAN-ROOT SINK | row-size=24B cardinality=0 | |--05:NESTED LOOP JOIN [CROSS JOIN] -| | row-size=24B cardinality=0 +| | row-size=24B cardinality=1 | | | |--02:SINGULAR ROW SRC | | row-size=16B cardinality=1 @@ -1077,7 +1084,7 @@ PLAN-ROOT SINK | 04:AGGREGATE [FINALIZE] | | output: count(*) | | having: count(*) < 10 -| | row-size=8B cardinality=0 +| | row-size=8B cardinality=1 | | | 03:UNNEST [a.int_array_col] | row-size=0B cardinality=10 @@ -1098,7 +1105,7 @@ PLAN-ROOT SINK | row-size=32B cardinality=0 | |--05:NESTED LOOP JOIN [CROSS JOIN] -| | row-size=32B cardinality=10 +| | row-size=32B cardinality=1 | | | |--02:SINGULAR ROW SRC | | row-size=16B cardinality=1 @@ -1107,7 +1114,7 @@ PLAN-ROOT SINK | | output: count(*) | | group by: f1 | | having: count(*) < 10 -| | row-size=16B cardinality=10 +| | row-size=16B cardinality=1 | | | 03:UNNEST [a.struct_array_col] | row-size=0B cardinality=10 @@ -1229,6 +1236,7 @@ PLAN-ROOT SINK | | row-size=16B cardinality=1 | | | 03:UNNEST [a.int_array_col] +| limit: 1 | row-size=4B cardinality=10 | 00:SCAN HDFS [functional.allcomplextypes a] @@ -1252,6 +1260,7 @@ PLAN-ROOT SINK | | row-size=16B cardinality=1 | | | 03:UNNEST [a.int_array_col] +| limit: 1 | row-size=0B cardinality=10 | 00:SCAN HDFS [functional.allcomplextypes a] @@ -1648,7 +1657,7 @@ PLAN-ROOT SINK | row-size=0B cardinality=10 | 00:SCAN HDFS [tpch_nested_parquet.customer c] - partitions=1/1 files=4 size=288.99MB + HDFS partitions=1/1 files=4 size=289.08MB predicates: c_custkey < 10, !empty(c.c_orders) predicates on o: !empty(o.o_lineitems), o_orderkey < 5 predicates on o_lineitems: l_linenumber < 3 @@ -1780,7 +1789,7 @@ PLAN-ROOT SINK | row-size=0B cardinality=10 | 00:SCAN HDFS [tpch_nested_parquet.customer c] - partitions=1/1 files=4 size=288.99MB + HDFS partitions=1/1 files=4 size=289.08MB predicates: c.c_custkey = c.c_nationkey, !empty(c.c_orders) predicates on o: !empty(o.o_lineitems), o.o_orderkey = o.o_shippriority predicates on l: l.l_partkey = l.l_linenumber, l.l_partkey = l.l_suppkey @@ -1895,7 +1904,7 @@ PLAN-ROOT SINK | row-size=44B cardinality=1 | 00:SCAN HDFS [tpch_nested_parquet.supplier s] - partitions=1/1 files=1 size=41.79MB + HDFS partitions=1/1 files=1 size=41.80MB row-size=44B cardinality=10.00K ==== # IMPALA-2383: Test join ordering of relative collection ref after an outer join. @@ -2068,7 +2077,7 @@ PLAN-ROOT SINK | row-size=0B cardinality=10 | 00:SCAN HDFS [tpch_nested_parquet.customer c] - partitions=1/1 files=4 size=288.99MB + HDFS partitions=1/1 files=4 size=289.08MB row-size=44B cardinality=150.00K ==== # IMPALA-2412: Test join ordering in nested subplans. Same as above @@ -2127,7 +2136,7 @@ PLAN-ROOT SINK | row-size=0B cardinality=10 | 00:SCAN HDFS [tpch_nested_parquet.customer c] - partitions=1/1 files=4 size=288.99MB + HDFS partitions=1/1 files=4 size=289.08MB predicates: !empty(c.c_orders) row-size=44B cardinality=150.00K ==== @@ -2184,7 +2193,7 @@ PLAN-ROOT SINK | row-size=0B cardinality=10 | 00:SCAN HDFS [tpch_nested_parquet.customer c] - partitions=1/1 files=4 size=288.99MB + HDFS partitions=1/1 files=4 size=289.08MB row-size=44B cardinality=150.00K ==== # IMPALA-2446: Test predicate assignment when outer join has no conjuncts in @@ -2331,7 +2340,7 @@ PLAN-ROOT SINK | row-size=0B cardinality=10 | 00:SCAN HDFS [tpch_nested_parquet.customer c] - partitions=1/1 files=4 size=288.99MB + HDFS partitions=1/1 files=4 size=289.08MB row-size=12B cardinality=150.00K ==== # IMPALA-3065/IMPALA-3062: Test correct assignment of !empty() predicates. @@ -2349,7 +2358,7 @@ PLAN-ROOT SINK | row-size=28B cardinality=1.50M | |--05:SCAN HDFS [tpch_nested_parquet.customer c2] -| partitions=1/1 files=4 size=288.99MB +| HDFS partitions=1/1 files=4 size=289.08MB | row-size=8B cardinality=150.00K | 01:SUBPLAN @@ -2365,7 +2374,7 @@ PLAN-ROOT SINK | row-size=0B cardinality=10 | 00:SCAN HDFS [tpch_nested_parquet.customer c1] - partitions=1/1 files=4 size=288.99MB + HDFS partitions=1/1 files=4 size=289.08MB runtime filters: RF000 -> c1.c_custkey row-size=20B cardinality=150.00K ==== @@ -2386,6 +2395,7 @@ PLAN-ROOT SINK | | row-size=40B cardinality=10 | | | |--04:UNNEST [c2.c_orders o2] +| | limit: 1 | | row-size=0B cardinality=10 | | | 05:NESTED LOOP JOIN [CROSS JOIN] @@ -2402,11 +2412,11 @@ PLAN-ROOT SINK | row-size=40B cardinality=300.00K | |--01:SCAN HDFS [tpch_nested_parquet.customer c2] -| partitions=1/1 files=4 size=288.99MB +| HDFS partitions=1/1 files=4 size=289.08MB | row-size=20B cardinality=150.00K | 00:SCAN HDFS [tpch_nested_parquet.customer c1] - partitions=1/1 files=4 size=288.99MB + HDFS partitions=1/1 files=4 size=289.08MB row-size=20B cardinality=150.00K ==== # IMPALA-3084: Test correct assignment of NULL checking predicates @@ -2431,7 +2441,7 @@ PLAN-ROOT SINK | row-size=0B cardinality=10 | 00:SCAN HDFS [tpch_nested_parquet.customer c] - partitions=1/1 files=4 size=288.99MB + HDFS partitions=1/1 files=4 size=289.08MB row-size=230B cardinality=150.00K ==== # IMPALA-2540: Complex query mixing joins on base tables and nested collections. @@ -2486,21 +2496,21 @@ PLAN-ROOT SINK | | | row-size=61B cardinality=5 | | | | | |--03:SCAN HDFS [tpch_nested_parquet.region t5] -| | | partitions=1/1 files=1 size=3.44KB +| | | HDFS partitions=1/1 files=1 size=3.59KB | | | row-size=2B cardinality=5 | | | | | 01:SCAN HDFS [tpch_nested_parquet.customer t2] -| | partitions=1/1 files=4 size=288.99MB +| | HDFS partitions=1/1 files=4 size=289.08MB | | runtime filters: RF004 -> t2.c_custkey | | row-size=59B cardinality=150.00K | | | 02:SCAN HDFS [tpch_nested_parquet.region t3] -| partitions=1/1 files=1 size=3.44KB +| HDFS partitions=1/1 files=1 size=3.59KB | runtime filters: RF002 -> t3.r_comment | row-size=78B cardinality=5 | 00:SCAN HDFS [tpch_nested_parquet.region.r_nations t1] - partitions=1/1 files=1 size=3.44KB + HDFS partitions=1/1 files=1 size=3.59KB runtime filters: RF000 -> t1.pos row-size=8B cardinality=50 ==== @@ -2533,7 +2543,7 @@ PLAN-ROOT SINK | row-size=8B cardinality=2 | 00:SCAN HDFS [tpch_nested_parquet.customer c] - partitions=1/1 files=4 size=288.99MB + HDFS partitions=1/1 files=4 size=289.08MB predicates on c_orders: o_orderkey = 6000000 row-size=20B cardinality=150.00K ==== @@ -2584,6 +2594,48 @@ PLAN-ROOT SINK | row-size=0B cardinality=10 | 00:SCAN HDFS [tpch_nested_parquet.customer c] - partitions=1/1 files=4 size=288.99MB + HDFS partitions=1/1 files=4 size=289.08MB row-size=20B cardinality=150.00K ==== +# IMPALA-1270: SEMI JOIN in subplan with distinct added by planner. +# The single node planner needs to correctly set the table ref ids for +# the left branch of the semi join for subplan generation to work. +select a.id, e.key from functional_parquet.complextypestbl a +left semi join functional.alltypessmall c on (a.id = c.int_col) +inner join a.nested_struct.g e +where length(e.key) > 0 +---- PLAN +PLAN-ROOT SINK +| +07:SUBPLAN +| row-size=32B cardinality=44.00K +| +|--05:NESTED LOOP JOIN [CROSS JOIN] +| | row-size=32B cardinality=10 +| | +| |--03:SINGULAR ROW SRC +| | row-size=20B cardinality=1 +| | +| 04:UNNEST [a.nested_struct.g e] +| row-size=0B cardinality=10 +| +06:HASH JOIN [LEFT SEMI JOIN] +| hash predicates: a.id = c.int_col +| runtime filters: RF000 <- c.int_col +| row-size=20B cardinality=4.40K +| +|--02:AGGREGATE [FINALIZE] +| | group by: c.int_col +| | row-size=4B cardinality=10 +| | +| 01:SCAN HDFS [functional.alltypessmall c] +| HDFS partitions=4/4 files=4 size=6.32KB +| row-size=4B cardinality=100 +| +00:SCAN HDFS [functional_parquet.complextypestbl a] + HDFS partitions=1/1 files=2 size=6.92KB + predicates: !empty(a.nested_struct.g) + predicates on e: length(e.`key`) > 0 + runtime filters: RF000 -> a.id + row-size=20B cardinality=4.40K +==== diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/nested-loop-join.test b/testdata/workloads/functional-planner/queries/PlannerTest/nested-loop-join.test index 2d717556e..9ceac1560 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/nested-loop-join.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/nested-loop-join.test @@ -14,7 +14,7 @@ PLAN-ROOT SINK | row-size=273B cardinality=7.30K | |--02:SCAN HDFS [functional.alltypesagg c] -| partitions=11/11 files=11 size=814.73KB +| HDFS partitions=11/11 files=11 size=814.73KB | predicates: c.bigint_col = 10 | row-size=95B cardinality=11 | @@ -23,11 +23,11 @@ PLAN-ROOT SINK | row-size=178B cardinality=7.30K | |--01:SCAN HDFS [functional.alltypes b] -| partitions=24/24 files=24 size=478.45KB +| HDFS partitions=24/24 files=24 size=478.45KB | row-size=89B cardinality=7.30K | 00:SCAN HDFS [functional.alltypestiny a] - partitions=4/4 files=4 size=460B + HDFS partitions=4/4 files=4 size=460B predicates: a.id < 10 row-size=89B cardinality=1 ---- DISTRIBUTEDPLAN @@ -48,7 +48,7 @@ PLAN-ROOT SINK | row-size=95B cardinality=1.10K | |--02:SCAN HDFS [functional.alltypesagg d] -| partitions=11/11 files=11 size=814.73KB +| HDFS partitions=11/11 files=11 size=814.73KB | predicates: d.bigint_col < 10 | row-size=95B cardinality=1.10K | @@ -57,11 +57,11 @@ PLAN-ROOT SINK | row-size=1B cardinality=100 | |--01:SCAN HDFS [functional.alltypessmall c] -| partitions=4/4 files=4 size=6.32KB +| HDFS partitions=4/4 files=4 size=6.32KB | row-size=1B cardinality=100 | 00:SCAN HDFS [functional.alltypestiny a] - partitions=4/4 files=4 size=460B + HDFS partitions=4/4 files=4 size=460B row-size=1B cardinality=8 ---- DISTRIBUTEDPLAN NotImplementedException: Error generating a valid execution plan for this query. A RIGHT SEMI JOIN type with no equi-join predicates can only be executed with a single node plan. @@ -84,7 +84,7 @@ PLAN-ROOT SINK | row-size=362B cardinality=18.40K | |--03:SCAN HDFS [functional.alltypes d] -| partitions=24/24 files=24 size=478.45KB +| HDFS partitions=24/24 files=24 size=478.45KB | row-size=89B cardinality=7.30K | 05:NESTED LOOP JOIN [FULL OUTER JOIN] @@ -92,7 +92,7 @@ PLAN-ROOT SINK | row-size=273B cardinality=11.10K | |--02:SCAN HDFS [functional.alltypesagg c] -| partitions=11/11 files=11 size=814.73KB +| HDFS partitions=11/11 files=11 size=814.73KB | row-size=95B cardinality=11.00K | 04:NESTED LOOP JOIN [FULL OUTER JOIN] @@ -100,11 +100,11 @@ PLAN-ROOT SINK | row-size=178B cardinality=101 | |--01:SCAN HDFS [functional.alltypessmall b] -| partitions=4/4 files=4 size=6.32KB +| HDFS partitions=4/4 files=4 size=6.32KB | row-size=89B cardinality=100 | 00:SCAN HDFS [functional.alltypestiny a] - partitions=4/4 files=4 size=460B + HDFS partitions=4/4 files=4 size=460B predicates: a.id < 10 row-size=89B cardinality=1 ---- DISTRIBUTEDPLAN @@ -127,12 +127,12 @@ PLAN-ROOT SINK | row-size=8B cardinality=10 | |--01:SCAN HDFS [functional.alltypessmall b] -| partitions=4/4 files=4 size=6.32KB +| HDFS partitions=4/4 files=4 size=6.32KB | predicates: b.int_col = 5 | row-size=8B cardinality=10 | 00:SCAN HDFS [functional.alltypestiny a] - partitions=4/4 files=4 size=460B + HDFS partitions=4/4 files=4 size=460B row-size=4B cardinality=8 ---- DISTRIBUTEDPLAN NotImplementedException: Error generating a valid execution plan for this query. A RIGHT ANTI JOIN type with no equi-join predicates can only be executed with a single node plan. @@ -149,14 +149,14 @@ PLAN-ROOT SINK | 09:AGGREGATE [FINALIZE] | output: count(*) -| row-size=8B cardinality=0 +| row-size=8B cardinality=1 | 08:NESTED LOOP JOIN [RIGHT ANTI JOIN] | join predicates: d.tinyint_col > e.tinyint_col | row-size=5B cardinality=0 | |--04:SCAN HDFS [functional.alltypesnopart e] -| partitions=1/1 files=0 size=0B +| HDFS partitions=1/1 files=0 size=0B | predicates: e.id < 10 | row-size=5B cardinality=0 | @@ -165,7 +165,7 @@ PLAN-ROOT SINK | row-size=1B cardinality=7.30K | |--03:SCAN HDFS [functional.alltypes d] -| partitions=24/24 files=24 size=478.45KB +| HDFS partitions=24/24 files=24 size=478.45KB | row-size=1B cardinality=7.30K | 06:NESTED LOOP JOIN [RIGHT OUTER JOIN] @@ -173,7 +173,7 @@ PLAN-ROOT SINK | row-size=17B cardinality=11.00K | |--02:SCAN HDFS [functional.alltypesagg c] -| partitions=11/11 files=11 size=814.73KB +| HDFS partitions=11/11 files=11 size=814.73KB | row-size=5B cardinality=11.00K | 05:NESTED LOOP JOIN [INNER JOIN] @@ -181,11 +181,11 @@ PLAN-ROOT SINK | row-size=12B cardinality=8 | |--01:SCAN HDFS [functional.alltypessmall b] -| partitions=4/4 files=4 size=6.32KB +| HDFS partitions=4/4 files=4 size=6.32KB | row-size=4B cardinality=100 | 00:SCAN HDFS [functional.alltypestiny a] - partitions=4/4 files=4 size=460B + HDFS partitions=4/4 files=4 size=460B row-size=8B cardinality=8 ---- DISTRIBUTEDPLAN NotImplementedException: Error generating a valid execution plan for this query. A RIGHT ANTI JOIN type with no equi-join predicates can only be executed with a single node plan. @@ -203,13 +203,13 @@ PLAN-ROOT SINK | 15:AGGREGATE [FINALIZE] | output: count:merge(*) -| row-size=8B cardinality=0 +| row-size=8B cardinality=1 | 14:EXCHANGE [UNPARTITIONED] | 09:AGGREGATE | output: count(*) -| row-size=8B cardinality=0 +| row-size=8B cardinality=1 | 08:NESTED LOOP JOIN [LEFT ANTI JOIN, BROADCAST] | join predicates: d.tinyint_col > e.tinyint_col @@ -236,23 +236,23 @@ PLAN-ROOT SINK | | | |--10:EXCHANGE [BROADCAST] | | | | | | | | | 00:SCAN HDFS [functional.alltypestiny a] -| | | | partitions=4/4 files=4 size=460B +| | | | HDFS partitions=4/4 files=4 size=460B | | | | row-size=8B cardinality=8 | | | | | | | 01:SCAN HDFS [functional.alltypessmall b] -| | | partitions=4/4 files=4 size=6.32KB +| | | HDFS partitions=4/4 files=4 size=6.32KB | | | row-size=4B cardinality=100 | | | | | 02:SCAN HDFS [functional.alltypesagg c] -| | partitions=11/11 files=11 size=814.73KB +| | HDFS partitions=11/11 files=11 size=814.73KB | | row-size=5B cardinality=11.00K | | | 03:SCAN HDFS [functional.alltypes d] -| partitions=24/24 files=24 size=478.45KB +| HDFS partitions=24/24 files=24 size=478.45KB | row-size=1B cardinality=7.30K | 04:SCAN HDFS [functional.alltypesnopart e] - partitions=1/1 files=0 size=0B + HDFS partitions=1/1 files=0 size=0B predicates: e.id < 10 row-size=5B cardinality=0 ==== @@ -272,7 +272,7 @@ PLAN-ROOT SINK |--06:EXCHANGE [BROADCAST] | | | 02:SCAN HDFS [functional.alltypes t2] -| partitions=24/24 files=24 size=478.45KB +| HDFS partitions=24/24 files=24 size=478.45KB | predicates: (t2.bigint_col = 5) | row-size=89B cardinality=730 | @@ -287,7 +287,7 @@ PLAN-ROOT SINK | row-size=4B cardinality=10 | 00:SCAN HDFS [functional.alltypes] - partitions=24/24 files=24 size=478.45KB + HDFS partitions=24/24 files=24 size=478.45KB row-size=4B cardinality=7.30K ==== # IMPALA-5689: Do not invert a left semi join with no equi-join predicates. @@ -298,16 +298,20 @@ left semi join functional.alltypes t2 on (t2.bigint_col=5) ---- DISTRIBUTEDPLAN PLAN-ROOT SINK | -07:EXCHANGE [UNPARTITIONED] +08:EXCHANGE [UNPARTITIONED] | 03:NESTED LOOP JOIN [LEFT SEMI JOIN, BROADCAST] | row-size=4B cardinality=10 | -|--06:EXCHANGE [BROADCAST] +|--07:EXCHANGE [BROADCAST] +| | +| 06:EXCHANGE [UNPARTITIONED] +| | limit: 1 | | | 02:SCAN HDFS [functional.alltypes t2] -| partitions=24/24 files=24 size=478.45KB +| HDFS partitions=24/24 files=24 size=478.45KB | predicates: (t2.bigint_col = 5) +| limit: 1 | row-size=8B cardinality=730 | 05:AGGREGATE [FINALIZE] @@ -321,6 +325,6 @@ PLAN-ROOT SINK | row-size=4B cardinality=10 | 00:SCAN HDFS [functional.alltypes] - partitions=24/24 files=24 size=478.45KB + HDFS partitions=24/24 files=24 size=478.45KB row-size=4B cardinality=7.30K ==== diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test b/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test index 45e2503a7..e268143f9 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test @@ -1148,6 +1148,7 @@ PLAN-ROOT SINK | |--03:SCAN HDFS [functional.alltypes d] | HDFS partitions=24/24 files=24 size=478.45KB +| limit: 1 | row-size=0B cardinality=7.30K | 05:HASH JOIN [RIGHT OUTER JOIN] diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/semi-join-distinct.test b/testdata/workloads/functional-planner/queries/PlannerTest/semi-join-distinct.test new file mode 100644 index 000000000..1e15873bd --- /dev/null +++ b/testdata/workloads/functional-planner/queries/PlannerTest/semi-join-distinct.test @@ -0,0 +1,973 @@ +# IMPALA-1270: distinct should be added to subquery automatically because +# it would reduce cardinality significantly. +select * from functional.alltypestiny +where int_col in (select int_col from functional.alltypes where id % 2 = 0) +---- PLAN +PLAN-ROOT SINK +| +03:HASH JOIN [LEFT SEMI JOIN] +| hash predicates: int_col = functional.alltypes.int_col +| runtime filters: RF000 <- functional.alltypes.int_col +| row-size=89B cardinality=8 +| +|--02:AGGREGATE [FINALIZE] +| | group by: functional.alltypes.int_col +| | row-size=4B cardinality=10 +| | +| 01:SCAN HDFS [functional.alltypes] +| HDFS partitions=24/24 files=24 size=478.45KB +| predicates: id % 2 = 0 +| row-size=8B cardinality=730 +| +00:SCAN HDFS [functional.alltypestiny] + HDFS partitions=4/4 files=4 size=460B + runtime filters: RF000 -> int_col + row-size=89B cardinality=8 +---- PARALLELPLANS +PLAN-ROOT SINK +| +07:EXCHANGE [UNPARTITIONED] +| +03:HASH JOIN [LEFT SEMI JOIN, BROADCAST] +| hash predicates: int_col = functional.alltypes.int_col +| row-size=89B cardinality=8 +| +|--JOIN BUILD +| | join-table-id=00 plan-id=01 cohort-id=01 +| | build expressions: functional.alltypes.int_col +| | runtime filters: RF000 <- functional.alltypes.int_col +| | +| 06:EXCHANGE [BROADCAST] +| | +| 05:AGGREGATE [FINALIZE] +| | group by: functional.alltypes.int_col +| | row-size=4B cardinality=10 +| | +| 04:EXCHANGE [HASH(functional.alltypes.int_col)] +| | +| 02:AGGREGATE [STREAMING] +| | group by: functional.alltypes.int_col +| | row-size=4B cardinality=10 +| | +| 01:SCAN HDFS [functional.alltypes] +| HDFS partitions=24/24 files=24 size=478.45KB +| predicates: id % 2 = 0 +| row-size=8B cardinality=730 +| +00:SCAN HDFS [functional.alltypestiny] + HDFS partitions=4/4 files=4 size=460B + runtime filters: RF000 -> int_col + row-size=89B cardinality=8 +==== +# IMPALA-1270: distinct should not be added to subquery when column stats +# are missing (as they are on functional_parquet.alltypes). +select * from functional.alltypestiny +where int_col in (select int_col from functional_parquet.alltypes) +---- PLAN +PLAN-ROOT SINK +| +02:HASH JOIN [RIGHT SEMI JOIN] +| hash predicates: int_col = int_col +| runtime filters: RF000 <- int_col +| row-size=89B cardinality=8 +| +|--00:SCAN HDFS [functional.alltypestiny] +| HDFS partitions=4/4 files=4 size=460B +| row-size=89B cardinality=8 +| +01:SCAN HDFS [functional_parquet.alltypes] + HDFS partitions=24/24 files=24 size=202.01KB + runtime filters: RF000 -> int_col + row-size=4B cardinality=12.85K +---- PARALLELPLANS +PLAN-ROOT SINK +| +05:EXCHANGE [UNPARTITIONED] +| +02:HASH JOIN [RIGHT SEMI JOIN, PARTITIONED] +| hash predicates: int_col = int_col +| row-size=89B cardinality=8 +| +|--JOIN BUILD +| | join-table-id=00 plan-id=01 cohort-id=01 +| | build expressions: int_col +| | runtime filters: RF000 <- int_col +| | +| 04:EXCHANGE [HASH(int_col)] +| | +| 00:SCAN HDFS [functional.alltypestiny] +| HDFS partitions=4/4 files=4 size=460B +| row-size=89B cardinality=8 +| +03:EXCHANGE [HASH(int_col)] +| +01:SCAN HDFS [functional_parquet.alltypes] + HDFS partitions=24/24 files=24 size=202.01KB + runtime filters: RF000 -> int_col + row-size=4B cardinality=12.85K +==== +# IMPALA-1270: distinct should be added to subquery automatically because +# it would reduce cardinality significantly. +select * from functional.alltypestiny +where int_col in (select int_col from functional.alltypes) +---- PLAN +PLAN-ROOT SINK +| +03:HASH JOIN [LEFT SEMI JOIN] +| hash predicates: int_col = functional.alltypes.int_col +| runtime filters: RF000 <- functional.alltypes.int_col +| row-size=89B cardinality=8 +| +|--02:AGGREGATE [FINALIZE] +| | group by: functional.alltypes.int_col +| | row-size=4B cardinality=10 +| | +| 01:SCAN HDFS [functional.alltypes] +| HDFS partitions=24/24 files=24 size=478.45KB +| row-size=4B cardinality=7.30K +| +00:SCAN HDFS [functional.alltypestiny] + HDFS partitions=4/4 files=4 size=460B + runtime filters: RF000 -> int_col + row-size=89B cardinality=8 +---- PARALLELPLANS +PLAN-ROOT SINK +| +07:EXCHANGE [UNPARTITIONED] +| +03:HASH JOIN [LEFT SEMI JOIN, BROADCAST] +| hash predicates: int_col = functional.alltypes.int_col +| row-size=89B cardinality=8 +| +|--JOIN BUILD +| | join-table-id=00 plan-id=01 cohort-id=01 +| | build expressions: functional.alltypes.int_col +| | runtime filters: RF000 <- functional.alltypes.int_col +| | +| 06:EXCHANGE [BROADCAST] +| | +| 05:AGGREGATE [FINALIZE] +| | group by: functional.alltypes.int_col +| | row-size=4B cardinality=10 +| | +| 04:EXCHANGE [HASH(functional.alltypes.int_col)] +| | +| 02:AGGREGATE [STREAMING] +| | group by: functional.alltypes.int_col +| | row-size=4B cardinality=10 +| | +| 01:SCAN HDFS [functional.alltypes] +| HDFS partitions=24/24 files=24 size=478.45KB +| row-size=4B cardinality=7.30K +| +00:SCAN HDFS [functional.alltypestiny] + HDFS partitions=4/4 files=4 size=460B + runtime filters: RF000 -> int_col + row-size=89B cardinality=8 +==== +# IMPALA-1270: distinct should not be added to subquery when it does not +# reduce cardinality significantly. +select * from functional.alltypestiny +where int_col in (select int_col from functional.alltypes where id in (1,2,3)) +---- PLAN +PLAN-ROOT SINK +| +02:HASH JOIN [LEFT SEMI JOIN] +| hash predicates: int_col = int_col +| runtime filters: RF000 <- int_col +| row-size=89B cardinality=8 +| +|--01:SCAN HDFS [functional.alltypes] +| HDFS partitions=24/24 files=24 size=478.45KB +| predicates: id IN (1, 2, 3) +| row-size=8B cardinality=3 +| +00:SCAN HDFS [functional.alltypestiny] + HDFS partitions=4/4 files=4 size=460B + runtime filters: RF000 -> int_col + row-size=89B cardinality=8 +---- PARALLELPLANS +PLAN-ROOT SINK +| +04:EXCHANGE [UNPARTITIONED] +| +02:HASH JOIN [LEFT SEMI JOIN, BROADCAST] +| hash predicates: int_col = int_col +| row-size=89B cardinality=8 +| +|--JOIN BUILD +| | join-table-id=00 plan-id=01 cohort-id=01 +| | build expressions: int_col +| | runtime filters: RF000 <- int_col +| | +| 03:EXCHANGE [BROADCAST] +| | +| 01:SCAN HDFS [functional.alltypes] +| HDFS partitions=24/24 files=24 size=478.45KB +| predicates: id IN (1, 2, 3) +| row-size=8B cardinality=3 +| +00:SCAN HDFS [functional.alltypestiny] + HDFS partitions=4/4 files=4 size=460B + runtime filters: RF000 -> int_col + row-size=89B cardinality=8 +==== +# IMPALA-1270: distinct should not be added to subquery that returns 0 rows +# reduce cardinality significantly. +select * from functional.alltypestiny +where int_col in (select int_col from functional.alltypes limit 0) +---- PLAN +PLAN-ROOT SINK +| +02:HASH JOIN [LEFT SEMI JOIN] +| hash predicates: int_col = int_col +| runtime filters: RF000 <- int_col +| row-size=89B cardinality=0 +| +|--01:EMPTYSET +| +00:SCAN HDFS [functional.alltypestiny] + HDFS partitions=4/4 files=4 size=460B + runtime filters: RF000 -> int_col + row-size=89B cardinality=8 +---- PARALLELPLANS +PLAN-ROOT SINK +| +04:EXCHANGE [UNPARTITIONED] +| +02:HASH JOIN [LEFT SEMI JOIN, BROADCAST] +| hash predicates: int_col = int_col +| row-size=89B cardinality=0 +| +|--JOIN BUILD +| | join-table-id=00 plan-id=01 cohort-id=01 +| | build expressions: int_col +| | runtime filters: RF000 <- int_col +| | +| 03:EXCHANGE [BROADCAST] +| | +| 01:EMPTYSET +| +00:SCAN HDFS [functional.alltypestiny] + HDFS partitions=4/4 files=4 size=460B + runtime filters: RF000 -> int_col + row-size=89B cardinality=8 +==== +# IMPALA-1270: limit should be added to subquery that results in a semi +# join with no join predicates. +select * from functional.alltypestiny +where exists (select int_col from functional.alltypes) +---- PLAN +PLAN-ROOT SINK +| +02:NESTED LOOP JOIN [LEFT SEMI JOIN] +| row-size=89B cardinality=8 +| +|--01:SCAN HDFS [functional.alltypes] +| HDFS partitions=24/24 files=24 size=478.45KB +| limit: 1 +| row-size=0B cardinality=1 +| +00:SCAN HDFS [functional.alltypestiny] + HDFS partitions=4/4 files=4 size=460B + row-size=89B cardinality=8 +---- PARALLELPLANS +PLAN-ROOT SINK +| +05:EXCHANGE [UNPARTITIONED] +| +02:NESTED LOOP JOIN [LEFT SEMI JOIN, BROADCAST] +| join table id: 00 +| row-size=89B cardinality=8 +| +|--JOIN BUILD +| | join-table-id=00 plan-id=01 cohort-id=01 +| | +| 04:EXCHANGE [BROADCAST] +| | +| 03:EXCHANGE [UNPARTITIONED] +| | limit: 1 +| | +| 01:SCAN HDFS [functional.alltypes] +| HDFS partitions=24/24 files=24 size=478.45KB +| limit: 1 +| row-size=0B cardinality=1 +| +00:SCAN HDFS [functional.alltypestiny] + HDFS partitions=4/4 files=4 size=460B + row-size=89B cardinality=8 +==== +# IMPALA-1270: the added aggregation does not result in an extra exchange for +# shuffle joins. +select straight_join * +from functional.alltypestiny t1 + left semi join /*+shuffle*/ functional.alltypes t2 on t1.int_col = t2.int_col +---- PLAN +PLAN-ROOT SINK +| +03:HASH JOIN [LEFT SEMI JOIN] +| hash predicates: t1.int_col = t2.int_col +| runtime filters: RF000 <- t2.int_col +| row-size=89B cardinality=8 +| +|--02:AGGREGATE [FINALIZE] +| | group by: t2.int_col +| | row-size=4B cardinality=10 +| | +| 01:SCAN HDFS [functional.alltypes t2] +| HDFS partitions=24/24 files=24 size=478.45KB +| row-size=4B cardinality=7.30K +| +00:SCAN HDFS [functional.alltypestiny t1] + HDFS partitions=4/4 files=4 size=460B + runtime filters: RF000 -> t1.int_col + row-size=89B cardinality=8 +---- PARALLELPLANS +PLAN-ROOT SINK +| +07:EXCHANGE [UNPARTITIONED] +| +03:HASH JOIN [LEFT SEMI JOIN, PARTITIONED] +| hash predicates: t1.int_col = t2.int_col +| row-size=89B cardinality=8 +| +|--JOIN BUILD +| | join-table-id=00 plan-id=01 cohort-id=01 +| | build expressions: t2.int_col +| | runtime filters: RF000 <- t2.int_col +| | +| 05:AGGREGATE [FINALIZE] +| | group by: t2.int_col +| | row-size=4B cardinality=10 +| | +| 04:EXCHANGE [HASH(t2.int_col)] +| | +| 02:AGGREGATE [STREAMING] +| | group by: t2.int_col +| | row-size=4B cardinality=10 +| | +| 01:SCAN HDFS [functional.alltypes t2] +| HDFS partitions=24/24 files=24 size=478.45KB +| row-size=4B cardinality=7.30K +| +06:EXCHANGE [HASH(t1.int_col)] +| +00:SCAN HDFS [functional.alltypestiny t1] + HDFS partitions=4/4 files=4 size=460B + runtime filters: RF000 -> t1.int_col + row-size=89B cardinality=8 +==== +# IMPALA-1270: the distinct optimisation also applies to NULL AWARE LEFT ANTI JOIN +select * from functional.alltypestiny +where int_col not in (select int_col from functional.alltypes where id % 2 = 0) +---- PLAN +PLAN-ROOT SINK +| +03:HASH JOIN [NULL AWARE LEFT ANTI JOIN] +| hash predicates: int_col = functional.alltypes.int_col +| row-size=89B cardinality=8 +| +|--02:AGGREGATE [FINALIZE] +| | group by: functional.alltypes.int_col +| | row-size=4B cardinality=10 +| | +| 01:SCAN HDFS [functional.alltypes] +| HDFS partitions=24/24 files=24 size=478.45KB +| predicates: id % 2 = 0 +| row-size=8B cardinality=730 +| +00:SCAN HDFS [functional.alltypestiny] + HDFS partitions=4/4 files=4 size=460B + row-size=89B cardinality=8 +---- PARALLELPLANS +PLAN-ROOT SINK +| +07:EXCHANGE [UNPARTITIONED] +| +03:HASH JOIN [NULL AWARE LEFT ANTI JOIN, BROADCAST] +| hash predicates: int_col = functional.alltypes.int_col +| row-size=89B cardinality=8 +| +|--JOIN BUILD +| | join-table-id=00 plan-id=01 cohort-id=01 +| | build expressions: functional.alltypes.int_col +| | +| 06:EXCHANGE [BROADCAST] +| | +| 05:AGGREGATE [FINALIZE] +| | group by: functional.alltypes.int_col +| | row-size=4B cardinality=10 +| | +| 04:EXCHANGE [HASH(functional.alltypes.int_col)] +| | +| 02:AGGREGATE [STREAMING] +| | group by: functional.alltypes.int_col +| | row-size=4B cardinality=10 +| | +| 01:SCAN HDFS [functional.alltypes] +| HDFS partitions=24/24 files=24 size=478.45KB +| predicates: id % 2 = 0 +| row-size=8B cardinality=730 +| +00:SCAN HDFS [functional.alltypestiny] + HDFS partitions=4/4 files=4 size=460B + row-size=89B cardinality=8 +==== +# IMPALA-1270: the distinct optimisation also applies to LEFT ANTI JOIN +select * from functional.alltypestiny t1 +where not exists ( + select int_col from functional.alltypes t2 + where id % 2 = 0 and t1.int_col = t2.int_col) +---- PLAN +PLAN-ROOT SINK +| +03:HASH JOIN [LEFT ANTI JOIN] +| hash predicates: t1.int_col = t2.int_col +| row-size=89B cardinality=8 +| +|--02:AGGREGATE [FINALIZE] +| | group by: t2.int_col +| | row-size=4B cardinality=10 +| | +| 01:SCAN HDFS [functional.alltypes t2] +| HDFS partitions=24/24 files=24 size=478.45KB +| predicates: id % 2 = 0 +| row-size=8B cardinality=730 +| +00:SCAN HDFS [functional.alltypestiny t1] + HDFS partitions=4/4 files=4 size=460B + row-size=89B cardinality=8 +---- PARALLELPLANS +PLAN-ROOT SINK +| +07:EXCHANGE [UNPARTITIONED] +| +03:HASH JOIN [LEFT ANTI JOIN, BROADCAST] +| hash predicates: t1.int_col = t2.int_col +| row-size=89B cardinality=8 +| +|--JOIN BUILD +| | join-table-id=00 plan-id=01 cohort-id=01 +| | build expressions: t2.int_col +| | +| 06:EXCHANGE [BROADCAST] +| | +| 05:AGGREGATE [FINALIZE] +| | group by: t2.int_col +| | row-size=4B cardinality=10 +| | +| 04:EXCHANGE [HASH(t2.int_col)] +| | +| 02:AGGREGATE [STREAMING] +| | group by: t2.int_col +| | row-size=4B cardinality=10 +| | +| 01:SCAN HDFS [functional.alltypes t2] +| HDFS partitions=24/24 files=24 size=478.45KB +| predicates: id % 2 = 0 +| row-size=8B cardinality=730 +| +00:SCAN HDFS [functional.alltypestiny t1] + HDFS partitions=4/4 files=4 size=460B + row-size=89B cardinality=8 +==== +# IMPALA-1270: multi-column join showing that unused slots are projected. +select count(*) from functional.alltypesagg t1 +where int_col in ( + select int_col from functional.alltypes t2 + where t1.bool_col = t2.bool_col and id is not null) +---- PLAN +PLAN-ROOT SINK +| +04:AGGREGATE [FINALIZE] +| output: count(*) +| row-size=8B cardinality=1 +| +03:HASH JOIN [LEFT SEMI JOIN] +| hash predicates: int_col = t2.int_col, t1.bool_col = t2.bool_col +| runtime filters: RF000 <- t2.int_col, RF001 <- t2.bool_col +| row-size=5B cardinality=115 +| +|--02:AGGREGATE [FINALIZE] +| | group by: t2.bool_col, t2.int_col +| | row-size=5B cardinality=20 +| | +| 01:SCAN HDFS [functional.alltypes t2] +| HDFS partitions=24/24 files=24 size=478.45KB +| predicates: id IS NOT NULL +| row-size=9B cardinality=7.30K +| +00:SCAN HDFS [functional.alltypesagg t1] + HDFS partitions=11/11 files=11 size=814.73KB + runtime filters: RF000 -> int_col, RF001 -> t1.bool_col + row-size=5B cardinality=11.00K +---- PARALLELPLANS +PLAN-ROOT SINK +| +09:AGGREGATE [FINALIZE] +| output: count:merge(*) +| row-size=8B cardinality=1 +| +08:EXCHANGE [UNPARTITIONED] +| +04:AGGREGATE +| output: count(*) +| row-size=8B cardinality=1 +| +03:HASH JOIN [LEFT SEMI JOIN, BROADCAST] +| hash predicates: int_col = t2.int_col, t1.bool_col = t2.bool_col +| row-size=5B cardinality=115 +| +|--JOIN BUILD +| | join-table-id=00 plan-id=01 cohort-id=01 +| | build expressions: t2.int_col, t2.bool_col +| | runtime filters: RF000 <- t2.int_col, RF001 <- t2.bool_col +| | +| 07:EXCHANGE [BROADCAST] +| | +| 06:AGGREGATE [FINALIZE] +| | group by: t2.bool_col, t2.int_col +| | row-size=5B cardinality=20 +| | +| 05:EXCHANGE [HASH(t2.bool_col,t2.int_col)] +| | +| 02:AGGREGATE [STREAMING] +| | group by: t2.bool_col, t2.int_col +| | row-size=5B cardinality=20 +| | +| 01:SCAN HDFS [functional.alltypes t2] +| HDFS partitions=24/24 files=24 size=478.45KB +| predicates: id IS NOT NULL +| row-size=9B cardinality=7.30K +| +00:SCAN HDFS [functional.alltypesagg t1] + HDFS partitions=11/11 files=11 size=814.73KB + runtime filters: RF000 -> int_col, RF001 -> t1.bool_col + row-size=5B cardinality=11.00K +==== +# IMPALA-1270: aggregation can be added on top of existing aggregation if it would +# reduce cardinality enough. +select count(*) from functional.alltypesagg t1 +where int_col in ( + select int_col from functional.alltypes t2 + group by int_col, id + having sum(int_col) > 1) +---- PLAN +PLAN-ROOT SINK +| +05:AGGREGATE [FINALIZE] +| output: count(*) +| row-size=8B cardinality=1 +| +04:HASH JOIN [LEFT SEMI JOIN] +| hash predicates: int_col = int_col +| runtime filters: RF000 <- int_col +| row-size=4B cardinality=115 +| +|--03:AGGREGATE [FINALIZE] +| | group by: int_col +| | row-size=4B cardinality=10 +| | +| 02:AGGREGATE [FINALIZE] +| | output: sum(int_col) +| | group by: int_col, id +| | having: sum(int_col) > 1 +| | row-size=16B cardinality=730 +| | +| 01:SCAN HDFS [functional.alltypes t2] +| HDFS partitions=24/24 files=24 size=478.45KB +| row-size=8B cardinality=7.30K +| +00:SCAN HDFS [functional.alltypesagg t1] + HDFS partitions=11/11 files=11 size=814.73KB + runtime filters: RF000 -> int_col + row-size=4B cardinality=11.00K +---- PARALLELPLANS +PLAN-ROOT SINK +| +12:AGGREGATE [FINALIZE] +| output: count:merge(*) +| row-size=8B cardinality=1 +| +11:EXCHANGE [UNPARTITIONED] +| +05:AGGREGATE +| output: count(*) +| row-size=8B cardinality=1 +| +04:HASH JOIN [LEFT SEMI JOIN, BROADCAST] +| hash predicates: int_col = int_col +| row-size=4B cardinality=115 +| +|--JOIN BUILD +| | join-table-id=00 plan-id=01 cohort-id=01 +| | build expressions: int_col +| | runtime filters: RF000 <- int_col +| | +| 10:EXCHANGE [BROADCAST] +| | +| 09:AGGREGATE [FINALIZE] +| | group by: int_col +| | row-size=4B cardinality=10 +| | +| 08:EXCHANGE [HASH(int_col)] +| | +| 03:AGGREGATE [STREAMING] +| | group by: int_col +| | row-size=4B cardinality=10 +| | +| 07:AGGREGATE [FINALIZE] +| | output: sum:merge(int_col) +| | group by: int_col, id +| | having: sum(int_col) > 1 +| | row-size=16B cardinality=730 +| | +| 06:EXCHANGE [HASH(int_col,id)] +| | +| 02:AGGREGATE [STREAMING] +| | output: sum(int_col) +| | group by: int_col, id +| | row-size=16B cardinality=7.30K +| | +| 01:SCAN HDFS [functional.alltypes t2] +| HDFS partitions=24/24 files=24 size=478.45KB +| row-size=8B cardinality=7.30K +| +00:SCAN HDFS [functional.alltypesagg t1] + HDFS partitions=11/11 files=11 size=814.73KB + runtime filters: RF000 -> int_col + row-size=4B cardinality=11.00K +==== +# IMPALA-1270: aggregation will not be added on top of existing aggregation if it does +# not reduce cardinality enough. +select count(*) from functional.alltypesagg t1 +where int_col in ( + select int_col from functional.alltypes t2 + group by int_col, tinyint_col + having sum(int_col) > 1) +---- PLAN +PLAN-ROOT SINK +| +04:AGGREGATE [FINALIZE] +| output: count(*) +| row-size=8B cardinality=1 +| +03:HASH JOIN [LEFT SEMI JOIN] +| hash predicates: int_col = int_col +| runtime filters: RF000 <- int_col +| row-size=4B cardinality=115 +| +|--02:AGGREGATE [FINALIZE] +| | output: sum(int_col) +| | group by: int_col, tinyint_col +| | having: sum(int_col) > 1 +| | row-size=13B cardinality=10 +| | +| 01:SCAN HDFS [functional.alltypes t2] +| HDFS partitions=24/24 files=24 size=478.45KB +| row-size=5B cardinality=7.30K +| +00:SCAN HDFS [functional.alltypesagg t1] + HDFS partitions=11/11 files=11 size=814.73KB + runtime filters: RF000 -> int_col + row-size=4B cardinality=11.00K +---- PARALLELPLANS +PLAN-ROOT SINK +| +09:AGGREGATE [FINALIZE] +| output: count:merge(*) +| row-size=8B cardinality=1 +| +08:EXCHANGE [UNPARTITIONED] +| +04:AGGREGATE +| output: count(*) +| row-size=8B cardinality=1 +| +03:HASH JOIN [LEFT SEMI JOIN, BROADCAST] +| hash predicates: int_col = int_col +| row-size=4B cardinality=115 +| +|--JOIN BUILD +| | join-table-id=00 plan-id=01 cohort-id=01 +| | build expressions: int_col +| | runtime filters: RF000 <- int_col +| | +| 07:EXCHANGE [BROADCAST] +| | +| 06:AGGREGATE [FINALIZE] +| | output: sum:merge(int_col) +| | group by: int_col, tinyint_col +| | having: sum(int_col) > 1 +| | row-size=13B cardinality=10 +| | +| 05:EXCHANGE [HASH(int_col,tinyint_col)] +| | +| 02:AGGREGATE [STREAMING] +| | output: sum(int_col) +| | group by: int_col, tinyint_col +| | row-size=13B cardinality=100 +| | +| 01:SCAN HDFS [functional.alltypes t2] +| HDFS partitions=24/24 files=24 size=478.45KB +| row-size=5B cardinality=7.30K +| +00:SCAN HDFS [functional.alltypesagg t1] + HDFS partitions=11/11 files=11 size=814.73KB + runtime filters: RF000 -> int_col + row-size=4B cardinality=11.00K +==== +# IMPALA-1270: planner is not able to coalesce redundant aggregations yet. +# The left input of the SEMI JOIN could be more efficiently executed with +# a single aggregation by int_col, but the bottom-up plan generation process +# first generates an aggregation by int_col, tinyint_col and the distinct +# aggregation is placed on top of that. +select count(*) from functional.alltypesagg t1 +where int_col in ( + select int_col from functional.alltypes t2 + group by int_col, tinyint_col) +---- PLAN +PLAN-ROOT SINK +| +05:AGGREGATE [FINALIZE] +| output: count(*) +| row-size=8B cardinality=1 +| +04:HASH JOIN [LEFT SEMI JOIN] +| hash predicates: int_col = int_col +| runtime filters: RF000 <- int_col +| row-size=4B cardinality=115 +| +|--03:AGGREGATE [FINALIZE] +| | group by: int_col +| | row-size=4B cardinality=10 +| | +| 02:AGGREGATE [FINALIZE] +| | group by: int_col, tinyint_col +| | row-size=5B cardinality=100 +| | +| 01:SCAN HDFS [functional.alltypes t2] +| HDFS partitions=24/24 files=24 size=478.45KB +| row-size=5B cardinality=7.30K +| +00:SCAN HDFS [functional.alltypesagg t1] + HDFS partitions=11/11 files=11 size=814.73KB + runtime filters: RF000 -> int_col + row-size=4B cardinality=11.00K +---- PARALLELPLANS +PLAN-ROOT SINK +| +12:AGGREGATE [FINALIZE] +| output: count:merge(*) +| row-size=8B cardinality=1 +| +11:EXCHANGE [UNPARTITIONED] +| +05:AGGREGATE +| output: count(*) +| row-size=8B cardinality=1 +| +04:HASH JOIN [LEFT SEMI JOIN, BROADCAST] +| hash predicates: int_col = int_col +| row-size=4B cardinality=115 +| +|--JOIN BUILD +| | join-table-id=00 plan-id=01 cohort-id=01 +| | build expressions: int_col +| | runtime filters: RF000 <- int_col +| | +| 10:EXCHANGE [BROADCAST] +| | +| 09:AGGREGATE [FINALIZE] +| | group by: int_col +| | row-size=4B cardinality=10 +| | +| 08:EXCHANGE [HASH(int_col)] +| | +| 03:AGGREGATE [STREAMING] +| | group by: int_col +| | row-size=4B cardinality=10 +| | +| 07:AGGREGATE [FINALIZE] +| | group by: int_col, tinyint_col +| | row-size=5B cardinality=100 +| | +| 06:EXCHANGE [HASH(int_col,tinyint_col)] +| | +| 02:AGGREGATE [STREAMING] +| | group by: int_col, tinyint_col +| | row-size=5B cardinality=100 +| | +| 01:SCAN HDFS [functional.alltypes t2] +| HDFS partitions=24/24 files=24 size=478.45KB +| row-size=5B cardinality=7.30K +| +00:SCAN HDFS [functional.alltypesagg t1] + HDFS partitions=11/11 files=11 size=814.73KB + runtime filters: RF000 -> int_col + row-size=4B cardinality=11.00K +==== +# IMPALA-1270: aggregate function in select list of subquery is eligible for +# distinct subquery optimization. +select id from functional.alltypesagg t1 +where int_col in ( + select count(*) + from functional.alltypes t2 + group by int_col, tinyint_col) +---- PLAN +PLAN-ROOT SINK +| +04:HASH JOIN [LEFT SEMI JOIN] +| hash predicates: int_col = count(*) +| runtime filters: RF000 <- count(*) +| row-size=8B cardinality=11 +| +|--03:AGGREGATE [FINALIZE] +| | group by: count(*) +| | row-size=8B cardinality=1 +| | +| 02:AGGREGATE [FINALIZE] +| | output: count(*) +| | group by: int_col, tinyint_col +| | row-size=13B cardinality=100 +| | +| 01:SCAN HDFS [functional.alltypes t2] +| HDFS partitions=24/24 files=24 size=478.45KB +| row-size=5B cardinality=7.30K +| +00:SCAN HDFS [functional.alltypesagg t1] + HDFS partitions=11/11 files=11 size=814.73KB + runtime filters: RF000 -> int_col + row-size=8B cardinality=11.00K +---- PARALLELPLANS +PLAN-ROOT SINK +| +10:EXCHANGE [UNPARTITIONED] +| +04:HASH JOIN [LEFT SEMI JOIN, BROADCAST] +| hash predicates: int_col = count(*) +| row-size=8B cardinality=11 +| +|--JOIN BUILD +| | join-table-id=00 plan-id=01 cohort-id=01 +| | build expressions: count(*) +| | runtime filters: RF000 <- count(*) +| | +| 09:EXCHANGE [BROADCAST] +| | +| 08:AGGREGATE [FINALIZE] +| | group by: count(*) +| | row-size=8B cardinality=1 +| | +| 07:EXCHANGE [HASH(count(*))] +| | +| 03:AGGREGATE [STREAMING] +| | group by: count(*) +| | row-size=8B cardinality=1 +| | +| 06:AGGREGATE [FINALIZE] +| | output: count:merge(*) +| | group by: int_col, tinyint_col +| | row-size=13B cardinality=100 +| | +| 05:EXCHANGE [HASH(int_col,tinyint_col)] +| | +| 02:AGGREGATE [STREAMING] +| | output: count(*) +| | group by: int_col, tinyint_col +| | row-size=13B cardinality=100 +| | +| 01:SCAN HDFS [functional.alltypes t2] +| HDFS partitions=24/24 files=24 size=478.45KB +| row-size=5B cardinality=7.30K +| +00:SCAN HDFS [functional.alltypesagg t1] + HDFS partitions=11/11 files=11 size=814.73KB + runtime filters: RF000 -> int_col + row-size=8B cardinality=11.00K +==== +# IMPALA-1270: analytic function in select list of subquery is eligible for +# distinct subquery optimization. +select id from functional.alltypesagg t1 +where int_col in ( + select rank() over (partition by int_col order by id) + from functional.alltypes t2) +---- PLAN +PLAN-ROOT SINK +| +05:HASH JOIN [LEFT SEMI JOIN] +| hash predicates: int_col = rank() +| runtime filters: RF000 <- rank() +| row-size=8B cardinality=11 +| +|--04:AGGREGATE [FINALIZE] +| | group by: rank() +| | row-size=8B cardinality=1 +| | +| 03:ANALYTIC +| | functions: rank() +| | partition by: int_col +| | order by: id ASC +| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW +| | row-size=16B cardinality=7.30K +| | +| 02:SORT +| | order by: int_col ASC NULLS FIRST, id ASC +| | row-size=8B cardinality=7.30K +| | +| 01:SCAN HDFS [functional.alltypes t2] +| HDFS partitions=24/24 files=24 size=478.45KB +| row-size=8B cardinality=7.30K +| +00:SCAN HDFS [functional.alltypesagg t1] + HDFS partitions=11/11 files=11 size=814.73KB + runtime filters: RF000 -> int_col + row-size=8B cardinality=11.00K +---- PARALLELPLANS +PLAN-ROOT SINK +| +10:EXCHANGE [UNPARTITIONED] +| +05:HASH JOIN [LEFT SEMI JOIN, BROADCAST] +| hash predicates: int_col = rank() +| row-size=8B cardinality=11 +| +|--JOIN BUILD +| | join-table-id=00 plan-id=01 cohort-id=01 +| | build expressions: rank() +| | runtime filters: RF000 <- rank() +| | +| 09:EXCHANGE [BROADCAST] +| | +| 08:AGGREGATE [FINALIZE] +| | group by: rank() +| | row-size=8B cardinality=1 +| | +| 07:EXCHANGE [HASH(rank())] +| | +| 04:AGGREGATE [STREAMING] +| | group by: rank() +| | row-size=8B cardinality=1 +| | +| 03:ANALYTIC +| | functions: rank() +| | partition by: int_col +| | order by: id ASC +| | window: RANGE BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW +| | row-size=16B cardinality=7.30K +| | +| 02:SORT +| | order by: int_col ASC NULLS FIRST, id ASC +| | row-size=8B cardinality=7.30K +| | +| 06:EXCHANGE [HASH(int_col)] +| | +| 01:SCAN HDFS [functional.alltypes t2] +| HDFS partitions=24/24 files=24 size=478.45KB +| row-size=8B cardinality=7.30K +| +00:SCAN HDFS [functional.alltypesagg t1] + HDFS partitions=11/11 files=11 size=814.73KB + runtime filters: RF000 -> int_col + row-size=8B cardinality=11.00K +==== diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite-hdfs-num-rows-est-enabled.test b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite-hdfs-num-rows-est-enabled.test index e49e9d221..bcc244a10 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite-hdfs-num-rows-est-enabled.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite-hdfs-num-rows-est-enabled.test @@ -13,19 +13,21 @@ PLAN-ROOT SINK | row-size=89B cardinality=7.30K | |--03:NESTED LOOP JOIN [RIGHT SEMI JOIN] +| | limit: 1 | | row-size=4B cardinality=1 | | | |--01:SCAN HDFS [functional.tinyinttable] -| | partitions=1/1 files=1 size=20B +| | HDFS partitions=1/1 files=1 size=20B | | predicates: 1 = functional.tinyinttable.int_col | | row-size=4B cardinality=1 | | | 02:SCAN HDFS [functional.alltypestiny] -| partitions=4/4 files=4 size=460B +| HDFS partitions=4/4 files=4 size=460B | predicates: 1 = functional.alltypestiny.int_col +| limit: 1 | row-size=4B cardinality=4 | 00:SCAN HDFS [functional.alltypes t] - partitions=24/24 files=24 size=478.45KB + HDFS partitions=24/24 files=24 size=478.45KB row-size=89B cardinality=7.30K ==== diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test index f7326ead9..25d50f614 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test @@ -126,12 +126,16 @@ where t1.int_col in ---- PLAN PLAN-ROOT SINK | -02:HASH JOIN [LEFT SEMI JOIN] +03:HASH JOIN [LEFT SEMI JOIN] | hash predicates: t1.int_col = t2.int_col | runtime filters: RF000 <- t2.int_col | row-size=89B cardinality=7.30K | -|--01:SCAN HDFS [functional.alltypes t2] +|--02:AGGREGATE [FINALIZE] +| | group by: t2.int_col +| | row-size=4B cardinality=10 +| | +| 01:SCAN HDFS [functional.alltypes t2] | HDFS partitions=24/24 files=24 size=478.45KB | predicates: (t2.int_col IS NOT NULL AND (t2.int_col < 0 OR t2.int_col > 10) OR t2.bigint_col IS NOT NULL AND (t2.bigint_col < 0 OR t2.bigint_col > 10)) | row-size=12B cardinality=730 @@ -173,11 +177,15 @@ and t.bigint_col < 1000 ---- PLAN PLAN-ROOT SINK | -04:HASH JOIN [NULL AWARE LEFT ANTI JOIN] -| hash predicates: t.tinyint_col = tinyint_col +05:HASH JOIN [NULL AWARE LEFT ANTI JOIN] +| hash predicates: t.tinyint_col = functional.alltypestiny.tinyint_col | row-size=89B cardinality=730 | -|--02:SCAN HDFS [functional.alltypestiny] +|--04:AGGREGATE [FINALIZE] +| | group by: functional.alltypestiny.tinyint_col +| | row-size=1B cardinality=2 +| | +| 02:SCAN HDFS [functional.alltypestiny] | HDFS partitions=4/4 files=4 size=460B | row-size=1B cardinality=8 | @@ -360,20 +368,24 @@ where a.int_col in ---- PLAN PLAN-ROOT SINK | -08:AGGREGATE [FINALIZE] +09:AGGREGATE [FINALIZE] | output: count(id) | row-size=8B cardinality=1 | -07:AGGREGATE +08:AGGREGATE | group by: id | row-size=4B cardinality=115 | -06:HASH JOIN [LEFT SEMI JOIN] +07:HASH JOIN [LEFT SEMI JOIN] | hash predicates: a.int_col = t.int_col | runtime filters: RF000 <- t.int_col | row-size=8B cardinality=115 | -|--05:HASH JOIN [INNER JOIN] +|--06:AGGREGATE [FINALIZE] +| | group by: t.int_col +| | row-size=4B cardinality=10 +| | +| 05:HASH JOIN [INNER JOIN] | | hash predicates: s.bigint_col = n.bigint_col | | runtime filters: RF002 <- n.bigint_col | | row-size=29B cardinality=40 @@ -476,12 +488,16 @@ and s.bool_col = false ---- PLAN PLAN-ROOT SINK | -08:HASH JOIN [LEFT SEMI JOIN] -| hash predicates: a.int_col = int_col -| runtime filters: RF000 <- int_col +09:HASH JOIN [LEFT SEMI JOIN] +| hash predicates: a.int_col = functional.alltypessmall.int_col +| runtime filters: RF000 <- functional.alltypessmall.int_col | row-size=109B cardinality=91 | -|--06:SCAN HDFS [functional.alltypessmall] +|--08:AGGREGATE [FINALIZE] +| | group by: functional.alltypessmall.int_col +| | row-size=4B cardinality=10 +| | +| 06:SCAN HDFS [functional.alltypessmall] | HDFS partitions=4/4 files=4 size=6.32KB | row-size=4B cardinality=100 | @@ -554,17 +570,21 @@ and bigint_col < 1000 ---- PLAN PLAN-ROOT SINK | -04:HASH JOIN [LEFT SEMI JOIN] +05:HASH JOIN [LEFT SEMI JOIN] | hash predicates: id = id | runtime filters: RF000 <- id | row-size=89B cardinality=11 | -|--03:HASH JOIN [LEFT SEMI JOIN] -| | hash predicates: int_col = int_col -| | runtime filters: RF002 <- int_col +|--04:HASH JOIN [LEFT SEMI JOIN] +| | hash predicates: int_col = functional.alltypestiny.int_col +| | runtime filters: RF002 <- functional.alltypestiny.int_col | | row-size=9B cardinality=11 | | -| |--02:SCAN HDFS [functional.alltypestiny] +| |--03:AGGREGATE [FINALIZE] +| | | group by: functional.alltypestiny.int_col +| | | row-size=4B cardinality=2 +| | | +| | 02:SCAN HDFS [functional.alltypestiny] | | HDFS partitions=4/4 files=4 size=460B | | row-size=4B cardinality=8 | | @@ -744,12 +764,16 @@ and tinyint_col < 10 ---- PLAN PLAN-ROOT SINK | -03:HASH JOIN [LEFT SEMI JOIN] +04:HASH JOIN [LEFT SEMI JOIN] | hash predicates: a.tinyint_col = b.tinyint_col | runtime filters: RF000 <- b.tinyint_col | row-size=1B cardinality=244 | -|--02:AGGREGATE [FINALIZE] +|--03:AGGREGATE [FINALIZE] +| | group by: b.tinyint_col +| | row-size=1B cardinality=2 +| | +| 02:AGGREGATE [FINALIZE] | | group by: id, int_col, bool_col, b.tinyint_col | | row-size=10B cardinality=8 | | @@ -771,21 +795,25 @@ where not exists ---- PLAN PLAN-ROOT SINK | -03:AGGREGATE [FINALIZE] +04:AGGREGATE [FINALIZE] | output: count(*) | row-size=8B cardinality=1 | -02:HASH JOIN [RIGHT ANTI JOIN] -| hash predicates: a.int_col = t.int_col +03:HASH JOIN [LEFT ANTI JOIN] +| hash predicates: t.int_col = a.int_col | row-size=4B cardinality=7.30K | -|--00:SCAN HDFS [functional.alltypes t] -| HDFS partitions=24/24 files=24 size=478.45KB -| row-size=4B cardinality=7.30K +|--02:AGGREGATE [FINALIZE] +| | group by: a.int_col +| | row-size=4B cardinality=957 +| | +| 01:SCAN HDFS [functional.alltypesagg a] +| HDFS partitions=11/11 files=11 size=814.73KB +| row-size=4B cardinality=11.00K | -01:SCAN HDFS [functional.alltypesagg a] - HDFS partitions=11/11 files=11 size=814.73KB - row-size=4B cardinality=11.00K +00:SCAN HDFS [functional.alltypes t] + HDFS partitions=24/24 files=24 size=478.45KB + row-size=4B cardinality=7.30K ==== # Correlated NOT EXISTS with an analytic function and a group by clause select count(*) @@ -799,15 +827,19 @@ and bool_col = false ---- PLAN PLAN-ROOT SINK | -06:AGGREGATE [FINALIZE] +07:AGGREGATE [FINALIZE] | output: count(*) | row-size=8B cardinality=1 | -05:HASH JOIN [LEFT ANTI JOIN] +06:HASH JOIN [LEFT ANTI JOIN] | hash predicates: a.int_col = b.int_col | row-size=5B cardinality=5.50K | -|--04:AGGREGATE [FINALIZE] +|--05:AGGREGATE [FINALIZE] +| | group by: b.int_col +| | row-size=4B cardinality=10 +| | +| 04:AGGREGATE [FINALIZE] | | group by: b.id, b.int_col, b.bigint_col | | row-size=16B cardinality=50 | | @@ -978,16 +1010,16 @@ where exists ---- PLAN PLAN-ROOT SINK | -05:AGGREGATE [FINALIZE] +06:AGGREGATE [FINALIZE] | output: count(*) | row-size=8B cardinality=1 | -04:HASH JOIN [LEFT SEMI JOIN] +05:HASH JOIN [LEFT SEMI JOIN] | hash predicates: a.id = t.id | runtime filters: RF000 <- t.id | row-size=4B cardinality=8 | -|--03:HASH JOIN [RIGHT SEMI JOIN] +|--04:HASH JOIN [RIGHT SEMI JOIN] | | hash predicates: g.int_col = t.int_col | | runtime filters: RF002 <- t.int_col | | row-size=8B cardinality=8 @@ -996,6 +1028,10 @@ PLAN-ROOT SINK | | HDFS partitions=4/4 files=4 size=460B | | row-size=8B cardinality=8 | | +| 03:AGGREGATE [FINALIZE] +| | group by: g.int_col +| | row-size=4B cardinality=957 +| | | 02:SCAN HDFS [functional.alltypesagg g] | HDFS partitions=11/11 files=11 size=814.73KB | predicates: g.bool_col = FALSE @@ -2489,6 +2525,7 @@ PLAN-ROOT SINK |--01:SCAN HDFS [functional.alltypestiny] | HDFS partitions=4/4 files=4 size=460B | predicates: 1 = functional.alltypestiny.int_col +| limit: 1 | row-size=4B cardinality=4 | 00:SCAN HDFS [functional.alltypessmall] @@ -2546,6 +2583,7 @@ PLAN-ROOT SINK | |--02:AGGREGATE [FINALIZE] | | group by: int_col +| | limit: 1 | | row-size=4B cardinality=2 | | | 01:SCAN HDFS [functional.alltypestiny] @@ -2592,6 +2630,7 @@ PLAN-ROOT SINK |--02:AGGREGATE [FINALIZE] | | output: max(int_col) | | having: 1 = max(int_col) +| | limit: 1 | | row-size=4B cardinality=1 | | | 01:SCAN HDFS [functional.alltypestiny] @@ -2635,6 +2674,7 @@ PLAN-ROOT SINK | |--02:SELECT | | predicates: 1 = int_col +| | limit: 1 | | row-size=4B cardinality=1 | | | 01:SCAN HDFS [functional.alltypestiny] @@ -2679,11 +2719,13 @@ PLAN-ROOT SINK | row-size=89B cardinality=7.30K | |--03:NESTED LOOP JOIN [LEFT SEMI JOIN] +| | limit: 1 | | row-size=4B cardinality=unavailable | | | |--02:SCAN HDFS [functional.alltypestiny] | | HDFS partitions=4/4 files=4 size=460B | | predicates: 1 = functional.alltypestiny.int_col +| | limit: 1 | | row-size=4B cardinality=4 | | | 01:SCAN HDFS [functional.tinyinttable] @@ -2708,6 +2750,7 @@ PLAN-ROOT SINK |--03:HASH JOIN [LEFT SEMI JOIN] | | hash predicates: bigint_col = bigint_col, t.id = id | | runtime filters: RF000 <- bigint_col, RF001 <- id +| | limit: 1 | | row-size=16B cardinality=2 | | | |--02:SCAN HDFS [functional.alltypestiny] @@ -4688,7 +4731,7 @@ and tinyint_col < 10 ---- PLAN PLAN-ROOT SINK | -04:HASH JOIN [RIGHT SEMI JOIN] +05:HASH JOIN [RIGHT SEMI JOIN] | hash predicates: CASE valid_tid(2,3,4,5) WHEN 2 THEN b.tinyint_col WHEN 3 THEN b.tinyint_col WHEN 4 THEN b.tinyint_col WHEN 5 THEN b.tinyint_col END = a.tinyint_col, CASE valid_tid(2,3,4,5) WHEN 2 THEN b.string_col WHEN 3 THEN b.string_col WHEN 4 THEN b.string_col WHEN 5 THEN b.string_col END = a.string_col | row-size=14B cardinality=1 | @@ -4697,6 +4740,10 @@ PLAN-ROOT SINK | predicates: tinyint_col < 10 | row-size=14B cardinality=1 | +04:AGGREGATE [FINALIZE] +| group by: CASE valid_tid(2,3,4,5) WHEN 2 THEN b.tinyint_col WHEN 3 THEN b.tinyint_col WHEN 4 THEN b.tinyint_col WHEN 5 THEN b.tinyint_col END, CASE valid_tid(2,3,4,5) WHEN 2 THEN b.string_col WHEN 3 THEN b.string_col WHEN 4 THEN b.string_col WHEN 5 THEN b.string_col END +| row-size=13B cardinality=9.64K +| 03:AGGREGATE [FINALIZE] | group by: CASE valid_tid(2,3,4,5) WHEN 2 THEN id WHEN 3 THEN id WHEN 4 THEN id WHEN 5 THEN NULL END, CASE valid_tid(2,3,4,5) WHEN 2 THEN int_col WHEN 3 THEN int_col WHEN 4 THEN NULL WHEN 5 THEN NULL END, CASE valid_tid(2,3,4,5) WHEN 2 THEN bool_col WHEN 3 THEN NULL WHEN 4 THEN NULL WHEN 5 THEN NULL END, CASE valid_tid(2,3,4,5) WHEN 2 THEN b.tinyint_col WHEN 3 THEN b.tinyint_col WHEN 4 THEN b.tinyint_col WHEN 5 THEN b.tinyint_col END, CASE valid_tid(2,3,4,5) WHEN 2 THEN b.string_col WHEN 3 THEN b.string_col WHEN 4 THEN b.string_col WHEN 5 THEN b.string_col END, CASE valid_tid(2,3,4,5) WHEN 2 THEN 2 WHEN 3 THEN 3 WHEN 4 THEN 4 WHEN 5 THEN 5 END | row-size=26B cardinality=41.67K diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test index 4c3845edf..c08726c19 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test @@ -13625,3 +13625,1846 @@ PLAN-ROOT SINK runtime filters: RF000 -> ss_store_sk, RF002 -> ss_item_sk, RF004 -> ss_sold_date_sk row-size=24B cardinality=2.88M ==== +# Q23-1 +with frequent_ss_items as + (select substr(i_item_desc,1,30) itemdesc,i_item_sk item_sk,d_date solddate,count(*) cnt + from store_sales + ,date_dim + ,item + where ss_sold_date_sk = d_date_sk + and ss_item_sk = i_item_sk + and d_year in (2000,2000+1,2000+2,2000+3) + group by substr(i_item_desc,1,30),i_item_sk,d_date + having count(*) >4), + max_store_sales as + (select max(csales) tpcds_cmax + from (select c_customer_sk,sum(ss_quantity*ss_sales_price) csales + from store_sales + ,customer + ,date_dim + where ss_customer_sk = c_customer_sk + and ss_sold_date_sk = d_date_sk + and d_year in (2000,2000+1,2000+2,2000+3) + group by c_customer_sk) x), + best_ss_customer as + (select c_customer_sk,sum(ss_quantity*ss_sales_price) ssales + from store_sales + ,customer + where ss_customer_sk = c_customer_sk + group by c_customer_sk + having sum(ss_quantity*ss_sales_price) > (50/100.0) * (select + * +from + max_store_sales)) + select sum(sales) + from (select cs_quantity*cs_list_price sales + from catalog_sales + ,date_dim + where d_year = 2000 + and d_moy = 2 + and cs_sold_date_sk = d_date_sk + and cs_item_sk in (select item_sk from frequent_ss_items) + and cs_bill_customer_sk in (select c_customer_sk from best_ss_customer) + union all + select ws_quantity*ws_list_price sales + from web_sales + ,date_dim + where d_year = 2000 + and d_moy = 2 + and ws_sold_date_sk = d_date_sk + and ws_item_sk in (select item_sk from frequent_ss_items) + and ws_bill_customer_sk in (select c_customer_sk from best_ss_customer)) y + limit 100; +---- PLAN +Max Per-Host Resource Reservation: Memory=122.44MB Threads=11 +Per-Host Resource Estimates: Memory=910MB +PLAN-ROOT SINK +| +49:AGGREGATE [FINALIZE] +| output: sum(sales) +| limit: 100 +| row-size=16B cardinality=1 +| +00:UNION +| row-size=8B cardinality=128.16K +| +|--48:HASH JOIN [RIGHT SEMI JOIN] +| | hash predicates: c_customer_sk = ws_bill_customer_sk +| | runtime filters: RF016 <- ws_bill_customer_sk +| | row-size=36B cardinality=42.85K +| | +| |--47:HASH JOIN [LEFT SEMI JOIN] +| | | hash predicates: ws_item_sk = i_item_sk +| | | row-size=36B cardinality=42.85K +| | | +| | |--46:AGGREGATE [FINALIZE] +| | | | group by: i_item_sk +| | | | row-size=8B cardinality=17.98K +| | | | +| | | 32:AGGREGATE [FINALIZE] +| | | | output: count(*) +| | | | group by: substr(i_item_desc, 1, 30), i_item_sk, d_date +| | | | having: count(*) > 4 +| | | | row-size=50B cardinality=235.45K +| | | | +| | | 31:HASH JOIN [INNER JOIN] +| | | | hash predicates: ss_item_sk = i_item_sk +| | | | row-size=162B cardinality=2.35M +| | | | +| | | |--29:SCAN HDFS [tpcds.item] +| | | | HDFS partitions=1/1 files=1 size=4.82MB +| | | | row-size=120B cardinality=18.00K +| | | | +| | | 30:HASH JOIN [INNER JOIN] +| | | | hash predicates: ss_sold_date_sk = d_date_sk +| | | | runtime filters: RF030 <- d_date_sk +| | | | row-size=42B cardinality=2.35M +| | | | +| | | |--28:SCAN HDFS [tpcds.date_dim] +| | | | HDFS partitions=1/1 files=1 size=9.84MB +| | | | predicates: d_year IN (2000, 2001, 2002, 2003) +| | | | row-size=30B cardinality=1.49K +| | | | +| | | 27:SCAN HDFS [tpcds.store_sales] +| | | HDFS partitions=1824/1824 files=1824 size=346.60MB +| | | runtime filters: RF030 -> ss_sold_date_sk +| | | row-size=12B cardinality=2.88M +| | | +| | 45:HASH JOIN [INNER JOIN] +| | | hash predicates: ws_sold_date_sk = d_date_sk +| | | runtime filters: RF026 <- d_date_sk +| | | row-size=36B cardinality=42.85K +| | | +| | |--26:SCAN HDFS [tpcds.date_dim] +| | | HDFS partitions=1/1 files=1 size=9.84MB +| | | predicates: d_year = 2000, d_moy = 2 +| | | row-size=12B cardinality=108 +| | | +| | 25:SCAN HDFS [tpcds.web_sales] +| | HDFS partitions=1/1 files=1 size=140.07MB +| | runtime filters: RF026 -> ws_sold_date_sk +| | row-size=24B cardinality=719.38K +| | +| 44:NESTED LOOP JOIN [INNER JOIN] +| | predicates: sum(ss_quantity * ss_sales_price) > 0.500000 * max(csales) +| | row-size=36B cardinality=100.00K +| | +| |--43:AGGREGATE [FINALIZE] +| | | output: max(sum(ss_quantity * ss_sales_price)) +| | | row-size=16B cardinality=1 +| | | +| | 42:AGGREGATE [FINALIZE] +| | | output: sum(ss_quantity * ss_sales_price) +| | | group by: c_customer_sk +| | | row-size=20B cardinality=100.00K +| | | +| | 41:HASH JOIN [INNER JOIN] +| | | hash predicates: ss_customer_sk = c_customer_sk +| | | row-size=28B cardinality=2.35M +| | | +| | |--38:SCAN HDFS [tpcds.customer] +| | | HDFS partitions=1/1 files=1 size=12.60MB +| | | row-size=4B cardinality=100.00K +| | | +| | 40:HASH JOIN [INNER JOIN] +| | | hash predicates: ss_sold_date_sk = d_date_sk +| | | runtime filters: RF022 <- d_date_sk +| | | row-size=24B cardinality=2.35M +| | | +| | |--39:SCAN HDFS [tpcds.date_dim] +| | | HDFS partitions=1/1 files=1 size=9.84MB +| | | predicates: d_year IN (2000, 2001, 2002, 2003) +| | | row-size=8B cardinality=1.49K +| | | +| | 37:SCAN HDFS [tpcds.store_sales] +| | HDFS partitions=1824/1824 files=1824 size=346.60MB +| | runtime filters: RF022 -> ss_sold_date_sk +| | row-size=16B cardinality=2.88M +| | +| 36:AGGREGATE [FINALIZE] +| | output: sum(ss_quantity * ss_sales_price) +| | group by: c_customer_sk +| | row-size=20B cardinality=100.00K +| | +| 35:HASH JOIN [INNER JOIN] +| | hash predicates: ss_customer_sk = c_customer_sk +| | row-size=16B cardinality=2.88M +| | +| |--34:SCAN HDFS [tpcds.customer] +| | HDFS partitions=1/1 files=1 size=12.60MB +| | runtime filters: RF016 -> tpcds.customer.c_customer_sk +| | row-size=4B cardinality=100.00K +| | +| 33:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| runtime filters: RF016 -> tpcds.store_sales.ss_customer_sk +| row-size=12B cardinality=2.88M +| +24:HASH JOIN [RIGHT SEMI JOIN] +| hash predicates: c_customer_sk = cs_bill_customer_sk +| runtime filters: RF000 <- cs_bill_customer_sk +| row-size=36B cardinality=85.31K +| +|--23:HASH JOIN [LEFT SEMI JOIN] +| | hash predicates: cs_item_sk = i_item_sk +| | row-size=36B cardinality=85.31K +| | +| |--22:AGGREGATE [FINALIZE] +| | | group by: i_item_sk +| | | row-size=8B cardinality=17.98K +| | | +| | 08:AGGREGATE [FINALIZE] +| | | output: count(*) +| | | group by: substr(i_item_desc, 1, 30), i_item_sk, d_date +| | | having: count(*) > 4 +| | | row-size=50B cardinality=235.45K +| | | +| | 07:HASH JOIN [INNER JOIN] +| | | hash predicates: ss_item_sk = i_item_sk +| | | row-size=162B cardinality=2.35M +| | | +| | |--05:SCAN HDFS [tpcds.item] +| | | HDFS partitions=1/1 files=1 size=4.82MB +| | | row-size=120B cardinality=18.00K +| | | +| | 06:HASH JOIN [INNER JOIN] +| | | hash predicates: ss_sold_date_sk = d_date_sk +| | | runtime filters: RF014 <- d_date_sk +| | | row-size=42B cardinality=2.35M +| | | +| | |--04:SCAN HDFS [tpcds.date_dim] +| | | HDFS partitions=1/1 files=1 size=9.84MB +| | | predicates: d_year IN (2000, 2001, 2002, 2003) +| | | row-size=30B cardinality=1.49K +| | | +| | 03:SCAN HDFS [tpcds.store_sales] +| | HDFS partitions=1824/1824 files=1824 size=346.60MB +| | runtime filters: RF014 -> ss_sold_date_sk +| | row-size=12B cardinality=2.88M +| | +| 21:HASH JOIN [INNER JOIN] +| | hash predicates: cs_sold_date_sk = d_date_sk +| | runtime filters: RF010 <- d_date_sk +| | row-size=36B cardinality=85.31K +| | +| |--02:SCAN HDFS [tpcds.date_dim] +| | HDFS partitions=1/1 files=1 size=9.84MB +| | predicates: d_year = 2000, d_moy = 2 +| | row-size=12B cardinality=108 +| | +| 01:SCAN HDFS [tpcds.catalog_sales] +| HDFS partitions=1/1 files=1 size=282.20MB +| runtime filters: RF010 -> cs_sold_date_sk +| row-size=24B cardinality=1.44M +| +20:NESTED LOOP JOIN [INNER JOIN] +| predicates: sum(ss_quantity * ss_sales_price) > 0.500000 * max(csales) +| row-size=36B cardinality=100.00K +| +|--19:AGGREGATE [FINALIZE] +| | output: max(sum(ss_quantity * ss_sales_price)) +| | row-size=16B cardinality=1 +| | +| 18:AGGREGATE [FINALIZE] +| | output: sum(ss_quantity * ss_sales_price) +| | group by: c_customer_sk +| | row-size=20B cardinality=100.00K +| | +| 17:HASH JOIN [INNER JOIN] +| | hash predicates: ss_customer_sk = c_customer_sk +| | runtime filters: RF004 <- c_customer_sk +| | row-size=28B cardinality=2.35M +| | +| |--14:SCAN HDFS [tpcds.customer] +| | HDFS partitions=1/1 files=1 size=12.60MB +| | row-size=4B cardinality=100.00K +| | +| 16:HASH JOIN [INNER JOIN] +| | hash predicates: ss_sold_date_sk = d_date_sk +| | runtime filters: RF006 <- d_date_sk +| | row-size=24B cardinality=2.35M +| | +| |--15:SCAN HDFS [tpcds.date_dim] +| | HDFS partitions=1/1 files=1 size=9.84MB +| | predicates: d_year IN (2000, 2001, 2002, 2003) +| | row-size=8B cardinality=1.49K +| | +| 13:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| runtime filters: RF006 -> ss_sold_date_sk, RF004 -> ss_customer_sk +| row-size=16B cardinality=2.88M +| +12:AGGREGATE [FINALIZE] +| output: sum(ss_quantity * ss_sales_price) +| group by: c_customer_sk +| row-size=20B cardinality=100.00K +| +11:HASH JOIN [INNER JOIN] +| hash predicates: ss_customer_sk = c_customer_sk +| runtime filters: RF002 <- c_customer_sk +| row-size=16B cardinality=2.88M +| +|--10:SCAN HDFS [tpcds.customer] +| HDFS partitions=1/1 files=1 size=12.60MB +| runtime filters: RF000 -> tpcds.customer.c_customer_sk +| row-size=4B cardinality=100.00K +| +09:SCAN HDFS [tpcds.store_sales] + HDFS partitions=1824/1824 files=1824 size=346.60MB + runtime filters: RF000 -> tpcds.store_sales.ss_customer_sk, RF002 -> ss_customer_sk + row-size=12B cardinality=2.88M +---- DISTRIBUTEDPLAN +Max Per-Host Resource Reservation: Memory=370.38MB Threads=50 +Per-Host Resource Estimates: Memory=2.09GB +PLAN-ROOT SINK +| +89:AGGREGATE [FINALIZE] +| output: sum:merge(sales) +| limit: 100 +| row-size=16B cardinality=1 +| +88:EXCHANGE [UNPARTITIONED] +| +49:AGGREGATE +| output: sum(sales) +| row-size=16B cardinality=1 +| +00:UNION +| row-size=8B cardinality=128.16K +| +|--48:HASH JOIN [RIGHT SEMI JOIN, PARTITIONED] +| | hash predicates: c_customer_sk = ws_bill_customer_sk +| | runtime filters: RF016 <- ws_bill_customer_sk +| | row-size=36B cardinality=42.85K +| | +| |--87:EXCHANGE [HASH(ws_bill_customer_sk)] +| | | +| | 47:HASH JOIN [LEFT SEMI JOIN, BROADCAST] +| | | hash predicates: ws_item_sk = i_item_sk +| | | row-size=36B cardinality=42.85K +| | | +| | |--86:EXCHANGE [BROADCAST] +| | | | +| | | 85:AGGREGATE [FINALIZE] +| | | | group by: i_item_sk +| | | | row-size=8B cardinality=17.98K +| | | | +| | | 84:EXCHANGE [HASH(i_item_sk)] +| | | | +| | | 46:AGGREGATE [STREAMING] +| | | | group by: i_item_sk +| | | | row-size=8B cardinality=17.98K +| | | | +| | | 83:AGGREGATE [FINALIZE] +| | | | output: count:merge(*) +| | | | group by: substr(i_item_desc, 1, 30), i_item_sk, d_date +| | | | having: count(*) > 4 +| | | | row-size=50B cardinality=235.45K +| | | | +| | | 82:EXCHANGE [HASH(substr(i_item_desc, 1, 30),i_item_sk,d_date)] +| | | | +| | | 32:AGGREGATE [STREAMING] +| | | | output: count(*) +| | | | group by: substr(i_item_desc, 1, 30), i_item_sk, d_date +| | | | row-size=50B cardinality=2.35M +| | | | +| | | 31:HASH JOIN [INNER JOIN, BROADCAST] +| | | | hash predicates: ss_item_sk = i_item_sk +| | | | row-size=162B cardinality=2.35M +| | | | +| | | |--81:EXCHANGE [BROADCAST] +| | | | | +| | | | 29:SCAN HDFS [tpcds.item] +| | | | HDFS partitions=1/1 files=1 size=4.82MB +| | | | row-size=120B cardinality=18.00K +| | | | +| | | 30:HASH JOIN [INNER JOIN, BROADCAST] +| | | | hash predicates: ss_sold_date_sk = d_date_sk +| | | | runtime filters: RF030 <- d_date_sk +| | | | row-size=42B cardinality=2.35M +| | | | +| | | |--80:EXCHANGE [BROADCAST] +| | | | | +| | | | 28:SCAN HDFS [tpcds.date_dim] +| | | | HDFS partitions=1/1 files=1 size=9.84MB +| | | | predicates: d_year IN (2000, 2001, 2002, 2003) +| | | | row-size=30B cardinality=1.49K +| | | | +| | | 27:SCAN HDFS [tpcds.store_sales] +| | | HDFS partitions=1824/1824 files=1824 size=346.60MB +| | | runtime filters: RF030 -> ss_sold_date_sk +| | | row-size=12B cardinality=2.88M +| | | +| | 45:HASH JOIN [INNER JOIN, BROADCAST] +| | | hash predicates: ws_sold_date_sk = d_date_sk +| | | runtime filters: RF026 <- d_date_sk +| | | row-size=36B cardinality=42.85K +| | | +| | |--79:EXCHANGE [BROADCAST] +| | | | +| | | 26:SCAN HDFS [tpcds.date_dim] +| | | HDFS partitions=1/1 files=1 size=9.84MB +| | | predicates: d_year = 2000, d_moy = 2 +| | | row-size=12B cardinality=108 +| | | +| | 25:SCAN HDFS [tpcds.web_sales] +| | HDFS partitions=1/1 files=1 size=140.07MB +| | runtime filters: RF026 -> ws_sold_date_sk +| | row-size=24B cardinality=719.38K +| | +| 44:NESTED LOOP JOIN [INNER JOIN, BROADCAST] +| | predicates: sum(ss_quantity * ss_sales_price) > 0.500000 * max(csales) +| | row-size=36B cardinality=100.00K +| | +| |--78:EXCHANGE [BROADCAST] +| | | +| | 77:AGGREGATE [FINALIZE] +| | | output: max:merge(csales) +| | | row-size=16B cardinality=1 +| | | +| | 76:EXCHANGE [UNPARTITIONED] +| | | +| | 43:AGGREGATE +| | | output: max(sum(ss_quantity * ss_sales_price)) +| | | row-size=16B cardinality=1 +| | | +| | 75:AGGREGATE [FINALIZE] +| | | output: sum:merge(ss_quantity * ss_sales_price) +| | | group by: c_customer_sk +| | | row-size=20B cardinality=100.00K +| | | +| | 74:EXCHANGE [HASH(c_customer_sk)] +| | | +| | 42:AGGREGATE [STREAMING] +| | | output: sum(ss_quantity * ss_sales_price) +| | | group by: c_customer_sk +| | | row-size=20B cardinality=100.00K +| | | +| | 41:HASH JOIN [INNER JOIN, BROADCAST] +| | | hash predicates: ss_customer_sk = c_customer_sk +| | | row-size=28B cardinality=2.35M +| | | +| | |--73:EXCHANGE [BROADCAST] +| | | | +| | | 38:SCAN HDFS [tpcds.customer] +| | | HDFS partitions=1/1 files=1 size=12.60MB +| | | row-size=4B cardinality=100.00K +| | | +| | 40:HASH JOIN [INNER JOIN, BROADCAST] +| | | hash predicates: ss_sold_date_sk = d_date_sk +| | | runtime filters: RF022 <- d_date_sk +| | | row-size=24B cardinality=2.35M +| | | +| | |--72:EXCHANGE [BROADCAST] +| | | | +| | | 39:SCAN HDFS [tpcds.date_dim] +| | | HDFS partitions=1/1 files=1 size=9.84MB +| | | predicates: d_year IN (2000, 2001, 2002, 2003) +| | | row-size=8B cardinality=1.49K +| | | +| | 37:SCAN HDFS [tpcds.store_sales] +| | HDFS partitions=1824/1824 files=1824 size=346.60MB +| | runtime filters: RF022 -> ss_sold_date_sk +| | row-size=16B cardinality=2.88M +| | +| 71:AGGREGATE [FINALIZE] +| | output: sum:merge(ss_quantity * ss_sales_price) +| | group by: c_customer_sk +| | row-size=20B cardinality=100.00K +| | +| 70:EXCHANGE [HASH(c_customer_sk)] +| | +| 36:AGGREGATE [STREAMING] +| | output: sum(ss_quantity * ss_sales_price) +| | group by: c_customer_sk +| | row-size=20B cardinality=100.00K +| | +| 35:HASH JOIN [INNER JOIN, BROADCAST] +| | hash predicates: ss_customer_sk = c_customer_sk +| | row-size=16B cardinality=2.88M +| | +| |--69:EXCHANGE [BROADCAST] +| | | +| | 34:SCAN HDFS [tpcds.customer] +| | HDFS partitions=1/1 files=1 size=12.60MB +| | runtime filters: RF016 -> tpcds.customer.c_customer_sk +| | row-size=4B cardinality=100.00K +| | +| 33:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| runtime filters: RF016 -> tpcds.store_sales.ss_customer_sk +| row-size=12B cardinality=2.88M +| +24:HASH JOIN [RIGHT SEMI JOIN, PARTITIONED] +| hash predicates: c_customer_sk = cs_bill_customer_sk +| runtime filters: RF000 <- cs_bill_customer_sk +| row-size=36B cardinality=85.31K +| +|--68:EXCHANGE [HASH(cs_bill_customer_sk)] +| | +| 23:HASH JOIN [LEFT SEMI JOIN, BROADCAST] +| | hash predicates: cs_item_sk = i_item_sk +| | row-size=36B cardinality=85.31K +| | +| |--67:EXCHANGE [BROADCAST] +| | | +| | 66:AGGREGATE [FINALIZE] +| | | group by: i_item_sk +| | | row-size=8B cardinality=17.98K +| | | +| | 65:EXCHANGE [HASH(i_item_sk)] +| | | +| | 22:AGGREGATE [STREAMING] +| | | group by: i_item_sk +| | | row-size=8B cardinality=17.98K +| | | +| | 64:AGGREGATE [FINALIZE] +| | | output: count:merge(*) +| | | group by: substr(i_item_desc, 1, 30), i_item_sk, d_date +| | | having: count(*) > 4 +| | | row-size=50B cardinality=235.45K +| | | +| | 63:EXCHANGE [HASH(substr(i_item_desc, 1, 30),i_item_sk,d_date)] +| | | +| | 08:AGGREGATE [STREAMING] +| | | output: count(*) +| | | group by: substr(i_item_desc, 1, 30), i_item_sk, d_date +| | | row-size=50B cardinality=2.35M +| | | +| | 07:HASH JOIN [INNER JOIN, BROADCAST] +| | | hash predicates: ss_item_sk = i_item_sk +| | | row-size=162B cardinality=2.35M +| | | +| | |--62:EXCHANGE [BROADCAST] +| | | | +| | | 05:SCAN HDFS [tpcds.item] +| | | HDFS partitions=1/1 files=1 size=4.82MB +| | | row-size=120B cardinality=18.00K +| | | +| | 06:HASH JOIN [INNER JOIN, BROADCAST] +| | | hash predicates: ss_sold_date_sk = d_date_sk +| | | runtime filters: RF014 <- d_date_sk +| | | row-size=42B cardinality=2.35M +| | | +| | |--61:EXCHANGE [BROADCAST] +| | | | +| | | 04:SCAN HDFS [tpcds.date_dim] +| | | HDFS partitions=1/1 files=1 size=9.84MB +| | | predicates: d_year IN (2000, 2001, 2002, 2003) +| | | row-size=30B cardinality=1.49K +| | | +| | 03:SCAN HDFS [tpcds.store_sales] +| | HDFS partitions=1824/1824 files=1824 size=346.60MB +| | runtime filters: RF014 -> ss_sold_date_sk +| | row-size=12B cardinality=2.88M +| | +| 21:HASH JOIN [INNER JOIN, BROADCAST] +| | hash predicates: cs_sold_date_sk = d_date_sk +| | runtime filters: RF010 <- d_date_sk +| | row-size=36B cardinality=85.31K +| | +| |--60:EXCHANGE [BROADCAST] +| | | +| | 02:SCAN HDFS [tpcds.date_dim] +| | HDFS partitions=1/1 files=1 size=9.84MB +| | predicates: d_year = 2000, d_moy = 2 +| | row-size=12B cardinality=108 +| | +| 01:SCAN HDFS [tpcds.catalog_sales] +| HDFS partitions=1/1 files=1 size=282.20MB +| runtime filters: RF010 -> cs_sold_date_sk +| row-size=24B cardinality=1.44M +| +20:NESTED LOOP JOIN [INNER JOIN, BROADCAST] +| predicates: sum(ss_quantity * ss_sales_price) > 0.500000 * max(csales) +| row-size=36B cardinality=100.00K +| +|--59:EXCHANGE [BROADCAST] +| | +| 58:AGGREGATE [FINALIZE] +| | output: max:merge(csales) +| | row-size=16B cardinality=1 +| | +| 57:EXCHANGE [UNPARTITIONED] +| | +| 19:AGGREGATE +| | output: max(sum(ss_quantity * ss_sales_price)) +| | row-size=16B cardinality=1 +| | +| 56:AGGREGATE [FINALIZE] +| | output: sum:merge(ss_quantity * ss_sales_price) +| | group by: c_customer_sk +| | row-size=20B cardinality=100.00K +| | +| 55:EXCHANGE [HASH(c_customer_sk)] +| | +| 18:AGGREGATE [STREAMING] +| | output: sum(ss_quantity * ss_sales_price) +| | group by: c_customer_sk +| | row-size=20B cardinality=100.00K +| | +| 17:HASH JOIN [INNER JOIN, BROADCAST] +| | hash predicates: ss_customer_sk = c_customer_sk +| | runtime filters: RF004 <- c_customer_sk +| | row-size=28B cardinality=2.35M +| | +| |--54:EXCHANGE [BROADCAST] +| | | +| | 14:SCAN HDFS [tpcds.customer] +| | HDFS partitions=1/1 files=1 size=12.60MB +| | row-size=4B cardinality=100.00K +| | +| 16:HASH JOIN [INNER JOIN, BROADCAST] +| | hash predicates: ss_sold_date_sk = d_date_sk +| | runtime filters: RF006 <- d_date_sk +| | row-size=24B cardinality=2.35M +| | +| |--53:EXCHANGE [BROADCAST] +| | | +| | 15:SCAN HDFS [tpcds.date_dim] +| | HDFS partitions=1/1 files=1 size=9.84MB +| | predicates: d_year IN (2000, 2001, 2002, 2003) +| | row-size=8B cardinality=1.49K +| | +| 13:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| runtime filters: RF006 -> ss_sold_date_sk, RF004 -> ss_customer_sk +| row-size=16B cardinality=2.88M +| +52:AGGREGATE [FINALIZE] +| output: sum:merge(ss_quantity * ss_sales_price) +| group by: c_customer_sk +| row-size=20B cardinality=100.00K +| +51:EXCHANGE [HASH(c_customer_sk)] +| +12:AGGREGATE [STREAMING] +| output: sum(ss_quantity * ss_sales_price) +| group by: c_customer_sk +| row-size=20B cardinality=100.00K +| +11:HASH JOIN [INNER JOIN, BROADCAST] +| hash predicates: ss_customer_sk = c_customer_sk +| runtime filters: RF002 <- c_customer_sk +| row-size=16B cardinality=2.88M +| +|--50:EXCHANGE [BROADCAST] +| | +| 10:SCAN HDFS [tpcds.customer] +| HDFS partitions=1/1 files=1 size=12.60MB +| runtime filters: RF000 -> tpcds.customer.c_customer_sk +| row-size=4B cardinality=100.00K +| +09:SCAN HDFS [tpcds.store_sales] + HDFS partitions=1824/1824 files=1824 size=346.60MB + runtime filters: RF000 -> tpcds.store_sales.ss_customer_sk, RF002 -> ss_customer_sk + row-size=12B cardinality=2.88M +==== +# TPCDS-Q33 +with ss as ( + select + i_manufact_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Electronics')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 5 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_manufact_id), + cs as ( + select + i_manufact_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Electronics')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 5 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_manufact_id), + ws as ( + select + i_manufact_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_manufact_id in (select + i_manufact_id +from + item +where i_category in ('Electronics')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 5 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_manufact_id) + select i_manufact_id ,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_manufact_id + order by total_sales +limit 100; +---- PLAN +Max Per-Host Resource Reservation: Memory=59.69MB Threads=6 +Per-Host Resource Estimates: Memory=348MB +PLAN-ROOT SINK +| +32:TOP-N [LIMIT=100] +| order by: sum(total_sales) ASC +| row-size=20B cardinality=100 +| +31:AGGREGATE [FINALIZE] +| output: sum(total_sales) +| group by: i_manufact_id +| row-size=20B cardinality=2.83K +| +00:UNION +| row-size=20B cardinality=2.83K +| +|--30:AGGREGATE [FINALIZE] +| | output: sum(ws_ext_sales_price) +| | group by: i_manufact_id +| | row-size=20B cardinality=944 +| | +| 29:HASH JOIN [LEFT SEMI JOIN] +| | hash predicates: i_manufact_id = i_manufact_id +| | row-size=52B cardinality=9.81K +| | +| |--25:SCAN HDFS [tpcds.item] +| | HDFS partitions=1/1 files=1 size=4.82MB +| | predicates: i_category IN ('Electronics') +| | row-size=22B cardinality=1.80K +| | +| 28:HASH JOIN [INNER JOIN] +| | hash predicates: ws_item_sk = i_item_sk +| | row-size=52B cardinality=9.81K +| | +| |--24:SCAN HDFS [tpcds.item] +| | HDFS partitions=1/1 files=1 size=4.82MB +| | row-size=12B cardinality=18.00K +| | +| 27:HASH JOIN [INNER JOIN] +| | hash predicates: ws_bill_addr_sk = ca_address_sk +| | runtime filters: RF020 <- ca_address_sk +| | row-size=40B cardinality=9.81K +| | +| |--23:SCAN HDFS [tpcds.customer_address] +| | HDFS partitions=1/1 files=1 size=5.25MB +| | predicates: ca_gmt_offset = -5 +| | row-size=8B cardinality=8.33K +| | +| 26:HASH JOIN [INNER JOIN] +| | hash predicates: ws_sold_date_sk = d_date_sk +| | runtime filters: RF022 <- d_date_sk +| | row-size=32B cardinality=42.85K +| | +| |--22:SCAN HDFS [tpcds.date_dim] +| | HDFS partitions=1/1 files=1 size=9.84MB +| | predicates: d_year = 1998, d_moy = 5 +| | row-size=12B cardinality=108 +| | +| 21:SCAN HDFS [tpcds.web_sales] +| HDFS partitions=1/1 files=1 size=140.07MB +| runtime filters: RF022 -> ws_sold_date_sk, RF020 -> ws_bill_addr_sk +| row-size=20B cardinality=719.38K +| +|--20:AGGREGATE [FINALIZE] +| | output: sum(cs_ext_sales_price) +| | group by: i_manufact_id +| | row-size=20B cardinality=944 +| | +| 19:HASH JOIN [LEFT SEMI JOIN] +| | hash predicates: i_manufact_id = i_manufact_id +| | runtime filters: RF008 <- i_manufact_id +| | row-size=52B cardinality=14.81K +| | +| |--15:SCAN HDFS [tpcds.item] +| | HDFS partitions=1/1 files=1 size=4.82MB +| | predicates: i_category IN ('Electronics') +| | row-size=22B cardinality=1.80K +| | +| 18:HASH JOIN [INNER JOIN] +| | hash predicates: cs_item_sk = i_item_sk +| | runtime filters: RF010 <- i_item_sk +| | row-size=52B cardinality=14.81K +| | +| |--14:SCAN HDFS [tpcds.item] +| | HDFS partitions=1/1 files=1 size=4.82MB +| | runtime filters: RF008 -> i_manufact_id +| | row-size=12B cardinality=18.00K +| | +| 17:HASH JOIN [INNER JOIN] +| | hash predicates: cs_bill_addr_sk = ca_address_sk +| | runtime filters: RF012 <- ca_address_sk +| | row-size=40B cardinality=14.81K +| | +| |--13:SCAN HDFS [tpcds.customer_address] +| | HDFS partitions=1/1 files=1 size=5.25MB +| | predicates: ca_gmt_offset = -5 +| | row-size=8B cardinality=8.33K +| | +| 16:HASH JOIN [INNER JOIN] +| | hash predicates: cs_sold_date_sk = d_date_sk +| | runtime filters: RF014 <- d_date_sk +| | row-size=32B cardinality=85.31K +| | +| |--12:SCAN HDFS [tpcds.date_dim] +| | HDFS partitions=1/1 files=1 size=9.84MB +| | predicates: d_year = 1998, d_moy = 5 +| | row-size=12B cardinality=108 +| | +| 11:SCAN HDFS [tpcds.catalog_sales] +| HDFS partitions=1/1 files=1 size=282.20MB +| runtime filters: RF014 -> cs_sold_date_sk, RF012 -> cs_bill_addr_sk, RF010 -> cs_item_sk +| row-size=20B cardinality=1.44M +| +10:AGGREGATE [FINALIZE] +| output: sum(ss_ext_sales_price) +| group by: i_manufact_id +| row-size=20B cardinality=944 +| +09:HASH JOIN [LEFT SEMI JOIN] +| hash predicates: i_manufact_id = i_manufact_id +| runtime filters: RF000 <- i_manufact_id +| row-size=52B cardinality=28.50K +| +|--05:SCAN HDFS [tpcds.item] +| HDFS partitions=1/1 files=1 size=4.82MB +| predicates: i_category IN ('Electronics') +| row-size=22B cardinality=1.80K +| +08:HASH JOIN [INNER JOIN] +| hash predicates: ss_item_sk = i_item_sk +| runtime filters: RF002 <- i_item_sk +| row-size=52B cardinality=28.50K +| +|--04:SCAN HDFS [tpcds.item] +| HDFS partitions=1/1 files=1 size=4.82MB +| runtime filters: RF000 -> i_manufact_id +| row-size=12B cardinality=18.00K +| +07:HASH JOIN [INNER JOIN] +| hash predicates: ss_addr_sk = ca_address_sk +| runtime filters: RF004 <- ca_address_sk +| row-size=40B cardinality=28.50K +| +|--03:SCAN HDFS [tpcds.customer_address] +| HDFS partitions=1/1 files=1 size=5.25MB +| predicates: ca_gmt_offset = -5 +| row-size=8B cardinality=8.33K +| +06:HASH JOIN [INNER JOIN] +| hash predicates: ss_sold_date_sk = d_date_sk +| runtime filters: RF006 <- d_date_sk +| row-size=32B cardinality=170.55K +| +|--02:SCAN HDFS [tpcds.date_dim] +| HDFS partitions=1/1 files=1 size=9.84MB +| predicates: d_year = 1998, d_moy = 5 +| row-size=12B cardinality=108 +| +01:SCAN HDFS [tpcds.store_sales] + HDFS partitions=1824/1824 files=1824 size=346.60MB + runtime filters: RF006 -> ss_sold_date_sk, RF004 -> ss_addr_sk, RF002 -> ss_item_sk + row-size=20B cardinality=2.88M +---- DISTRIBUTEDPLAN +Max Per-Host Resource Reservation: Memory=168.12MB Threads=35 +Per-Host Resource Estimates: Memory=994MB +PLAN-ROOT SINK +| +55:MERGING-EXCHANGE [UNPARTITIONED] +| order by: sum(total_sales) ASC +| limit: 100 +| +32:TOP-N [LIMIT=100] +| order by: sum(total_sales) ASC +| row-size=20B cardinality=100 +| +54:AGGREGATE [FINALIZE] +| output: sum:merge(total_sales) +| group by: i_manufact_id +| row-size=20B cardinality=2.83K +| +53:EXCHANGE [HASH(i_manufact_id)] +| +31:AGGREGATE [STREAMING] +| output: sum(total_sales) +| group by: i_manufact_id +| row-size=20B cardinality=2.83K +| +00:UNION +| row-size=20B cardinality=2.83K +| +|--52:AGGREGATE [FINALIZE] +| | output: sum:merge(ws_ext_sales_price) +| | group by: i_manufact_id +| | row-size=20B cardinality=944 +| | +| 51:EXCHANGE [HASH(i_manufact_id)] +| | +| 30:AGGREGATE [STREAMING] +| | output: sum(ws_ext_sales_price) +| | group by: i_manufact_id +| | row-size=20B cardinality=944 +| | +| 29:HASH JOIN [LEFT SEMI JOIN, BROADCAST] +| | hash predicates: i_manufact_id = i_manufact_id +| | row-size=52B cardinality=9.81K +| | +| |--50:EXCHANGE [BROADCAST] +| | | +| | 25:SCAN HDFS [tpcds.item] +| | HDFS partitions=1/1 files=1 size=4.82MB +| | predicates: i_category IN ('Electronics') +| | row-size=22B cardinality=1.80K +| | +| 28:HASH JOIN [INNER JOIN, PARTITIONED] +| | hash predicates: ws_item_sk = i_item_sk +| | row-size=52B cardinality=9.81K +| | +| |--49:EXCHANGE [HASH(i_item_sk)] +| | | +| | 24:SCAN HDFS [tpcds.item] +| | HDFS partitions=1/1 files=1 size=4.82MB +| | row-size=12B cardinality=18.00K +| | +| 48:EXCHANGE [HASH(ws_item_sk)] +| | +| 27:HASH JOIN [INNER JOIN, BROADCAST] +| | hash predicates: ws_bill_addr_sk = ca_address_sk +| | runtime filters: RF020 <- ca_address_sk +| | row-size=40B cardinality=9.81K +| | +| |--47:EXCHANGE [BROADCAST] +| | | +| | 23:SCAN HDFS [tpcds.customer_address] +| | HDFS partitions=1/1 files=1 size=5.25MB +| | predicates: ca_gmt_offset = -5 +| | row-size=8B cardinality=8.33K +| | +| 26:HASH JOIN [INNER JOIN, BROADCAST] +| | hash predicates: ws_sold_date_sk = d_date_sk +| | runtime filters: RF022 <- d_date_sk +| | row-size=32B cardinality=42.85K +| | +| |--46:EXCHANGE [BROADCAST] +| | | +| | 22:SCAN HDFS [tpcds.date_dim] +| | HDFS partitions=1/1 files=1 size=9.84MB +| | predicates: d_year = 1998, d_moy = 5 +| | row-size=12B cardinality=108 +| | +| 21:SCAN HDFS [tpcds.web_sales] +| HDFS partitions=1/1 files=1 size=140.07MB +| runtime filters: RF022 -> ws_sold_date_sk, RF020 -> ws_bill_addr_sk +| row-size=20B cardinality=719.38K +| +|--45:AGGREGATE [FINALIZE] +| | output: sum:merge(cs_ext_sales_price) +| | group by: i_manufact_id +| | row-size=20B cardinality=944 +| | +| 44:EXCHANGE [HASH(i_manufact_id)] +| | +| 20:AGGREGATE [STREAMING] +| | output: sum(cs_ext_sales_price) +| | group by: i_manufact_id +| | row-size=20B cardinality=944 +| | +| 19:HASH JOIN [LEFT SEMI JOIN, BROADCAST] +| | hash predicates: i_manufact_id = i_manufact_id +| | runtime filters: RF008 <- i_manufact_id +| | row-size=52B cardinality=14.81K +| | +| |--43:EXCHANGE [BROADCAST] +| | | +| | 15:SCAN HDFS [tpcds.item] +| | HDFS partitions=1/1 files=1 size=4.82MB +| | predicates: i_category IN ('Electronics') +| | row-size=22B cardinality=1.80K +| | +| 18:HASH JOIN [INNER JOIN, PARTITIONED] +| | hash predicates: cs_item_sk = i_item_sk +| | runtime filters: RF010 <- i_item_sk +| | row-size=52B cardinality=14.81K +| | +| |--42:EXCHANGE [HASH(i_item_sk)] +| | | +| | 14:SCAN HDFS [tpcds.item] +| | HDFS partitions=1/1 files=1 size=4.82MB +| | runtime filters: RF008 -> i_manufact_id +| | row-size=12B cardinality=18.00K +| | +| 41:EXCHANGE [HASH(cs_item_sk)] +| | +| 17:HASH JOIN [INNER JOIN, BROADCAST] +| | hash predicates: cs_bill_addr_sk = ca_address_sk +| | runtime filters: RF012 <- ca_address_sk +| | row-size=40B cardinality=14.81K +| | +| |--40:EXCHANGE [BROADCAST] +| | | +| | 13:SCAN HDFS [tpcds.customer_address] +| | HDFS partitions=1/1 files=1 size=5.25MB +| | predicates: ca_gmt_offset = -5 +| | row-size=8B cardinality=8.33K +| | +| 16:HASH JOIN [INNER JOIN, BROADCAST] +| | hash predicates: cs_sold_date_sk = d_date_sk +| | runtime filters: RF014 <- d_date_sk +| | row-size=32B cardinality=85.31K +| | +| |--39:EXCHANGE [BROADCAST] +| | | +| | 12:SCAN HDFS [tpcds.date_dim] +| | HDFS partitions=1/1 files=1 size=9.84MB +| | predicates: d_year = 1998, d_moy = 5 +| | row-size=12B cardinality=108 +| | +| 11:SCAN HDFS [tpcds.catalog_sales] +| HDFS partitions=1/1 files=1 size=282.20MB +| runtime filters: RF014 -> cs_sold_date_sk, RF012 -> cs_bill_addr_sk, RF010 -> cs_item_sk +| row-size=20B cardinality=1.44M +| +38:AGGREGATE [FINALIZE] +| output: sum:merge(ss_ext_sales_price) +| group by: i_manufact_id +| row-size=20B cardinality=944 +| +37:EXCHANGE [HASH(i_manufact_id)] +| +10:AGGREGATE [STREAMING] +| output: sum(ss_ext_sales_price) +| group by: i_manufact_id +| row-size=20B cardinality=944 +| +09:HASH JOIN [LEFT SEMI JOIN, BROADCAST] +| hash predicates: i_manufact_id = i_manufact_id +| runtime filters: RF000 <- i_manufact_id +| row-size=52B cardinality=28.50K +| +|--36:EXCHANGE [BROADCAST] +| | +| 05:SCAN HDFS [tpcds.item] +| HDFS partitions=1/1 files=1 size=4.82MB +| predicates: i_category IN ('Electronics') +| row-size=22B cardinality=1.80K +| +08:HASH JOIN [INNER JOIN, BROADCAST] +| hash predicates: ss_item_sk = i_item_sk +| runtime filters: RF002 <- i_item_sk +| row-size=52B cardinality=28.50K +| +|--35:EXCHANGE [BROADCAST] +| | +| 04:SCAN HDFS [tpcds.item] +| HDFS partitions=1/1 files=1 size=4.82MB +| runtime filters: RF000 -> i_manufact_id +| row-size=12B cardinality=18.00K +| +07:HASH JOIN [INNER JOIN, BROADCAST] +| hash predicates: ss_addr_sk = ca_address_sk +| runtime filters: RF004 <- ca_address_sk +| row-size=40B cardinality=28.50K +| +|--34:EXCHANGE [BROADCAST] +| | +| 03:SCAN HDFS [tpcds.customer_address] +| HDFS partitions=1/1 files=1 size=5.25MB +| predicates: ca_gmt_offset = -5 +| row-size=8B cardinality=8.33K +| +06:HASH JOIN [INNER JOIN, BROADCAST] +| hash predicates: ss_sold_date_sk = d_date_sk +| runtime filters: RF006 <- d_date_sk +| row-size=32B cardinality=170.55K +| +|--33:EXCHANGE [BROADCAST] +| | +| 02:SCAN HDFS [tpcds.date_dim] +| HDFS partitions=1/1 files=1 size=9.84MB +| predicates: d_year = 1998, d_moy = 5 +| row-size=12B cardinality=108 +| +01:SCAN HDFS [tpcds.store_sales] + HDFS partitions=1824/1824 files=1824 size=346.60MB + runtime filters: RF006 -> ss_sold_date_sk, RF004 -> ss_addr_sk, RF002 -> ss_item_sk + row-size=20B cardinality=2.88M +==== +# TPCDS-Q60 +with ss as ( + select + i_item_id,sum(ss_ext_sales_price) total_sales + from + store_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Music')) + and ss_item_sk = i_item_sk + and ss_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 9 + and ss_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id), + cs as ( + select + i_item_id,sum(cs_ext_sales_price) total_sales + from + catalog_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Music')) + and cs_item_sk = i_item_sk + and cs_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 9 + and cs_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id), + ws as ( + select + i_item_id,sum(ws_ext_sales_price) total_sales + from + web_sales, + date_dim, + customer_address, + item + where + i_item_id in (select + i_item_id +from + item +where i_category in ('Music')) + and ws_item_sk = i_item_sk + and ws_sold_date_sk = d_date_sk + and d_year = 1998 + and d_moy = 9 + and ws_bill_addr_sk = ca_address_sk + and ca_gmt_offset = -5 + group by i_item_id) + select + i_item_id +,sum(total_sales) total_sales + from (select * from ss + union all + select * from cs + union all + select * from ws) tmp1 + group by i_item_id + order by i_item_id + ,total_sales + limit 100; +---- PLAN +Max Per-Host Resource Reservation: Memory=59.69MB Threads=6 +Per-Host Resource Estimates: Memory=346MB +PLAN-ROOT SINK +| +32:TOP-N [LIMIT=100] +| order by: i_item_id ASC, sum(total_sales) ASC +| row-size=44B cardinality=100 +| +31:AGGREGATE [FINALIZE] +| output: sum(total_sales) +| group by: i_item_id +| row-size=44B cardinality=10.80K +| +00:UNION +| row-size=44B cardinality=10.80K +| +|--30:AGGREGATE [FINALIZE] +| | output: sum(ws_ext_sales_price) +| | group by: i_item_id +| | row-size=44B cardinality=2.00K +| | +| 29:HASH JOIN [LEFT SEMI JOIN] +| | hash predicates: i_item_id = i_item_id +| | runtime filters: RF016 <- i_item_id +| | row-size=76B cardinality=2.00K +| | +| |--25:SCAN HDFS [tpcds.item] +| | HDFS partitions=1/1 files=1 size=4.82MB +| | predicates: i_category IN ('Music') +| | row-size=46B cardinality=1.80K +| | +| 28:HASH JOIN [INNER JOIN] +| | hash predicates: i_item_sk = ws_item_sk +| | runtime filters: RF018 <- ws_item_sk +| | row-size=76B cardinality=9.81K +| | +| |--27:HASH JOIN [INNER JOIN] +| | | hash predicates: ws_bill_addr_sk = ca_address_sk +| | | runtime filters: RF020 <- ca_address_sk +| | | row-size=40B cardinality=9.81K +| | | +| | |--23:SCAN HDFS [tpcds.customer_address] +| | | HDFS partitions=1/1 files=1 size=5.25MB +| | | predicates: ca_gmt_offset = -5 +| | | row-size=8B cardinality=8.33K +| | | +| | 26:HASH JOIN [INNER JOIN] +| | | hash predicates: ws_sold_date_sk = d_date_sk +| | | runtime filters: RF022 <- d_date_sk +| | | row-size=32B cardinality=42.85K +| | | +| | |--22:SCAN HDFS [tpcds.date_dim] +| | | HDFS partitions=1/1 files=1 size=9.84MB +| | | predicates: d_year = 1998, d_moy = 9 +| | | row-size=12B cardinality=108 +| | | +| | 21:SCAN HDFS [tpcds.web_sales] +| | HDFS partitions=1/1 files=1 size=140.07MB +| | runtime filters: RF022 -> ws_sold_date_sk, RF020 -> ws_bill_addr_sk +| | row-size=20B cardinality=719.38K +| | +| 24:SCAN HDFS [tpcds.item] +| HDFS partitions=1/1 files=1 size=4.82MB +| runtime filters: RF016 -> i_item_id, RF018 -> i_item_sk +| row-size=36B cardinality=18.00K +| +|--20:AGGREGATE [FINALIZE] +| | output: sum(cs_ext_sales_price) +| | group by: i_item_id +| | row-size=44B cardinality=3.01K +| | +| 19:HASH JOIN [LEFT SEMI JOIN] +| | hash predicates: i_item_id = i_item_id +| | runtime filters: RF008 <- i_item_id +| | row-size=76B cardinality=3.01K +| | +| |--15:SCAN HDFS [tpcds.item] +| | HDFS partitions=1/1 files=1 size=4.82MB +| | predicates: i_category IN ('Music') +| | row-size=46B cardinality=1.80K +| | +| 18:HASH JOIN [INNER JOIN] +| | hash predicates: i_item_sk = cs_item_sk +| | row-size=76B cardinality=14.81K +| | +| |--17:HASH JOIN [INNER JOIN] +| | | hash predicates: cs_bill_addr_sk = ca_address_sk +| | | runtime filters: RF012 <- ca_address_sk +| | | row-size=40B cardinality=14.81K +| | | +| | |--13:SCAN HDFS [tpcds.customer_address] +| | | HDFS partitions=1/1 files=1 size=5.25MB +| | | predicates: ca_gmt_offset = -5 +| | | row-size=8B cardinality=8.33K +| | | +| | 16:HASH JOIN [INNER JOIN] +| | | hash predicates: cs_sold_date_sk = d_date_sk +| | | runtime filters: RF014 <- d_date_sk +| | | row-size=32B cardinality=85.31K +| | | +| | |--12:SCAN HDFS [tpcds.date_dim] +| | | HDFS partitions=1/1 files=1 size=9.84MB +| | | predicates: d_year = 1998, d_moy = 9 +| | | row-size=12B cardinality=108 +| | | +| | 11:SCAN HDFS [tpcds.catalog_sales] +| | HDFS partitions=1/1 files=1 size=282.20MB +| | runtime filters: RF014 -> cs_sold_date_sk, RF012 -> cs_bill_addr_sk +| | row-size=20B cardinality=1.44M +| | +| 14:SCAN HDFS [tpcds.item] +| HDFS partitions=1/1 files=1 size=4.82MB +| runtime filters: RF008 -> i_item_id +| row-size=36B cardinality=18.00K +| +10:AGGREGATE [FINALIZE] +| output: sum(ss_ext_sales_price) +| group by: i_item_id +| row-size=44B cardinality=5.79K +| +09:HASH JOIN [LEFT SEMI JOIN] +| hash predicates: i_item_id = i_item_id +| runtime filters: RF000 <- i_item_id +| row-size=76B cardinality=5.79K +| +|--05:SCAN HDFS [tpcds.item] +| HDFS partitions=1/1 files=1 size=4.82MB +| predicates: i_category IN ('Music') +| row-size=46B cardinality=1.80K +| +08:HASH JOIN [INNER JOIN] +| hash predicates: ss_item_sk = i_item_sk +| row-size=76B cardinality=28.50K +| +|--04:SCAN HDFS [tpcds.item] +| HDFS partitions=1/1 files=1 size=4.82MB +| runtime filters: RF000 -> i_item_id +| row-size=36B cardinality=18.00K +| +07:HASH JOIN [INNER JOIN] +| hash predicates: ss_addr_sk = ca_address_sk +| runtime filters: RF004 <- ca_address_sk +| row-size=40B cardinality=28.50K +| +|--03:SCAN HDFS [tpcds.customer_address] +| HDFS partitions=1/1 files=1 size=5.25MB +| predicates: ca_gmt_offset = -5 +| row-size=8B cardinality=8.33K +| +06:HASH JOIN [INNER JOIN] +| hash predicates: ss_sold_date_sk = d_date_sk +| runtime filters: RF006 <- d_date_sk +| row-size=32B cardinality=170.55K +| +|--02:SCAN HDFS [tpcds.date_dim] +| HDFS partitions=1/1 files=1 size=9.84MB +| predicates: d_year = 1998, d_moy = 9 +| row-size=12B cardinality=108 +| +01:SCAN HDFS [tpcds.store_sales] + HDFS partitions=1824/1824 files=1824 size=346.60MB + runtime filters: RF006 -> ss_sold_date_sk, RF004 -> ss_addr_sk + row-size=20B cardinality=2.88M +---- DISTRIBUTEDPLAN +Max Per-Host Resource Reservation: Memory=169.12MB Threads=36 +Per-Host Resource Estimates: Memory=998MB +PLAN-ROOT SINK +| +56:MERGING-EXCHANGE [UNPARTITIONED] +| order by: i_item_id ASC, sum(total_sales) ASC +| limit: 100 +| +32:TOP-N [LIMIT=100] +| order by: i_item_id ASC, sum(total_sales) ASC +| row-size=44B cardinality=100 +| +55:AGGREGATE [FINALIZE] +| output: sum:merge(total_sales) +| group by: i_item_id +| row-size=44B cardinality=10.80K +| +54:EXCHANGE [HASH(i_item_id)] +| +31:AGGREGATE [STREAMING] +| output: sum(total_sales) +| group by: i_item_id +| row-size=44B cardinality=10.80K +| +00:UNION +| row-size=44B cardinality=10.80K +| +|--53:AGGREGATE [FINALIZE] +| | output: sum:merge(ws_ext_sales_price) +| | group by: i_item_id +| | row-size=44B cardinality=2.00K +| | +| 52:EXCHANGE [HASH(i_item_id)] +| | +| 30:AGGREGATE [STREAMING] +| | output: sum(ws_ext_sales_price) +| | group by: i_item_id +| | row-size=44B cardinality=2.00K +| | +| 29:HASH JOIN [LEFT SEMI JOIN, BROADCAST] +| | hash predicates: i_item_id = i_item_id +| | runtime filters: RF016 <- i_item_id +| | row-size=76B cardinality=2.00K +| | +| |--51:EXCHANGE [BROADCAST] +| | | +| | 25:SCAN HDFS [tpcds.item] +| | HDFS partitions=1/1 files=1 size=4.82MB +| | predicates: i_category IN ('Music') +| | row-size=46B cardinality=1.80K +| | +| 28:HASH JOIN [INNER JOIN, PARTITIONED] +| | hash predicates: ws_item_sk = i_item_sk +| | row-size=76B cardinality=9.81K +| | +| |--50:EXCHANGE [HASH(i_item_sk)] +| | | +| | 24:SCAN HDFS [tpcds.item] +| | HDFS partitions=1/1 files=1 size=4.82MB +| | runtime filters: RF016 -> i_item_id +| | row-size=36B cardinality=18.00K +| | +| 49:EXCHANGE [HASH(ws_item_sk)] +| | +| 27:HASH JOIN [INNER JOIN, BROADCAST] +| | hash predicates: ws_bill_addr_sk = ca_address_sk +| | runtime filters: RF020 <- ca_address_sk +| | row-size=40B cardinality=9.81K +| | +| |--48:EXCHANGE [BROADCAST] +| | | +| | 23:SCAN HDFS [tpcds.customer_address] +| | HDFS partitions=1/1 files=1 size=5.25MB +| | predicates: ca_gmt_offset = -5 +| | row-size=8B cardinality=8.33K +| | +| 26:HASH JOIN [INNER JOIN, BROADCAST] +| | hash predicates: ws_sold_date_sk = d_date_sk +| | runtime filters: RF022 <- d_date_sk +| | row-size=32B cardinality=42.85K +| | +| |--47:EXCHANGE [BROADCAST] +| | | +| | 22:SCAN HDFS [tpcds.date_dim] +| | HDFS partitions=1/1 files=1 size=9.84MB +| | predicates: d_year = 1998, d_moy = 9 +| | row-size=12B cardinality=108 +| | +| 21:SCAN HDFS [tpcds.web_sales] +| HDFS partitions=1/1 files=1 size=140.07MB +| runtime filters: RF022 -> ws_sold_date_sk, RF020 -> ws_bill_addr_sk +| row-size=20B cardinality=719.38K +| +|--46:AGGREGATE [FINALIZE] +| | output: sum:merge(cs_ext_sales_price) +| | group by: i_item_id +| | row-size=44B cardinality=3.01K +| | +| 45:EXCHANGE [HASH(i_item_id)] +| | +| 20:AGGREGATE [STREAMING] +| | output: sum(cs_ext_sales_price) +| | group by: i_item_id +| | row-size=44B cardinality=3.01K +| | +| 19:HASH JOIN [LEFT SEMI JOIN, BROADCAST] +| | hash predicates: i_item_id = i_item_id +| | runtime filters: RF008 <- i_item_id +| | row-size=76B cardinality=3.01K +| | +| |--44:EXCHANGE [BROADCAST] +| | | +| | 15:SCAN HDFS [tpcds.item] +| | HDFS partitions=1/1 files=1 size=4.82MB +| | predicates: i_category IN ('Music') +| | row-size=46B cardinality=1.80K +| | +| 18:HASH JOIN [INNER JOIN, PARTITIONED] +| | hash predicates: cs_item_sk = i_item_sk +| | row-size=76B cardinality=14.81K +| | +| |--43:EXCHANGE [HASH(i_item_sk)] +| | | +| | 14:SCAN HDFS [tpcds.item] +| | HDFS partitions=1/1 files=1 size=4.82MB +| | runtime filters: RF008 -> i_item_id +| | row-size=36B cardinality=18.00K +| | +| 42:EXCHANGE [HASH(cs_item_sk)] +| | +| 17:HASH JOIN [INNER JOIN, BROADCAST] +| | hash predicates: cs_bill_addr_sk = ca_address_sk +| | runtime filters: RF012 <- ca_address_sk +| | row-size=40B cardinality=14.81K +| | +| |--41:EXCHANGE [BROADCAST] +| | | +| | 13:SCAN HDFS [tpcds.customer_address] +| | HDFS partitions=1/1 files=1 size=5.25MB +| | predicates: ca_gmt_offset = -5 +| | row-size=8B cardinality=8.33K +| | +| 16:HASH JOIN [INNER JOIN, BROADCAST] +| | hash predicates: cs_sold_date_sk = d_date_sk +| | runtime filters: RF014 <- d_date_sk +| | row-size=32B cardinality=85.31K +| | +| |--40:EXCHANGE [BROADCAST] +| | | +| | 12:SCAN HDFS [tpcds.date_dim] +| | HDFS partitions=1/1 files=1 size=9.84MB +| | predicates: d_year = 1998, d_moy = 9 +| | row-size=12B cardinality=108 +| | +| 11:SCAN HDFS [tpcds.catalog_sales] +| HDFS partitions=1/1 files=1 size=282.20MB +| runtime filters: RF014 -> cs_sold_date_sk, RF012 -> cs_bill_addr_sk +| row-size=20B cardinality=1.44M +| +39:AGGREGATE [FINALIZE] +| output: sum:merge(ss_ext_sales_price) +| group by: i_item_id +| row-size=44B cardinality=5.79K +| +38:EXCHANGE [HASH(i_item_id)] +| +10:AGGREGATE [STREAMING] +| output: sum(ss_ext_sales_price) +| group by: i_item_id +| row-size=44B cardinality=5.79K +| +09:HASH JOIN [LEFT SEMI JOIN, BROADCAST] +| hash predicates: i_item_id = i_item_id +| runtime filters: RF000 <- i_item_id +| row-size=76B cardinality=5.79K +| +|--37:EXCHANGE [BROADCAST] +| | +| 05:SCAN HDFS [tpcds.item] +| HDFS partitions=1/1 files=1 size=4.82MB +| predicates: i_category IN ('Music') +| row-size=46B cardinality=1.80K +| +08:HASH JOIN [INNER JOIN, PARTITIONED] +| hash predicates: ss_item_sk = i_item_sk +| runtime filters: RF002 <- i_item_sk +| row-size=76B cardinality=28.50K +| +|--36:EXCHANGE [HASH(i_item_sk)] +| | +| 04:SCAN HDFS [tpcds.item] +| HDFS partitions=1/1 files=1 size=4.82MB +| runtime filters: RF000 -> i_item_id +| row-size=36B cardinality=18.00K +| +35:EXCHANGE [HASH(ss_item_sk)] +| +07:HASH JOIN [INNER JOIN, BROADCAST] +| hash predicates: ss_addr_sk = ca_address_sk +| runtime filters: RF004 <- ca_address_sk +| row-size=40B cardinality=28.50K +| +|--34:EXCHANGE [BROADCAST] +| | +| 03:SCAN HDFS [tpcds.customer_address] +| HDFS partitions=1/1 files=1 size=5.25MB +| predicates: ca_gmt_offset = -5 +| row-size=8B cardinality=8.33K +| +06:HASH JOIN [INNER JOIN, BROADCAST] +| hash predicates: ss_sold_date_sk = d_date_sk +| runtime filters: RF006 <- d_date_sk +| row-size=32B cardinality=170.55K +| +|--33:EXCHANGE [BROADCAST] +| | +| 02:SCAN HDFS [tpcds.date_dim] +| HDFS partitions=1/1 files=1 size=9.84MB +| predicates: d_year = 1998, d_moy = 9 +| row-size=12B cardinality=108 +| +01:SCAN HDFS [tpcds.store_sales] + HDFS partitions=1824/1824 files=1824 size=346.60MB + runtime filters: RF006 -> ss_sold_date_sk, RF004 -> ss_addr_sk, RF002 -> ss_item_sk + row-size=20B cardinality=2.88M +==== +# TPCDS-Q95 +with ws_wh as +(select ws1.ws_order_number,ws1.ws_warehouse_sk wh1,ws2.ws_warehouse_sk wh2 + from web_sales ws1,web_sales ws2 + where ws1.ws_order_number = ws2.ws_order_number + and ws1.ws_warehouse_sk <> ws2.ws_warehouse_sk) + select + count(distinct ws_order_number) as "order count" + ,sum(ws_ext_ship_cost) as "total shipping cost" + ,sum(ws_net_profit) as "total net profit" +from + web_sales ws1 + ,date_dim + ,customer_address + ,web_site +where + cast(d_date as timestamp) between cast('1999-02-01' as timestamp) and + (cast('1999-02-01' as timestamp) + interval 60 days) +and ws1.ws_ship_date_sk = d_date_sk +and ws1.ws_ship_addr_sk = ca_address_sk +and ca_state = 'IL' +and ws1.ws_web_site_sk = web_site_sk +and web_company_name = 'pri' +and ws1.ws_order_number in (select ws_order_number + from ws_wh) +and ws1.ws_order_number in (select wr_order_number + from web_returns,ws_wh + where wr_order_number = ws_wh.ws_order_number) +order by count(distinct ws_order_number) +limit 100; +---- PLAN +Max Per-Host Resource Reservation: Memory=113.77MB Threads=10 +Per-Host Resource Estimates: Memory=1010MB +PLAN-ROOT SINK +| +21:TOP-N [LIMIT=100] +| order by: count(ws_order_number) ASC +| row-size=40B cardinality=1 +| +20:AGGREGATE [FINALIZE] +| output: count(ws_order_number), sum:merge(ws_ext_ship_cost), sum:merge(ws_net_profit) +| row-size=40B cardinality=1 +| +19:AGGREGATE +| output: sum(ws_ext_ship_cost), sum(ws_net_profit) +| group by: ws_order_number +| row-size=40B cardinality=3.25K +| +18:HASH JOIN [RIGHT SEMI JOIN] +| hash predicates: tpcds.web_returns.wr_order_number = ws1.ws_order_number +| runtime filters: RF000 <- ws1.ws_order_number +| row-size=92B cardinality=3.25K +| +|--16:HASH JOIN [RIGHT SEMI JOIN] +| | hash predicates: ws1.ws_order_number = ws1.ws_order_number +| | runtime filters: RF006 <- ws1.ws_order_number +| | row-size=92B cardinality=3.25K +| | +| |--14:HASH JOIN [INNER JOIN] +| | | hash predicates: ws1.ws_ship_date_sk = d_date_sk +| | | runtime filters: RF010 <- d_date_sk +| | | row-size=92B cardinality=3.25K +| | | +| | |--01:SCAN HDFS [tpcds.date_dim] +| | | HDFS partitions=1/1 files=1 size=9.84MB +| | | predicates: CAST(d_date AS TIMESTAMP) <= TIMESTAMP '1999-04-02 00:00:00', CAST(d_date AS TIMESTAMP) >= TIMESTAMP '1999-02-01 00:00:00' +| | | row-size=26B cardinality=7.30K +| | | +| | 13:HASH JOIN [INNER JOIN] +| | | hash predicates: ws1.ws_web_site_sk = web_site_sk +| | | runtime filters: RF012 <- web_site_sk +| | | row-size=66B cardinality=3.25K +| | | +| | |--03:SCAN HDFS [tpcds.web_site] +| | | HDFS partitions=1/1 files=1 size=8.57KB +| | | predicates: web_company_name = 'pri' +| | | row-size=20B cardinality=5 +| | | +| | 12:HASH JOIN [INNER JOIN] +| | | hash predicates: ws1.ws_ship_addr_sk = ca_address_sk +| | | runtime filters: RF014 <- ca_address_sk +| | | row-size=46B cardinality=19.52K +| | | +| | |--02:SCAN HDFS [tpcds.customer_address] +| | | HDFS partitions=1/1 files=1 size=5.25MB +| | | predicates: ca_state = 'IL' +| | | row-size=18B cardinality=980 +| | | +| | 00:SCAN HDFS [tpcds.web_sales ws1] +| | HDFS partitions=1/1 files=1 size=140.07MB +| | runtime filters: RF010 -> ws1.ws_ship_date_sk, RF012 -> ws1.ws_web_site_sk, RF014 -> ws1.ws_ship_addr_sk +| | row-size=28B cardinality=719.38K +| | +| 15:AGGREGATE [FINALIZE] +| | group by: ws1.ws_order_number +| | row-size=8B cardinality=59.77K +| | +| 06:HASH JOIN [INNER JOIN] +| | hash predicates: ws1.ws_order_number = ws2.ws_order_number +| | other predicates: ws1.ws_warehouse_sk != ws2.ws_warehouse_sk +| | runtime filters: RF008 <- ws2.ws_order_number +| | row-size=24B cardinality=8.66M +| | +| |--05:SCAN HDFS [tpcds.web_sales ws2] +| | HDFS partitions=1/1 files=1 size=140.07MB +| | runtime filters: RF006 -> ws2.ws_order_number +| | row-size=12B cardinality=719.38K +| | +| 04:SCAN HDFS [tpcds.web_sales ws1] +| HDFS partitions=1/1 files=1 size=140.07MB +| runtime filters: RF006 -> ws1.ws_order_number, RF008 -> ws1.ws_order_number +| row-size=12B cardinality=719.38K +| +17:AGGREGATE [FINALIZE] +| group by: tpcds.web_returns.wr_order_number +| row-size=8B cardinality=43.44K +| +11:HASH JOIN [INNER JOIN] +| hash predicates: ws1.ws_order_number = wr_order_number +| runtime filters: RF002 <- wr_order_number +| row-size=32B cardinality=10.39M +| +|--07:SCAN HDFS [tpcds.web_returns] +| HDFS partitions=1/1 files=1 size=9.35MB +| runtime filters: RF000 -> tpcds.web_returns.wr_order_number +| row-size=8B cardinality=71.76K +| +10:HASH JOIN [INNER JOIN] +| hash predicates: ws1.ws_order_number = ws2.ws_order_number +| other predicates: ws1.ws_warehouse_sk != ws2.ws_warehouse_sk +| runtime filters: RF004 <- ws2.ws_order_number +| row-size=24B cardinality=8.66M +| +|--09:SCAN HDFS [tpcds.web_sales ws2] +| HDFS partitions=1/1 files=1 size=140.07MB +| runtime filters: RF000 -> ws2.ws_order_number, RF002 -> ws2.ws_order_number +| row-size=12B cardinality=719.38K +| +08:SCAN HDFS [tpcds.web_sales ws1] + HDFS partitions=1/1 files=1 size=140.07MB + runtime filters: RF000 -> ws1.ws_order_number, RF002 -> ws1.ws_order_number, RF004 -> ws1.ws_order_number + row-size=12B cardinality=719.38K +---- DISTRIBUTEDPLAN +Max Per-Host Resource Reservation: Memory=112.58MB Threads=21 +Per-Host Resource Estimates: Memory=1.02GB +PLAN-ROOT SINK +| +21:TOP-N [LIMIT=100] +| order by: count(ws_order_number) ASC +| row-size=40B cardinality=1 +| +33:AGGREGATE [FINALIZE] +| output: count:merge(ws_order_number), sum:merge(ws_ext_ship_cost), sum:merge(ws_net_profit) +| row-size=40B cardinality=1 +| +32:EXCHANGE [UNPARTITIONED] +| +20:AGGREGATE +| output: count(ws_order_number), sum:merge(ws_ext_ship_cost), sum:merge(ws_net_profit) +| row-size=40B cardinality=1 +| +19:AGGREGATE +| output: sum(ws_ext_ship_cost), sum(ws_net_profit) +| group by: ws_order_number +| row-size=40B cardinality=3.25K +| +18:HASH JOIN [RIGHT SEMI JOIN, PARTITIONED] +| hash predicates: tpcds.web_returns.wr_order_number = ws1.ws_order_number +| runtime filters: RF000 <- ws1.ws_order_number +| row-size=92B cardinality=3.25K +| +|--16:HASH JOIN [RIGHT SEMI JOIN, PARTITIONED] +| | hash predicates: ws1.ws_order_number = ws1.ws_order_number +| | runtime filters: RF006 <- ws1.ws_order_number +| | row-size=92B cardinality=3.25K +| | +| |--31:EXCHANGE [HASH(ws1.ws_order_number)] +| | | +| | 14:HASH JOIN [INNER JOIN, PARTITIONED] +| | | hash predicates: ws1.ws_ship_date_sk = d_date_sk +| | | runtime filters: RF010 <- d_date_sk +| | | row-size=92B cardinality=3.25K +| | | +| | |--30:EXCHANGE [HASH(d_date_sk)] +| | | | +| | | 01:SCAN HDFS [tpcds.date_dim] +| | | HDFS partitions=1/1 files=1 size=9.84MB +| | | predicates: CAST(d_date AS TIMESTAMP) <= TIMESTAMP '1999-04-02 00:00:00', CAST(d_date AS TIMESTAMP) >= TIMESTAMP '1999-02-01 00:00:00' +| | | row-size=26B cardinality=7.30K +| | | +| | 29:EXCHANGE [HASH(ws1.ws_ship_date_sk)] +| | | +| | 13:HASH JOIN [INNER JOIN, BROADCAST] +| | | hash predicates: ws1.ws_web_site_sk = web_site_sk +| | | runtime filters: RF012 <- web_site_sk +| | | row-size=66B cardinality=3.25K +| | | +| | |--28:EXCHANGE [BROADCAST] +| | | | +| | | 03:SCAN HDFS [tpcds.web_site] +| | | HDFS partitions=1/1 files=1 size=8.57KB +| | | predicates: web_company_name = 'pri' +| | | row-size=20B cardinality=5 +| | | +| | 12:HASH JOIN [INNER JOIN, BROADCAST] +| | | hash predicates: ws1.ws_ship_addr_sk = ca_address_sk +| | | runtime filters: RF014 <- ca_address_sk +| | | row-size=46B cardinality=19.52K +| | | +| | |--27:EXCHANGE [BROADCAST] +| | | | +| | | 02:SCAN HDFS [tpcds.customer_address] +| | | HDFS partitions=1/1 files=1 size=5.25MB +| | | predicates: ca_state = 'IL' +| | | row-size=18B cardinality=980 +| | | +| | 00:SCAN HDFS [tpcds.web_sales ws1] +| | HDFS partitions=1/1 files=1 size=140.07MB +| | runtime filters: RF010 -> ws1.ws_ship_date_sk, RF012 -> ws1.ws_web_site_sk, RF014 -> ws1.ws_ship_addr_sk +| | row-size=28B cardinality=719.38K +| | +| 15:AGGREGATE [FINALIZE] +| | group by: ws1.ws_order_number +| | row-size=8B cardinality=59.77K +| | +| 06:HASH JOIN [INNER JOIN, PARTITIONED] +| | hash predicates: ws1.ws_order_number = ws2.ws_order_number +| | other predicates: ws1.ws_warehouse_sk != ws2.ws_warehouse_sk +| | runtime filters: RF008 <- ws2.ws_order_number +| | row-size=24B cardinality=8.66M +| | +| |--26:EXCHANGE [HASH(ws2.ws_order_number)] +| | | +| | 05:SCAN HDFS [tpcds.web_sales ws2] +| | HDFS partitions=1/1 files=1 size=140.07MB +| | runtime filters: RF006 -> ws2.ws_order_number +| | row-size=12B cardinality=719.38K +| | +| 25:EXCHANGE [HASH(ws1.ws_order_number)] +| | +| 04:SCAN HDFS [tpcds.web_sales ws1] +| HDFS partitions=1/1 files=1 size=140.07MB +| runtime filters: RF006 -> ws1.ws_order_number, RF008 -> ws1.ws_order_number +| row-size=12B cardinality=719.38K +| +17:AGGREGATE [FINALIZE] +| group by: tpcds.web_returns.wr_order_number +| row-size=8B cardinality=43.44K +| +11:HASH JOIN [INNER JOIN, PARTITIONED] +| hash predicates: ws1.ws_order_number = wr_order_number +| runtime filters: RF002 <- wr_order_number +| row-size=32B cardinality=10.39M +| +|--24:EXCHANGE [HASH(wr_order_number)] +| | +| 07:SCAN HDFS [tpcds.web_returns] +| HDFS partitions=1/1 files=1 size=9.35MB +| runtime filters: RF000 -> tpcds.web_returns.wr_order_number +| row-size=8B cardinality=71.76K +| +10:HASH JOIN [INNER JOIN, PARTITIONED] +| hash predicates: ws1.ws_order_number = ws2.ws_order_number +| other predicates: ws1.ws_warehouse_sk != ws2.ws_warehouse_sk +| runtime filters: RF004 <- ws2.ws_order_number +| row-size=24B cardinality=8.66M +| +|--23:EXCHANGE [HASH(ws2.ws_order_number)] +| | +| 09:SCAN HDFS [tpcds.web_sales ws2] +| HDFS partitions=1/1 files=1 size=140.07MB +| runtime filters: RF000 -> ws2.ws_order_number, RF002 -> ws2.ws_order_number +| row-size=12B cardinality=719.38K +| +22:EXCHANGE [HASH(ws1.ws_order_number)] +| +08:SCAN HDFS [tpcds.web_sales ws1] + HDFS partitions=1/1 files=1 size=140.07MB + runtime filters: RF000 -> ws1.ws_order_number, RF002 -> ws1.ws_order_number, RF004 -> ws1.ws_order_number + row-size=12B cardinality=719.38K +==== diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test index 07291450f..38741efdd 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-all.test @@ -4227,12 +4227,12 @@ Max Per-Host Resource Reservation: Memory=71.75MB Threads=6 Per-Host Resource Estimates: Memory=612MB PLAN-ROOT SINK | -10:SORT +11:SORT | order by: s_name ASC | row-size=67B cardinality=400 | -09:HASH JOIN [RIGHT SEMI JOIN] -| hash predicates: ps_suppkey = s_suppkey +10:HASH JOIN [RIGHT SEMI JOIN] +| hash predicates: tpch.partsupp.ps_suppkey = s_suppkey | runtime filters: RF000 <- s_suppkey | row-size=98B cardinality=400 | @@ -4251,6 +4251,10 @@ PLAN-ROOT SINK | runtime filters: RF008 -> s_nationkey | row-size=77B cardinality=10.00K | +09:AGGREGATE [FINALIZE] +| group by: tpch.partsupp.ps_suppkey +| row-size=8B cardinality=9.71K +| 07:HASH JOIN [RIGHT SEMI JOIN] | hash predicates: l_partkey = ps_partkey, l_suppkey = ps_suppkey | other join predicates: ps_availqty > 0.5 * sum(l_quantity) @@ -4269,7 +4273,7 @@ PLAN-ROOT SINK | | | 02:SCAN HDFS [tpch.partsupp] | HDFS partitions=1/1 files=1 size=112.71MB -| runtime filters: RF000 -> ps_suppkey, RF006 -> ps_partkey +| runtime filters: RF000 -> tpch.partsupp.ps_suppkey, RF006 -> ps_partkey | row-size=20B cardinality=800.00K | 05:AGGREGATE [FINALIZE] @@ -4283,30 +4287,30 @@ PLAN-ROOT SINK runtime filters: RF000 -> tpch.lineitem.l_suppkey, RF002 -> tpch.lineitem.l_partkey, RF003 -> tpch.lineitem.l_suppkey row-size=46B cardinality=600.12K ---- DISTRIBUTEDPLAN -Max Per-Host Resource Reservation: Memory=107.63MB Threads=13 -Per-Host Resource Estimates: Memory=660MB +Max Per-Host Resource Reservation: Memory=111.57MB Threads=13 +Per-Host Resource Estimates: Memory=679MB PLAN-ROOT SINK | -18:MERGING-EXCHANGE [UNPARTITIONED] +20:MERGING-EXCHANGE [UNPARTITIONED] | order by: s_name ASC | -10:SORT +11:SORT | order by: s_name ASC | row-size=67B cardinality=400 | -09:HASH JOIN [RIGHT SEMI JOIN, PARTITIONED] -| hash predicates: ps_suppkey = s_suppkey +10:HASH JOIN [RIGHT SEMI JOIN, PARTITIONED] +| hash predicates: tpch.partsupp.ps_suppkey = s_suppkey | runtime filters: RF000 <- s_suppkey | row-size=98B cardinality=400 | -|--17:EXCHANGE [HASH(s_suppkey)] +|--19:EXCHANGE [HASH(s_suppkey)] | | | 08:HASH JOIN [INNER JOIN, BROADCAST] | | hash predicates: s_nationkey = n_nationkey | | runtime filters: RF008 <- n_nationkey | | row-size=98B cardinality=400 | | -| |--15:EXCHANGE [BROADCAST] +| |--18:EXCHANGE [BROADCAST] | | | | | 01:SCAN HDFS [tpch.nation] | | HDFS partitions=1/1 files=1 size=2.15KB @@ -4318,7 +4322,15 @@ PLAN-ROOT SINK | runtime filters: RF008 -> s_nationkey | row-size=77B cardinality=10.00K | -16:EXCHANGE [HASH(ps_suppkey)] +17:AGGREGATE [FINALIZE] +| group by: tpch.partsupp.ps_suppkey +| row-size=8B cardinality=9.71K +| +16:EXCHANGE [HASH(tpch.partsupp.ps_suppkey)] +| +09:AGGREGATE [STREAMING] +| group by: tpch.partsupp.ps_suppkey +| row-size=8B cardinality=9.71K | 07:HASH JOIN [RIGHT SEMI JOIN, PARTITIONED] | hash predicates: l_partkey = ps_partkey, l_suppkey = ps_suppkey @@ -4326,14 +4338,14 @@ PLAN-ROOT SINK | runtime filters: RF002 <- ps_partkey, RF003 <- ps_suppkey | row-size=20B cardinality=79.79K | -|--14:EXCHANGE [HASH(ps_partkey,ps_suppkey)] +|--15:EXCHANGE [HASH(ps_partkey,ps_suppkey)] | | | 06:HASH JOIN [LEFT SEMI JOIN, BROADCAST] | | hash predicates: ps_partkey = p_partkey | | runtime filters: RF006 <- p_partkey | | row-size=20B cardinality=79.79K | | -| |--13:EXCHANGE [BROADCAST] +| |--14:EXCHANGE [BROADCAST] | | | | | 03:SCAN HDFS [tpch.part] | | HDFS partitions=1/1 files=1 size=22.83MB @@ -4342,15 +4354,15 @@ PLAN-ROOT SINK | | | 02:SCAN HDFS [tpch.partsupp] | HDFS partitions=1/1 files=1 size=112.71MB -| runtime filters: RF000 -> ps_suppkey, RF006 -> ps_partkey +| runtime filters: RF000 -> tpch.partsupp.ps_suppkey, RF006 -> ps_partkey | row-size=20B cardinality=800.00K | -12:AGGREGATE [FINALIZE] +13:AGGREGATE [FINALIZE] | output: sum:merge(l_quantity) | group by: l_partkey, l_suppkey | row-size=32B cardinality=600.12K | -11:EXCHANGE [HASH(l_partkey,l_suppkey)] +12:EXCHANGE [HASH(l_partkey,l_suppkey)] | 05:AGGREGATE [STREAMING] | output: sum(l_quantity) @@ -4363,19 +4375,19 @@ PLAN-ROOT SINK runtime filters: RF000 -> tpch.lineitem.l_suppkey, RF002 -> tpch.lineitem.l_partkey, RF003 -> tpch.lineitem.l_suppkey row-size=46B cardinality=600.12K ---- PARALLELPLANS -Max Per-Host Resource Reservation: Memory=123.45MB Threads=13 -Per-Host Resource Estimates: Memory=430MB +Max Per-Host Resource Reservation: Memory=127.38MB Threads=13 +Per-Host Resource Estimates: Memory=449MB PLAN-ROOT SINK | -18:MERGING-EXCHANGE [UNPARTITIONED] +20:MERGING-EXCHANGE [UNPARTITIONED] | order by: s_name ASC | -10:SORT +11:SORT | order by: s_name ASC | row-size=67B cardinality=400 | -09:HASH JOIN [RIGHT SEMI JOIN, PARTITIONED] -| hash predicates: ps_suppkey = s_suppkey +10:HASH JOIN [RIGHT SEMI JOIN, PARTITIONED] +| hash predicates: tpch.partsupp.ps_suppkey = s_suppkey | row-size=98B cardinality=400 | |--JOIN BUILD @@ -4383,7 +4395,7 @@ PLAN-ROOT SINK | | build expressions: s_suppkey | | runtime filters: RF000 <- s_suppkey | | -| 17:EXCHANGE [HASH(s_suppkey)] +| 19:EXCHANGE [HASH(s_suppkey)] | | | 08:HASH JOIN [INNER JOIN, BROADCAST] | | hash predicates: s_nationkey = n_nationkey @@ -4394,7 +4406,7 @@ PLAN-ROOT SINK | | | build expressions: n_nationkey | | | runtime filters: RF008 <- n_nationkey | | | -| | 15:EXCHANGE [BROADCAST] +| | 18:EXCHANGE [BROADCAST] | | | | | 01:SCAN HDFS [tpch.nation] | | HDFS partitions=1/1 files=1 size=2.15KB @@ -4406,7 +4418,15 @@ PLAN-ROOT SINK | runtime filters: RF008 -> s_nationkey | row-size=77B cardinality=10.00K | -16:EXCHANGE [HASH(ps_suppkey)] +17:AGGREGATE [FINALIZE] +| group by: tpch.partsupp.ps_suppkey +| row-size=8B cardinality=9.71K +| +16:EXCHANGE [HASH(tpch.partsupp.ps_suppkey)] +| +09:AGGREGATE [STREAMING] +| group by: tpch.partsupp.ps_suppkey +| row-size=8B cardinality=9.71K | 07:HASH JOIN [RIGHT SEMI JOIN, PARTITIONED] | hash predicates: l_partkey = ps_partkey, l_suppkey = ps_suppkey @@ -4418,7 +4438,7 @@ PLAN-ROOT SINK | | build expressions: ps_partkey, ps_suppkey | | runtime filters: RF002 <- ps_partkey, RF003 <- ps_suppkey | | -| 14:EXCHANGE [HASH(ps_partkey,ps_suppkey)] +| 15:EXCHANGE [HASH(ps_partkey,ps_suppkey)] | | | 06:HASH JOIN [LEFT SEMI JOIN, BROADCAST] | | hash predicates: ps_partkey = p_partkey @@ -4429,7 +4449,7 @@ PLAN-ROOT SINK | | | build expressions: p_partkey | | | runtime filters: RF006 <- p_partkey | | | -| | 13:EXCHANGE [BROADCAST] +| | 14:EXCHANGE [BROADCAST] | | | | | 03:SCAN HDFS [tpch.part] | | HDFS partitions=1/1 files=1 size=22.83MB @@ -4438,15 +4458,15 @@ PLAN-ROOT SINK | | | 02:SCAN HDFS [tpch.partsupp] | HDFS partitions=1/1 files=1 size=112.71MB -| runtime filters: RF000 -> ps_suppkey, RF006 -> ps_partkey +| runtime filters: RF000 -> tpch.partsupp.ps_suppkey, RF006 -> ps_partkey | row-size=20B cardinality=800.00K | -12:AGGREGATE [FINALIZE] +13:AGGREGATE [FINALIZE] | output: sum:merge(l_quantity) | group by: l_partkey, l_suppkey | row-size=32B cardinality=600.12K | -11:EXCHANGE [HASH(l_partkey,l_suppkey)] +12:EXCHANGE [HASH(l_partkey,l_suppkey)] | 05:AGGREGATE [STREAMING] | output: sum(l_quantity) @@ -4815,21 +4835,21 @@ group by order by cntrycode ---- PLAN -Max Per-Host Resource Reservation: Memory=24.00MB Threads=4 -Per-Host Resource Estimates: Memory=314MB +Max Per-Host Resource Reservation: Memory=25.94MB Threads=4 +Per-Host Resource Estimates: Memory=324MB PLAN-ROOT SINK | -07:SORT +08:SORT | order by: cntrycode ASC | row-size=36B cardinality=15.00K | -06:AGGREGATE [FINALIZE] +07:AGGREGATE [FINALIZE] | output: count(*), sum(c_acctbal) | group by: substr(c_phone, 1, 2) | row-size=36B cardinality=15.00K | -05:HASH JOIN [RIGHT ANTI JOIN] -| hash predicates: o_custkey = c_custkey +06:HASH JOIN [RIGHT ANTI JOIN] +| hash predicates: tpch.orders.o_custkey = c_custkey | row-size=51B cardinality=15.00K | |--04:NESTED LOOP JOIN [INNER JOIN] @@ -4850,50 +4870,54 @@ PLAN-ROOT SINK | predicates: substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') | row-size=43B cardinality=15.00K | +05:AGGREGATE [FINALIZE] +| group by: tpch.orders.o_custkey +| row-size=8B cardinality=98.39K +| 03:SCAN HDFS [tpch.orders] HDFS partitions=1/1 files=1 size=162.56MB row-size=8B cardinality=1.50M ---- DISTRIBUTEDPLAN -Max Per-Host Resource Reservation: Memory=41.88MB Threads=10 -Per-Host Resource Estimates: Memory=365MB +Max Per-Host Resource Reservation: Memory=45.81MB Threads=10 +Per-Host Resource Estimates: Memory=380MB PLAN-ROOT SINK | -15:MERGING-EXCHANGE [UNPARTITIONED] +17:MERGING-EXCHANGE [UNPARTITIONED] | order by: cntrycode ASC | -07:SORT +08:SORT | order by: cntrycode ASC | row-size=36B cardinality=15.00K | -14:AGGREGATE [FINALIZE] +16:AGGREGATE [FINALIZE] | output: count:merge(*), sum:merge(c_acctbal) | group by: cntrycode | row-size=36B cardinality=15.00K | -13:EXCHANGE [HASH(cntrycode)] +15:EXCHANGE [HASH(cntrycode)] | -06:AGGREGATE [STREAMING] +07:AGGREGATE [STREAMING] | output: count(*), sum(c_acctbal) | group by: substr(c_phone, 1, 2) | row-size=36B cardinality=15.00K | -05:HASH JOIN [RIGHT ANTI JOIN, PARTITIONED] -| hash predicates: o_custkey = c_custkey +06:HASH JOIN [RIGHT ANTI JOIN, PARTITIONED] +| hash predicates: tpch.orders.o_custkey = c_custkey | row-size=51B cardinality=15.00K | -|--12:EXCHANGE [HASH(c_custkey)] +|--14:EXCHANGE [HASH(c_custkey)] | | | 04:NESTED LOOP JOIN [INNER JOIN, BROADCAST] | | predicates: c_acctbal > avg(c_acctbal) | | row-size=51B cardinality=15.00K | | -| |--10:EXCHANGE [BROADCAST] +| |--13:EXCHANGE [BROADCAST] | | | -| | 09:AGGREGATE [FINALIZE] +| | 12:AGGREGATE [FINALIZE] | | | output: avg:merge(c_acctbal) | | | row-size=8B cardinality=1 | | | -| | 08:EXCHANGE [UNPARTITIONED] +| | 11:EXCHANGE [UNPARTITIONED] | | | | | 02:AGGREGATE | | | output: avg(c_acctbal) @@ -4909,44 +4933,52 @@ PLAN-ROOT SINK | predicates: substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') | row-size=43B cardinality=15.00K | -11:EXCHANGE [HASH(o_custkey)] +10:AGGREGATE [FINALIZE] +| group by: tpch.orders.o_custkey +| row-size=8B cardinality=98.39K +| +09:EXCHANGE [HASH(tpch.orders.o_custkey)] +| +05:AGGREGATE [STREAMING] +| group by: tpch.orders.o_custkey +| row-size=8B cardinality=98.39K | 03:SCAN HDFS [tpch.orders] HDFS partitions=1/1 files=1 size=162.56MB row-size=8B cardinality=1.50M ---- PARALLELPLANS -Max Per-Host Resource Reservation: Memory=41.88MB Threads=9 -Per-Host Resource Estimates: Memory=213MB +Max Per-Host Resource Reservation: Memory=45.81MB Threads=9 +Per-Host Resource Estimates: Memory=228MB PLAN-ROOT SINK | -15:MERGING-EXCHANGE [UNPARTITIONED] +17:MERGING-EXCHANGE [UNPARTITIONED] | order by: cntrycode ASC | -07:SORT +08:SORT | order by: cntrycode ASC | row-size=36B cardinality=15.00K | -14:AGGREGATE [FINALIZE] +16:AGGREGATE [FINALIZE] | output: count:merge(*), sum:merge(c_acctbal) | group by: cntrycode | row-size=36B cardinality=15.00K | -13:EXCHANGE [HASH(cntrycode)] +15:EXCHANGE [HASH(cntrycode)] | -06:AGGREGATE [STREAMING] +07:AGGREGATE [STREAMING] | output: count(*), sum(c_acctbal) | group by: substr(c_phone, 1, 2) | row-size=36B cardinality=15.00K | -05:HASH JOIN [RIGHT ANTI JOIN, PARTITIONED] -| hash predicates: o_custkey = c_custkey +06:HASH JOIN [RIGHT ANTI JOIN, PARTITIONED] +| hash predicates: tpch.orders.o_custkey = c_custkey | row-size=51B cardinality=15.00K | |--JOIN BUILD | | join-table-id=00 plan-id=01 cohort-id=01 | | build expressions: c_custkey | | -| 12:EXCHANGE [HASH(c_custkey)] +| 14:EXCHANGE [HASH(c_custkey)] | | | 04:NESTED LOOP JOIN [INNER JOIN, BROADCAST] | | join table id: 01 @@ -4956,13 +4988,13 @@ PLAN-ROOT SINK | |--JOIN BUILD | | | join-table-id=01 plan-id=02 cohort-id=02 | | | -| | 10:EXCHANGE [BROADCAST] +| | 13:EXCHANGE [BROADCAST] | | | -| | 09:AGGREGATE [FINALIZE] +| | 12:AGGREGATE [FINALIZE] | | | output: avg:merge(c_acctbal) | | | row-size=8B cardinality=1 | | | -| | 08:EXCHANGE [UNPARTITIONED] +| | 11:EXCHANGE [UNPARTITIONED] | | | | | 02:AGGREGATE | | | output: avg(c_acctbal) @@ -4978,7 +5010,15 @@ PLAN-ROOT SINK | predicates: substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') | row-size=43B cardinality=15.00K | -11:EXCHANGE [HASH(o_custkey)] +10:AGGREGATE [FINALIZE] +| group by: tpch.orders.o_custkey +| row-size=8B cardinality=98.39K +| +09:EXCHANGE [HASH(tpch.orders.o_custkey)] +| +05:AGGREGATE [STREAMING] +| group by: tpch.orders.o_custkey +| row-size=8B cardinality=98.39K | 03:SCAN HDFS [tpch.orders] HDFS partitions=1/1 files=1 size=162.56MB diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test index 83f8beaf0..dc2dad17f 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-kudu.test @@ -1475,16 +1475,16 @@ where order by s_name ---- PLAN -Max Per-Host Resource Reservation: Memory=55.81MB Threads=6 +Max Per-Host Resource Reservation: Memory=48.62MB Threads=6 Per-Host Resource Estimates: Memory=65MB PLAN-ROOT SINK | -10:SORT +11:SORT | order by: s_name ASC | row-size=67B cardinality=400 | -09:HASH JOIN [RIGHT SEMI JOIN] -| hash predicates: ps_suppkey = s_suppkey +10:HASH JOIN [RIGHT SEMI JOIN] +| hash predicates: tpch_kudu.partsupp.ps_suppkey = s_suppkey | runtime filters: RF000 <- s_suppkey, RF001 <- s_suppkey | row-size=87B cardinality=400 | @@ -1501,6 +1501,10 @@ PLAN-ROOT SINK | runtime filters: RF008 -> s_nationkey, RF009 -> s_nationkey | row-size=85B cardinality=10.00K | +09:AGGREGATE [FINALIZE] +| group by: tpch_kudu.partsupp.ps_suppkey +| row-size=8B cardinality=9.71K +| 07:HASH JOIN [RIGHT SEMI JOIN] | hash predicates: l_partkey = ps_partkey, l_suppkey = ps_suppkey | other join predicates: ps_availqty > 0.5 * sum(l_quantity) @@ -1517,7 +1521,7 @@ PLAN-ROOT SINK | | row-size=57B cardinality=20.00K | | | 02:SCAN KUDU [tpch_kudu.partsupp] -| runtime filters: RF000 -> ps_suppkey, RF001 -> ps_suppkey, RF006 -> ps_partkey, RF007 -> ps_partkey +| runtime filters: RF000 -> tpch_kudu.partsupp.ps_suppkey, RF001 -> tpch_kudu.partsupp.ps_suppkey, RF006 -> ps_partkey, RF007 -> ps_partkey | row-size=24B cardinality=800.00K | 05:AGGREGATE [FINALIZE] @@ -1673,39 +1677,43 @@ order by cntrycode ---- PLAN Max Per-Host Resource Reservation: Memory=13.94MB Threads=4 -Per-Host Resource Estimates: Memory=31MB +Per-Host Resource Estimates: Memory=41MB PLAN-ROOT SINK | -07:SORT +08:SORT | order by: cntrycode ASC | row-size=36B cardinality=15.00K | -06:AGGREGATE [FINALIZE] +07:AGGREGATE [FINALIZE] | output: count(*), sum(c_acctbal) | group by: substr(c_phone, 1, 2) | row-size=36B cardinality=15.00K | -05:HASH JOIN [RIGHT ANTI JOIN] -| hash predicates: o_custkey = c_custkey +06:HASH JOIN [LEFT ANTI JOIN] +| hash predicates: c_custkey = tpch_kudu.orders.o_custkey | row-size=55B cardinality=15.00K | -|--04:NESTED LOOP JOIN [INNER JOIN] -| | predicates: c_acctbal > round(avg(c_acctbal), 1) -| | row-size=55B cardinality=15.00K +|--05:AGGREGATE [FINALIZE] +| | group by: tpch_kudu.orders.o_custkey +| | row-size=8B cardinality=98.39K | | -| |--02:AGGREGATE [FINALIZE] -| | | output: avg(c_acctbal) -| | | row-size=8B cardinality=1 -| | | -| | 01:SCAN KUDU [tpch_kudu.customer] -| | predicates: substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') -| | kudu predicates: c_acctbal > 0 -| | row-size=39B cardinality=15.00K -| | -| 00:SCAN KUDU [tpch_kudu.customer] -| predicates: substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') -| row-size=47B cardinality=15.00K +| 03:SCAN KUDU [tpch_kudu.orders] +| row-size=8B cardinality=1.50M | -03:SCAN KUDU [tpch_kudu.orders] - row-size=8B cardinality=1.50M +04:NESTED LOOP JOIN [INNER JOIN] +| predicates: c_acctbal > round(avg(c_acctbal), 1) +| row-size=55B cardinality=15.00K +| +|--02:AGGREGATE [FINALIZE] +| | output: avg(c_acctbal) +| | row-size=8B cardinality=1 +| | +| 01:SCAN KUDU [tpch_kudu.customer] +| predicates: substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') +| kudu predicates: c_acctbal > 0 +| row-size=39B cardinality=15.00K +| +00:SCAN KUDU [tpch_kudu.customer] + predicates: substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') + row-size=47B cardinality=15.00K ==== diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test index 9eaca45d7..7deee3bf0 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-nested.test @@ -36,7 +36,7 @@ PLAN-ROOT SINK | row-size=120B cardinality=1.50M | 00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB predicates: l_shipdate <= '1998-09-02' row-size=68B cardinality=1.50M ---- DISTRIBUTEDPLAN @@ -64,7 +64,7 @@ PLAN-ROOT SINK | row-size=120B cardinality=1.50M | 00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB predicates: l_shipdate <= '1998-09-02' row-size=68B cardinality=1.50M ==== @@ -147,7 +147,7 @@ PLAN-ROOT SINK | | | row-size=0B cardinality=10 | | | | | 16:SCAN HDFS [tpch_nested_parquet.region r] -| | HDFS partitions=1/1 files=1 size=3.50KB +| | HDFS partitions=1/1 files=1 size=3.59KB | | predicates: r_name = 'EUROPE', !empty(r.r_nations) | | row-size=31B cardinality=1 | | @@ -187,7 +187,7 @@ PLAN-ROOT SINK | | row-size=0B cardinality=10 | | | 06:SCAN HDFS [tpch_nested_parquet.region r] -| HDFS partitions=1/1 files=1 size=3.50KB +| HDFS partitions=1/1 files=1 size=3.59KB | predicates: r_name = 'EUROPE', !empty(r.r_nations) | row-size=31B cardinality=1 | @@ -270,7 +270,7 @@ PLAN-ROOT SINK | | | row-size=0B cardinality=10 | | | | | 16:SCAN HDFS [tpch_nested_parquet.region r] -| | HDFS partitions=1/1 files=1 size=3.50KB +| | HDFS partitions=1/1 files=1 size=3.59KB | | predicates: r_name = 'EUROPE', !empty(r.r_nations) | | row-size=31B cardinality=1 | | @@ -312,7 +312,7 @@ PLAN-ROOT SINK | | row-size=0B cardinality=10 | | | 06:SCAN HDFS [tpch_nested_parquet.region r] -| HDFS partitions=1/1 files=1 size=3.50KB +| HDFS partitions=1/1 files=1 size=3.59KB | predicates: r_name = 'EUROPE', !empty(r.r_nations) | row-size=31B cardinality=1 | @@ -408,7 +408,7 @@ PLAN-ROOT SINK | row-size=0B cardinality=10 | 00:SCAN HDFS [tpch_nested_parquet.customer c] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB predicates: c_mktsegment = 'BUILDING', !empty(c.c_orders) predicates on o: !empty(o.o_lineitems), o_orderdate < '1995-03-15' predicates on l: l_shipdate > '1995-03-15' @@ -463,7 +463,7 @@ PLAN-ROOT SINK | row-size=0B cardinality=10 | 00:SCAN HDFS [tpch_nested_parquet.customer c] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB predicates: c_mktsegment = 'BUILDING', !empty(c.c_orders) predicates on o: !empty(o.o_lineitems), o_orderdate < '1995-03-15' predicates on l: l_shipdate > '1995-03-15' @@ -519,6 +519,7 @@ PLAN-ROOT SINK | | | row-size=48B cardinality=1 | | | | | 05:UNNEST [o.o_lineitems] +| | limit: 1 | | row-size=24B cardinality=10 | | | 07:NESTED LOOP JOIN [CROSS JOIN] @@ -531,7 +532,7 @@ PLAN-ROOT SINK | row-size=0B cardinality=10 | 00:SCAN HDFS [tpch_nested_parquet.customer c] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB predicates: !empty(c.c_orders) predicates on o: o_orderdate >= '1993-07-01', o_orderdate < '1993-10-01' predicates on o_lineitems: l_commitdate < l_receiptdate @@ -573,6 +574,7 @@ PLAN-ROOT SINK | | | row-size=48B cardinality=1 | | | | | 05:UNNEST [o.o_lineitems] +| | limit: 1 | | row-size=24B cardinality=10 | | | 07:NESTED LOOP JOIN [CROSS JOIN] @@ -585,7 +587,7 @@ PLAN-ROOT SINK | row-size=0B cardinality=10 | 00:SCAN HDFS [tpch_nested_parquet.customer c] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB predicates: !empty(c.c_orders) predicates on o: o_orderdate >= '1993-07-01', o_orderdate < '1993-10-01' predicates on o_lineitems: l_commitdate < l_receiptdate @@ -655,7 +657,7 @@ PLAN-ROOT SINK | | row-size=0B cardinality=10 | | | 10:SCAN HDFS [tpch_nested_parquet.region r] -| HDFS partitions=1/1 files=1 size=3.50KB +| HDFS partitions=1/1 files=1 size=3.59KB | predicates: r_name = 'ASIA', !empty(r.r_nations) | row-size=31B cardinality=1 | @@ -684,7 +686,7 @@ PLAN-ROOT SINK | row-size=0B cardinality=10 | 00:SCAN HDFS [tpch_nested_parquet.customer c] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB predicates: !empty(c.c_orders) predicates on o: !empty(o.o_lineitems), o_orderdate >= '1994-01-01', o_orderdate < '1995-01-01' runtime filters: RF000 -> c_nationkey, RF004 -> c.c_nationkey @@ -744,7 +746,7 @@ PLAN-ROOT SINK | | row-size=0B cardinality=10 | | | 10:SCAN HDFS [tpch_nested_parquet.region r] -| HDFS partitions=1/1 files=1 size=3.50KB +| HDFS partitions=1/1 files=1 size=3.59KB | predicates: r_name = 'ASIA', !empty(r.r_nations) | row-size=31B cardinality=1 | @@ -773,7 +775,7 @@ PLAN-ROOT SINK | row-size=0B cardinality=10 | 00:SCAN HDFS [tpch_nested_parquet.customer c] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB predicates: !empty(c.c_orders) predicates on o: !empty(o.o_lineitems), o_orderdate >= '1994-01-01', o_orderdate < '1995-01-01' runtime filters: RF000 -> c_nationkey, RF004 -> c.c_nationkey @@ -800,7 +802,7 @@ PLAN-ROOT SINK | row-size=16B cardinality=1 | 00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB predicates: l_discount <= 0.07, l_discount >= 0.05, l_quantity < 24, l_shipdate < '1995-01-01', l_shipdate >= '1994-01-01' row-size=36B cardinality=1.50M ---- DISTRIBUTEDPLAN @@ -819,7 +821,7 @@ PLAN-ROOT SINK | row-size=16B cardinality=1 | 00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB predicates: l_discount <= 0.07, l_discount >= 0.05, l_quantity < 24, l_shipdate < '1995-01-01', l_shipdate >= '1994-01-01' row-size=36B cardinality=1.50M ==== @@ -882,7 +884,7 @@ PLAN-ROOT SINK | row-size=100B cardinality=15.00M | |--11:SCAN HDFS [tpch_nested_parquet.region.r_nations n2] -| HDFS partitions=1/1 files=1 size=3.50KB +| HDFS partitions=1/1 files=1 size=3.59KB | predicates: n2.n_name IN ('GERMANY', 'FRANCE') | row-size=14B cardinality=5 | @@ -892,7 +894,7 @@ PLAN-ROOT SINK | row-size=86B cardinality=15.00M | |--10:SCAN HDFS [tpch_nested_parquet.region.r_nations n1] -| HDFS partitions=1/1 files=1 size=3.50KB +| HDFS partitions=1/1 files=1 size=3.59KB | predicates: n1.n_name IN ('FRANCE', 'GERMANY') | row-size=14B cardinality=5 | @@ -930,7 +932,7 @@ PLAN-ROOT SINK | row-size=0B cardinality=10 | 00:SCAN HDFS [tpch_nested_parquet.customer c] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB predicates: !empty(c.c_orders) predicates on o: !empty(o.o_lineitems) predicates on l: l_shipdate >= '1995-01-01', l_shipdate <= '1996-12-31' @@ -969,7 +971,7 @@ PLAN-ROOT SINK |--19:EXCHANGE [BROADCAST] | | | 11:SCAN HDFS [tpch_nested_parquet.region.r_nations n2] -| HDFS partitions=1/1 files=1 size=3.50KB +| HDFS partitions=1/1 files=1 size=3.59KB | predicates: n2.n_name IN ('GERMANY', 'FRANCE') | row-size=14B cardinality=5 | @@ -981,7 +983,7 @@ PLAN-ROOT SINK |--18:EXCHANGE [BROADCAST] | | | 10:SCAN HDFS [tpch_nested_parquet.region.r_nations n1] -| HDFS partitions=1/1 files=1 size=3.50KB +| HDFS partitions=1/1 files=1 size=3.59KB | predicates: n1.n_name IN ('FRANCE', 'GERMANY') | row-size=14B cardinality=5 | @@ -1021,7 +1023,7 @@ PLAN-ROOT SINK | row-size=0B cardinality=10 | 00:SCAN HDFS [tpch_nested_parquet.customer c] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB predicates: !empty(c.c_orders) predicates on o: !empty(o.o_lineitems) predicates on l: l_shipdate >= '1995-01-01', l_shipdate <= '1996-12-31' @@ -1084,7 +1086,7 @@ PLAN-ROOT SINK | row-size=167B cardinality=15.00M | |--16:SCAN HDFS [tpch_nested_parquet.region.r_nations n2] -| HDFS partitions=1/1 files=1 size=3.50KB +| HDFS partitions=1/1 files=1 size=3.59KB | row-size=14B cardinality=50 | 19:HASH JOIN [INNER JOIN] @@ -1105,7 +1107,7 @@ PLAN-ROOT SINK | | row-size=0B cardinality=10 | | | 11:SCAN HDFS [tpch_nested_parquet.region r] -| HDFS partitions=1/1 files=1 size=3.50KB +| HDFS partitions=1/1 files=1 size=3.59KB | predicates: r_name = 'AMERICA', !empty(r.r_nations) | row-size=31B cardinality=1 | @@ -1152,7 +1154,7 @@ PLAN-ROOT SINK | row-size=0B cardinality=10 | 00:SCAN HDFS [tpch_nested_parquet.customer c] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB predicates: !empty(c.c_orders) predicates on o: !empty(o.o_lineitems), o_orderdate >= '1995-01-01', o_orderdate <= '1996-12-31' runtime filters: RF002 -> c_nationkey @@ -1189,7 +1191,7 @@ PLAN-ROOT SINK |--26:EXCHANGE [BROADCAST] | | | 16:SCAN HDFS [tpch_nested_parquet.region.r_nations n2] -| HDFS partitions=1/1 files=1 size=3.50KB +| HDFS partitions=1/1 files=1 size=3.59KB | row-size=14B cardinality=50 | 19:HASH JOIN [INNER JOIN, BROADCAST] @@ -1212,7 +1214,7 @@ PLAN-ROOT SINK | | row-size=0B cardinality=10 | | | 11:SCAN HDFS [tpch_nested_parquet.region r] -| HDFS partitions=1/1 files=1 size=3.50KB +| HDFS partitions=1/1 files=1 size=3.59KB | predicates: r_name = 'AMERICA', !empty(r.r_nations) | row-size=31B cardinality=1 | @@ -1263,7 +1265,7 @@ PLAN-ROOT SINK | row-size=0B cardinality=10 | 00:SCAN HDFS [tpch_nested_parquet.customer c] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB predicates: !empty(c.c_orders) predicates on o: !empty(o.o_lineitems), o_orderdate >= '1995-01-01', o_orderdate <= '1996-12-31' runtime filters: RF002 -> c_nationkey @@ -1320,7 +1322,7 @@ PLAN-ROOT SINK | row-size=169B cardinality=15.00M | |--11:SCAN HDFS [tpch_nested_parquet.region.r_nations n] -| HDFS partitions=1/1 files=1 size=3.50KB +| HDFS partitions=1/1 files=1 size=3.59KB | row-size=14B cardinality=50 | 13:HASH JOIN [INNER JOIN] @@ -1367,7 +1369,7 @@ PLAN-ROOT SINK | row-size=0B cardinality=10 | 00:SCAN HDFS [tpch_nested_parquet.customer.c_orders o] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB predicates: !empty(o.o_lineitems) row-size=24B cardinality=1.50M ---- DISTRIBUTEDPLAN @@ -1402,7 +1404,7 @@ PLAN-ROOT SINK |--19:EXCHANGE [BROADCAST] | | | 11:SCAN HDFS [tpch_nested_parquet.region.r_nations n] -| HDFS partitions=1/1 files=1 size=3.50KB +| HDFS partitions=1/1 files=1 size=3.59KB | row-size=14B cardinality=50 | 13:HASH JOIN [INNER JOIN, BROADCAST] @@ -1453,7 +1455,7 @@ PLAN-ROOT SINK | row-size=0B cardinality=10 | 00:SCAN HDFS [tpch_nested_parquet.customer.c_orders o] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB predicates: !empty(o.o_lineitems) row-size=24B cardinality=1.50M ==== @@ -1510,7 +1512,7 @@ PLAN-ROOT SINK | row-size=275B cardinality=15.00M | |--09:SCAN HDFS [tpch_nested_parquet.region.r_nations n] -| HDFS partitions=1/1 files=1 size=3.50KB +| HDFS partitions=1/1 files=1 size=3.59KB | row-size=14B cardinality=50 | 01:SUBPLAN @@ -1538,7 +1540,7 @@ PLAN-ROOT SINK | row-size=0B cardinality=10 | 00:SCAN HDFS [tpch_nested_parquet.customer c] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB predicates: !empty(c.c_orders) predicates on o: !empty(o.o_lineitems), o_orderdate >= '1993-10-01', o_orderdate < '1994-01-01' predicates on l: l_returnflag = 'R' @@ -1577,7 +1579,7 @@ PLAN-ROOT SINK |--13:EXCHANGE [BROADCAST] | | | 09:SCAN HDFS [tpch_nested_parquet.region.r_nations n] -| HDFS partitions=1/1 files=1 size=3.50KB +| HDFS partitions=1/1 files=1 size=3.59KB | row-size=14B cardinality=50 | 01:SUBPLAN @@ -1605,7 +1607,7 @@ PLAN-ROOT SINK | row-size=0B cardinality=10 | 00:SCAN HDFS [tpch_nested_parquet.customer c] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB predicates: !empty(c.c_orders) predicates on o: !empty(o.o_lineitems), o_orderdate >= '1993-10-01', o_orderdate < '1994-01-01' predicates on l: l_returnflag = 'R' @@ -1668,7 +1670,7 @@ PLAN-ROOT SINK | | row-size=40B cardinality=100.00K | | | |--13:SCAN HDFS [tpch_nested_parquet.region.r_nations n] -| | HDFS partitions=1/1 files=1 size=3.50KB +| | HDFS partitions=1/1 files=1 size=3.59KB | | predicates: n_name = 'GERMANY' | | row-size=14B cardinality=5 | | @@ -1701,7 +1703,7 @@ PLAN-ROOT SINK | row-size=48B cardinality=100.00K | |--05:SCAN HDFS [tpch_nested_parquet.region.r_nations n] -| HDFS partitions=1/1 files=1 size=3.50KB +| HDFS partitions=1/1 files=1 size=3.59KB | predicates: n_name = 'GERMANY' | row-size=14B cardinality=5 | @@ -1758,7 +1760,7 @@ PLAN-ROOT SINK | |--21:EXCHANGE [BROADCAST] | | | | | 13:SCAN HDFS [tpch_nested_parquet.region.r_nations n] -| | HDFS partitions=1/1 files=1 size=3.50KB +| | HDFS partitions=1/1 files=1 size=3.59KB | | predicates: n_name = 'GERMANY' | | row-size=14B cardinality=5 | | @@ -1800,7 +1802,7 @@ PLAN-ROOT SINK |--18:EXCHANGE [BROADCAST] | | | 05:SCAN HDFS [tpch_nested_parquet.region.r_nations n] -| HDFS partitions=1/1 files=1 size=3.50KB +| HDFS partitions=1/1 files=1 size=3.59KB | predicates: n_name = 'GERMANY' | row-size=14B cardinality=5 | @@ -1878,7 +1880,7 @@ PLAN-ROOT SINK | row-size=0B cardinality=10 | 00:SCAN HDFS [tpch_nested_parquet.customer.c_orders o] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB predicates: !empty(o.o_lineitems) predicates on l: l_shipmode IN ('MAIL', 'SHIP'), l_commitdate < l_receiptdate, l_shipdate < l_commitdate, l_receiptdate >= '1994-01-01', l_receiptdate < '1995-01-01' row-size=24B cardinality=1.50M @@ -1919,7 +1921,7 @@ PLAN-ROOT SINK | row-size=0B cardinality=10 | 00:SCAN HDFS [tpch_nested_parquet.customer.c_orders o] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB predicates: !empty(o.o_lineitems) predicates on l: l_shipmode IN ('MAIL', 'SHIP'), l_commitdate < l_receiptdate, l_shipdate < l_commitdate, l_receiptdate >= '1994-01-01', l_receiptdate < '1995-01-01' row-size=24B cardinality=1.50M @@ -1977,7 +1979,7 @@ PLAN-ROOT SINK | row-size=0B cardinality=10 | 00:SCAN HDFS [tpch_nested_parquet.customer c] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB predicates on c_orders: (NOT o_comment LIKE '%special%requests%') row-size=20B cardinality=150.00K ---- DISTRIBUTEDPLAN @@ -2029,7 +2031,7 @@ PLAN-ROOT SINK | row-size=0B cardinality=10 | 00:SCAN HDFS [tpch_nested_parquet.customer c] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB predicates on c_orders: (NOT o_comment LIKE '%special%requests%') row-size=20B cardinality=150.00K ==== @@ -2067,7 +2069,7 @@ PLAN-ROOT SINK | row-size=41B cardinality=200.00K | 00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB predicates: l_shipdate < '1995-10-01', l_shipdate >= '1995-09-01' runtime filters: RF000 -> l_partkey row-size=36B cardinality=1.50M @@ -2098,7 +2100,7 @@ PLAN-ROOT SINK | row-size=41B cardinality=200.00K | 00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB predicates: l_shipdate < '1995-10-01', l_shipdate >= '1995-09-01' runtime filters: RF000 -> l_partkey row-size=36B cardinality=1.50M @@ -2158,7 +2160,7 @@ PLAN-ROOT SINK | | row-size=24B cardinality=1.50M | | | 03:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l] -| HDFS partitions=1/1 files=4 size=289.14MB +| HDFS partitions=1/1 files=4 size=289.08MB | predicates: l_shipdate < '1996-04-01', l_shipdate >= '1996-01-01' | row-size=36B cardinality=1.50M | @@ -2177,7 +2179,7 @@ PLAN-ROOT SINK | row-size=24B cardinality=1.50M | 01:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB predicates: l_shipdate < '1996-04-01', l_shipdate >= '1996-01-01' runtime filters: RF000 -> l.l_suppkey row-size=36B cardinality=1.50M @@ -2222,7 +2224,7 @@ PLAN-ROOT SINK | | row-size=24B cardinality=1.50M | | | 03:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l] -| HDFS partitions=1/1 files=4 size=289.14MB +| HDFS partitions=1/1 files=4 size=289.08MB | predicates: l_shipdate < '1996-04-01', l_shipdate >= '1996-01-01' | row-size=36B cardinality=1.50M | @@ -2250,7 +2252,7 @@ PLAN-ROOT SINK | row-size=24B cardinality=1.50M | 01:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB predicates: l_shipdate < '1996-04-01', l_shipdate >= '1996-01-01' runtime filters: RF000 -> l.l_suppkey row-size=36B cardinality=1.50M @@ -2426,7 +2428,7 @@ PLAN-ROOT SINK | | row-size=16B cardinality=15.00M | | | 02:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l] -| HDFS partitions=1/1 files=4 size=289.14MB +| HDFS partitions=1/1 files=4 size=289.08MB | row-size=16B cardinality=15.00M | 04:HASH JOIN [INNER JOIN] @@ -2441,7 +2443,7 @@ PLAN-ROOT SINK | row-size=48B cardinality=1.00K | 00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB runtime filters: RF000 -> l.l_partkey, RF002 -> l_partkey row-size=24B cardinality=15.00M ---- DISTRIBUTEDPLAN @@ -2478,7 +2480,7 @@ PLAN-ROOT SINK | | row-size=16B cardinality=15.00M | | | 02:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l] -| HDFS partitions=1/1 files=4 size=289.14MB +| HDFS partitions=1/1 files=4 size=289.08MB | row-size=16B cardinality=15.00M | 10:EXCHANGE [HASH(p_partkey)] @@ -2497,7 +2499,7 @@ PLAN-ROOT SINK | row-size=48B cardinality=1.00K | 00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB runtime filters: RF000 -> l.l_partkey, RF002 -> l_partkey row-size=24B cardinality=15.00M ==== @@ -2559,7 +2561,7 @@ PLAN-ROOT SINK | row-size=0B cardinality=10 | 00:SCAN HDFS [tpch_nested_parquet.customer c] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB predicates: !empty(c.c_orders) row-size=50B cardinality=150.00K ---- DISTRIBUTEDPLAN @@ -2605,7 +2607,7 @@ PLAN-ROOT SINK | row-size=0B cardinality=10 | 00:SCAN HDFS [tpch_nested_parquet.customer c] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB predicates: !empty(c.c_orders) row-size=50B cardinality=150.00K ==== @@ -2667,7 +2669,7 @@ PLAN-ROOT SINK | row-size=52B cardinality=1.43K | 00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB predicates: l_shipmode IN ('AIR', 'AIR REG'), l_quantity <= 11 OR l_quantity <= 20 OR l_quantity <= 30, l_quantity <= 11 OR l_quantity <= 20 OR l_quantity >= 20, l_quantity <= 11 OR l_quantity >= 10 OR l_quantity <= 30, l_quantity <= 11 OR l_quantity >= 10 OR l_quantity >= 20, l_quantity >= 1 OR l_quantity <= 20 OR l_quantity <= 30, l_quantity >= 1 OR l_quantity <= 20 OR l_quantity >= 20, l_quantity >= 1 OR l_quantity >= 10 OR l_quantity <= 30, l_quantity >= 1 OR l_quantity >= 10 OR l_quantity >= 20, l_shipinstruct = 'DELIVER IN PERSON' runtime filters: RF000 -> l_partkey row-size=56B cardinality=1.50M @@ -2700,7 +2702,7 @@ PLAN-ROOT SINK | row-size=52B cardinality=1.43K | 00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB predicates: l_shipmode IN ('AIR', 'AIR REG'), l_quantity <= 11 OR l_quantity <= 20 OR l_quantity <= 30, l_quantity <= 11 OR l_quantity <= 20 OR l_quantity >= 20, l_quantity <= 11 OR l_quantity >= 10 OR l_quantity <= 30, l_quantity <= 11 OR l_quantity >= 10 OR l_quantity >= 20, l_quantity >= 1 OR l_quantity <= 20 OR l_quantity <= 30, l_quantity >= 1 OR l_quantity <= 20 OR l_quantity >= 20, l_quantity >= 1 OR l_quantity >= 10 OR l_quantity <= 30, l_quantity >= 1 OR l_quantity >= 10 OR l_quantity >= 20, l_shipinstruct = 'DELIVER IN PERSON' runtime filters: RF000 -> l_partkey row-size=56B cardinality=1.50M @@ -2773,7 +2775,7 @@ PLAN-ROOT SINK | | row-size=115B cardinality=100.00K | | | |--05:SCAN HDFS [tpch_nested_parquet.region.r_nations n] -| | HDFS partitions=1/1 files=1 size=3.50KB +| | HDFS partitions=1/1 files=1 size=3.59KB | | predicates: n_name = 'CANADA' | | row-size=14B cardinality=5 | | @@ -2801,7 +2803,7 @@ PLAN-ROOT SINK | row-size=32B cardinality=1.50M | 07:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB predicates: l_shipdate < '1995-01-01', l_shipdate >= '1994-01-01' runtime filters: RF000 -> l.l_partkey, RF001 -> l.l_suppkey row-size=36B cardinality=1.50M @@ -2854,7 +2856,7 @@ PLAN-ROOT SINK | |--16:EXCHANGE [BROADCAST] | | | | | 05:SCAN HDFS [tpch_nested_parquet.region.r_nations n] -| | HDFS partitions=1/1 files=1 size=3.50KB +| | HDFS partitions=1/1 files=1 size=3.59KB | | predicates: n_name = 'CANADA' | | row-size=14B cardinality=5 | | @@ -2889,7 +2891,7 @@ PLAN-ROOT SINK | row-size=32B cardinality=1.50M | 07:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB predicates: l_shipdate < '1995-01-01', l_shipdate >= '1994-01-01' runtime filters: RF000 -> l.l_partkey, RF001 -> l.l_suppkey row-size=36B cardinality=1.50M @@ -2974,7 +2976,7 @@ PLAN-ROOT SINK | row-size=146B cardinality=15.00M | |--10:SCAN HDFS [tpch_nested_parquet.region.r_nations n] -| HDFS partitions=1/1 files=1 size=3.50KB +| HDFS partitions=1/1 files=1 size=3.59KB | predicates: n_name = 'SAUDI ARABIA' | row-size=14B cardinality=5 | @@ -3012,7 +3014,7 @@ PLAN-ROOT SINK | row-size=0B cardinality=10 | 01:SCAN HDFS [tpch_nested_parquet.customer c] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB predicates: !empty(c.c_orders) predicates on o: !empty(o.o_lineitems), o_orderstatus = 'F' predicates on l1: l1.l_receiptdate > l1.l_commitdate @@ -3071,7 +3073,7 @@ PLAN-ROOT SINK |--22:EXCHANGE [BROADCAST] | | | 10:SCAN HDFS [tpch_nested_parquet.region.r_nations n] -| HDFS partitions=1/1 files=1 size=3.50KB +| HDFS partitions=1/1 files=1 size=3.59KB | predicates: n_name = 'SAUDI ARABIA' | row-size=14B cardinality=5 | @@ -3111,7 +3113,7 @@ PLAN-ROOT SINK | row-size=0B cardinality=10 | 01:SCAN HDFS [tpch_nested_parquet.customer c] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB predicates: !empty(c.c_orders) predicates on o: !empty(o.o_lineitems), o_orderstatus = 'F' predicates on l1: l1.l_receiptdate > l1.l_commitdate @@ -3176,6 +3178,7 @@ PLAN-ROOT SINK | | row-size=55B cardinality=1 | | | 04:UNNEST [c.c_orders] +| limit: 1 | row-size=0B cardinality=10 | 06:NESTED LOOP JOIN [INNER JOIN] @@ -3187,12 +3190,12 @@ PLAN-ROOT SINK | | row-size=8B cardinality=1 | | | 01:SCAN HDFS [tpch_nested_parquet.customer c] -| HDFS partitions=1/1 files=4 size=289.14MB +| HDFS partitions=1/1 files=4 size=289.08MB | predicates: c_acctbal > 0, substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') | row-size=35B cardinality=15.00K | 00:SCAN HDFS [tpch_nested_parquet.customer c] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB predicates: substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') row-size=47B cardinality=15.00K ---- DISTRIBUTEDPLAN @@ -3229,6 +3232,7 @@ PLAN-ROOT SINK | | row-size=55B cardinality=1 | | | 04:UNNEST [c.c_orders] +| limit: 1 | row-size=0B cardinality=10 | 06:NESTED LOOP JOIN [INNER JOIN, BROADCAST] @@ -3248,12 +3252,12 @@ PLAN-ROOT SINK | | row-size=8B cardinality=1 | | | 01:SCAN HDFS [tpch_nested_parquet.customer c] -| HDFS partitions=1/1 files=4 size=289.14MB +| HDFS partitions=1/1 files=4 size=289.08MB | predicates: c_acctbal > 0, substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') | row-size=35B cardinality=15.00K | 00:SCAN HDFS [tpch_nested_parquet.customer c] - HDFS partitions=1/1 files=4 size=289.14MB + HDFS partitions=1/1 files=4 size=289.08MB predicates: substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') row-size=47B cardinality=15.00K ==== diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test index 0bb999078..adc56aa99 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpch-views.test @@ -1477,11 +1477,11 @@ order by ---- PLAN PLAN-ROOT SINK | -10:SORT +11:SORT | order by: s_name ASC | row-size=67B cardinality=400 | -09:HASH JOIN [RIGHT SEMI JOIN] +10:HASH JOIN [RIGHT SEMI JOIN] | hash predicates: tpch.partsupp.ps_suppkey = tpch.supplier.s_suppkey | runtime filters: RF000 <- tpch.supplier.s_suppkey | row-size=98B cardinality=400 @@ -1501,6 +1501,10 @@ PLAN-ROOT SINK | runtime filters: RF008 -> tpch.supplier.s_nationkey | row-size=77B cardinality=10.00K | +09:AGGREGATE [FINALIZE] +| group by: tpch.partsupp.ps_suppkey +| row-size=8B cardinality=9.71K +| 07:HASH JOIN [RIGHT SEMI JOIN] | hash predicates: l_partkey = tpch.partsupp.ps_partkey, l_suppkey = tpch.partsupp.ps_suppkey | other join predicates: tpch.partsupp.ps_availqty > 0.5 * sum(l_quantity) @@ -1683,16 +1687,16 @@ order by ---- PLAN PLAN-ROOT SINK | -07:SORT +08:SORT | order by: cntrycode ASC | row-size=36B cardinality=15.00K | -06:AGGREGATE [FINALIZE] +07:AGGREGATE [FINALIZE] | output: count(*), sum(tpch.customer.c_acctbal) | group by: substr(tpch.customer.c_phone, 1, 2) | row-size=36B cardinality=15.00K | -05:HASH JOIN [RIGHT ANTI JOIN] +06:HASH JOIN [RIGHT ANTI JOIN] | hash predicates: tpch.orders.o_custkey = tpch.customer.c_custkey | row-size=51B cardinality=15.00K | @@ -1714,6 +1718,10 @@ PLAN-ROOT SINK | predicates: substr(tpch.customer.c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17') | row-size=43B cardinality=15.00K | +05:AGGREGATE [FINALIZE] +| group by: tpch.orders.o_custkey +| row-size=8B cardinality=98.39K +| 03:SCAN HDFS [tpch.orders] HDFS partitions=1/1 files=1 size=162.56MB row-size=8B cardinality=1.50M diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/union.test b/testdata/workloads/functional-planner/queries/PlannerTest/union.test index c6ad4ef8d..396f85f66 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/union.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/union.test @@ -4005,43 +4005,43 @@ select t1.bigint_col from functional.alltypestiny t1 inner join ---- PLAN PLAN-ROOT SINK | -11:AGGREGATE [FINALIZE] +12:AGGREGATE [FINALIZE] | group by: bigint_col | row-size=8B cardinality=8 | 00:UNION -| pass-through-operands: 01,04 +| pass-through-operands: 01,05 | row-size=8B cardinality=11.70K | -|--10:HASH JOIN [INNER JOIN] +|--11:HASH JOIN [INNER JOIN] | | hash predicates: t2.bigint_col = t1.bigint_col | | runtime filters: RF004 <- t1.bigint_col | | row-size=16B cardinality=5.84K | | -| |--08:SCAN HDFS [functional.alltypestiny t1] +| |--09:SCAN HDFS [functional.alltypestiny t1] | | HDFS partitions=4/4 files=4 size=460B | | row-size=8B cardinality=8 | | -| 09:SCAN HDFS [functional.alltypes t2] +| 10:SCAN HDFS [functional.alltypes t2] | HDFS partitions=24/24 files=24 size=478.45KB | runtime filters: RF004 -> t2.bigint_col | row-size=8B cardinality=7.30K | -|--07:HASH JOIN [RIGHT OUTER JOIN] +|--08:HASH JOIN [RIGHT OUTER JOIN] | | hash predicates: t2.bigint_col = t1.bigint_col | | runtime filters: RF002 <- t1.bigint_col | | row-size=16B cardinality=5.84K | | -| |--05:SCAN HDFS [functional.alltypestiny t1] +| |--06:SCAN HDFS [functional.alltypestiny t1] | | HDFS partitions=4/4 files=4 size=460B | | row-size=8B cardinality=8 | | -| 06:SCAN HDFS [functional.alltypes t2] +| 07:SCAN HDFS [functional.alltypes t2] | HDFS partitions=24/24 files=24 size=478.45KB | runtime filters: RF002 -> t2.bigint_col | row-size=8B cardinality=7.30K | -|--04:HASH JOIN [RIGHT SEMI JOIN] +|--05:HASH JOIN [RIGHT SEMI JOIN] | | hash predicates: t2.bigint_col = t1.bigint_col | | runtime filters: RF000 <- t1.bigint_col | | row-size=8B cardinality=8 @@ -4050,6 +4050,10 @@ PLAN-ROOT SINK | | HDFS partitions=4/4 files=4 size=460B | | row-size=8B cardinality=8 | | +| 04:AGGREGATE [FINALIZE] +| | group by: t2.bigint_col +| | row-size=8B cardinality=10 +| | | 03:SCAN HDFS [functional.alltypes t2] | HDFS partitions=24/24 files=24 size=478.45KB | runtime filters: RF000 -> t2.bigint_col @@ -4103,7 +4107,7 @@ PLAN-ROOT SINK | row-size=0B cardinality=10 | 00:SCAN HDFS [tpch_nested_parquet.customer c] - HDFS partitions=1/1 files=4 size=288.99MB + HDFS partitions=1/1 files=4 size=289.08MB row-size=32B cardinality=150.00K ==== # IMPALA-6388: Verify that the order of the union operands does not impact the diff --git a/testdata/workloads/functional-query/queries/QueryTest/nested-types-runtime.test b/testdata/workloads/functional-query/queries/QueryTest/nested-types-runtime.test index 5f0cb16be..343d5a48a 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/nested-types-runtime.test +++ b/testdata/workloads/functional-query/queries/QueryTest/nested-types-runtime.test @@ -494,3 +494,23 @@ on leftSide.id = rightSide.id; ---- TYPES BIGINT,BIGINT ==== +---- QUERY +# IMPALA-1270: ensure semi join in subplan with distinct added is +# executable. +# NOTE: reference functional.alltypessmall because functional_parquet.alltypessmall +# does not have stats computed. +select a.id, e.key from complextypestbl a +left semi join functional.alltypessmall c on (a.id = c.int_col) +inner join a.nested_struct.g e +where length(e.key) > 0 +---- RESULTS +1,'foo' +2,'g1' +2,'g2' +2,'g3' +2,'g4' +2,'g5' +5,'foo' +---- TYPES +BIGINT,STRING +==== diff --git a/testdata/workloads/functional-query/queries/QueryTest/subquery.test b/testdata/workloads/functional-query/queries/QueryTest/subquery.test index bda3a8e57..0a38fb4df 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/subquery.test +++ b/testdata/workloads/functional-query/queries/QueryTest/subquery.test @@ -1434,3 +1434,88 @@ from functional.alltypestiny ---- TYPES INT, INT ==== +---- QUERY +# IMPALA-1270: test that distinct subquery is executable and returns correct results. +select id from alltypestiny +where int_col in (select int_col from alltypes where id % 2 = 0) +---- RESULTS +0 +2 +4 +6 +---- TYPES +INT +==== +---- QUERY +# IMPALA-1270: test that distinct subquery with anti join is executable and +# returns correct results. +select id from alltypestiny +where int_col not in (select int_col from alltypes where id % 2 = 0) +---- RESULTS +1 +3 +5 +7 +---- TYPES +INT +==== +---- QUERY +# IMPALA-1270: test that subquery with no join predicates is executable and +# returns correct results. A limit is added by the planner. +select id from alltypestiny +where exists (select int_col from alltypes where id % 2 = 0) +---- RESULTS +0 +1 +2 +3 +4 +5 +6 +7 +---- TYPES +INT +==== +---- QUERY +# IMPALA-1270: test subquery with multiple join predicates with distinct +# added returns correct results. +select count(*) from alltypesagg t1 +where int_col in ( + select int_col from alltypes t2 + where t1.bool_col = t2.bool_col and id is not null); +---- RESULTS +90 +---- TYPES +BIGINT +==== +---- QUERY +# IMPALA-1270: test subquery with aggregate function returns correct results. +select id from alltypesagg t1 +where int_col in ( + select count(*) + from alltypes t2 + group by int_col, tinyint_col) +---- RESULTS +730 +730 +1730 +1730 +2730 +2730 +3730 +3730 +4730 +4730 +5730 +5730 +6730 +6730 +7730 +7730 +8730 +8730 +9730 +9730 +---- TYPES +INT +====