From 388ad555d7df9e2711ea0ab81dbae4235c54b3cc Mon Sep 17 00:00:00 2001 From: Shant Hovsepian Date: Fri, 29 May 2020 16:26:07 -0400 Subject: [PATCH] IMPALA-8954: Uncorrelated scalar subqueries in the select list Extend StmtRewriter with the ability to rewrite scalar subqueries in the select list into cross joins. Currently the subquery must pass plan-time checks to determine that it returns a single row which may miss cases that may be valid at runtime or with more complex evaluation of the predicate expressions in the planner. Support for correlated subqueries will be a follow on change. Testing: * Added new analyzer tests, updated previous subquery tests * test_queries.py::TestQueries::test_subquery * Added test_tpcds_q9 to e2e and planner tests Change-Id: Ibcf55d26889aa01d69bb85f18c9241dda095fb66 Reviewed-on: http://gerrit.cloudera.org:8080/16007 Reviewed-by: Tim Armstrong Tested-by: Tim Armstrong --- .../apache/impala/analysis/SelectList.java | 5 + .../apache/impala/analysis/SelectStmt.java | 43 +- .../apache/impala/analysis/StmtRewriter.java | 169 +++- .../analysis/AnalyzeSubqueriesTest.java | 23 +- .../datasets/tpcds/tpcds_schema_template.sql | 2 +- .../queries/PlannerTest/subquery-rewrite.test | 134 +++ .../queries/PlannerTest/tpcds-all.test | 916 ++++++++++++++++++ .../queries/QueryTest/subquery.test | 173 ++++ testdata/workloads/tpcds/queries/count.test | 9 +- .../tpcds/queries/tpcds-decimal_v2-q9.test | 54 ++ .../workloads/tpcds/queries/tpcds-q9.test | 54 ++ tests/query_test/test_tpcds_queries.py | 6 + tests/util/parse_util.py | 2 +- 13 files changed, 1572 insertions(+), 18 deletions(-) create mode 100644 testdata/workloads/tpcds/queries/tpcds-decimal_v2-q9.test create mode 100644 testdata/workloads/tpcds/queries/tpcds-q9.test diff --git a/fe/src/main/java/org/apache/impala/analysis/SelectList.java b/fe/src/main/java/org/apache/impala/analysis/SelectList.java index 3ac383035..9ed28c0e9 100644 --- a/fe/src/main/java/org/apache/impala/analysis/SelectList.java +++ b/fe/src/main/java/org/apache/impala/analysis/SelectList.java @@ -94,9 +94,14 @@ public class SelectList { public void rewriteExprs(ExprRewriter rewriter, Analyzer analyzer) throws AnalysisException { + List subqueryExprs = new ArrayList<>(); for (SelectListItem item: items_) { if (item.isStar()) continue; item.setExpr(rewriter.rewrite(item.getExpr(), analyzer)); + item.getExpr().collect(Subquery.class, subqueryExprs); + } + for (Subquery s : subqueryExprs) { + s.getStatement().rewriteExprs(rewriter); } } diff --git a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java index 00323de61..56186945b 100644 --- a/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java +++ b/fe/src/main/java/org/apache/impala/analysis/SelectStmt.java @@ -274,9 +274,23 @@ public class SelectStmt extends QueryStmt { // Analyze the resultExpr before generating a label to ensure enforcement // of expr child and depth limits (toColumn() label may call toSql()). item.getExpr().analyze(analyzer_); - if (item.getExpr().contains(Predicates.instanceOf(Subquery.class))) { - throw new AnalysisException( - "Subqueries are not supported in the select list."); + // Check for scalar subquery types which are not supported + List subqueryExprs = new ArrayList<>(); + item.getExpr().collect(Subquery.class, subqueryExprs); + for (Subquery s : subqueryExprs) { + Preconditions.checkState(s.getStatement() instanceof SelectStmt); + if (!s.returnsScalarColumn()) { + throw new AnalysisException("A non-scalar subquery is not supported in " + + "the expression: " + item.getExpr().toSql()); + } + if (s.getStatement().isRuntimeScalar()) { + throw new AnalysisException( + "A subquery which may return more than one row is not supported in " + + "the expression: " + item.getExpr().toSql()); + } + Preconditions.checkState(((SelectStmt) s.getStatement()).returnsSingleRow(), + "Invariant violated: Only subqueries that are guaranteed to return a " + + "single row are supported: " + item.getExpr().toSql()); } resultExprs_.add(item.getExpr()); String label = item.toColumnLabel(i, analyzer_.useHiveColLabels()); @@ -1197,12 +1211,17 @@ public class SelectStmt extends QueryStmt { if (fromClauseOnly) { fromClause_.collectFromClauseTableRefs(tblRefs); } else { + // Collect TableRefs in all subqueries. fromClause_.collectTableRefs(tblRefs); - } - if (!fromClauseOnly && whereClause_ != null) { - // Collect TableRefs in WHERE-clause subqueries. List subqueries = new ArrayList<>(); - whereClause_.collect(Subquery.class, subqueries); + if (whereClause_ != null) { + whereClause_.collect(Subquery.class, subqueries); + } + for (SelectListItem item : selectList_.getItems()) { + if (item.isStar()) continue; + item.getExpr().collect(Subquery.class, subqueries); + } + for (Subquery sq: subqueries) { sq.getStatement().collectTableRefs(tblRefs, fromClauseOnly); } @@ -1211,8 +1230,6 @@ public class SelectStmt extends QueryStmt { @Override public void collectInlineViews(Set inlineViews) { - // Impala currently supports sub queries only in FROM, WHERE & WITH clauses. Hence, - // this function does not carry out any checks on HAVING clause. super.collectInlineViews(inlineViews); List fromTblRefs = getTableRefs(); Preconditions.checkNotNull(inlineViews); @@ -1236,6 +1253,14 @@ public class SelectStmt extends QueryStmt { whereSubQueries.get(0).getStatement().collectInlineViews(inlineViews); } } + List selectListSubQueries = Lists.newArrayList(); + for (SelectListItem item : selectList_.getItems()) { + if (item.isStar()) continue; + item.getExpr().collect(Subquery.class, selectListSubQueries); + } + for (Subquery sq : selectListSubQueries) { + sq.getStatement().collectInlineViews(inlineViews); + } } @Override diff --git a/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java b/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java index 850b62950..c9fb004a7 100644 --- a/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java +++ b/fe/src/main/java/org/apache/impala/analysis/StmtRewriter.java @@ -934,8 +934,9 @@ public class StmtRewriter { /** * Rewrite all the subqueries of a SelectStmt in place. Subqueries are currently - * supported in FROM and WHERE clauses. The rewrite is performed in place and not in a - * clone of SelectStmt because it requires the stmt to be analyzed. + * supported in the FROM clause, WHERE clause and SELECT list. The rewrite is + * performed in place and not in a clone of SelectStmt because it requires the stmt to + * be analyzed. */ @Override protected void rewriteSelectStmtHook(SelectStmt stmt, Analyzer analyzer) @@ -952,6 +953,7 @@ public class StmtRewriter { } rewriteWhereClauseSubqueries(stmt, analyzer); } + rewriteSelectListSubqueries(stmt, analyzer); } /** @@ -1096,5 +1098,168 @@ public class StmtRewriter { smap.put(subquery, newSubquery); return expr.substitute(smap, analyzer, false); } + + /** + * Rewrite subqueries of a stmt's SELECT list. Scalar subqueries are the only type + * of subquery supported in the select list. Scalar subqueries return a single column + * and at most 1 row, a runtime error should be thrown if more than one row is + * returned. Generally these subqueries can be evaluated once for every row of the + * outer query however for performance reasons we want to rewrite evaluation to use + * joins where possible. + * + * 1) Uncorrelated Scalar Aggregate Query + * + * SELECT T1.a, (SELECT avg(T2.a) from T2) FROM T1; + * + * This is implemented by flattening into a join. + * + * SELECT T1.a, $a$1.$c$1 FROM T1, (SELECT avg(T2.a) $c$1 FROM T2) $a$1 + * + * Currently we only support very simple subqueries which return a single aggregate + * function with no group by columns unless a LIMIT 1 is given. TODO: IMPALA-1285 + * + * 2) Correlated Scalar Aggregate + * + * TODO: IMPALA-8955 + * SELECT id, (SELECT count(*) FROM T2 WHERE id=a.id ) FROM T1 a + * + * This can be flattened with a LEFT OUTER JOIN + * + * SELECT T1.a, $a$1.$c$1 FROM T1 LEFT OUTER JOIN + * (SELECT id, count(*) $c$1 FROM T2 GROUP BY id) $a$1 ON T1.id = $a$1.id + * + * 3) Correlated Scalar + * + * TODO: IMPALA-6315 + * SELECT id, (SELECT cost FROM T2 WHERE id=a.id ) FROM T1 a + * + * In this case there is no aggregate function to guarantee only a single row is + * returned per group so a run time cardinality check must be applied. An exception + * would be if the correlated predicates had primary key constraints. + * + * 4) Runtime Scalar Subqueries + * + * TODO: IMPALA-5100 + * We do have a {@link CardinalityCheckNode} for runtime checks however queries + * can't always be rewritten into an NLJ without special care. For example with + * conditional expression like below: + * + * SELECT T1.a, + * IF((SELECT max(T2.a) from T2 > 10, + * (SELECT T2.a from T2 WHERE id=T1.id), + * (SELECT T3.a from T2 WHERE if=T1.id) + * FROM T1; + * + * If rewritten to joins with cardinality checks then both legs of the conditional + * expression would be evaluated regardless of the condition. If the false case + * were to return a runtime error while when the true doesn't and the condition + * evaluates to true then we'd have incorrect behavior. + */ + private void rewriteSelectListSubqueries(SelectStmt stmt, Analyzer analyzer) + throws AnalysisException { + Preconditions.checkNotNull(stmt); + Preconditions.checkNotNull(analyzer); + final int numTableRefs = stmt.fromClause_.size(); + final boolean parentHasAgg = stmt.hasMultiAggInfo(); + // Track any new inline views so we later ensure they are rewritten if needed. + // An improvement would be to have a pre/post order abstract rewriter class. + final List newViews = new ArrayList<>(); + for (SelectListItem selectItem : stmt.getSelectList().getItems()) { + if (selectItem.isStar()) { + continue; + } + + final Expr expr = selectItem.getExpr(); + final List subqueries = new ArrayList<>(); + // Use collect as opposed to collectAll in order to allow nested subqueries to be + // rewritten as needed. For example a subquery in the select list which contains + // its own subquery in the where clause. + expr.collect(Predicates.instanceOf(Subquery.class), subqueries); + if (subqueries.size() == 0) { + continue; + } + final ExprSubstitutionMap smap = new ExprSubstitutionMap(); + for (Subquery sq : subqueries) { + final SelectStmt subqueryStmt = (SelectStmt) sq.getStatement(); + // TODO: Handle correlated subqueries IMPALA-8955 + if (isCorrelated(subqueryStmt)) { + throw new AnalysisException("A correlated scalar subquery is not supported " + + "in the expression: " + expr.toSql()); + } + Preconditions.checkState(sq.getType().isScalarType()); + + // Existential subqueries in Impala aren't really execution time expressions, + // they are either checked at plan time or expected to be handled by the + // subquery rewrite into a join. In the case of the select list we will only + // support plan time evaluation. + boolean replacedExists = false; + final List existsPredicates = new ArrayList<>(); + expr.collect(ExistsPredicate.class, existsPredicates); + for (ExistsPredicate ep : existsPredicates) { + // Check to see if the current subquery is the child of an exists predicate. + if (ep.contains(sq)) { + final BoolLiteral boolLiteral = replaceExistsPredicate(ep); + if (boolLiteral != null) { + boolLiteral.analyze(analyzer); + smap.put(ep, boolLiteral); + replacedExists = true; + break; + } else { + throw new AnalysisException( + "Unsupported subquery with runtime scalar check: " + ep.toSql()); + } + } + } + if (replacedExists) { + continue; + } + + List colLabels = new ArrayList<>(); + for (int i = 0; i < subqueryStmt.getColLabels().size(); ++i) { + colLabels.add(subqueryStmt.getColumnAliasGenerator().getNextAlias()); + } + // Create a new inline view from the subquery stmt aliasing the columns. + InlineViewRef inlineView = new InlineViewRef( + stmt.getTableAliasGenerator().getNextAlias(), subqueryStmt, colLabels); + inlineView.reset(); + inlineView.analyze(analyzer); + + // For uncorrelated scalar subqueries we rewrite with a CROSS_JOIN. This makes + // it simpler to further optimize by merging subqueries without worrying about + // join ordering as in IMPALA-9796. For correlated subqueries we'd want to + // rewrite to a LOJ. + inlineView.setJoinOp(JoinOperator.CROSS_JOIN); + stmt.fromClause_.add(inlineView); + newViews.add(inlineView); + + SlotRef slotRef = new SlotRef(Lists.newArrayList( + inlineView.getUniqueAlias(), inlineView.getColLabels().get(0))); + slotRef.analyze(analyzer); + Expr substitute = slotRef; + // Need to wrap the expression with a no-op aggregate function if the stmt does + // any aggregation, using MAX() given no explicit function to return any value + // in a group. + if (parentHasAgg) { + final FunctionCallExpr aggWrapper = + new FunctionCallExpr("max", Lists.newArrayList((Expr) slotRef)); + aggWrapper.analyze(analyzer); + substitute = aggWrapper; + } + // Substitute original subquery expression with a reference to the inline view. + smap.put(sq, substitute); + } + // Update select list with any new slot references. + selectItem.setExpr(expr.substitute(smap, analyzer, false)); + } + // Rewrite any new views + for (InlineViewRef v : newViews) { + rewriteQueryStatement(v.getViewStmt(), v.getAnalyzer()); + } + // Only applies to the original list of TableRefs, not any as a result of the + // rewrite. + if (!newViews.isEmpty()) { + replaceUnqualifiedStarItems(stmt, numTableRefs); + } + } } } diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeSubqueriesTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeSubqueriesTest.java index d297c4874..a8457186c 100644 --- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeSubqueriesTest.java +++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeSubqueriesTest.java @@ -1207,6 +1207,20 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest { "(select id from functional.alltypes t2 where t1.int_col = t2.int_col)", "Unsupported correlated subquery with runtime scalar check: " + "SELECT id FROM functional.alltypes t2 WHERE t1.int_col = t2.int_col"); + + // Scalar subqueries in the select list + AnalyzesOk("select id, 10 + (select max(int_col) from functional.alltypestiny) " + + "from functional.alltypestiny"); + AnalyzesOk("select id, (select count(*) from functional.alltypestiny where int_col " + + "< (select max(int_col) from functional.alltypes)) from functional.dimtbl"); + AnalysisError("select id, (select id, count(*) from functional.alltypestiny " + + "group by 1) from functional.dimtbl", + "A non-scalar subquery is not supported in the expression: " + + "(SELECT id, count(*) FROM functional.alltypestiny GROUP BY id)"); + AnalysisError("select id, (select count(*) from functional.alltypestiny b where " + + "id=a.id ) from functional.alltypes a", + "A correlated scalar subquery is not supported in the expression: " + + "(SELECT count(*) FROM functional.alltypestiny b WHERE id = a.id)"); } @Test @@ -1339,7 +1353,7 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest { } @Test - public void TestIllegalSubquery() throws AnalysisException { + public void testIllegalSubquery() throws AnalysisException { // Predicate with a child subquery in the HAVING clause AnalysisError("select id, count(*) from functional.alltypestiny t group by " + "id having count(*) > (select count(*) from functional.alltypesagg)", @@ -1349,9 +1363,10 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest { "Subqueries are not supported in the HAVING clause."); // Subquery in the select list - AnalysisError("select id, (select int_col from functional.alltypestiny) " + - "from functional.alltypestiny", - "Subqueries are not supported in the select list."); + AnalysisError("select id, (select int_col from functional.alltypestiny) " + + "from functional.alltypestiny", + "A subquery which may return more than one row is not supported in the " + + "expression: " + "(SELECT int_col FROM functional.alltypestiny)"); // Subquery in the GROUP BY clause AnalysisError("select id, count(*) from functional.alltypestiny " + diff --git a/testdata/datasets/tpcds/tpcds_schema_template.sql b/testdata/datasets/tpcds/tpcds_schema_template.sql index 079e21acd..78f9f7c8e 100644 --- a/testdata/datasets/tpcds/tpcds_schema_template.sql +++ b/testdata/datasets/tpcds/tpcds_schema_template.sql @@ -1138,7 +1138,7 @@ tpcds ---- BASE_TABLE_NAME reason ---- COLUMNS -r_reason_sk bigint +r_reason_sk int r_reason_id string r_reason_desc string ---- ROW_FORMAT diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test index 8335ff32b..bcffd73ed 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test @@ -3296,3 +3296,137 @@ PLAN-ROOT SINK runtime filters: RF000 -> int_col row-size=89B cardinality=7.30K ==== +# Scalar subquery in select list +SELECT alltypesagg.string_col, alltypesagg.date_string_col, dimtbl.name, +SUM(CASE WHEN dimtbl.zip < 94613 THEN 1 ELSE 0 END) / CAST((SELECT COUNT(*) FROM functional.dimtbl WHERE dimtbl.zip < 94613) AS decimal(10, 6)) AS close, +SUM(CASE WHEN dimtbl.zip >= 94613 THEN 1 ELSE 0 END) / CAST((SELECT COUNT(*) FROM functional.dimtbl WHERE dimtbl.zip >= 94613) AS decimal(10, 6)) AS remote +FROM functional.alltypesagg JOIN functional.dimtbl ON (alltypesagg.id = dimtbl.id) +GROUP BY 1, 2, 3 +ORDER BY alltypesagg.string_col DESC +---- PLAN +PLAN-ROOT SINK +| +10:SORT +| order by: alltypesagg.string_col DESC +| row-size=84B cardinality=11 +| +09:AGGREGATE [FINALIZE] +| output: sum(CASE WHEN dimtbl.zip < 94613 THEN 1 ELSE 0 END), max(count(*)), sum(CASE WHEN dimtbl.zip >= 94613 THEN 1 ELSE 0 END), max(count(*)) +| group by: alltypesagg.string_col, alltypesagg.date_string_col, dimtbl.name +| row-size=84B cardinality=11 +| +08:NESTED LOOP JOIN [CROSS JOIN] +| row-size=84B cardinality=11 +| +|--05:AGGREGATE [FINALIZE] +| | output: count(*) +| | row-size=8B cardinality=1 +| | +| 04:SCAN HDFS [functional.dimtbl] +| HDFS partitions=1/1 files=1 size=171B +| predicates: dimtbl.zip >= 94613 +| row-size=4B cardinality=1 +| +07:NESTED LOOP JOIN [CROSS JOIN] +| row-size=76B cardinality=11 +| +|--03:AGGREGATE [FINALIZE] +| | output: count(*) +| | row-size=8B cardinality=1 +| | +| 02:SCAN HDFS [functional.dimtbl] +| HDFS partitions=1/1 files=1 size=171B +| predicates: dimtbl.zip < 94613 +| row-size=4B cardinality=1 +| +06:HASH JOIN [INNER JOIN] +| hash predicates: alltypesagg.id = dimtbl.id +| runtime filters: RF000 <- dimtbl.id +| row-size=68B cardinality=11 +| +|--01:SCAN HDFS [functional.dimtbl] +| HDFS partitions=1/1 files=1 size=171B +| row-size=29B cardinality=10 +| +00:SCAN HDFS [functional.alltypesagg] + HDFS partitions=11/11 files=11 size=814.73KB + runtime filters: RF000 -> alltypesagg.id + row-size=39B cardinality=11.00K +---- DISTRIBUTEDPLAN +PLAN-ROOT SINK +| +20:MERGING-EXCHANGE [UNPARTITIONED] +| order by: alltypesagg.string_col DESC +| +10:SORT +| order by: alltypesagg.string_col DESC +| row-size=84B cardinality=11 +| +19:AGGREGATE [FINALIZE] +| output: sum:merge(CASE WHEN dimtbl.zip < 94613 THEN 1 ELSE 0 END), max:merge(`$a$1`.`$c$1`), sum:merge(CASE WHEN dimtbl.zip >= 94613 THEN 1 ELSE 0 END), max:merge(`$a$2`.`$c$1`) +| group by: alltypesagg.string_col, alltypesagg.date_string_col, dimtbl.name +| row-size=84B cardinality=11 +| +18:EXCHANGE [HASH(alltypesagg.string_col,alltypesagg.date_string_col,dimtbl.name)] +| +09:AGGREGATE [STREAMING] +| output: sum(CASE WHEN dimtbl.zip < 94613 THEN 1 ELSE 0 END), max(count(*)), sum(CASE WHEN dimtbl.zip >= 94613 THEN 1 ELSE 0 END), max(count(*)) +| group by: alltypesagg.string_col, alltypesagg.date_string_col, dimtbl.name +| row-size=84B cardinality=11 +| +08:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| row-size=84B cardinality=11 +| +|--17:EXCHANGE [BROADCAST] +| | +| 16:AGGREGATE [FINALIZE] +| | output: count:merge(*) +| | row-size=8B cardinality=1 +| | +| 15:EXCHANGE [UNPARTITIONED] +| | +| 05:AGGREGATE +| | output: count(*) +| | row-size=8B cardinality=1 +| | +| 04:SCAN HDFS [functional.dimtbl] +| HDFS partitions=1/1 files=1 size=171B +| predicates: dimtbl.zip >= 94613 +| row-size=4B cardinality=1 +| +07:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| row-size=76B cardinality=11 +| +|--14:EXCHANGE [BROADCAST] +| | +| 13:AGGREGATE [FINALIZE] +| | output: count:merge(*) +| | row-size=8B cardinality=1 +| | +| 12:EXCHANGE [UNPARTITIONED] +| | +| 03:AGGREGATE +| | output: count(*) +| | row-size=8B cardinality=1 +| | +| 02:SCAN HDFS [functional.dimtbl] +| HDFS partitions=1/1 files=1 size=171B +| predicates: dimtbl.zip < 94613 +| row-size=4B cardinality=1 +| +06:HASH JOIN [INNER JOIN, BROADCAST] +| hash predicates: alltypesagg.id = dimtbl.id +| runtime filters: RF000 <- dimtbl.id +| row-size=68B cardinality=11 +| +|--11:EXCHANGE [BROADCAST] +| | +| 01:SCAN HDFS [functional.dimtbl] +| HDFS partitions=1/1 files=1 size=171B +| row-size=29B cardinality=10 +| +00:SCAN HDFS [functional.alltypesagg] + HDFS partitions=11/11 files=11 size=814.73KB + runtime filters: RF000 -> alltypesagg.id + row-size=39B cardinality=11.00K +==== diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test b/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test index 8d797571a..9057a3f63 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/tpcds-all.test @@ -8788,3 +8788,919 @@ PLAN-ROOT SINK runtime filters: RF004 -> ss_sold_time_sk, RF002 -> ss_hdemo_sk row-size=12B cardinality=2.88M ==== +# TPCDS-Q9 +select case when (select count(*) + from store_sales + where ss_quantity between 1 and 20) > 74129 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 1 and 20) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 1 and 20) end bucket1, + case when (select count(*) + from store_sales + where ss_quantity between 21 and 40) > 122840 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 21 and 40) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 21 and 40) end bucket2, + case when (select count(*) + from store_sales + where ss_quantity between 41 and 60) > 56580 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 41 and 60) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 41 and 60) end bucket3, + case when (select count(*) + from store_sales + where ss_quantity between 61 and 80) > 10097 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 61 and 80) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 61 and 80) end bucket4, + case when (select count(*) + from store_sales + where ss_quantity between 81 and 100) > 165306 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 81 and 100) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 81 and 100) end bucket5 +from reason +where r_reason_sk = 1 +---- PLAN +Max Per-Host Resource Reservation: Memory=120.01MB Threads=17 +Per-Host Resource Estimates: Memory=2.05GB +PLAN-ROOT SINK +| +45:NESTED LOOP JOIN [CROSS JOIN] +| row-size=124B cardinality=1 +| +|--30:AGGREGATE [FINALIZE] +| | output: avg(ss_net_profit) +| | row-size=8B cardinality=1 +| | +| 29:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 100, ss_quantity >= 81 +| row-size=8B cardinality=288.04K +| +44:NESTED LOOP JOIN [CROSS JOIN] +| row-size=116B cardinality=1 +| +|--28:AGGREGATE [FINALIZE] +| | output: avg(ss_ext_discount_amt) +| | row-size=8B cardinality=1 +| | +| 27:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 100, ss_quantity >= 81 +| row-size=8B cardinality=288.04K +| +43:NESTED LOOP JOIN [CROSS JOIN] +| row-size=108B cardinality=1 +| +|--26:AGGREGATE [FINALIZE] +| | output: count(*) +| | row-size=8B cardinality=1 +| | +| 25:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 100, ss_quantity >= 81 +| row-size=4B cardinality=288.04K +| +42:NESTED LOOP JOIN [CROSS JOIN] +| row-size=100B cardinality=1 +| +|--24:AGGREGATE [FINALIZE] +| | output: avg(ss_net_profit) +| | row-size=8B cardinality=1 +| | +| 23:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 80, ss_quantity >= 61 +| row-size=8B cardinality=288.04K +| +41:NESTED LOOP JOIN [CROSS JOIN] +| row-size=92B cardinality=1 +| +|--22:AGGREGATE [FINALIZE] +| | output: avg(ss_ext_discount_amt) +| | row-size=8B cardinality=1 +| | +| 21:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 80, ss_quantity >= 61 +| row-size=8B cardinality=288.04K +| +40:NESTED LOOP JOIN [CROSS JOIN] +| row-size=84B cardinality=1 +| +|--20:AGGREGATE [FINALIZE] +| | output: count(*) +| | row-size=8B cardinality=1 +| | +| 19:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 80, ss_quantity >= 61 +| row-size=4B cardinality=288.04K +| +39:NESTED LOOP JOIN [CROSS JOIN] +| row-size=76B cardinality=1 +| +|--18:AGGREGATE [FINALIZE] +| | output: avg(ss_net_profit) +| | row-size=8B cardinality=1 +| | +| 17:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 60, ss_quantity >= 41 +| row-size=8B cardinality=288.04K +| +38:NESTED LOOP JOIN [CROSS JOIN] +| row-size=68B cardinality=1 +| +|--16:AGGREGATE [FINALIZE] +| | output: avg(ss_ext_discount_amt) +| | row-size=8B cardinality=1 +| | +| 15:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 60, ss_quantity >= 41 +| row-size=8B cardinality=288.04K +| +37:NESTED LOOP JOIN [CROSS JOIN] +| row-size=60B cardinality=1 +| +|--14:AGGREGATE [FINALIZE] +| | output: count(*) +| | row-size=8B cardinality=1 +| | +| 13:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 60, ss_quantity >= 41 +| row-size=4B cardinality=288.04K +| +36:NESTED LOOP JOIN [CROSS JOIN] +| row-size=52B cardinality=1 +| +|--12:AGGREGATE [FINALIZE] +| | output: avg(ss_net_profit) +| | row-size=8B cardinality=1 +| | +| 11:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 40, ss_quantity >= 21 +| row-size=8B cardinality=288.04K +| +35:NESTED LOOP JOIN [CROSS JOIN] +| row-size=44B cardinality=1 +| +|--10:AGGREGATE [FINALIZE] +| | output: avg(ss_ext_discount_amt) +| | row-size=8B cardinality=1 +| | +| 09:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 40, ss_quantity >= 21 +| row-size=8B cardinality=288.04K +| +34:NESTED LOOP JOIN [CROSS JOIN] +| row-size=36B cardinality=1 +| +|--08:AGGREGATE [FINALIZE] +| | output: count(*) +| | row-size=8B cardinality=1 +| | +| 07:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 40, ss_quantity >= 21 +| row-size=4B cardinality=288.04K +| +33:NESTED LOOP JOIN [CROSS JOIN] +| row-size=28B cardinality=1 +| +|--06:AGGREGATE [FINALIZE] +| | output: avg(ss_net_profit) +| | row-size=8B cardinality=1 +| | +| 05:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 20, ss_quantity >= 1 +| row-size=8B cardinality=288.04K +| +32:NESTED LOOP JOIN [CROSS JOIN] +| row-size=20B cardinality=1 +| +|--04:AGGREGATE [FINALIZE] +| | output: avg(ss_ext_discount_amt) +| | row-size=8B cardinality=1 +| | +| 03:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 20, ss_quantity >= 1 +| row-size=8B cardinality=288.04K +| +31:NESTED LOOP JOIN [CROSS JOIN] +| row-size=12B cardinality=1 +| +|--00:SCAN HDFS [tpcds.reason] +| HDFS partitions=1/1 files=1 size=1.31KB +| predicates: r_reason_sk = 1 +| row-size=4B cardinality=1 +| +02:AGGREGATE [FINALIZE] +| output: count(*) +| row-size=8B cardinality=1 +| +01:SCAN HDFS [tpcds.store_sales] + HDFS partitions=1824/1824 files=1824 size=346.60MB + predicates: ss_quantity <= 20, ss_quantity >= 1 + row-size=4B cardinality=288.04K +---- DISTRIBUTEDPLAN +Max Per-Host Resource Reservation: Memory=120.01MB Threads=47 +Per-Host Resource Estimates: Memory=2.20GB +PLAN-ROOT SINK +| +45:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| row-size=124B cardinality=1 +| +|--90:EXCHANGE [UNPARTITIONED] +| | +| 89:AGGREGATE [FINALIZE] +| | output: avg:merge(ss_net_profit) +| | row-size=8B cardinality=1 +| | +| 88:EXCHANGE [UNPARTITIONED] +| | +| 30:AGGREGATE +| | output: avg(ss_net_profit) +| | row-size=8B cardinality=1 +| | +| 29:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 100, ss_quantity >= 81 +| row-size=8B cardinality=288.04K +| +44:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| row-size=116B cardinality=1 +| +|--87:EXCHANGE [UNPARTITIONED] +| | +| 86:AGGREGATE [FINALIZE] +| | output: avg:merge(ss_ext_discount_amt) +| | row-size=8B cardinality=1 +| | +| 85:EXCHANGE [UNPARTITIONED] +| | +| 28:AGGREGATE +| | output: avg(ss_ext_discount_amt) +| | row-size=8B cardinality=1 +| | +| 27:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 100, ss_quantity >= 81 +| row-size=8B cardinality=288.04K +| +43:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| row-size=108B cardinality=1 +| +|--84:EXCHANGE [UNPARTITIONED] +| | +| 83:AGGREGATE [FINALIZE] +| | output: count:merge(*) +| | row-size=8B cardinality=1 +| | +| 82:EXCHANGE [UNPARTITIONED] +| | +| 26:AGGREGATE +| | output: count(*) +| | row-size=8B cardinality=1 +| | +| 25:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 100, ss_quantity >= 81 +| row-size=4B cardinality=288.04K +| +42:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| row-size=100B cardinality=1 +| +|--81:EXCHANGE [UNPARTITIONED] +| | +| 80:AGGREGATE [FINALIZE] +| | output: avg:merge(ss_net_profit) +| | row-size=8B cardinality=1 +| | +| 79:EXCHANGE [UNPARTITIONED] +| | +| 24:AGGREGATE +| | output: avg(ss_net_profit) +| | row-size=8B cardinality=1 +| | +| 23:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 80, ss_quantity >= 61 +| row-size=8B cardinality=288.04K +| +41:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| row-size=92B cardinality=1 +| +|--78:EXCHANGE [UNPARTITIONED] +| | +| 77:AGGREGATE [FINALIZE] +| | output: avg:merge(ss_ext_discount_amt) +| | row-size=8B cardinality=1 +| | +| 76:EXCHANGE [UNPARTITIONED] +| | +| 22:AGGREGATE +| | output: avg(ss_ext_discount_amt) +| | row-size=8B cardinality=1 +| | +| 21:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 80, ss_quantity >= 61 +| row-size=8B cardinality=288.04K +| +40:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| row-size=84B cardinality=1 +| +|--75:EXCHANGE [UNPARTITIONED] +| | +| 74:AGGREGATE [FINALIZE] +| | output: count:merge(*) +| | row-size=8B cardinality=1 +| | +| 73:EXCHANGE [UNPARTITIONED] +| | +| 20:AGGREGATE +| | output: count(*) +| | row-size=8B cardinality=1 +| | +| 19:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 80, ss_quantity >= 61 +| row-size=4B cardinality=288.04K +| +39:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| row-size=76B cardinality=1 +| +|--72:EXCHANGE [UNPARTITIONED] +| | +| 71:AGGREGATE [FINALIZE] +| | output: avg:merge(ss_net_profit) +| | row-size=8B cardinality=1 +| | +| 70:EXCHANGE [UNPARTITIONED] +| | +| 18:AGGREGATE +| | output: avg(ss_net_profit) +| | row-size=8B cardinality=1 +| | +| 17:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 60, ss_quantity >= 41 +| row-size=8B cardinality=288.04K +| +38:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| row-size=68B cardinality=1 +| +|--69:EXCHANGE [UNPARTITIONED] +| | +| 68:AGGREGATE [FINALIZE] +| | output: avg:merge(ss_ext_discount_amt) +| | row-size=8B cardinality=1 +| | +| 67:EXCHANGE [UNPARTITIONED] +| | +| 16:AGGREGATE +| | output: avg(ss_ext_discount_amt) +| | row-size=8B cardinality=1 +| | +| 15:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 60, ss_quantity >= 41 +| row-size=8B cardinality=288.04K +| +37:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| row-size=60B cardinality=1 +| +|--66:EXCHANGE [UNPARTITIONED] +| | +| 65:AGGREGATE [FINALIZE] +| | output: count:merge(*) +| | row-size=8B cardinality=1 +| | +| 64:EXCHANGE [UNPARTITIONED] +| | +| 14:AGGREGATE +| | output: count(*) +| | row-size=8B cardinality=1 +| | +| 13:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 60, ss_quantity >= 41 +| row-size=4B cardinality=288.04K +| +36:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| row-size=52B cardinality=1 +| +|--63:EXCHANGE [UNPARTITIONED] +| | +| 62:AGGREGATE [FINALIZE] +| | output: avg:merge(ss_net_profit) +| | row-size=8B cardinality=1 +| | +| 61:EXCHANGE [UNPARTITIONED] +| | +| 12:AGGREGATE +| | output: avg(ss_net_profit) +| | row-size=8B cardinality=1 +| | +| 11:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 40, ss_quantity >= 21 +| row-size=8B cardinality=288.04K +| +35:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| row-size=44B cardinality=1 +| +|--60:EXCHANGE [UNPARTITIONED] +| | +| 59:AGGREGATE [FINALIZE] +| | output: avg:merge(ss_ext_discount_amt) +| | row-size=8B cardinality=1 +| | +| 58:EXCHANGE [UNPARTITIONED] +| | +| 10:AGGREGATE +| | output: avg(ss_ext_discount_amt) +| | row-size=8B cardinality=1 +| | +| 09:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 40, ss_quantity >= 21 +| row-size=8B cardinality=288.04K +| +34:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| row-size=36B cardinality=1 +| +|--57:EXCHANGE [UNPARTITIONED] +| | +| 56:AGGREGATE [FINALIZE] +| | output: count:merge(*) +| | row-size=8B cardinality=1 +| | +| 55:EXCHANGE [UNPARTITIONED] +| | +| 08:AGGREGATE +| | output: count(*) +| | row-size=8B cardinality=1 +| | +| 07:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 40, ss_quantity >= 21 +| row-size=4B cardinality=288.04K +| +33:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| row-size=28B cardinality=1 +| +|--54:EXCHANGE [UNPARTITIONED] +| | +| 53:AGGREGATE [FINALIZE] +| | output: avg:merge(ss_net_profit) +| | row-size=8B cardinality=1 +| | +| 52:EXCHANGE [UNPARTITIONED] +| | +| 06:AGGREGATE +| | output: avg(ss_net_profit) +| | row-size=8B cardinality=1 +| | +| 05:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 20, ss_quantity >= 1 +| row-size=8B cardinality=288.04K +| +32:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| row-size=20B cardinality=1 +| +|--51:EXCHANGE [UNPARTITIONED] +| | +| 50:AGGREGATE [FINALIZE] +| | output: avg:merge(ss_ext_discount_amt) +| | row-size=8B cardinality=1 +| | +| 49:EXCHANGE [UNPARTITIONED] +| | +| 04:AGGREGATE +| | output: avg(ss_ext_discount_amt) +| | row-size=8B cardinality=1 +| | +| 03:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 20, ss_quantity >= 1 +| row-size=8B cardinality=288.04K +| +31:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| row-size=12B cardinality=1 +| +|--48:EXCHANGE [UNPARTITIONED] +| | +| 00:SCAN HDFS [tpcds.reason] +| HDFS partitions=1/1 files=1 size=1.31KB +| predicates: r_reason_sk = 1 +| row-size=4B cardinality=1 +| +47:AGGREGATE [FINALIZE] +| output: count:merge(*) +| row-size=8B cardinality=1 +| +46:EXCHANGE [UNPARTITIONED] +| +02:AGGREGATE +| output: count(*) +| row-size=8B cardinality=1 +| +01:SCAN HDFS [tpcds.store_sales] + HDFS partitions=1824/1824 files=1824 size=346.60MB + predicates: ss_quantity <= 20, ss_quantity >= 1 + row-size=4B cardinality=288.04K +---- PARALLELPLANS +Max Per-Host Resource Reservation: Memory=240.01MB Threads=61 +Per-Host Resource Estimates: Memory=946MB +PLAN-ROOT SINK +| +45:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| join table id: 00 +| row-size=124B cardinality=1 +| +|--JOIN BUILD +| | join-table-id=00 plan-id=01 cohort-id=01 +| | +| 90:EXCHANGE [UNPARTITIONED] +| | +| 89:AGGREGATE [FINALIZE] +| | output: avg:merge(ss_net_profit) +| | row-size=8B cardinality=1 +| | +| 88:EXCHANGE [UNPARTITIONED] +| | +| 30:AGGREGATE +| | output: avg(ss_net_profit) +| | row-size=8B cardinality=1 +| | +| 29:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 100, ss_quantity >= 81 +| row-size=8B cardinality=288.04K +| +44:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| join table id: 01 +| row-size=116B cardinality=1 +| +|--JOIN BUILD +| | join-table-id=01 plan-id=02 cohort-id=01 +| | +| 87:EXCHANGE [UNPARTITIONED] +| | +| 86:AGGREGATE [FINALIZE] +| | output: avg:merge(ss_ext_discount_amt) +| | row-size=8B cardinality=1 +| | +| 85:EXCHANGE [UNPARTITIONED] +| | +| 28:AGGREGATE +| | output: avg(ss_ext_discount_amt) +| | row-size=8B cardinality=1 +| | +| 27:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 100, ss_quantity >= 81 +| row-size=8B cardinality=288.04K +| +43:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| join table id: 02 +| row-size=108B cardinality=1 +| +|--JOIN BUILD +| | join-table-id=02 plan-id=03 cohort-id=01 +| | +| 84:EXCHANGE [UNPARTITIONED] +| | +| 83:AGGREGATE [FINALIZE] +| | output: count:merge(*) +| | row-size=8B cardinality=1 +| | +| 82:EXCHANGE [UNPARTITIONED] +| | +| 26:AGGREGATE +| | output: count(*) +| | row-size=8B cardinality=1 +| | +| 25:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 100, ss_quantity >= 81 +| row-size=4B cardinality=288.04K +| +42:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| join table id: 03 +| row-size=100B cardinality=1 +| +|--JOIN BUILD +| | join-table-id=03 plan-id=04 cohort-id=01 +| | +| 81:EXCHANGE [UNPARTITIONED] +| | +| 80:AGGREGATE [FINALIZE] +| | output: avg:merge(ss_net_profit) +| | row-size=8B cardinality=1 +| | +| 79:EXCHANGE [UNPARTITIONED] +| | +| 24:AGGREGATE +| | output: avg(ss_net_profit) +| | row-size=8B cardinality=1 +| | +| 23:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 80, ss_quantity >= 61 +| row-size=8B cardinality=288.04K +| +41:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| join table id: 04 +| row-size=92B cardinality=1 +| +|--JOIN BUILD +| | join-table-id=04 plan-id=05 cohort-id=01 +| | +| 78:EXCHANGE [UNPARTITIONED] +| | +| 77:AGGREGATE [FINALIZE] +| | output: avg:merge(ss_ext_discount_amt) +| | row-size=8B cardinality=1 +| | +| 76:EXCHANGE [UNPARTITIONED] +| | +| 22:AGGREGATE +| | output: avg(ss_ext_discount_amt) +| | row-size=8B cardinality=1 +| | +| 21:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 80, ss_quantity >= 61 +| row-size=8B cardinality=288.04K +| +40:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| join table id: 05 +| row-size=84B cardinality=1 +| +|--JOIN BUILD +| | join-table-id=05 plan-id=06 cohort-id=01 +| | +| 75:EXCHANGE [UNPARTITIONED] +| | +| 74:AGGREGATE [FINALIZE] +| | output: count:merge(*) +| | row-size=8B cardinality=1 +| | +| 73:EXCHANGE [UNPARTITIONED] +| | +| 20:AGGREGATE +| | output: count(*) +| | row-size=8B cardinality=1 +| | +| 19:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 80, ss_quantity >= 61 +| row-size=4B cardinality=288.04K +| +39:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| join table id: 06 +| row-size=76B cardinality=1 +| +|--JOIN BUILD +| | join-table-id=06 plan-id=07 cohort-id=01 +| | +| 72:EXCHANGE [UNPARTITIONED] +| | +| 71:AGGREGATE [FINALIZE] +| | output: avg:merge(ss_net_profit) +| | row-size=8B cardinality=1 +| | +| 70:EXCHANGE [UNPARTITIONED] +| | +| 18:AGGREGATE +| | output: avg(ss_net_profit) +| | row-size=8B cardinality=1 +| | +| 17:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 60, ss_quantity >= 41 +| row-size=8B cardinality=288.04K +| +38:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| join table id: 07 +| row-size=68B cardinality=1 +| +|--JOIN BUILD +| | join-table-id=07 plan-id=08 cohort-id=01 +| | +| 69:EXCHANGE [UNPARTITIONED] +| | +| 68:AGGREGATE [FINALIZE] +| | output: avg:merge(ss_ext_discount_amt) +| | row-size=8B cardinality=1 +| | +| 67:EXCHANGE [UNPARTITIONED] +| | +| 16:AGGREGATE +| | output: avg(ss_ext_discount_amt) +| | row-size=8B cardinality=1 +| | +| 15:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 60, ss_quantity >= 41 +| row-size=8B cardinality=288.04K +| +37:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| join table id: 08 +| row-size=60B cardinality=1 +| +|--JOIN BUILD +| | join-table-id=08 plan-id=09 cohort-id=01 +| | +| 66:EXCHANGE [UNPARTITIONED] +| | +| 65:AGGREGATE [FINALIZE] +| | output: count:merge(*) +| | row-size=8B cardinality=1 +| | +| 64:EXCHANGE [UNPARTITIONED] +| | +| 14:AGGREGATE +| | output: count(*) +| | row-size=8B cardinality=1 +| | +| 13:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 60, ss_quantity >= 41 +| row-size=4B cardinality=288.04K +| +36:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| join table id: 09 +| row-size=52B cardinality=1 +| +|--JOIN BUILD +| | join-table-id=09 plan-id=10 cohort-id=01 +| | +| 63:EXCHANGE [UNPARTITIONED] +| | +| 62:AGGREGATE [FINALIZE] +| | output: avg:merge(ss_net_profit) +| | row-size=8B cardinality=1 +| | +| 61:EXCHANGE [UNPARTITIONED] +| | +| 12:AGGREGATE +| | output: avg(ss_net_profit) +| | row-size=8B cardinality=1 +| | +| 11:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 40, ss_quantity >= 21 +| row-size=8B cardinality=288.04K +| +35:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| join table id: 10 +| row-size=44B cardinality=1 +| +|--JOIN BUILD +| | join-table-id=10 plan-id=11 cohort-id=01 +| | +| 60:EXCHANGE [UNPARTITIONED] +| | +| 59:AGGREGATE [FINALIZE] +| | output: avg:merge(ss_ext_discount_amt) +| | row-size=8B cardinality=1 +| | +| 58:EXCHANGE [UNPARTITIONED] +| | +| 10:AGGREGATE +| | output: avg(ss_ext_discount_amt) +| | row-size=8B cardinality=1 +| | +| 09:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 40, ss_quantity >= 21 +| row-size=8B cardinality=288.04K +| +34:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| join table id: 11 +| row-size=36B cardinality=1 +| +|--JOIN BUILD +| | join-table-id=11 plan-id=12 cohort-id=01 +| | +| 57:EXCHANGE [UNPARTITIONED] +| | +| 56:AGGREGATE [FINALIZE] +| | output: count:merge(*) +| | row-size=8B cardinality=1 +| | +| 55:EXCHANGE [UNPARTITIONED] +| | +| 08:AGGREGATE +| | output: count(*) +| | row-size=8B cardinality=1 +| | +| 07:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 40, ss_quantity >= 21 +| row-size=4B cardinality=288.04K +| +33:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| join table id: 12 +| row-size=28B cardinality=1 +| +|--JOIN BUILD +| | join-table-id=12 plan-id=13 cohort-id=01 +| | +| 54:EXCHANGE [UNPARTITIONED] +| | +| 53:AGGREGATE [FINALIZE] +| | output: avg:merge(ss_net_profit) +| | row-size=8B cardinality=1 +| | +| 52:EXCHANGE [UNPARTITIONED] +| | +| 06:AGGREGATE +| | output: avg(ss_net_profit) +| | row-size=8B cardinality=1 +| | +| 05:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 20, ss_quantity >= 1 +| row-size=8B cardinality=288.04K +| +32:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| join table id: 13 +| row-size=20B cardinality=1 +| +|--JOIN BUILD +| | join-table-id=13 plan-id=14 cohort-id=01 +| | +| 51:EXCHANGE [UNPARTITIONED] +| | +| 50:AGGREGATE [FINALIZE] +| | output: avg:merge(ss_ext_discount_amt) +| | row-size=8B cardinality=1 +| | +| 49:EXCHANGE [UNPARTITIONED] +| | +| 04:AGGREGATE +| | output: avg(ss_ext_discount_amt) +| | row-size=8B cardinality=1 +| | +| 03:SCAN HDFS [tpcds.store_sales] +| HDFS partitions=1824/1824 files=1824 size=346.60MB +| predicates: ss_quantity <= 20, ss_quantity >= 1 +| row-size=8B cardinality=288.04K +| +31:NESTED LOOP JOIN [CROSS JOIN, BROADCAST] +| join table id: 14 +| row-size=12B cardinality=1 +| +|--JOIN BUILD +| | join-table-id=14 plan-id=15 cohort-id=01 +| | +| 48:EXCHANGE [UNPARTITIONED] +| | +| 00:SCAN HDFS [tpcds.reason] +| HDFS partitions=1/1 files=1 size=1.31KB +| predicates: r_reason_sk = 1 +| row-size=4B cardinality=1 +| +47:AGGREGATE [FINALIZE] +| output: count:merge(*) +| row-size=8B cardinality=1 +| +46:EXCHANGE [UNPARTITIONED] +| +02:AGGREGATE +| output: count(*) +| row-size=8B cardinality=1 +| +01:SCAN HDFS [tpcds.store_sales] + HDFS partitions=1824/1824 files=1824 size=346.60MB + predicates: ss_quantity <= 20, ss_quantity >= 1 + row-size=4B cardinality=288.04K +==== diff --git a/testdata/workloads/functional-query/queries/QueryTest/subquery.test b/testdata/workloads/functional-query/queries/QueryTest/subquery.test index c53d29c88..000c8cfec 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/subquery.test +++ b/testdata/workloads/functional-query/queries/QueryTest/subquery.test @@ -1039,3 +1039,176 @@ where int_col = (select f.id from ( ---- TYPES BIGINT ==== +---- QUERY +# Basic scalar in select list +select id, 1+(select min(id) from functional.alltypessmall) + from functional.alltypestiny +order by id +---- RESULTS +0,1 +1,1 +2,1 +3,1 +4,1 +5,1 +6,1 +7,1 +---- TYPES +INT, BIGINT +==== +---- QUERY +# No aggregate however limit 1 +select id, 1+(select id from functional.alltypessmall where id=0 group by id limit 1) + from functional.alltypestiny +order by id +---- RESULTS +0,1 +1,1 +2,1 +3,1 +4,1 +5,1 +6,1 +7,1 +---- TYPES +INT, BIGINT +==== +---- QUERY +# outer join +select s.id, (select count(id) from alltypes) from alltypestiny t + left outer join alltypessmall s on t.id = s.id where s.bool_col +order by id +---- RESULTS +0,7300 +2,7300 +4,7300 +6,7300 +---- TYPES +INT, BIGINT +==== +---- QUERY +# correlated scalar currently unsupported +select id, (select count(id) from functional.alltypessmall where id=t.id) + from functional.alltypestiny t +order by id +---- RESULTS +---- CATCH +A correlated scalar subquery is not supported in the expression: +==== +---- QUERY +# Uncorrelated Scalar Aggregate in select list combined with aggregation in parent query +select +max(a.id), +(select max(id) from functional.alltypestiny), +(select min(id) from functional.alltypestiny) +from functional.alltypessmall a +---- RESULTS +99,7,0 +---- TYPES +INT, INT, INT +==== +---- QUERY +# Empty tables or false predicate should result in NULLs +# Count aggregates on empty return 0 +select id, + (select sum(f2) from functional.emptytable), + (select count(id) from functional.alltypestiny where id > 100) +from functional.alltypestiny +order by id +---- RESULTS +0,NULL,0 +1,NULL,0 +2,NULL,0 +3,NULL,0 +4,NULL,0 +5,NULL,0 +6,NULL,0 +7,NULL,0 +---- TYPES +INT, BIGINT, BIGINT +==== +---- QUERY +# nested subquery over a view with an outer predicate on the subquery column +select count(id) from + (select id, + (select count(*) as cnt from functional.alltypestiny where int_col + <= (select max(int_col) from functional.alltypes_view)) as c from functional.dimtbl) T + where t.c <10 +---- RESULTS +10 +---- TYPES +BIGINT +==== +---- QUERY +# Empty tables should result in NULLs +select id, id in (select min(id) from functional.alltypessmall) + from functional.alltypestiny +order by id +---- RESULTS +0,true +1,false +2,false +3,false +4,false +5,false +6,false +7,false +---- TYPES +INT, BOOLEAN +==== +---- QUERY +# With a join in the subquery +select id, + (select count(*) from functional.alltypessmall join functional.alltypestiny using (id)) + from functional.alltypestiny +order by id +---- RESULTS +0,8 +1,8 +2,8 +3,8 +4,8 +5,8 +6,8 +7,8 +---- TYPES +INT, BIGINT +==== +---- QUERY +# EXISTS predicates are handled at plantime so we need to special case their rewrites +select id, + exists (select max(id) from dimtbl) + from functional.alltypestiny +order by id +---- RESULTS +0,true +1,true +2,true +3,true +4,true +5,true +6,true +7,true +---- TYPES +INT, BOOLEAN +==== +---- QUERY +# Multiple levels of exists +SELECT id, +NOT EXISTS (SELECT MAX(f2) FROM emptytable), +EXISTS (SELECT MAX(id) FROM DIMTBL WHERE EXISTS (SELECT MAX(id) FROM alltypestiny)), +(SELECT 1 +FROM alltypestiny t1 +WHERE EXISTS + (SELECT 1 + FROM alltypestiny t2 + WHERE t1.id = t2.id AND t1.int_col = t2.int_col + GROUP BY t2.id + HAVING count(1) = 1) LIMIT 1) +FROM alltypessmall WHERE id < 2 +---- RESULTS +0,false,true,1 +1,false,true,1 +---- TYPES +INT, BOOLEAN, BOOLEAN, TINYINT +==== diff --git a/testdata/workloads/tpcds/queries/count.test b/testdata/workloads/tpcds/queries/count.test index 213c7a7ad..8ae49d44b 100644 --- a/testdata/workloads/tpcds/queries/count.test +++ b/testdata/workloads/tpcds/queries/count.test @@ -159,4 +159,11 @@ select count(*) from web_site; 30 ---- TYPES BIGINT -==== \ No newline at end of file +==== +---- QUERY: TPCDS-COUNT-REASON +select count(*) from reason; +---- RESULTS +35 +---- TYPES +BIGINT +==== diff --git a/testdata/workloads/tpcds/queries/tpcds-decimal_v2-q9.test b/testdata/workloads/tpcds/queries/tpcds-decimal_v2-q9.test new file mode 100644 index 000000000..82f452709 --- /dev/null +++ b/testdata/workloads/tpcds/queries/tpcds-decimal_v2-q9.test @@ -0,0 +1,54 @@ +==== +---- QUERY: TPCDS-Q9 +select case when (select count(*) + from store_sales + where ss_quantity between 1 and 20) > 74129 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 1 and 20) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 1 and 20) end bucket1 , + case when (select count(*) + from store_sales + where ss_quantity between 21 and 40) > 122840 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 21 and 40) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 21 and 40) end bucket2, + case when (select count(*) + from store_sales + where ss_quantity between 41 and 60) > 56580 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 41 and 60) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 41 and 60) end bucket3, + case when (select count(*) + from store_sales + where ss_quantity between 61 and 80) > 10097 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 61 and 80) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 61 and 80) end bucket4, + case when (select count(*) + from store_sales + where ss_quantity between 81 and 100) > 165306 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 81 and 100) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 81 and 100) end bucket5 +from reason +where r_reason_sk = 1 +---- RESULTS +39.645413,115.898138,191.634713,267.188000,341.986915 +---- TYPES +DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL +==== diff --git a/testdata/workloads/tpcds/queries/tpcds-q9.test b/testdata/workloads/tpcds/queries/tpcds-q9.test new file mode 100644 index 000000000..881cb6885 --- /dev/null +++ b/testdata/workloads/tpcds/queries/tpcds-q9.test @@ -0,0 +1,54 @@ +==== +---- QUERY: TPCDS-Q9 +select case when (select count(*) + from store_sales + where ss_quantity between 1 and 20) > 74129 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 1 and 20) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 1 and 20) end bucket1, + case when (select count(*) + from store_sales + where ss_quantity between 21 and 40) > 122840 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 21 and 40) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 21 and 40) end bucket2, + case when (select count(*) + from store_sales + where ss_quantity between 41 and 60) > 56580 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 41 and 60) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 41 and 60) end bucket3, + case when (select count(*) + from store_sales + where ss_quantity between 61 and 80) > 10097 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 61 and 80) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 61 and 80) end bucket4, + case when (select count(*) + from store_sales + where ss_quantity between 81 and 100) > 165306 + then (select avg(ss_ext_discount_amt) + from store_sales + where ss_quantity between 81 and 100) + else (select avg(ss_net_profit) + from store_sales + where ss_quantity between 81 and 100) end bucket5 +from reason +where r_reason_sk = 1 +---- RESULTS +39.64,115.89,191.63,267.18,341.98 +---- TYPES +DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL +==== diff --git a/tests/query_test/test_tpcds_queries.py b/tests/query_test/test_tpcds_queries.py index 512fbf504..41fa1a7cd 100644 --- a/tests/query_test/test_tpcds_queries.py +++ b/tests/query_test/test_tpcds_queries.py @@ -73,6 +73,9 @@ class TestTpcdsQuery(ImpalaTestSuite): def test_tpcds_q8(self, vector): self.run_test_case(self.get_workload() + '-q8', vector) + def test_tpcds_q9(self, vector): + self.run_test_case(self.get_workload() + '-q9', vector) + def test_tpcds_q10a(self, vector): self.run_test_case(self.get_workload() + '-q10a', vector) @@ -342,6 +345,9 @@ class TestTpcdsDecimalV2Query(ImpalaTestSuite): def test_tpcds_q8(self, vector): self.run_test_case(self.get_workload() + '-decimal_v2-q8', vector) + def test_tpcds_q9(self, vector): + self.run_test_case(self.get_workload() + '-decimal_v2-q9', vector) + def test_tpcds_q10a(self, vector): self.run_test_case(self.get_workload() + '-decimal_v2-q10a', vector) diff --git a/tests/util/parse_util.py b/tests/util/parse_util.py index 4aeb00bcb..448b6ec1c 100644 --- a/tests/util/parse_util.py +++ b/tests/util/parse_util.py @@ -22,7 +22,7 @@ from datetime import datetime # changed, and the stress test loses the ability to run the full set of queries. Set # these constants and assert that when a workload is used, all the queries we expect to # use are there. -EXPECTED_TPCDS_QUERIES_COUNT = 84 +EXPECTED_TPCDS_QUERIES_COUNT = 85 EXPECTED_TPCH_NESTED_QUERIES_COUNT = 22 EXPECTED_TPCH_QUERIES_COUNT = 22 # Add the number of stress test specific queries, i.e. in files like '*-stress-*.test'