IMPALA-8954: Uncorrelated scalar subqueries in the select list

Extend StmtRewriter with the ability to rewrite scalar subqueries in the
select list into cross joins. Currently the subquery must pass plan-time
checks to determine that it returns a single row which may miss cases
that may be valid at runtime or with more complex evaluation of the
predicate expressions in the planner. Support for correlated subqueries
will be a follow on change.

Testing:
  * Added new analyzer tests, updated previous subquery tests
  * test_queries.py::TestQueries::test_subquery
  * Added test_tpcds_q9 to e2e and planner tests

Change-Id: Ibcf55d26889aa01d69bb85f18c9241dda095fb66
Reviewed-on: http://gerrit.cloudera.org:8080/16007
Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com>
Tested-by: Tim Armstrong <tarmstrong@cloudera.com>
This commit is contained in:
Shant Hovsepian
2020-05-29 16:26:07 -04:00
committed by Tim Armstrong
parent 49af396c10
commit 388ad555d7
13 changed files with 1572 additions and 18 deletions

View File

@@ -94,9 +94,14 @@ public class SelectList {
public void rewriteExprs(ExprRewriter rewriter, Analyzer analyzer)
throws AnalysisException {
List<Subquery> subqueryExprs = new ArrayList<>();
for (SelectListItem item: items_) {
if (item.isStar()) continue;
item.setExpr(rewriter.rewrite(item.getExpr(), analyzer));
item.getExpr().collect(Subquery.class, subqueryExprs);
}
for (Subquery s : subqueryExprs) {
s.getStatement().rewriteExprs(rewriter);
}
}

View File

@@ -274,9 +274,23 @@ public class SelectStmt extends QueryStmt {
// Analyze the resultExpr before generating a label to ensure enforcement
// of expr child and depth limits (toColumn() label may call toSql()).
item.getExpr().analyze(analyzer_);
if (item.getExpr().contains(Predicates.instanceOf(Subquery.class))) {
throw new AnalysisException(
"Subqueries are not supported in the select list.");
// Check for scalar subquery types which are not supported
List<Subquery> subqueryExprs = new ArrayList<>();
item.getExpr().collect(Subquery.class, subqueryExprs);
for (Subquery s : subqueryExprs) {
Preconditions.checkState(s.getStatement() instanceof SelectStmt);
if (!s.returnsScalarColumn()) {
throw new AnalysisException("A non-scalar subquery is not supported in "
+ "the expression: " + item.getExpr().toSql());
}
if (s.getStatement().isRuntimeScalar()) {
throw new AnalysisException(
"A subquery which may return more than one row is not supported in "
+ "the expression: " + item.getExpr().toSql());
}
Preconditions.checkState(((SelectStmt) s.getStatement()).returnsSingleRow(),
"Invariant violated: Only subqueries that are guaranteed to return a "
+ "single row are supported: " + item.getExpr().toSql());
}
resultExprs_.add(item.getExpr());
String label = item.toColumnLabel(i, analyzer_.useHiveColLabels());
@@ -1197,12 +1211,17 @@ public class SelectStmt extends QueryStmt {
if (fromClauseOnly) {
fromClause_.collectFromClauseTableRefs(tblRefs);
} else {
// Collect TableRefs in all subqueries.
fromClause_.collectTableRefs(tblRefs);
}
if (!fromClauseOnly && whereClause_ != null) {
// Collect TableRefs in WHERE-clause subqueries.
List<Subquery> subqueries = new ArrayList<>();
whereClause_.collect(Subquery.class, subqueries);
if (whereClause_ != null) {
whereClause_.collect(Subquery.class, subqueries);
}
for (SelectListItem item : selectList_.getItems()) {
if (item.isStar()) continue;
item.getExpr().collect(Subquery.class, subqueries);
}
for (Subquery sq: subqueries) {
sq.getStatement().collectTableRefs(tblRefs, fromClauseOnly);
}
@@ -1211,8 +1230,6 @@ public class SelectStmt extends QueryStmt {
@Override
public void collectInlineViews(Set<FeView> inlineViews) {
// Impala currently supports sub queries only in FROM, WHERE & WITH clauses. Hence,
// this function does not carry out any checks on HAVING clause.
super.collectInlineViews(inlineViews);
List<TableRef> fromTblRefs = getTableRefs();
Preconditions.checkNotNull(inlineViews);
@@ -1236,6 +1253,14 @@ public class SelectStmt extends QueryStmt {
whereSubQueries.get(0).getStatement().collectInlineViews(inlineViews);
}
}
List<Subquery> selectListSubQueries = Lists.newArrayList();
for (SelectListItem item : selectList_.getItems()) {
if (item.isStar()) continue;
item.getExpr().collect(Subquery.class, selectListSubQueries);
}
for (Subquery sq : selectListSubQueries) {
sq.getStatement().collectInlineViews(inlineViews);
}
}
@Override

View File

@@ -934,8 +934,9 @@ public class StmtRewriter {
/**
* Rewrite all the subqueries of a SelectStmt in place. Subqueries are currently
* supported in FROM and WHERE clauses. The rewrite is performed in place and not in a
* clone of SelectStmt because it requires the stmt to be analyzed.
* supported in the FROM clause, WHERE clause and SELECT list. The rewrite is
* performed in place and not in a clone of SelectStmt because it requires the stmt to
* be analyzed.
*/
@Override
protected void rewriteSelectStmtHook(SelectStmt stmt, Analyzer analyzer)
@@ -952,6 +953,7 @@ public class StmtRewriter {
}
rewriteWhereClauseSubqueries(stmt, analyzer);
}
rewriteSelectListSubqueries(stmt, analyzer);
}
/**
@@ -1096,5 +1098,168 @@ public class StmtRewriter {
smap.put(subquery, newSubquery);
return expr.substitute(smap, analyzer, false);
}
/**
* Rewrite subqueries of a stmt's SELECT list. Scalar subqueries are the only type
* of subquery supported in the select list. Scalar subqueries return a single column
* and at most 1 row, a runtime error should be thrown if more than one row is
* returned. Generally these subqueries can be evaluated once for every row of the
* outer query however for performance reasons we want to rewrite evaluation to use
* joins where possible.
*
* 1) Uncorrelated Scalar Aggregate Query
*
* SELECT T1.a, (SELECT avg(T2.a) from T2) FROM T1;
*
* This is implemented by flattening into a join.
*
* SELECT T1.a, $a$1.$c$1 FROM T1, (SELECT avg(T2.a) $c$1 FROM T2) $a$1
*
* Currently we only support very simple subqueries which return a single aggregate
* function with no group by columns unless a LIMIT 1 is given. TODO: IMPALA-1285
*
* 2) Correlated Scalar Aggregate
*
* TODO: IMPALA-8955
* SELECT id, (SELECT count(*) FROM T2 WHERE id=a.id ) FROM T1 a
*
* This can be flattened with a LEFT OUTER JOIN
*
* SELECT T1.a, $a$1.$c$1 FROM T1 LEFT OUTER JOIN
* (SELECT id, count(*) $c$1 FROM T2 GROUP BY id) $a$1 ON T1.id = $a$1.id
*
* 3) Correlated Scalar
*
* TODO: IMPALA-6315
* SELECT id, (SELECT cost FROM T2 WHERE id=a.id ) FROM T1 a
*
* In this case there is no aggregate function to guarantee only a single row is
* returned per group so a run time cardinality check must be applied. An exception
* would be if the correlated predicates had primary key constraints.
*
* 4) Runtime Scalar Subqueries
*
* TODO: IMPALA-5100
* We do have a {@link CardinalityCheckNode} for runtime checks however queries
* can't always be rewritten into an NLJ without special care. For example with
* conditional expression like below:
*
* SELECT T1.a,
* IF((SELECT max(T2.a) from T2 > 10,
* (SELECT T2.a from T2 WHERE id=T1.id),
* (SELECT T3.a from T2 WHERE if=T1.id)
* FROM T1;
*
* If rewritten to joins with cardinality checks then both legs of the conditional
* expression would be evaluated regardless of the condition. If the false case
* were to return a runtime error while when the true doesn't and the condition
* evaluates to true then we'd have incorrect behavior.
*/
private void rewriteSelectListSubqueries(SelectStmt stmt, Analyzer analyzer)
throws AnalysisException {
Preconditions.checkNotNull(stmt);
Preconditions.checkNotNull(analyzer);
final int numTableRefs = stmt.fromClause_.size();
final boolean parentHasAgg = stmt.hasMultiAggInfo();
// Track any new inline views so we later ensure they are rewritten if needed.
// An improvement would be to have a pre/post order abstract rewriter class.
final List<InlineViewRef> newViews = new ArrayList<>();
for (SelectListItem selectItem : stmt.getSelectList().getItems()) {
if (selectItem.isStar()) {
continue;
}
final Expr expr = selectItem.getExpr();
final List<Subquery> subqueries = new ArrayList<>();
// Use collect as opposed to collectAll in order to allow nested subqueries to be
// rewritten as needed. For example a subquery in the select list which contains
// its own subquery in the where clause.
expr.collect(Predicates.instanceOf(Subquery.class), subqueries);
if (subqueries.size() == 0) {
continue;
}
final ExprSubstitutionMap smap = new ExprSubstitutionMap();
for (Subquery sq : subqueries) {
final SelectStmt subqueryStmt = (SelectStmt) sq.getStatement();
// TODO: Handle correlated subqueries IMPALA-8955
if (isCorrelated(subqueryStmt)) {
throw new AnalysisException("A correlated scalar subquery is not supported "
+ "in the expression: " + expr.toSql());
}
Preconditions.checkState(sq.getType().isScalarType());
// Existential subqueries in Impala aren't really execution time expressions,
// they are either checked at plan time or expected to be handled by the
// subquery rewrite into a join. In the case of the select list we will only
// support plan time evaluation.
boolean replacedExists = false;
final List<ExistsPredicate> existsPredicates = new ArrayList<>();
expr.collect(ExistsPredicate.class, existsPredicates);
for (ExistsPredicate ep : existsPredicates) {
// Check to see if the current subquery is the child of an exists predicate.
if (ep.contains(sq)) {
final BoolLiteral boolLiteral = replaceExistsPredicate(ep);
if (boolLiteral != null) {
boolLiteral.analyze(analyzer);
smap.put(ep, boolLiteral);
replacedExists = true;
break;
} else {
throw new AnalysisException(
"Unsupported subquery with runtime scalar check: " + ep.toSql());
}
}
}
if (replacedExists) {
continue;
}
List<String> colLabels = new ArrayList<>();
for (int i = 0; i < subqueryStmt.getColLabels().size(); ++i) {
colLabels.add(subqueryStmt.getColumnAliasGenerator().getNextAlias());
}
// Create a new inline view from the subquery stmt aliasing the columns.
InlineViewRef inlineView = new InlineViewRef(
stmt.getTableAliasGenerator().getNextAlias(), subqueryStmt, colLabels);
inlineView.reset();
inlineView.analyze(analyzer);
// For uncorrelated scalar subqueries we rewrite with a CROSS_JOIN. This makes
// it simpler to further optimize by merging subqueries without worrying about
// join ordering as in IMPALA-9796. For correlated subqueries we'd want to
// rewrite to a LOJ.
inlineView.setJoinOp(JoinOperator.CROSS_JOIN);
stmt.fromClause_.add(inlineView);
newViews.add(inlineView);
SlotRef slotRef = new SlotRef(Lists.newArrayList(
inlineView.getUniqueAlias(), inlineView.getColLabels().get(0)));
slotRef.analyze(analyzer);
Expr substitute = slotRef;
// Need to wrap the expression with a no-op aggregate function if the stmt does
// any aggregation, using MAX() given no explicit function to return any value
// in a group.
if (parentHasAgg) {
final FunctionCallExpr aggWrapper =
new FunctionCallExpr("max", Lists.newArrayList((Expr) slotRef));
aggWrapper.analyze(analyzer);
substitute = aggWrapper;
}
// Substitute original subquery expression with a reference to the inline view.
smap.put(sq, substitute);
}
// Update select list with any new slot references.
selectItem.setExpr(expr.substitute(smap, analyzer, false));
}
// Rewrite any new views
for (InlineViewRef v : newViews) {
rewriteQueryStatement(v.getViewStmt(), v.getAnalyzer());
}
// Only applies to the original list of TableRefs, not any as a result of the
// rewrite.
if (!newViews.isEmpty()) {
replaceUnqualifiedStarItems(stmt, numTableRefs);
}
}
}
}

View File

@@ -1207,6 +1207,20 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest {
"(select id from functional.alltypes t2 where t1.int_col = t2.int_col)",
"Unsupported correlated subquery with runtime scalar check: " +
"SELECT id FROM functional.alltypes t2 WHERE t1.int_col = t2.int_col");
// Scalar subqueries in the select list
AnalyzesOk("select id, 10 + (select max(int_col) from functional.alltypestiny) "
+ "from functional.alltypestiny");
AnalyzesOk("select id, (select count(*) from functional.alltypestiny where int_col "
+ "< (select max(int_col) from functional.alltypes)) from functional.dimtbl");
AnalysisError("select id, (select id, count(*) from functional.alltypestiny "
+ "group by 1) from functional.dimtbl",
"A non-scalar subquery is not supported in the expression: "
+ "(SELECT id, count(*) FROM functional.alltypestiny GROUP BY id)");
AnalysisError("select id, (select count(*) from functional.alltypestiny b where "
+ "id=a.id ) from functional.alltypes a",
"A correlated scalar subquery is not supported in the expression: "
+ "(SELECT count(*) FROM functional.alltypestiny b WHERE id = a.id)");
}
@Test
@@ -1339,7 +1353,7 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest {
}
@Test
public void TestIllegalSubquery() throws AnalysisException {
public void testIllegalSubquery() throws AnalysisException {
// Predicate with a child subquery in the HAVING clause
AnalysisError("select id, count(*) from functional.alltypestiny t group by " +
"id having count(*) > (select count(*) from functional.alltypesagg)",
@@ -1349,9 +1363,10 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest {
"Subqueries are not supported in the HAVING clause.");
// Subquery in the select list
AnalysisError("select id, (select int_col from functional.alltypestiny) " +
"from functional.alltypestiny",
"Subqueries are not supported in the select list.");
AnalysisError("select id, (select int_col from functional.alltypestiny) "
+ "from functional.alltypestiny",
"A subquery which may return more than one row is not supported in the " +
"expression: " + "(SELECT int_col FROM functional.alltypestiny)");
// Subquery in the GROUP BY clause
AnalysisError("select id, count(*) from functional.alltypestiny " +

View File

@@ -1138,7 +1138,7 @@ tpcds
---- BASE_TABLE_NAME
reason
---- COLUMNS
r_reason_sk bigint
r_reason_sk int
r_reason_id string
r_reason_desc string
---- ROW_FORMAT

View File

@@ -3296,3 +3296,137 @@ PLAN-ROOT SINK
runtime filters: RF000 -> int_col
row-size=89B cardinality=7.30K
====
# Scalar subquery in select list
SELECT alltypesagg.string_col, alltypesagg.date_string_col, dimtbl.name,
SUM(CASE WHEN dimtbl.zip < 94613 THEN 1 ELSE 0 END) / CAST((SELECT COUNT(*) FROM functional.dimtbl WHERE dimtbl.zip < 94613) AS decimal(10, 6)) AS close,
SUM(CASE WHEN dimtbl.zip >= 94613 THEN 1 ELSE 0 END) / CAST((SELECT COUNT(*) FROM functional.dimtbl WHERE dimtbl.zip >= 94613) AS decimal(10, 6)) AS remote
FROM functional.alltypesagg JOIN functional.dimtbl ON (alltypesagg.id = dimtbl.id)
GROUP BY 1, 2, 3
ORDER BY alltypesagg.string_col DESC
---- PLAN
PLAN-ROOT SINK
|
10:SORT
| order by: alltypesagg.string_col DESC
| row-size=84B cardinality=11
|
09:AGGREGATE [FINALIZE]
| output: sum(CASE WHEN dimtbl.zip < 94613 THEN 1 ELSE 0 END), max(count(*)), sum(CASE WHEN dimtbl.zip >= 94613 THEN 1 ELSE 0 END), max(count(*))
| group by: alltypesagg.string_col, alltypesagg.date_string_col, dimtbl.name
| row-size=84B cardinality=11
|
08:NESTED LOOP JOIN [CROSS JOIN]
| row-size=84B cardinality=11
|
|--05:AGGREGATE [FINALIZE]
| | output: count(*)
| | row-size=8B cardinality=1
| |
| 04:SCAN HDFS [functional.dimtbl]
| HDFS partitions=1/1 files=1 size=171B
| predicates: dimtbl.zip >= 94613
| row-size=4B cardinality=1
|
07:NESTED LOOP JOIN [CROSS JOIN]
| row-size=76B cardinality=11
|
|--03:AGGREGATE [FINALIZE]
| | output: count(*)
| | row-size=8B cardinality=1
| |
| 02:SCAN HDFS [functional.dimtbl]
| HDFS partitions=1/1 files=1 size=171B
| predicates: dimtbl.zip < 94613
| row-size=4B cardinality=1
|
06:HASH JOIN [INNER JOIN]
| hash predicates: alltypesagg.id = dimtbl.id
| runtime filters: RF000 <- dimtbl.id
| row-size=68B cardinality=11
|
|--01:SCAN HDFS [functional.dimtbl]
| HDFS partitions=1/1 files=1 size=171B
| row-size=29B cardinality=10
|
00:SCAN HDFS [functional.alltypesagg]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> alltypesagg.id
row-size=39B cardinality=11.00K
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
20:MERGING-EXCHANGE [UNPARTITIONED]
| order by: alltypesagg.string_col DESC
|
10:SORT
| order by: alltypesagg.string_col DESC
| row-size=84B cardinality=11
|
19:AGGREGATE [FINALIZE]
| output: sum:merge(CASE WHEN dimtbl.zip < 94613 THEN 1 ELSE 0 END), max:merge(`$a$1`.`$c$1`), sum:merge(CASE WHEN dimtbl.zip >= 94613 THEN 1 ELSE 0 END), max:merge(`$a$2`.`$c$1`)
| group by: alltypesagg.string_col, alltypesagg.date_string_col, dimtbl.name
| row-size=84B cardinality=11
|
18:EXCHANGE [HASH(alltypesagg.string_col,alltypesagg.date_string_col,dimtbl.name)]
|
09:AGGREGATE [STREAMING]
| output: sum(CASE WHEN dimtbl.zip < 94613 THEN 1 ELSE 0 END), max(count(*)), sum(CASE WHEN dimtbl.zip >= 94613 THEN 1 ELSE 0 END), max(count(*))
| group by: alltypesagg.string_col, alltypesagg.date_string_col, dimtbl.name
| row-size=84B cardinality=11
|
08:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| row-size=84B cardinality=11
|
|--17:EXCHANGE [BROADCAST]
| |
| 16:AGGREGATE [FINALIZE]
| | output: count:merge(*)
| | row-size=8B cardinality=1
| |
| 15:EXCHANGE [UNPARTITIONED]
| |
| 05:AGGREGATE
| | output: count(*)
| | row-size=8B cardinality=1
| |
| 04:SCAN HDFS [functional.dimtbl]
| HDFS partitions=1/1 files=1 size=171B
| predicates: dimtbl.zip >= 94613
| row-size=4B cardinality=1
|
07:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| row-size=76B cardinality=11
|
|--14:EXCHANGE [BROADCAST]
| |
| 13:AGGREGATE [FINALIZE]
| | output: count:merge(*)
| | row-size=8B cardinality=1
| |
| 12:EXCHANGE [UNPARTITIONED]
| |
| 03:AGGREGATE
| | output: count(*)
| | row-size=8B cardinality=1
| |
| 02:SCAN HDFS [functional.dimtbl]
| HDFS partitions=1/1 files=1 size=171B
| predicates: dimtbl.zip < 94613
| row-size=4B cardinality=1
|
06:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: alltypesagg.id = dimtbl.id
| runtime filters: RF000 <- dimtbl.id
| row-size=68B cardinality=11
|
|--11:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [functional.dimtbl]
| HDFS partitions=1/1 files=1 size=171B
| row-size=29B cardinality=10
|
00:SCAN HDFS [functional.alltypesagg]
HDFS partitions=11/11 files=11 size=814.73KB
runtime filters: RF000 -> alltypesagg.id
row-size=39B cardinality=11.00K
====

View File

@@ -8788,3 +8788,919 @@ PLAN-ROOT SINK
runtime filters: RF004 -> ss_sold_time_sk, RF002 -> ss_hdemo_sk
row-size=12B cardinality=2.88M
====
# TPCDS-Q9
select case when (select count(*)
from store_sales
where ss_quantity between 1 and 20) > 74129
then (select avg(ss_ext_discount_amt)
from store_sales
where ss_quantity between 1 and 20)
else (select avg(ss_net_profit)
from store_sales
where ss_quantity between 1 and 20) end bucket1,
case when (select count(*)
from store_sales
where ss_quantity between 21 and 40) > 122840
then (select avg(ss_ext_discount_amt)
from store_sales
where ss_quantity between 21 and 40)
else (select avg(ss_net_profit)
from store_sales
where ss_quantity between 21 and 40) end bucket2,
case when (select count(*)
from store_sales
where ss_quantity between 41 and 60) > 56580
then (select avg(ss_ext_discount_amt)
from store_sales
where ss_quantity between 41 and 60)
else (select avg(ss_net_profit)
from store_sales
where ss_quantity between 41 and 60) end bucket3,
case when (select count(*)
from store_sales
where ss_quantity between 61 and 80) > 10097
then (select avg(ss_ext_discount_amt)
from store_sales
where ss_quantity between 61 and 80)
else (select avg(ss_net_profit)
from store_sales
where ss_quantity between 61 and 80) end bucket4,
case when (select count(*)
from store_sales
where ss_quantity between 81 and 100) > 165306
then (select avg(ss_ext_discount_amt)
from store_sales
where ss_quantity between 81 and 100)
else (select avg(ss_net_profit)
from store_sales
where ss_quantity between 81 and 100) end bucket5
from reason
where r_reason_sk = 1
---- PLAN
Max Per-Host Resource Reservation: Memory=120.01MB Threads=17
Per-Host Resource Estimates: Memory=2.05GB
PLAN-ROOT SINK
|
45:NESTED LOOP JOIN [CROSS JOIN]
| row-size=124B cardinality=1
|
|--30:AGGREGATE [FINALIZE]
| | output: avg(ss_net_profit)
| | row-size=8B cardinality=1
| |
| 29:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 100, ss_quantity >= 81
| row-size=8B cardinality=288.04K
|
44:NESTED LOOP JOIN [CROSS JOIN]
| row-size=116B cardinality=1
|
|--28:AGGREGATE [FINALIZE]
| | output: avg(ss_ext_discount_amt)
| | row-size=8B cardinality=1
| |
| 27:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 100, ss_quantity >= 81
| row-size=8B cardinality=288.04K
|
43:NESTED LOOP JOIN [CROSS JOIN]
| row-size=108B cardinality=1
|
|--26:AGGREGATE [FINALIZE]
| | output: count(*)
| | row-size=8B cardinality=1
| |
| 25:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 100, ss_quantity >= 81
| row-size=4B cardinality=288.04K
|
42:NESTED LOOP JOIN [CROSS JOIN]
| row-size=100B cardinality=1
|
|--24:AGGREGATE [FINALIZE]
| | output: avg(ss_net_profit)
| | row-size=8B cardinality=1
| |
| 23:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 80, ss_quantity >= 61
| row-size=8B cardinality=288.04K
|
41:NESTED LOOP JOIN [CROSS JOIN]
| row-size=92B cardinality=1
|
|--22:AGGREGATE [FINALIZE]
| | output: avg(ss_ext_discount_amt)
| | row-size=8B cardinality=1
| |
| 21:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 80, ss_quantity >= 61
| row-size=8B cardinality=288.04K
|
40:NESTED LOOP JOIN [CROSS JOIN]
| row-size=84B cardinality=1
|
|--20:AGGREGATE [FINALIZE]
| | output: count(*)
| | row-size=8B cardinality=1
| |
| 19:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 80, ss_quantity >= 61
| row-size=4B cardinality=288.04K
|
39:NESTED LOOP JOIN [CROSS JOIN]
| row-size=76B cardinality=1
|
|--18:AGGREGATE [FINALIZE]
| | output: avg(ss_net_profit)
| | row-size=8B cardinality=1
| |
| 17:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 60, ss_quantity >= 41
| row-size=8B cardinality=288.04K
|
38:NESTED LOOP JOIN [CROSS JOIN]
| row-size=68B cardinality=1
|
|--16:AGGREGATE [FINALIZE]
| | output: avg(ss_ext_discount_amt)
| | row-size=8B cardinality=1
| |
| 15:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 60, ss_quantity >= 41
| row-size=8B cardinality=288.04K
|
37:NESTED LOOP JOIN [CROSS JOIN]
| row-size=60B cardinality=1
|
|--14:AGGREGATE [FINALIZE]
| | output: count(*)
| | row-size=8B cardinality=1
| |
| 13:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 60, ss_quantity >= 41
| row-size=4B cardinality=288.04K
|
36:NESTED LOOP JOIN [CROSS JOIN]
| row-size=52B cardinality=1
|
|--12:AGGREGATE [FINALIZE]
| | output: avg(ss_net_profit)
| | row-size=8B cardinality=1
| |
| 11:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 40, ss_quantity >= 21
| row-size=8B cardinality=288.04K
|
35:NESTED LOOP JOIN [CROSS JOIN]
| row-size=44B cardinality=1
|
|--10:AGGREGATE [FINALIZE]
| | output: avg(ss_ext_discount_amt)
| | row-size=8B cardinality=1
| |
| 09:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 40, ss_quantity >= 21
| row-size=8B cardinality=288.04K
|
34:NESTED LOOP JOIN [CROSS JOIN]
| row-size=36B cardinality=1
|
|--08:AGGREGATE [FINALIZE]
| | output: count(*)
| | row-size=8B cardinality=1
| |
| 07:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 40, ss_quantity >= 21
| row-size=4B cardinality=288.04K
|
33:NESTED LOOP JOIN [CROSS JOIN]
| row-size=28B cardinality=1
|
|--06:AGGREGATE [FINALIZE]
| | output: avg(ss_net_profit)
| | row-size=8B cardinality=1
| |
| 05:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 20, ss_quantity >= 1
| row-size=8B cardinality=288.04K
|
32:NESTED LOOP JOIN [CROSS JOIN]
| row-size=20B cardinality=1
|
|--04:AGGREGATE [FINALIZE]
| | output: avg(ss_ext_discount_amt)
| | row-size=8B cardinality=1
| |
| 03:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 20, ss_quantity >= 1
| row-size=8B cardinality=288.04K
|
31:NESTED LOOP JOIN [CROSS JOIN]
| row-size=12B cardinality=1
|
|--00:SCAN HDFS [tpcds.reason]
| HDFS partitions=1/1 files=1 size=1.31KB
| predicates: r_reason_sk = 1
| row-size=4B cardinality=1
|
02:AGGREGATE [FINALIZE]
| output: count(*)
| row-size=8B cardinality=1
|
01:SCAN HDFS [tpcds.store_sales]
HDFS partitions=1824/1824 files=1824 size=346.60MB
predicates: ss_quantity <= 20, ss_quantity >= 1
row-size=4B cardinality=288.04K
---- DISTRIBUTEDPLAN
Max Per-Host Resource Reservation: Memory=120.01MB Threads=47
Per-Host Resource Estimates: Memory=2.20GB
PLAN-ROOT SINK
|
45:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| row-size=124B cardinality=1
|
|--90:EXCHANGE [UNPARTITIONED]
| |
| 89:AGGREGATE [FINALIZE]
| | output: avg:merge(ss_net_profit)
| | row-size=8B cardinality=1
| |
| 88:EXCHANGE [UNPARTITIONED]
| |
| 30:AGGREGATE
| | output: avg(ss_net_profit)
| | row-size=8B cardinality=1
| |
| 29:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 100, ss_quantity >= 81
| row-size=8B cardinality=288.04K
|
44:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| row-size=116B cardinality=1
|
|--87:EXCHANGE [UNPARTITIONED]
| |
| 86:AGGREGATE [FINALIZE]
| | output: avg:merge(ss_ext_discount_amt)
| | row-size=8B cardinality=1
| |
| 85:EXCHANGE [UNPARTITIONED]
| |
| 28:AGGREGATE
| | output: avg(ss_ext_discount_amt)
| | row-size=8B cardinality=1
| |
| 27:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 100, ss_quantity >= 81
| row-size=8B cardinality=288.04K
|
43:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| row-size=108B cardinality=1
|
|--84:EXCHANGE [UNPARTITIONED]
| |
| 83:AGGREGATE [FINALIZE]
| | output: count:merge(*)
| | row-size=8B cardinality=1
| |
| 82:EXCHANGE [UNPARTITIONED]
| |
| 26:AGGREGATE
| | output: count(*)
| | row-size=8B cardinality=1
| |
| 25:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 100, ss_quantity >= 81
| row-size=4B cardinality=288.04K
|
42:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| row-size=100B cardinality=1
|
|--81:EXCHANGE [UNPARTITIONED]
| |
| 80:AGGREGATE [FINALIZE]
| | output: avg:merge(ss_net_profit)
| | row-size=8B cardinality=1
| |
| 79:EXCHANGE [UNPARTITIONED]
| |
| 24:AGGREGATE
| | output: avg(ss_net_profit)
| | row-size=8B cardinality=1
| |
| 23:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 80, ss_quantity >= 61
| row-size=8B cardinality=288.04K
|
41:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| row-size=92B cardinality=1
|
|--78:EXCHANGE [UNPARTITIONED]
| |
| 77:AGGREGATE [FINALIZE]
| | output: avg:merge(ss_ext_discount_amt)
| | row-size=8B cardinality=1
| |
| 76:EXCHANGE [UNPARTITIONED]
| |
| 22:AGGREGATE
| | output: avg(ss_ext_discount_amt)
| | row-size=8B cardinality=1
| |
| 21:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 80, ss_quantity >= 61
| row-size=8B cardinality=288.04K
|
40:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| row-size=84B cardinality=1
|
|--75:EXCHANGE [UNPARTITIONED]
| |
| 74:AGGREGATE [FINALIZE]
| | output: count:merge(*)
| | row-size=8B cardinality=1
| |
| 73:EXCHANGE [UNPARTITIONED]
| |
| 20:AGGREGATE
| | output: count(*)
| | row-size=8B cardinality=1
| |
| 19:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 80, ss_quantity >= 61
| row-size=4B cardinality=288.04K
|
39:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| row-size=76B cardinality=1
|
|--72:EXCHANGE [UNPARTITIONED]
| |
| 71:AGGREGATE [FINALIZE]
| | output: avg:merge(ss_net_profit)
| | row-size=8B cardinality=1
| |
| 70:EXCHANGE [UNPARTITIONED]
| |
| 18:AGGREGATE
| | output: avg(ss_net_profit)
| | row-size=8B cardinality=1
| |
| 17:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 60, ss_quantity >= 41
| row-size=8B cardinality=288.04K
|
38:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| row-size=68B cardinality=1
|
|--69:EXCHANGE [UNPARTITIONED]
| |
| 68:AGGREGATE [FINALIZE]
| | output: avg:merge(ss_ext_discount_amt)
| | row-size=8B cardinality=1
| |
| 67:EXCHANGE [UNPARTITIONED]
| |
| 16:AGGREGATE
| | output: avg(ss_ext_discount_amt)
| | row-size=8B cardinality=1
| |
| 15:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 60, ss_quantity >= 41
| row-size=8B cardinality=288.04K
|
37:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| row-size=60B cardinality=1
|
|--66:EXCHANGE [UNPARTITIONED]
| |
| 65:AGGREGATE [FINALIZE]
| | output: count:merge(*)
| | row-size=8B cardinality=1
| |
| 64:EXCHANGE [UNPARTITIONED]
| |
| 14:AGGREGATE
| | output: count(*)
| | row-size=8B cardinality=1
| |
| 13:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 60, ss_quantity >= 41
| row-size=4B cardinality=288.04K
|
36:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| row-size=52B cardinality=1
|
|--63:EXCHANGE [UNPARTITIONED]
| |
| 62:AGGREGATE [FINALIZE]
| | output: avg:merge(ss_net_profit)
| | row-size=8B cardinality=1
| |
| 61:EXCHANGE [UNPARTITIONED]
| |
| 12:AGGREGATE
| | output: avg(ss_net_profit)
| | row-size=8B cardinality=1
| |
| 11:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 40, ss_quantity >= 21
| row-size=8B cardinality=288.04K
|
35:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| row-size=44B cardinality=1
|
|--60:EXCHANGE [UNPARTITIONED]
| |
| 59:AGGREGATE [FINALIZE]
| | output: avg:merge(ss_ext_discount_amt)
| | row-size=8B cardinality=1
| |
| 58:EXCHANGE [UNPARTITIONED]
| |
| 10:AGGREGATE
| | output: avg(ss_ext_discount_amt)
| | row-size=8B cardinality=1
| |
| 09:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 40, ss_quantity >= 21
| row-size=8B cardinality=288.04K
|
34:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| row-size=36B cardinality=1
|
|--57:EXCHANGE [UNPARTITIONED]
| |
| 56:AGGREGATE [FINALIZE]
| | output: count:merge(*)
| | row-size=8B cardinality=1
| |
| 55:EXCHANGE [UNPARTITIONED]
| |
| 08:AGGREGATE
| | output: count(*)
| | row-size=8B cardinality=1
| |
| 07:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 40, ss_quantity >= 21
| row-size=4B cardinality=288.04K
|
33:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| row-size=28B cardinality=1
|
|--54:EXCHANGE [UNPARTITIONED]
| |
| 53:AGGREGATE [FINALIZE]
| | output: avg:merge(ss_net_profit)
| | row-size=8B cardinality=1
| |
| 52:EXCHANGE [UNPARTITIONED]
| |
| 06:AGGREGATE
| | output: avg(ss_net_profit)
| | row-size=8B cardinality=1
| |
| 05:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 20, ss_quantity >= 1
| row-size=8B cardinality=288.04K
|
32:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| row-size=20B cardinality=1
|
|--51:EXCHANGE [UNPARTITIONED]
| |
| 50:AGGREGATE [FINALIZE]
| | output: avg:merge(ss_ext_discount_amt)
| | row-size=8B cardinality=1
| |
| 49:EXCHANGE [UNPARTITIONED]
| |
| 04:AGGREGATE
| | output: avg(ss_ext_discount_amt)
| | row-size=8B cardinality=1
| |
| 03:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 20, ss_quantity >= 1
| row-size=8B cardinality=288.04K
|
31:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| row-size=12B cardinality=1
|
|--48:EXCHANGE [UNPARTITIONED]
| |
| 00:SCAN HDFS [tpcds.reason]
| HDFS partitions=1/1 files=1 size=1.31KB
| predicates: r_reason_sk = 1
| row-size=4B cardinality=1
|
47:AGGREGATE [FINALIZE]
| output: count:merge(*)
| row-size=8B cardinality=1
|
46:EXCHANGE [UNPARTITIONED]
|
02:AGGREGATE
| output: count(*)
| row-size=8B cardinality=1
|
01:SCAN HDFS [tpcds.store_sales]
HDFS partitions=1824/1824 files=1824 size=346.60MB
predicates: ss_quantity <= 20, ss_quantity >= 1
row-size=4B cardinality=288.04K
---- PARALLELPLANS
Max Per-Host Resource Reservation: Memory=240.01MB Threads=61
Per-Host Resource Estimates: Memory=946MB
PLAN-ROOT SINK
|
45:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| join table id: 00
| row-size=124B cardinality=1
|
|--JOIN BUILD
| | join-table-id=00 plan-id=01 cohort-id=01
| |
| 90:EXCHANGE [UNPARTITIONED]
| |
| 89:AGGREGATE [FINALIZE]
| | output: avg:merge(ss_net_profit)
| | row-size=8B cardinality=1
| |
| 88:EXCHANGE [UNPARTITIONED]
| |
| 30:AGGREGATE
| | output: avg(ss_net_profit)
| | row-size=8B cardinality=1
| |
| 29:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 100, ss_quantity >= 81
| row-size=8B cardinality=288.04K
|
44:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| join table id: 01
| row-size=116B cardinality=1
|
|--JOIN BUILD
| | join-table-id=01 plan-id=02 cohort-id=01
| |
| 87:EXCHANGE [UNPARTITIONED]
| |
| 86:AGGREGATE [FINALIZE]
| | output: avg:merge(ss_ext_discount_amt)
| | row-size=8B cardinality=1
| |
| 85:EXCHANGE [UNPARTITIONED]
| |
| 28:AGGREGATE
| | output: avg(ss_ext_discount_amt)
| | row-size=8B cardinality=1
| |
| 27:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 100, ss_quantity >= 81
| row-size=8B cardinality=288.04K
|
43:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| join table id: 02
| row-size=108B cardinality=1
|
|--JOIN BUILD
| | join-table-id=02 plan-id=03 cohort-id=01
| |
| 84:EXCHANGE [UNPARTITIONED]
| |
| 83:AGGREGATE [FINALIZE]
| | output: count:merge(*)
| | row-size=8B cardinality=1
| |
| 82:EXCHANGE [UNPARTITIONED]
| |
| 26:AGGREGATE
| | output: count(*)
| | row-size=8B cardinality=1
| |
| 25:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 100, ss_quantity >= 81
| row-size=4B cardinality=288.04K
|
42:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| join table id: 03
| row-size=100B cardinality=1
|
|--JOIN BUILD
| | join-table-id=03 plan-id=04 cohort-id=01
| |
| 81:EXCHANGE [UNPARTITIONED]
| |
| 80:AGGREGATE [FINALIZE]
| | output: avg:merge(ss_net_profit)
| | row-size=8B cardinality=1
| |
| 79:EXCHANGE [UNPARTITIONED]
| |
| 24:AGGREGATE
| | output: avg(ss_net_profit)
| | row-size=8B cardinality=1
| |
| 23:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 80, ss_quantity >= 61
| row-size=8B cardinality=288.04K
|
41:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| join table id: 04
| row-size=92B cardinality=1
|
|--JOIN BUILD
| | join-table-id=04 plan-id=05 cohort-id=01
| |
| 78:EXCHANGE [UNPARTITIONED]
| |
| 77:AGGREGATE [FINALIZE]
| | output: avg:merge(ss_ext_discount_amt)
| | row-size=8B cardinality=1
| |
| 76:EXCHANGE [UNPARTITIONED]
| |
| 22:AGGREGATE
| | output: avg(ss_ext_discount_amt)
| | row-size=8B cardinality=1
| |
| 21:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 80, ss_quantity >= 61
| row-size=8B cardinality=288.04K
|
40:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| join table id: 05
| row-size=84B cardinality=1
|
|--JOIN BUILD
| | join-table-id=05 plan-id=06 cohort-id=01
| |
| 75:EXCHANGE [UNPARTITIONED]
| |
| 74:AGGREGATE [FINALIZE]
| | output: count:merge(*)
| | row-size=8B cardinality=1
| |
| 73:EXCHANGE [UNPARTITIONED]
| |
| 20:AGGREGATE
| | output: count(*)
| | row-size=8B cardinality=1
| |
| 19:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 80, ss_quantity >= 61
| row-size=4B cardinality=288.04K
|
39:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| join table id: 06
| row-size=76B cardinality=1
|
|--JOIN BUILD
| | join-table-id=06 plan-id=07 cohort-id=01
| |
| 72:EXCHANGE [UNPARTITIONED]
| |
| 71:AGGREGATE [FINALIZE]
| | output: avg:merge(ss_net_profit)
| | row-size=8B cardinality=1
| |
| 70:EXCHANGE [UNPARTITIONED]
| |
| 18:AGGREGATE
| | output: avg(ss_net_profit)
| | row-size=8B cardinality=1
| |
| 17:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 60, ss_quantity >= 41
| row-size=8B cardinality=288.04K
|
38:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| join table id: 07
| row-size=68B cardinality=1
|
|--JOIN BUILD
| | join-table-id=07 plan-id=08 cohort-id=01
| |
| 69:EXCHANGE [UNPARTITIONED]
| |
| 68:AGGREGATE [FINALIZE]
| | output: avg:merge(ss_ext_discount_amt)
| | row-size=8B cardinality=1
| |
| 67:EXCHANGE [UNPARTITIONED]
| |
| 16:AGGREGATE
| | output: avg(ss_ext_discount_amt)
| | row-size=8B cardinality=1
| |
| 15:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 60, ss_quantity >= 41
| row-size=8B cardinality=288.04K
|
37:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| join table id: 08
| row-size=60B cardinality=1
|
|--JOIN BUILD
| | join-table-id=08 plan-id=09 cohort-id=01
| |
| 66:EXCHANGE [UNPARTITIONED]
| |
| 65:AGGREGATE [FINALIZE]
| | output: count:merge(*)
| | row-size=8B cardinality=1
| |
| 64:EXCHANGE [UNPARTITIONED]
| |
| 14:AGGREGATE
| | output: count(*)
| | row-size=8B cardinality=1
| |
| 13:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 60, ss_quantity >= 41
| row-size=4B cardinality=288.04K
|
36:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| join table id: 09
| row-size=52B cardinality=1
|
|--JOIN BUILD
| | join-table-id=09 plan-id=10 cohort-id=01
| |
| 63:EXCHANGE [UNPARTITIONED]
| |
| 62:AGGREGATE [FINALIZE]
| | output: avg:merge(ss_net_profit)
| | row-size=8B cardinality=1
| |
| 61:EXCHANGE [UNPARTITIONED]
| |
| 12:AGGREGATE
| | output: avg(ss_net_profit)
| | row-size=8B cardinality=1
| |
| 11:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 40, ss_quantity >= 21
| row-size=8B cardinality=288.04K
|
35:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| join table id: 10
| row-size=44B cardinality=1
|
|--JOIN BUILD
| | join-table-id=10 plan-id=11 cohort-id=01
| |
| 60:EXCHANGE [UNPARTITIONED]
| |
| 59:AGGREGATE [FINALIZE]
| | output: avg:merge(ss_ext_discount_amt)
| | row-size=8B cardinality=1
| |
| 58:EXCHANGE [UNPARTITIONED]
| |
| 10:AGGREGATE
| | output: avg(ss_ext_discount_amt)
| | row-size=8B cardinality=1
| |
| 09:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 40, ss_quantity >= 21
| row-size=8B cardinality=288.04K
|
34:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| join table id: 11
| row-size=36B cardinality=1
|
|--JOIN BUILD
| | join-table-id=11 plan-id=12 cohort-id=01
| |
| 57:EXCHANGE [UNPARTITIONED]
| |
| 56:AGGREGATE [FINALIZE]
| | output: count:merge(*)
| | row-size=8B cardinality=1
| |
| 55:EXCHANGE [UNPARTITIONED]
| |
| 08:AGGREGATE
| | output: count(*)
| | row-size=8B cardinality=1
| |
| 07:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 40, ss_quantity >= 21
| row-size=4B cardinality=288.04K
|
33:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| join table id: 12
| row-size=28B cardinality=1
|
|--JOIN BUILD
| | join-table-id=12 plan-id=13 cohort-id=01
| |
| 54:EXCHANGE [UNPARTITIONED]
| |
| 53:AGGREGATE [FINALIZE]
| | output: avg:merge(ss_net_profit)
| | row-size=8B cardinality=1
| |
| 52:EXCHANGE [UNPARTITIONED]
| |
| 06:AGGREGATE
| | output: avg(ss_net_profit)
| | row-size=8B cardinality=1
| |
| 05:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 20, ss_quantity >= 1
| row-size=8B cardinality=288.04K
|
32:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| join table id: 13
| row-size=20B cardinality=1
|
|--JOIN BUILD
| | join-table-id=13 plan-id=14 cohort-id=01
| |
| 51:EXCHANGE [UNPARTITIONED]
| |
| 50:AGGREGATE [FINALIZE]
| | output: avg:merge(ss_ext_discount_amt)
| | row-size=8B cardinality=1
| |
| 49:EXCHANGE [UNPARTITIONED]
| |
| 04:AGGREGATE
| | output: avg(ss_ext_discount_amt)
| | row-size=8B cardinality=1
| |
| 03:SCAN HDFS [tpcds.store_sales]
| HDFS partitions=1824/1824 files=1824 size=346.60MB
| predicates: ss_quantity <= 20, ss_quantity >= 1
| row-size=8B cardinality=288.04K
|
31:NESTED LOOP JOIN [CROSS JOIN, BROADCAST]
| join table id: 14
| row-size=12B cardinality=1
|
|--JOIN BUILD
| | join-table-id=14 plan-id=15 cohort-id=01
| |
| 48:EXCHANGE [UNPARTITIONED]
| |
| 00:SCAN HDFS [tpcds.reason]
| HDFS partitions=1/1 files=1 size=1.31KB
| predicates: r_reason_sk = 1
| row-size=4B cardinality=1
|
47:AGGREGATE [FINALIZE]
| output: count:merge(*)
| row-size=8B cardinality=1
|
46:EXCHANGE [UNPARTITIONED]
|
02:AGGREGATE
| output: count(*)
| row-size=8B cardinality=1
|
01:SCAN HDFS [tpcds.store_sales]
HDFS partitions=1824/1824 files=1824 size=346.60MB
predicates: ss_quantity <= 20, ss_quantity >= 1
row-size=4B cardinality=288.04K
====

View File

@@ -1039,3 +1039,176 @@ where int_col = (select f.id from (
---- TYPES
BIGINT
====
---- QUERY
# Basic scalar in select list
select id, 1+(select min(id) from functional.alltypessmall)
from functional.alltypestiny
order by id
---- RESULTS
0,1
1,1
2,1
3,1
4,1
5,1
6,1
7,1
---- TYPES
INT, BIGINT
====
---- QUERY
# No aggregate however limit 1
select id, 1+(select id from functional.alltypessmall where id=0 group by id limit 1)
from functional.alltypestiny
order by id
---- RESULTS
0,1
1,1
2,1
3,1
4,1
5,1
6,1
7,1
---- TYPES
INT, BIGINT
====
---- QUERY
# outer join
select s.id, (select count(id) from alltypes) from alltypestiny t
left outer join alltypessmall s on t.id = s.id where s.bool_col
order by id
---- RESULTS
0,7300
2,7300
4,7300
6,7300
---- TYPES
INT, BIGINT
====
---- QUERY
# correlated scalar currently unsupported
select id, (select count(id) from functional.alltypessmall where id=t.id)
from functional.alltypestiny t
order by id
---- RESULTS
---- CATCH
A correlated scalar subquery is not supported in the expression:
====
---- QUERY
# Uncorrelated Scalar Aggregate in select list combined with aggregation in parent query
select
max(a.id),
(select max(id) from functional.alltypestiny),
(select min(id) from functional.alltypestiny)
from functional.alltypessmall a
---- RESULTS
99,7,0
---- TYPES
INT, INT, INT
====
---- QUERY
# Empty tables or false predicate should result in NULLs
# Count aggregates on empty return 0
select id,
(select sum(f2) from functional.emptytable),
(select count(id) from functional.alltypestiny where id > 100)
from functional.alltypestiny
order by id
---- RESULTS
0,NULL,0
1,NULL,0
2,NULL,0
3,NULL,0
4,NULL,0
5,NULL,0
6,NULL,0
7,NULL,0
---- TYPES
INT, BIGINT, BIGINT
====
---- QUERY
# nested subquery over a view with an outer predicate on the subquery column
select count(id) from
(select id,
(select count(*) as cnt from functional.alltypestiny where int_col
<= (select max(int_col) from functional.alltypes_view)) as c from functional.dimtbl) T
where t.c <10
---- RESULTS
10
---- TYPES
BIGINT
====
---- QUERY
# Empty tables should result in NULLs
select id, id in (select min(id) from functional.alltypessmall)
from functional.alltypestiny
order by id
---- RESULTS
0,true
1,false
2,false
3,false
4,false
5,false
6,false
7,false
---- TYPES
INT, BOOLEAN
====
---- QUERY
# With a join in the subquery
select id,
(select count(*) from functional.alltypessmall join functional.alltypestiny using (id))
from functional.alltypestiny
order by id
---- RESULTS
0,8
1,8
2,8
3,8
4,8
5,8
6,8
7,8
---- TYPES
INT, BIGINT
====
---- QUERY
# EXISTS predicates are handled at plantime so we need to special case their rewrites
select id,
exists (select max(id) from dimtbl)
from functional.alltypestiny
order by id
---- RESULTS
0,true
1,true
2,true
3,true
4,true
5,true
6,true
7,true
---- TYPES
INT, BOOLEAN
====
---- QUERY
# Multiple levels of exists
SELECT id,
NOT EXISTS (SELECT MAX(f2) FROM emptytable),
EXISTS (SELECT MAX(id) FROM DIMTBL WHERE EXISTS (SELECT MAX(id) FROM alltypestiny)),
(SELECT 1
FROM alltypestiny t1
WHERE EXISTS
(SELECT 1
FROM alltypestiny t2
WHERE t1.id = t2.id AND t1.int_col = t2.int_col
GROUP BY t2.id
HAVING count(1) = 1) LIMIT 1)
FROM alltypessmall WHERE id < 2
---- RESULTS
0,false,true,1
1,false,true,1
---- TYPES
INT, BOOLEAN, BOOLEAN, TINYINT
====

View File

@@ -159,4 +159,11 @@ select count(*) from web_site;
30
---- TYPES
BIGINT
====
====
---- QUERY: TPCDS-COUNT-REASON
select count(*) from reason;
---- RESULTS
35
---- TYPES
BIGINT
====

View File

@@ -0,0 +1,54 @@
====
---- QUERY: TPCDS-Q9
select case when (select count(*)
from store_sales
where ss_quantity between 1 and 20) > 74129
then (select avg(ss_ext_discount_amt)
from store_sales
where ss_quantity between 1 and 20)
else (select avg(ss_net_profit)
from store_sales
where ss_quantity between 1 and 20) end bucket1 ,
case when (select count(*)
from store_sales
where ss_quantity between 21 and 40) > 122840
then (select avg(ss_ext_discount_amt)
from store_sales
where ss_quantity between 21 and 40)
else (select avg(ss_net_profit)
from store_sales
where ss_quantity between 21 and 40) end bucket2,
case when (select count(*)
from store_sales
where ss_quantity between 41 and 60) > 56580
then (select avg(ss_ext_discount_amt)
from store_sales
where ss_quantity between 41 and 60)
else (select avg(ss_net_profit)
from store_sales
where ss_quantity between 41 and 60) end bucket3,
case when (select count(*)
from store_sales
where ss_quantity between 61 and 80) > 10097
then (select avg(ss_ext_discount_amt)
from store_sales
where ss_quantity between 61 and 80)
else (select avg(ss_net_profit)
from store_sales
where ss_quantity between 61 and 80) end bucket4,
case when (select count(*)
from store_sales
where ss_quantity between 81 and 100) > 165306
then (select avg(ss_ext_discount_amt)
from store_sales
where ss_quantity between 81 and 100)
else (select avg(ss_net_profit)
from store_sales
where ss_quantity between 81 and 100) end bucket5
from reason
where r_reason_sk = 1
---- RESULTS
39.645413,115.898138,191.634713,267.188000,341.986915
---- TYPES
DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL
====

View File

@@ -0,0 +1,54 @@
====
---- QUERY: TPCDS-Q9
select case when (select count(*)
from store_sales
where ss_quantity between 1 and 20) > 74129
then (select avg(ss_ext_discount_amt)
from store_sales
where ss_quantity between 1 and 20)
else (select avg(ss_net_profit)
from store_sales
where ss_quantity between 1 and 20) end bucket1,
case when (select count(*)
from store_sales
where ss_quantity between 21 and 40) > 122840
then (select avg(ss_ext_discount_amt)
from store_sales
where ss_quantity between 21 and 40)
else (select avg(ss_net_profit)
from store_sales
where ss_quantity between 21 and 40) end bucket2,
case when (select count(*)
from store_sales
where ss_quantity between 41 and 60) > 56580
then (select avg(ss_ext_discount_amt)
from store_sales
where ss_quantity between 41 and 60)
else (select avg(ss_net_profit)
from store_sales
where ss_quantity between 41 and 60) end bucket3,
case when (select count(*)
from store_sales
where ss_quantity between 61 and 80) > 10097
then (select avg(ss_ext_discount_amt)
from store_sales
where ss_quantity between 61 and 80)
else (select avg(ss_net_profit)
from store_sales
where ss_quantity between 61 and 80) end bucket4,
case when (select count(*)
from store_sales
where ss_quantity between 81 and 100) > 165306
then (select avg(ss_ext_discount_amt)
from store_sales
where ss_quantity between 81 and 100)
else (select avg(ss_net_profit)
from store_sales
where ss_quantity between 81 and 100) end bucket5
from reason
where r_reason_sk = 1
---- RESULTS
39.64,115.89,191.63,267.18,341.98
---- TYPES
DECIMAL, DECIMAL, DECIMAL, DECIMAL, DECIMAL
====

View File

@@ -73,6 +73,9 @@ class TestTpcdsQuery(ImpalaTestSuite):
def test_tpcds_q8(self, vector):
self.run_test_case(self.get_workload() + '-q8', vector)
def test_tpcds_q9(self, vector):
self.run_test_case(self.get_workload() + '-q9', vector)
def test_tpcds_q10a(self, vector):
self.run_test_case(self.get_workload() + '-q10a', vector)
@@ -342,6 +345,9 @@ class TestTpcdsDecimalV2Query(ImpalaTestSuite):
def test_tpcds_q8(self, vector):
self.run_test_case(self.get_workload() + '-decimal_v2-q8', vector)
def test_tpcds_q9(self, vector):
self.run_test_case(self.get_workload() + '-decimal_v2-q9', vector)
def test_tpcds_q10a(self, vector):
self.run_test_case(self.get_workload() + '-decimal_v2-q10a', vector)

View File

@@ -22,7 +22,7 @@ from datetime import datetime
# changed, and the stress test loses the ability to run the full set of queries. Set
# these constants and assert that when a workload is used, all the queries we expect to
# use are there.
EXPECTED_TPCDS_QUERIES_COUNT = 84
EXPECTED_TPCDS_QUERIES_COUNT = 85
EXPECTED_TPCH_NESTED_QUERIES_COUNT = 22
EXPECTED_TPCH_QUERIES_COUNT = 22
# Add the number of stress test specific queries, i.e. in files like '*-stress-*.test'