IMPALA-3065/IMPALA-3062: Restrict !empty() predicates to scan nodes.

The bug:
Evaluating !empty() predicates at non-scan nodes interacts
poorly with our BE projection of collection slots. For example,
rows could incorrectly be filtered if a !empty() predicate is
assigned to a plan node that comes after the unnest of the
collection that also performs the projection.

The fix:
This patch reworks the generation of !empty() predicates
introduced in IMPALA-2663 for correctness purposes.
The predicates are generated in cases where we can ensure that
they will be assigned only by the parent scan, and no other
plan node.

The conditions are as follows:
- collection table ref is relative and non-correlated
- collection table ref represents the rhs of an inner/cross/semi join
- collection table ref's parent tuple is not outer joined

Change-Id: Ie975ce139a103285c4e9f93c59ce1f1d2aa71767
Reviewed-on: http://gerrit.cloudera.org:8080/2399
Reviewed-by: Alex Behm <alex.behm@cloudera.com>
Reviewed-by: Silvius Rus <srus@cloudera.com>
Tested-by: Internal Jenkins
This commit is contained in:
Alex Behm
2016-02-23 23:54:16 -08:00
committed by Harrison Sheinblatt
parent 6cdcdb12ff
commit 54a46e9459
6 changed files with 197 additions and 88 deletions

View File

@@ -34,7 +34,6 @@ import com.cloudera.impala.catalog.ImpaladCatalog;
import com.cloudera.impala.common.AnalysisException;
import com.cloudera.impala.common.Pair;
import com.cloudera.impala.thrift.TAccessEvent;
import com.cloudera.impala.thrift.TDescribeOutputStyle;
import com.cloudera.impala.thrift.TLineageGraph;
import com.cloudera.impala.thrift.TQueryCtx;
import com.google.common.base.Preconditions;

View File

@@ -94,16 +94,6 @@ public class CollectionTableRef extends TableRef {
// InlineViews are currently not supported as a parent ref.
Preconditions.checkState(!(parentRef instanceof InlineViewRef));
correlatedTupleIds_.add(parentRef.getId());
} else if (getJoinOp().isCrossJoin() || getJoinOp().isInnerJoin()
|| getJoinOp() == JoinOperator.LEFT_SEMI_JOIN) {
// Generate a predicate to filter out empty collections directly
// in the parent scan. This is a performance optimization to avoid
// processing empty collections inside a subplan that would yield
// an empty result set.
IsNotEmptyPredicate isNotEmptyPred =
new IsNotEmptyPredicate(collectionExpr_.clone());
isNotEmptyPred.analyze(analyzer);
analyzer.registerConjuncts(isNotEmptyPred, false);
}
}
if (!isRelative()) {
@@ -115,6 +105,11 @@ public class CollectionTableRef extends TableRef {
}
isAnalyzed_ = true;
analyzeHints(analyzer);
// TODO: For joins on nested collections some join ops can be simplified
// due to the containment relationship of the parent and child. For example,
// a FULL OUTER JOIN would become a LEFT OUTER JOIN, or a RIGHT SEMI JOIN
// would become an INNER or CROSS JOIN.
analyzeJoin(analyzer);
}

View File

@@ -172,6 +172,12 @@ public class SelectStmt extends QueryStmt {
throw new AnalysisException("Found missing tables. Aborting analysis.");
}
// Generate !empty() predicates to filter out empty collections.
// Skip this step when analyzing a WITH-clause because CollectionTableRefs
// do not register collection slots in their parent in that context
// (see CollectionTableRef.analyze()).
if (!analyzer.isWithClause()) registerIsNotEmptyPredicates(analyzer);
// analyze plan hints from select list
selectList_.analyzePlanHints(analyzer);
@@ -284,6 +290,50 @@ public class SelectStmt extends QueryStmt {
if (aggInfo_ != null) LOG.debug("post-analysis " + aggInfo_.debugString());
}
/**
* Generates and registers !empty() predicates to filter out empty collections directly
* in the parent scan of collection table refs. This is a performance optimization to
* avoid the expensive processing of empty collections inside a subplan that would
* yield an empty result set.
*
* For correctness purposes, the predicates are generated in cases where we can ensure
* that they will be assigned only to the parent scan, and no other plan node.
*
* The conditions are as follows:
* - collection table ref is relative and non-correlated
* - collection table ref represents the rhs of an inner/cross/semi join
* - collection table ref's parent tuple is not outer joined
*
* TODO: In some cases, it is possible to generate !empty() predicates for a correlated
* table ref, but in general, that is not correct for non-trivial query blocks.
* For example, if the block with the correlated ref has an aggregation then adding a
* !empty() predicate would incorrectly discard rows from the final result set.
* TODO: Evaluating !empty() predicates at non-scan nodes interacts poorly with our BE
* projection of collection slots. For example, rows could incorrectly be filtered if
* a !empty() predicate is assigned to a plan node that comes after the unnest of the
* collection that also performs the projection.
*/
private void registerIsNotEmptyPredicates(Analyzer analyzer) throws AnalysisException {
for (TableRef tblRef: tableRefs_) {
Preconditions.checkState(tblRef.isResolved());
if (!(tblRef instanceof CollectionTableRef)) continue;
CollectionTableRef ref = (CollectionTableRef) tblRef;
// Skip non-relative and correlated refs.
if (!ref.isRelative() || ref.isCorrelated()) continue;
// Skip outer and anti joins.
if (ref.getJoinOp().isOuterJoin() || ref.getJoinOp().isAntiJoin()) continue;
// Do not generate a predicate if the parent tuple is outer joined.
if (analyzer.isOuterJoined(ref.getResolvedPath().getRootDesc().getId())) continue;
IsNotEmptyPredicate isNotEmptyPred =
new IsNotEmptyPredicate(ref.getCollectionExpr().clone());
isNotEmptyPred.analyze(analyzer);
// Register the predicate as an On-clause conjunct because it should only
// affect the result of this join and not the whole FROM clause.
analyzer.registerOnClauseConjuncts(
Lists.<Expr>newArrayList(isNotEmptyPred), ref);
}
}
/**
* Marks all unassigned join predicates as well as exprs in aggInfo and sortInfo.
*/

View File

@@ -82,7 +82,7 @@ where c_nationkey = n_nationkey and s_nationkey = n_nationkey
| partitions=1/1 files=1 size=111.08MB
|
05:SCAN HDFS [tpch_nested_parquet.customer c]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
runtime filters: RF000 -> c_nationkey, RF001 -> c.c_comment, RF002 -> c_comment, RF003 -> c.c_nationkey
====
# Test subplans: Cross join of parent and relative ref.
@@ -132,6 +132,7 @@ where b.item % 2 = 0
|
00:SCAN HDFS [functional.allcomplextypes a]
partitions=0/0 files=0 size=0B
predicates: !empty(a.int_array_col)
predicates on b: b.item % 2 = 0
====
# Test subplans: Left anti join of parent and relative ref without On-clause.
@@ -168,8 +169,6 @@ where a.id < 10
predicates: a.id < 10
====
# Test subplans: Right anti join of parent and relative ref without On-clause.
# TODO: Transform the join op into a CROSS JOIN.
# in this context?
select b.item from functional.allcomplextypes a right anti join a.int_array_col b
where b.item % 2 = 0
---- PLAN
@@ -219,8 +218,6 @@ where a.id < 10
predicates: a.id < 10
====
# Test subplans: Right outer join of parent and relative ref without On-clause.
# TODO: Transform the join op into a CROSS JOIN.
# in this context?
select a.id, b.item from functional.allcomplextypes a right outer join a.int_array_col b
where b.item % 2 = 0
---- PLAN
@@ -237,7 +234,6 @@ where b.item % 2 = 0
predicates on b: b.item % 2 = 0
====
# Test subplans: Full outer join of parent and relative ref without On-clause.
# TODO: Transform the join op into an INNER JOIN.
select a.id, b.item from functional.allcomplextypes a full outer join a.int_array_col b
where b.item % 2 = 0 and a.id < 10
---- PLAN
@@ -293,7 +289,6 @@ where a.id < 10
predicates on b: b.item % 2 = 0
====
# Test subplans: Non-equi right semi join of parent and relative ref.
# TODO: Transform the join op into an INNER JOIN.
select b.item from functional.allcomplextypes a
right semi join a.int_array_col b on (a.id < b.item and a.id < 10)
where b.item % 2 = 0
@@ -309,7 +304,7 @@ where b.item % 2 = 0
|
00:SCAN HDFS [functional.allcomplextypes a]
partitions=0/0 files=0 size=0B
predicates: a.id < 10
predicates: a.id < 10, !empty(a.int_array_col)
predicates on b: b.item % 2 = 0
====
# Test subplans: Non-equi left anti join of parent and relative ref.
@@ -332,7 +327,6 @@ where a.id < 10
predicates on b: b.item % 2 = 0
====
# Test subplans: Non-equi right anti join of parent and relative ref.
# TODO: Transform the join op into an INNER JOIN.
select b.item from functional.allcomplextypes a
right anti join a.int_array_col b on (a.id < b.item and a.id < 10)
where b.item % 2 = 0
@@ -371,7 +365,6 @@ where a.id < 10
predicates on b: b.item % 2 = 0
====
# Test subplans: Non-equi right outer join of parent and relative ref.
# TODO: Transform the join op into an INNER JOIN.
select a.id, b.item from functional.allcomplextypes a
right outer join a.int_array_col b on (a.id < b.item and a.id < 10)
where b.item % 2 = 0
@@ -391,7 +384,6 @@ where b.item % 2 = 0
predicates on b: b.item % 2 = 0
====
# Test subplans: Non-equi full outer join of parent and relative ref.
# TODO: Transform the join op into LEFT OUTER JOIN.
select a.id, b.item from functional.allcomplextypes a
full outer join a.int_array_col b on (a.id < b.item and a.id < 10)
where b.item % 2 = 0
@@ -447,11 +439,10 @@ where a.id < 10
|
00:SCAN HDFS [functional.allcomplextypes a]
partitions=0/0 files=0 size=0B
predicates: !empty(a.struct_array_col), a.id % 2 = 0, a.id < 10
predicates: a.id % 2 = 0, !empty(a.struct_array_col), a.id < 10
predicates on b: b.f1 % 2 = 0, b.f1 < 10
====
# Test subplans: Right-semi equi-join of parent and relative ref.
# TODO: Transform the join op into an INNER JOIN.
select b.f1, b.f2 from functional.allcomplextypes a
right semi join a.struct_array_col b
on (a.id < 10 and b.f1 = a.id and b.f1 < a.year)
@@ -468,7 +459,7 @@ where b.f1 % 2 = 0
|
00:SCAN HDFS [functional.allcomplextypes a]
partitions=0/0 files=0 size=0B
predicates: a.id < 10, a.id % 2 = 0
predicates: a.id < 10, !empty(a.struct_array_col), a.id % 2 = 0
predicates on b: b.f1 < 10, b.f1 % 2 = 0
====
# Test subplans: Left-anti equi-join of parent and relative ref.
@@ -492,7 +483,6 @@ where a.id < 10
predicates on b: b.f1 % 2 = 0, b.f1 < 10
====
# Test subplans: Right-anti equi-join of parent and relative ref.
# TODO: Transform the join op into an INNER JOIN.
select b.f1, b.f2 from functional.allcomplextypes a
right anti join a.struct_array_col b
on (a.id < 10 and b.f1 = a.id and b.f1 < a.year)
@@ -533,7 +523,6 @@ where a.id < 10
predicates on b: b.f1 % 2 = 0, b.f1 < 10
====
# Test subplans: Right-outer equi-join of parent and relative ref.
# TODO: Transform the join op into an INNER JOIN.
select b.f1, b.f2 from functional.allcomplextypes a
right outer join a.struct_array_col b
on (a.id < 10 and b.f1 = a.id and b.f1 < a.year)
@@ -554,7 +543,6 @@ where b.f1 % 2 = 0
predicates on b: b.f1 % 2 = 0
====
# Test subplans: Full-outer equi-join of parent and relative ref.
# TODO: Transform the join op into an LEFT OUTER JOIN.
select b.f1, b.f2 from functional.allcomplextypes a
full outer join a.struct_array_col b
on (b.f1 = a.id and b.f1 < a.year)
@@ -689,11 +677,9 @@ inner join functional.alltypes d on (b.id = d.id)
|
08:HASH JOIN [FULL OUTER JOIN]
| hash predicates: b.id = a.id
| other predicates: !empty(a.struct_array_col)
|
|--00:SCAN HDFS [functional.allcomplextypes a]
| partitions=0/0 files=0 size=0B
| predicates: !empty(a.struct_array_col)
| predicates on e: e.f1 < 10
|
01:SCAN HDFS [functional.alltypestiny b]
@@ -775,7 +761,7 @@ where b.item < 10 and c.int_col > 30
|
10:HASH JOIN [FULL OUTER JOIN]
| hash predicates: c.id = b.item
| other predicates: !empty(a.int_array_col), !empty(a.int_map_col), b.item < 10, c.int_col > 30
| other predicates: b.item < 10, c.int_col > 30
|
|--01:SUBPLAN
| |
@@ -787,7 +773,6 @@ where b.item < 10 and c.int_col > 30
| |
| 00:SCAN HDFS [functional.allcomplextypes a]
| partitions=0/0 files=0 size=0B
| predicates: !empty(a.int_array_col), !empty(a.int_map_col)
| predicates on b: b.item < 10
|
05:SCAN HDFS [functional.alltypessmall c]
@@ -1225,7 +1210,7 @@ limit 10
| 03:UNNEST [c.c_orders o]
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
predicates: !empty(c.c_orders), c_custkey < 10
predicates on o: !empty(o.o_lineitems), o_orderkey < 5
predicates on o_lineitems: l_linenumber < 3
@@ -1322,7 +1307,7 @@ where c.c_custkey = o.o_orderkey and c.c_custkey = o.o_shippriority
| 03:UNNEST [c.c_orders o]
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
predicates: !empty(c.c_orders), c.c_custkey = c.c_nationkey
predicates on o: !empty(o.o_lineitems), o.o_orderkey = o.o_shippriority
predicates on l: l.l_partkey = l.l_linenumber, l.l_partkey = l.l_suppkey
@@ -1429,11 +1414,9 @@ inner join t2.int_array_col
|
05:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: t1.id = t2.id
| other predicates: !empty(t2.int_array_col)
|
|--01:SCAN HDFS [functional.allcomplextypes t2]
| partitions=0/0 files=0 size=0B
| predicates: !empty(t2.int_array_col)
|
00:SCAN HDFS [functional.allcomplextypes t1]
partitions=0/0 files=0 size=0B
@@ -1539,7 +1522,7 @@ where c.c_custkey in
| 04:UNNEST [c.c_orders o2]
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
====
# IMPALA-2412: Test join ordering in nested subplans. Same as above
# but with a few inner joins.
@@ -1583,7 +1566,7 @@ where c.c_custkey in
| 04:UNNEST [c.c_orders o2]
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
predicates: !empty(c.c_orders)
====
# IMPALA-2412: Test join ordering in nested subplans.
@@ -1625,7 +1608,7 @@ where c.c_custkey in
| 04:UNNEST [c.c_orders o2]
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
====
# IMPALA-2446: Test predicate assignment when outer join has no conjuncts in
# the ON clause and there are predicates in the WHERE clause that can be assigned to
@@ -1726,13 +1709,66 @@ inner join o.o_lineitems
| | 05:UNNEST [o.o_lineitems]
| |
| 07:NESTED LOOP JOIN [RIGHT OUTER JOIN]
| | predicates: !empty(o.o_lineitems)
| |
| |--02:SINGULAR ROW SRC
| |
| 03:UNNEST [c.c_orders o]
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
partitions=1/1 files=4 size=577.87MB
predicates on o: !empty(o.o_lineitems)
partitions=1/1 files=4 size=554.13MB
====
# IMPALA-3065/IMPALA-3062: Test correct assignment of !empty() predicates.
# Predicates should not be generated if the parent tuple is outer joined.
select 1 from tpch_nested_parquet.customer c1
inner join c1.c_orders
right outer join tpch_nested_parquet.customer c2
on c1.c_custkey = c2.c_custkey
---- PLAN
06:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: c1.c_custkey = c2.c_custkey
| runtime filters: RF000 <- c2.c_custkey
|
|--05:SCAN HDFS [tpch_nested_parquet.customer c2]
| partitions=1/1 files=4 size=554.13MB
|
01:SUBPLAN
|
|--04:NESTED LOOP JOIN [CROSS JOIN]
| |
| |--02:SINGULAR ROW SRC
| |
| 03:UNNEST [c1.c_orders]
|
00:SCAN HDFS [tpch_nested_parquet.customer c1]
partitions=1/1 files=4 size=554.13MB
runtime filters: RF000 -> c1.c_custkey
====
# IMPALA-3065/IMPALA-3062: Test correct assignment of !empty() predicates.
# Predicates should not be generated if the parent tuple is outer joined.
select 1 from tpch_nested_parquet.customer c1
full outer join tpch_nested_parquet.customer c2
on c1.c_custkey = c2.c_custkey
inner join c1.c_orders o1
left semi join c2.c_orders o2
---- PLAN
08:SUBPLAN
|
|--06:NESTED LOOP JOIN [LEFT SEMI JOIN]
| |
| |--04:UNNEST [c2.c_orders o2]
| |
| 05:NESTED LOOP JOIN [CROSS JOIN]
| |
| |--02:SINGULAR ROW SRC
| |
| 03:UNNEST [c1.c_orders o1]
|
07:HASH JOIN [FULL OUTER JOIN]
| hash predicates: c1.c_custkey = c2.c_custkey
|
|--01:SCAN HDFS [tpch_nested_parquet.customer c2]
| partitions=1/1 files=4 size=554.13MB
|
00:SCAN HDFS [tpch_nested_parquet.customer c1]
partitions=1/1 files=4 size=554.13MB
====

View File

@@ -30,7 +30,7 @@ order by
| group by: l_returnflag, l_linestatus
|
00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
predicates: l_shipdate <= '1998-09-02'
---- DISTRIBUTEDPLAN
05:MERGING-EXCHANGE [UNPARTITIONED]
@@ -50,7 +50,7 @@ order by
| group by: l_returnflag, l_linestatus
|
00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
predicates: l_shipdate <= '1998-09-02'
====
# TPCH-Q2
@@ -316,7 +316,7 @@ limit 10
| 03:UNNEST [c.c_orders o]
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
predicates: !empty(c.c_orders), c_mktsegment = 'BUILDING'
predicates on o: !empty(o.o_lineitems), o_orderdate < '1995-03-15'
predicates on l: l_shipdate > '1995-03-15'
@@ -355,7 +355,7 @@ limit 10
| 03:UNNEST [c.c_orders o]
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
predicates: !empty(c.c_orders), c_mktsegment = 'BUILDING'
predicates on o: !empty(o.o_lineitems), o_orderdate < '1995-03-15'
predicates on l: l_shipdate > '1995-03-15'
@@ -408,7 +408,7 @@ order by
| 03:UNNEST [c.c_orders o]
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
predicates: !empty(c.c_orders)
predicates on o: o_orderdate >= '1993-07-01', o_orderdate < '1993-10-01'
predicates on o_lineitems: l_commitdate < l_receiptdate
@@ -446,7 +446,7 @@ order by
| 03:UNNEST [c.c_orders o]
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
predicates: !empty(c.c_orders)
predicates on o: o_orderdate >= '1993-07-01', o_orderdate < '1993-10-01'
predicates on o_lineitems: l_commitdate < l_receiptdate
@@ -522,7 +522,7 @@ order by
| 03:UNNEST [c.c_orders o]
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
predicates: !empty(c.c_orders)
predicates on o: !empty(o.o_lineitems), o_orderdate >= '1994-01-01', o_orderdate < '1995-01-01'
runtime filters: RF000 -> c.c_nationkey, RF002 -> c_nationkey
@@ -587,7 +587,7 @@ order by
| 03:UNNEST [c.c_orders o]
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
predicates: !empty(c.c_orders)
predicates on o: !empty(o.o_lineitems), o_orderdate >= '1994-01-01', o_orderdate < '1995-01-01'
runtime filters: RF000 -> c.c_nationkey, RF002 -> c_nationkey
@@ -608,7 +608,7 @@ where
| output: sum(l_extendedprice * l_discount)
|
00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
predicates: l_shipdate >= '1994-01-01', l_shipdate < '1995-01-01', l_discount >= 0.05, l_discount <= 0.07, l_quantity < 24
---- DISTRIBUTEDPLAN
03:AGGREGATE [FINALIZE]
@@ -620,7 +620,7 @@ where
| output: sum(l_extendedprice * l_discount)
|
00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
predicates: l_shipdate >= '1994-01-01', l_shipdate < '1995-01-01', l_discount >= 0.05, l_discount <= 0.07, l_quantity < 24
====
# TPCH-Q7
@@ -708,7 +708,7 @@ order by
| 03:UNNEST [c.c_orders o]
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
predicates: !empty(c.c_orders)
predicates on o: !empty(o.o_lineitems)
predicates on l: l_shipdate >= '1995-01-01', l_shipdate <= '1996-12-31'
@@ -775,7 +775,7 @@ order by
| 03:UNNEST [c.c_orders o]
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
predicates: !empty(c.c_orders)
predicates on o: !empty(o.o_lineitems)
predicates on l: l_shipdate >= '1995-01-01', l_shipdate <= '1996-12-31'
@@ -879,7 +879,7 @@ order by
| 03:UNNEST [c.c_orders o]
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
predicates: !empty(c.c_orders)
predicates on o: !empty(o.o_lineitems), o_orderdate >= '1995-01-01', o_orderdate <= '1996-12-31'
runtime filters: RF001 -> c_nationkey
@@ -962,7 +962,7 @@ order by
| 03:UNNEST [c.c_orders o]
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
predicates: !empty(c.c_orders)
predicates on o: !empty(o.o_lineitems), o_orderdate >= '1995-01-01', o_orderdate <= '1996-12-31'
runtime filters: RF001 -> c_nationkey
@@ -1045,7 +1045,7 @@ order by
| 03:UNNEST [o.o_lineitems l]
|
00:SCAN HDFS [tpch_nested_parquet.customer.c_orders o]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
predicates: !empty(o.o_lineitems)
---- DISTRIBUTEDPLAN
22:MERGING-EXCHANGE [UNPARTITIONED]
@@ -1109,7 +1109,7 @@ order by
| 03:UNNEST [o.o_lineitems l]
|
00:SCAN HDFS [tpch_nested_parquet.customer.c_orders o]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
predicates: !empty(o.o_lineitems)
====
# TPCH-Q10
@@ -1177,7 +1177,7 @@ limit 20
| 03:UNNEST [c.c_orders o]
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
predicates: !empty(c.c_orders)
predicates on o: !empty(o.o_lineitems), o_orderdate >= '1993-10-01', o_orderdate < '1994-01-01'
predicates on l: l_returnflag = 'R'
@@ -1226,7 +1226,7 @@ limit 20
| 03:UNNEST [c.c_orders o]
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
predicates: !empty(c.c_orders)
predicates on o: !empty(o.o_lineitems), o_orderdate >= '1993-10-01', o_orderdate < '1994-01-01'
predicates on l: l_returnflag = 'R'
@@ -1442,7 +1442,7 @@ order by
| 03:UNNEST [o.o_lineitems l]
|
00:SCAN HDFS [tpch_nested_parquet.customer.c_orders o]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
predicates: !empty(o.o_lineitems)
predicates on l: l_shipmode IN ('MAIL', 'SHIP'), l_commitdate < l_receiptdate, l_shipdate < l_commitdate, l_receiptdate >= '1994-01-01', l_receiptdate < '1995-01-01'
---- DISTRIBUTEDPLAN
@@ -1471,7 +1471,7 @@ order by
| 03:UNNEST [o.o_lineitems l]
|
00:SCAN HDFS [tpch_nested_parquet.customer.c_orders o]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
predicates: !empty(o.o_lineitems)
predicates on l: l_shipmode IN ('MAIL', 'SHIP'), l_commitdate < l_receiptdate, l_shipdate < l_commitdate, l_receiptdate >= '1994-01-01', l_receiptdate < '1995-01-01'
====
@@ -1517,7 +1517,7 @@ order by
| 03:UNNEST [c.c_orders]
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
predicates on c_orders: (NOT o_comment LIKE '%special%requests%')
---- DISTRIBUTEDPLAN
12:MERGING-EXCHANGE [UNPARTITIONED]
@@ -1555,7 +1555,7 @@ order by
| 03:UNNEST [c.c_orders]
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
predicates on c_orders: (NOT o_comment LIKE '%special%requests%')
====
# TPCH-Q14
@@ -1585,7 +1585,7 @@ where
| partitions=1/1 files=1 size=6.30MB
|
00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
predicates: l_shipdate >= '1995-09-01', l_shipdate < '1995-10-01'
runtime filters: RF000 -> l_partkey
---- DISTRIBUTEDPLAN
@@ -1607,7 +1607,7 @@ where
| partitions=1/1 files=1 size=6.30MB
|
00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
predicates: l_shipdate >= '1995-09-01', l_shipdate < '1995-10-01'
runtime filters: RF000 -> l_partkey
====
@@ -1658,7 +1658,7 @@ order by
| | group by: l_suppkey
| |
| 03:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l]
| partitions=1/1 files=4 size=577.87MB
| partitions=1/1 files=4 size=554.13MB
| predicates: l_shipdate >= '1996-01-01', l_shipdate < '1996-04-01'
|
06:HASH JOIN [INNER JOIN]
@@ -1673,7 +1673,7 @@ order by
| group by: l_suppkey
|
01:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
predicates: l_shipdate >= '1996-01-01', l_shipdate < '1996-04-01'
runtime filters: RF000 -> l.l_suppkey
---- DISTRIBUTEDPLAN
@@ -1707,7 +1707,7 @@ order by
| | group by: l_suppkey
| |
| 03:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l]
| partitions=1/1 files=4 size=577.87MB
| partitions=1/1 files=4 size=554.13MB
| predicates: l_shipdate >= '1996-01-01', l_shipdate < '1996-04-01'
|
06:HASH JOIN [INNER JOIN, PARTITIONED]
@@ -1730,7 +1730,7 @@ order by
| group by: l_suppkey
|
01:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
predicates: l_shipdate >= '1996-01-01', l_shipdate < '1996-04-01'
runtime filters: RF000 -> l.l_suppkey
====
@@ -1862,7 +1862,7 @@ where
| | group by: l_partkey
| |
| 02:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l]
| partitions=1/1 files=4 size=577.87MB
| partitions=1/1 files=4 size=554.13MB
|
04:HASH JOIN [INNER JOIN]
| hash predicates: l_partkey = p_partkey
@@ -1873,7 +1873,7 @@ where
| predicates: p_brand = 'Brand#23', p_container = 'MED BOX'
|
00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
runtime filters: RF000 -> l.l_partkey, RF001 -> l_partkey
---- DISTRIBUTEDPLAN
12:AGGREGATE [FINALIZE]
@@ -1902,7 +1902,7 @@ where
| | group by: l_partkey
| |
| 02:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l]
| partitions=1/1 files=4 size=577.87MB
| partitions=1/1 files=4 size=554.13MB
|
04:HASH JOIN [INNER JOIN, BROADCAST]
| hash predicates: l_partkey = p_partkey
@@ -1915,7 +1915,7 @@ where
| predicates: p_brand = 'Brand#23', p_container = 'MED BOX'
|
00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
runtime filters: RF000 -> l.l_partkey, RF001 -> l_partkey
====
# TPCH-Q18
@@ -1976,7 +1976,7 @@ limit 100
| 03:UNNEST [c.c_orders o]
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
predicates: !empty(c.c_orders)
predicates on o: !empty(o.o_lineitems)
---- DISTRIBUTEDPLAN
@@ -2022,7 +2022,7 @@ limit 100
| 03:UNNEST [c.c_orders o]
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
predicates: !empty(c.c_orders)
predicates on o: !empty(o.o_lineitems)
====
@@ -2076,7 +2076,7 @@ where
| partitions=1/1 files=1 size=6.30MB
|
00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
runtime filters: RF000 -> l_partkey
---- DISTRIBUTEDPLAN
06:AGGREGATE [FINALIZE]
@@ -2098,7 +2098,7 @@ where
| partitions=1/1 files=1 size=6.30MB
|
00:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
runtime filters: RF000 -> l_partkey
====
# TPCH-Q20
@@ -2180,7 +2180,7 @@ order by
| group by: l_partkey, l_suppkey
|
07:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
predicates: l_shipdate >= '1994-01-01', l_shipdate < '1995-01-01'
runtime filters: RF000 -> l.l_suppkey, RF001 -> l.l_partkey
---- DISTRIBUTEDPLAN
@@ -2248,7 +2248,7 @@ order by
| group by: l_partkey, l_suppkey
|
07:SCAN HDFS [tpch_nested_parquet.customer.c_orders.o_lineitems l]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
predicates: l_shipdate >= '1994-01-01', l_shipdate < '1995-01-01'
runtime filters: RF000 -> l.l_suppkey, RF001 -> l.l_partkey
====
@@ -2346,9 +2346,9 @@ limit 100
| 04:UNNEST [c.c_orders o]
|
01:SCAN HDFS [tpch_nested_parquet.customer c]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
predicates: !empty(c.c_orders)
predicates on o: !empty(o.o_lineitems), o_orderstatus = 'F'
predicates on o: o_orderstatus = 'F', !empty(o.o_lineitems)
predicates on l1: l1.l_receiptdate > l1.l_commitdate
predicates on l3: l3.l_receiptdate > l3.l_commitdate
---- DISTRIBUTEDPLAN
@@ -2419,9 +2419,9 @@ limit 100
| 04:UNNEST [c.c_orders o]
|
01:SCAN HDFS [tpch_nested_parquet.customer c]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
predicates: !empty(c.c_orders)
predicates on o: !empty(o.o_lineitems), o_orderstatus = 'F'
predicates on o: o_orderstatus = 'F', !empty(o.o_lineitems)
predicates on l1: l1.l_receiptdate > l1.l_commitdate
predicates on l3: l3.l_receiptdate > l3.l_commitdate
====
@@ -2474,7 +2474,7 @@ order by
| | output: avg(c_acctbal)
| |
| 05:SCAN HDFS [tpch_nested_parquet.customer c]
| partitions=1/1 files=4 size=577.87MB
| partitions=1/1 files=4 size=554.13MB
| predicates: c_acctbal > 0.00, substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17')
|
01:SUBPLAN
@@ -2486,7 +2486,7 @@ order by
| 03:UNNEST [c.c_orders]
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
predicates: substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17')
---- DISTRIBUTEDPLAN
15:MERGING-EXCHANGE [UNPARTITIONED]
@@ -2519,7 +2519,7 @@ order by
| | output: avg(c_acctbal)
| |
| 05:SCAN HDFS [tpch_nested_parquet.customer c]
| partitions=1/1 files=4 size=577.87MB
| partitions=1/1 files=4 size=554.13MB
| predicates: c_acctbal > 0.00, substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17')
|
01:SUBPLAN
@@ -2531,6 +2531,6 @@ order by
| 03:UNNEST [c.c_orders]
|
00:SCAN HDFS [tpch_nested_parquet.customer c]
partitions=1/1 files=4 size=577.87MB
partitions=1/1 files=4 size=554.13MB
predicates: substr(c_phone, 1, 2) IN ('13', '31', '23', '29', '30', '18', '17')
====

View File

@@ -421,3 +421,32 @@ select id, pos from complextypestbl t1 full outer join t1.int_array t2
---- TYPES
bigint,bigint
====
---- QUERY
# IMPALA-3065/IMPALA-3062: Test a join on a nested collection whose
# parent tuple is outer joined. This test covers the case where the
# outer joined collection is on the probe side of the outer join.
# To reliably reproduce one of the problematic cases, we need
# > batch_size matches for at least one probe row.
select straight_join count(o.pos) from tpch_nested_parquet.customer c1
right outer join tpch_nested_parquet.customer c2
on c1.c_custkey % 2 = c2.c_custkey % 2
inner join c1.c_orders o
where c1.c_custkey < 10 and c2.c_custkey < 10000
---- RESULTS
329960
---- TYPES
bigint
====
---- QUERY
# IMPALA-3065/IMPALA-3062: Test a join on a nested collection whose
# parent tuple is outer joined. This test covers the case where the
# outer joined collection is on the build side of the outer join.
select count(a.pos) from complextypestbl t1
full outer join complextypestbl t2
on t1.id = t2.id
inner join t2.int_array a
---- RESULTS
10
---- TYPES
bigint
====