Files
impala/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test
Dimitris Tsirogiannis 5a6f53db16 Add partition pruning tests
The following changes are included in this commit:
1. Modified the alltypesagg table to include an additional partition key
that has nulls.
2. Added a number of tests in hdfs.test that exercise the partition
pruning logic (see IMPALA-887).
3. Modified all the tests that are affected by the change in alltypesagg.

Change-Id: I1a769375aaa71273341522eb94490ba5e4c6f00d
Reviewed-on: http://gerrit.ent.cloudera.com:8080/2874
Reviewed-by: Dimitris Tsirogiannis <dtsirogiannis@cloudera.com>
Tested-by: jenkins
Reviewed-on: http://gerrit.ent.cloudera.com:8080/3236
2014-06-24 02:14:27 -07:00

373 lines
12 KiB
Plaintext

# correct placement of predicates with left outer joins; t2 and t3 are nullable
select *
from functional.testtbl t1 left outer join functional.testtbl t2 on (
t1.id - 1 = t2.id + 1
# lhs predicate is join predicate
and t1.zip = 94611
# rhs predicate applied by rhs scan
and t2.zip = 94104)
left outer join functional.testtbl t3 on (
t1.id = t3.id
# predicate on t2 is join predicate, not applied by t2 scan
and t2.id = 15
# predicate on t1 and t2 is join predicate
and t1.id - t2.id = 0
# rhs predicate applied by rhs scan
and t3.zip = 94720
)
where
# t1 predicate in where clause is applied by scans (t1; also propagated to t3)
t1.id > 0
# t2 predicate in where clause is applied by join and scan
and t2.id is null
# t3 predicate in where clause is applied by join and scan
and t3.id is not null
# join predicate between t1 and t2 applied after t2 join
and t1.zip + t2.zip = 10
# join predicate between t1, t2 and t3 applied after last join
and t1.zip + t2.zip + t3.zip= 20
---- PLAN
04:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: t1.id = t3.id
| other join predicates: t2.id = 15, t1.id - t2.id = 0
| other predicates: t3.id IS NOT NULL, t1.zip + t2.zip + t3.zip = 20
|
|--02:SCAN HDFS [functional.testtbl t3]
| partitions=1/1 size=0B compact
| predicates: t3.zip = 94720, t3.id > 0, t3.id IS NOT NULL
|
03:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: t1.id - 1 = t2.id + 1
| other join predicates: t1.zip = 94611
| other predicates: t2.id IS NULL, t1.zip + t2.zip = 10
|
|--01:SCAN HDFS [functional.testtbl t2]
| partitions=1/1 size=0B compact
| predicates: t2.zip = 94104
|
00:SCAN HDFS [functional.testtbl t1]
partitions=1/1 size=0B
predicates: t1.id > 0
---- DISTRIBUTEDPLAN
07:EXCHANGE [UNPARTITIONED]
|
04:HASH JOIN [LEFT OUTER JOIN, BROADCAST]
| hash predicates: t1.id = t3.id
| other join predicates: t2.id = 15, t1.id - t2.id = 0
| other predicates: t3.id IS NOT NULL, t1.zip + t2.zip + t3.zip = 20
|
|--06:EXCHANGE [BROADCAST]
| |
| 02:SCAN HDFS [functional.testtbl t3]
| partitions=1/1 size=0B
| predicates: t3.zip = 94720, t3.id > 0, t3.id IS NOT NULL
|
03:HASH JOIN [LEFT OUTER JOIN, BROADCAST]
| hash predicates: t1.id - 1 = t2.id + 1
| other join predicates: t1.zip = 94611
| other predicates: t2.id IS NULL, t1.zip + t2.zip = 10
|
|--05:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [functional.testtbl t2]
| partitions=1/1 size=0B
| predicates: t2.zip = 94104
|
00:SCAN HDFS [functional.testtbl t1]
partitions=1/1 size=0B
predicates: t1.id > 0
====
# the same thing with subqueries; should produce the same result
select *
from (select * from functional.testtbl a1) t1
left outer join (select * from functional.testtbl a2) t2 on (
t1.id - 1 = t2.id + 1 and t1.zip = 94611 and t2.zip = 94104)
left outer join (select * from functional.testtbl a3) t3 on (
t1.id = t3.id and t2.id = 15 and t1.id - t2.id = 0 and t3.zip = 94720)
where t1.id > 0 and t2.id is null and t3.id is not null
and t1.zip + t2.zip = 10 and t1.zip + t2.zip + t3.zip= 20
---- PLAN
04:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: a1.id = a3.id
| other join predicates: a2.id = 15, a1.id - a2.id = 0
| other predicates: a3.id IS NOT NULL, a1.zip + a2.zip + a3.zip = 20
|
|--02:SCAN HDFS [functional.testtbl a3]
| partitions=1/1 size=0B compact
| predicates: a3.id > 0, a3.id IS NOT NULL, a3.zip = 94720
|
03:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: a1.id - 1 = a2.id + 1
| other join predicates: a1.zip = 94611
| other predicates: a2.id IS NULL, a1.zip + a2.zip = 10
|
|--01:SCAN HDFS [functional.testtbl a2]
| partitions=1/1 size=0B compact
| predicates: a2.zip = 94104
|
00:SCAN HDFS [functional.testtbl a1]
partitions=1/1 size=0B
predicates: a1.id > 0
---- DISTRIBUTEDPLAN
07:EXCHANGE [UNPARTITIONED]
|
04:HASH JOIN [LEFT OUTER JOIN, BROADCAST]
| hash predicates: a1.id = a3.id
| other join predicates: a2.id = 15, a1.id - a2.id = 0
| other predicates: a3.id IS NOT NULL, a1.zip + a2.zip + a3.zip = 20
|
|--06:EXCHANGE [BROADCAST]
| |
| 02:SCAN HDFS [functional.testtbl a3]
| partitions=1/1 size=0B
| predicates: a3.id > 0, a3.id IS NOT NULL, a3.zip = 94720
|
03:HASH JOIN [LEFT OUTER JOIN, BROADCAST]
| hash predicates: a1.id - 1 = a2.id + 1
| other join predicates: a1.zip = 94611
| other predicates: a2.id IS NULL, a1.zip + a2.zip = 10
|
|--05:EXCHANGE [BROADCAST]
| |
| 01:SCAN HDFS [functional.testtbl a2]
| partitions=1/1 size=0B
| predicates: a2.zip = 94104
|
00:SCAN HDFS [functional.testtbl a1]
partitions=1/1 size=0B
predicates: a1.id > 0
====
# correct propagation of scan predicates in OJ On clauses:
# id = 17 must not be applied by the t1 and t3 scans
select *
from functional.testtbl t1
left outer join functional.testtbl t2 on (
t1.id = t2.id and t1.id = 17)
join functional.testtbl t3 on (t1.id = t3.id)
---- PLAN
04:HASH JOIN [INNER JOIN]
| hash predicates: t1.id = t3.id
|
|--02:SCAN HDFS [functional.testtbl t3]
| partitions=1/1 size=0B compact
|
03:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: t1.id = t2.id
| other join predicates: t1.id = 17
|
|--01:SCAN HDFS [functional.testtbl t2]
| partitions=1/1 size=0B compact
| predicates: t2.id = 17
|
00:SCAN HDFS [functional.testtbl t1]
partitions=1/1 size=0B
====
# correct placement of predicates with right outer joins; t1 and t2 are nullable
select *
from functional.testtbl t1 right outer join functional.testtbl t2 on (
t1.id - 1 = t2.id + 1
# lhs predicate is scan predicate
and t1.zip = 94611
# rhs predicate is join predicate
and t2.zip = 94104)
right outer join functional.testtbl t3 on (
t1.id = t3.id
# predicate on t2 is scan predicate
and t2.id = 15
# predicate on t1 and t2 could be applied by previous join
# but the planner doesn't recognize that case and makes it a join predicate
and t1.id - t2.id = 0
# rhs predicate is join predicate
and t3.zip = 94720
)
where
# t1 predicate in where clause is applied by scan and last join
t1.id > 0
# t2 predicate in where clause is applied by last join
and t2.id is null
# t3 predicate in where clause is applied by scan
and t3.id is not null
# join predicate between t1 and t2 applied after last join
and t1.zip + t2.zip = 10
# join predicate between t1, t2 and t3 applied after last join
and t1.zip + t2.zip + t3.zip= 20
---- PLAN
04:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: t1.id = t3.id
| other join predicates: t1.id - t2.id = 0, t3.zip = 94720
| other predicates: t1.id > 0, t2.id IS NULL, t1.zip + t2.zip = 10, t1.zip + t2.zip + t3.zip = 20
|
|--02:SCAN HDFS [functional.testtbl t3]
| partitions=1/1 size=0B compact
| predicates: t3.id IS NOT NULL
|
03:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: t1.id - 1 = t2.id + 1
| other join predicates: t1.zip = 94611, t2.zip = 94104
|
|--01:SCAN HDFS [functional.testtbl t2]
| partitions=1/1 size=0B compact
| predicates: t2.id = 15
|
00:SCAN HDFS [functional.testtbl t1]
partitions=1/1 size=0B
predicates: t1.id > 0, t1.id IS NOT NULL
---- DISTRIBUTEDPLAN
09:EXCHANGE [UNPARTITIONED]
|
04:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
| hash predicates: t1.id = t3.id
| other join predicates: t1.id - t2.id = 0, t3.zip = 94720
| other predicates: t1.id > 0, t2.id IS NULL, t1.zip + t2.zip = 10, t1.zip + t2.zip + t3.zip = 20
|
|--08:EXCHANGE [HASH(t3.id)]
| |
| 02:SCAN HDFS [functional.testtbl t3]
| partitions=1/1 size=0B
| predicates: t3.id IS NOT NULL
|
07:EXCHANGE [HASH(t1.id)]
|
03:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
| hash predicates: t1.id - 1 = t2.id + 1
| other join predicates: t1.zip = 94611, t2.zip = 94104
|
|--06:EXCHANGE [HASH(t2.id + 1)]
| |
| 01:SCAN HDFS [functional.testtbl t2]
| partitions=1/1 size=0B
| predicates: t2.id = 15
|
05:EXCHANGE [HASH(t1.id - 1)]
|
00:SCAN HDFS [functional.testtbl t1]
partitions=1/1 size=0B
predicates: t1.id > 0, t1.id IS NOT NULL
====
# the same thing with subqueries; should produce the same result
select *
from (select * from functional.testtbl a1) t1 right outer join (select * from functional.testtbl a2) t2 on (
t1.id - 1 = t2.id + 1 and t1.zip = 94611 and t2.zip = 94104)
right outer join (select * from functional.testtbl a3) t3 on (
t1.id = t3.id and t2.id = 15 and t1.id - t2.id = 0 and t3.zip = 94720 )
where t1.id > 0 and t2.id is null and t3.id is not null
and t1.zip + t2.zip = 10 and t1.zip + t2.zip + t3.zip= 20
---- PLAN
04:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: a1.id = a3.id
| other join predicates: a1.id - a2.id = 0, a3.zip = 94720
| other predicates: a2.id IS NULL, a1.id > 0, a1.zip + a2.zip = 10, a1.zip + a2.zip + a3.zip = 20
|
|--02:SCAN HDFS [functional.testtbl a3]
| partitions=1/1 size=0B compact
| predicates: a3.id IS NOT NULL
|
03:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: a1.id - 1 = a2.id + 1
| other join predicates: a1.zip = 94611, a2.zip = 94104
|
|--01:SCAN HDFS [functional.testtbl a2]
| partitions=1/1 size=0B compact
| predicates: a2.id = 15
|
00:SCAN HDFS [functional.testtbl a1]
partitions=1/1 size=0B
predicates: a1.id > 0, a1.id IS NOT NULL
---- DISTRIBUTEDPLAN
09:EXCHANGE [UNPARTITIONED]
|
04:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
| hash predicates: a1.id = a3.id
| other join predicates: a1.id - a2.id = 0, a3.zip = 94720
| other predicates: a2.id IS NULL, a1.id > 0, a1.zip + a2.zip = 10, a1.zip + a2.zip + a3.zip = 20
|
|--08:EXCHANGE [HASH(a3.id)]
| |
| 02:SCAN HDFS [functional.testtbl a3]
| partitions=1/1 size=0B
| predicates: a3.id IS NOT NULL
|
07:EXCHANGE [HASH(a1.id)]
|
03:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
| hash predicates: a1.id - 1 = a2.id + 1
| other join predicates: a1.zip = 94611, a2.zip = 94104
|
|--06:EXCHANGE [HASH(a2.id + 1)]
| |
| 01:SCAN HDFS [functional.testtbl a2]
| partitions=1/1 size=0B
| predicates: a2.id = 15
|
05:EXCHANGE [HASH(a1.id - 1)]
|
00:SCAN HDFS [functional.testtbl a1]
partitions=1/1 size=0B
predicates: a1.id > 0, a1.id IS NOT NULL
====
# right outer join requires the join op to be partitioned, otherwise non-matches cause
# duplicates
select a.tinyint_col, b.id
from functional.alltypesagg a
right outer join functional.alltypestiny b on (a.tinyint_col = b.id)
where a.tinyint_col is null
---- PLAN
02:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: a.tinyint_col = b.id
| other predicates: a.tinyint_col IS NULL
|
|--01:SCAN HDFS [functional.alltypestiny b]
| partitions=4/4 size=460B compact
|
00:SCAN HDFS [functional.alltypesagg a]
partitions=11/11 size=814.73KB
---- DISTRIBUTEDPLAN
05:EXCHANGE [UNPARTITIONED]
|
02:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
| hash predicates: a.tinyint_col = b.id
| other predicates: a.tinyint_col IS NULL
|
|--04:EXCHANGE [HASH(b.id)]
| |
| 01:SCAN HDFS [functional.alltypestiny b]
| partitions=4/4 size=460B
|
03:EXCHANGE [HASH(a.tinyint_col)]
|
00:SCAN HDFS [functional.alltypesagg a]
partitions=11/11 size=814.73KB
====
# same for full outer joins
select a.tinyint_col, b.id
from functional.alltypesagg a
full outer join functional.alltypestiny b on (a.tinyint_col = b.id)
where a.tinyint_col is null
---- PLAN
02:HASH JOIN [FULL OUTER JOIN]
| hash predicates: a.tinyint_col = b.id
| other predicates: a.tinyint_col IS NULL
|
|--01:SCAN HDFS [functional.alltypestiny b]
| partitions=4/4 size=460B compact
|
00:SCAN HDFS [functional.alltypesagg a]
partitions=11/11 size=814.73KB
---- DISTRIBUTEDPLAN
05:EXCHANGE [UNPARTITIONED]
|
02:HASH JOIN [FULL OUTER JOIN, PARTITIONED]
| hash predicates: a.tinyint_col = b.id
| other predicates: a.tinyint_col IS NULL
|
|--04:EXCHANGE [HASH(b.id)]
| |
| 01:SCAN HDFS [functional.alltypestiny b]
| partitions=4/4 size=460B
|
03:EXCHANGE [HASH(a.tinyint_col)]
|
00:SCAN HDFS [functional.alltypesagg a]
partitions=11/11 size=814.73KB
====