mirror of
https://github.com/apache/impala.git
synced 2026-01-08 03:02:48 -05:00
Equivalent class is used to get the equivalencies between slots. It is ill-defined and the current implementation is inefficient. This patch removes it and directly uses the information from the value transfer graph instead. Value transfer graph is reimplemented using Tarjan's strongly connected component algorithm and BFS with adjacency lists to speed up on both condensed and sparse graphs. Testing: It passes the existing tests. In planner tests the equivalence between SCC-condensed graph and uncondensed graph is checked. A test case is added for a helper class IntArrayList. An outer-join edge case is added in planner test. On a query with 1800 union operations, the equivalence class computation time is reduced from 7m57s to 65ms and the planning time is reduced from 8m5s to 13s. Change-Id: If4cb1d8be46efa8fd61a97048cc79dabe2ffa51a Reviewed-on: http://gerrit.cloudera.org:8080/8317 Reviewed-by: Alex Behm <alex.behm@cloudera.com> Tested-by: Impala Public Jenkins
1157 lines
36 KiB
Plaintext
1157 lines
36 KiB
Plaintext
# correct placement of predicates with left outer joins; t2 and t3 are nullable
|
|
select *
|
|
from functional.testtbl t1 left outer join functional.testtbl t2 on (
|
|
t1.id - 1 = t2.id + 1
|
|
# lhs predicate is join predicate
|
|
and t1.zip = 94611
|
|
# rhs predicate applied by rhs scan
|
|
and t2.zip = 94104)
|
|
left outer join functional.testtbl t3 on (
|
|
t1.id = t3.id
|
|
# predicate on t2 is join predicate, not applied by t2 scan
|
|
and t2.id = 15
|
|
# predicate on t1 and t2 is join predicate
|
|
and t1.id - t2.id = 0
|
|
# rhs predicate applied by rhs scan
|
|
and t3.zip = 94720
|
|
)
|
|
where
|
|
# t1 predicate in where clause is applied by scans (t1; also propagated to t3)
|
|
t1.id > 0
|
|
# t2 predicate in where clause is applied by join and scan
|
|
and t2.id is null
|
|
# t3 predicate in where clause is applied by join and scan
|
|
and t3.id is not null
|
|
# join predicate between t1 and t2 applied after t2 join
|
|
and t1.zip + t2.zip = 10
|
|
# join predicate between t1, t2 and t3 applied after last join
|
|
and t1.zip + t2.zip + t3.zip= 20
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
04:HASH JOIN [LEFT OUTER JOIN]
|
|
| hash predicates: t1.id = t3.id
|
|
| other join predicates: t2.id = 15, t1.id - t2.id = 0
|
|
| other predicates: t3.id IS NOT NULL, t1.zip + t2.zip + t3.zip = 20
|
|
|
|
|
|--02:SCAN HDFS [functional.testtbl t3]
|
|
| partitions=1/1 files=0 size=0B
|
|
| predicates: t3.id IS NOT NULL, t3.id > 0, t3.zip = 94720
|
|
|
|
|
03:HASH JOIN [LEFT OUTER JOIN]
|
|
| hash predicates: t1.id - 1 = t2.id + 1
|
|
| other join predicates: t1.zip = 94611
|
|
| other predicates: t2.id IS NULL, t1.zip + t2.zip = 10
|
|
|
|
|
|--01:SCAN HDFS [functional.testtbl t2]
|
|
| partitions=1/1 files=0 size=0B
|
|
| predicates: t2.zip = 94104
|
|
|
|
|
00:SCAN HDFS [functional.testtbl t1]
|
|
partitions=1/1 files=0 size=0B
|
|
predicates: t1.id > 0
|
|
---- DISTRIBUTEDPLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
07:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
04:HASH JOIN [LEFT OUTER JOIN, BROADCAST]
|
|
| hash predicates: t1.id = t3.id
|
|
| other join predicates: t2.id = 15, t1.id - t2.id = 0
|
|
| other predicates: t3.id IS NOT NULL, t1.zip + t2.zip + t3.zip = 20
|
|
|
|
|
|--06:EXCHANGE [BROADCAST]
|
|
| |
|
|
| 02:SCAN HDFS [functional.testtbl t3]
|
|
| partitions=1/1 files=0 size=0B
|
|
| predicates: t3.id IS NOT NULL, t3.id > 0, t3.zip = 94720
|
|
|
|
|
03:HASH JOIN [LEFT OUTER JOIN, BROADCAST]
|
|
| hash predicates: t1.id - 1 = t2.id + 1
|
|
| other join predicates: t1.zip = 94611
|
|
| other predicates: t2.id IS NULL, t1.zip + t2.zip = 10
|
|
|
|
|
|--05:EXCHANGE [BROADCAST]
|
|
| |
|
|
| 01:SCAN HDFS [functional.testtbl t2]
|
|
| partitions=1/1 files=0 size=0B
|
|
| predicates: t2.zip = 94104
|
|
|
|
|
00:SCAN HDFS [functional.testtbl t1]
|
|
partitions=1/1 files=0 size=0B
|
|
predicates: t1.id > 0
|
|
====
|
|
# the same thing with subqueries; should produce the same result
|
|
select *
|
|
from (select * from functional.testtbl a1) t1
|
|
left outer join (select * from functional.testtbl a2) t2 on (
|
|
t1.id - 1 = t2.id + 1 and t1.zip = 94611 and t2.zip = 94104)
|
|
left outer join (select * from functional.testtbl a3) t3 on (
|
|
t1.id = t3.id and t2.id = 15 and t1.id - t2.id = 0 and t3.zip = 94720)
|
|
where t1.id > 0 and t2.id is null and t3.id is not null
|
|
and t1.zip + t2.zip = 10 and t1.zip + t2.zip + t3.zip= 20
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
04:HASH JOIN [LEFT OUTER JOIN]
|
|
| hash predicates: a1.id = a3.id
|
|
| other join predicates: a2.id = 15, a1.id - a2.id = 0
|
|
| other predicates: a3.id IS NOT NULL, a1.zip + a2.zip + a3.zip = 20
|
|
|
|
|
|--02:SCAN HDFS [functional.testtbl a3]
|
|
| partitions=1/1 files=0 size=0B
|
|
| predicates: a3.id IS NOT NULL, a3.id > 0, a3.zip = 94720
|
|
|
|
|
03:HASH JOIN [LEFT OUTER JOIN]
|
|
| hash predicates: a1.id - 1 = a2.id + 1
|
|
| other join predicates: a1.zip = 94611
|
|
| other predicates: a2.id IS NULL, a1.zip + a2.zip = 10
|
|
|
|
|
|--01:SCAN HDFS [functional.testtbl a2]
|
|
| partitions=1/1 files=0 size=0B
|
|
| predicates: a2.zip = 94104
|
|
|
|
|
00:SCAN HDFS [functional.testtbl a1]
|
|
partitions=1/1 files=0 size=0B
|
|
predicates: a1.id > 0
|
|
---- DISTRIBUTEDPLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
07:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
04:HASH JOIN [LEFT OUTER JOIN, BROADCAST]
|
|
| hash predicates: a1.id = a3.id
|
|
| other join predicates: a2.id = 15, a1.id - a2.id = 0
|
|
| other predicates: a3.id IS NOT NULL, a1.zip + a2.zip + a3.zip = 20
|
|
|
|
|
|--06:EXCHANGE [BROADCAST]
|
|
| |
|
|
| 02:SCAN HDFS [functional.testtbl a3]
|
|
| partitions=1/1 files=0 size=0B
|
|
| predicates: a3.id IS NOT NULL, a3.id > 0, a3.zip = 94720
|
|
|
|
|
03:HASH JOIN [LEFT OUTER JOIN, BROADCAST]
|
|
| hash predicates: a1.id - 1 = a2.id + 1
|
|
| other join predicates: a1.zip = 94611
|
|
| other predicates: a2.id IS NULL, a1.zip + a2.zip = 10
|
|
|
|
|
|--05:EXCHANGE [BROADCAST]
|
|
| |
|
|
| 01:SCAN HDFS [functional.testtbl a2]
|
|
| partitions=1/1 files=0 size=0B
|
|
| predicates: a2.zip = 94104
|
|
|
|
|
00:SCAN HDFS [functional.testtbl a1]
|
|
partitions=1/1 files=0 size=0B
|
|
predicates: a1.id > 0
|
|
====
|
|
# correct propagation of scan predicates in OJ On clauses:
|
|
# id = 17 must not be applied by the t1 and t3 scans
|
|
select *
|
|
from functional.testtbl t1
|
|
left outer join functional.testtbl t2 on (
|
|
t1.id = t2.id and t1.id = 17)
|
|
join functional.testtbl t3 on (t1.id = t3.id)
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
04:HASH JOIN [INNER JOIN]
|
|
| hash predicates: t1.id = t3.id
|
|
| runtime filters: RF000 <- t3.id
|
|
|
|
|
|--02:SCAN HDFS [functional.testtbl t3]
|
|
| partitions=1/1 files=0 size=0B
|
|
|
|
|
03:HASH JOIN [LEFT OUTER JOIN]
|
|
| hash predicates: t1.id = t2.id
|
|
| other join predicates: t1.id = 17
|
|
|
|
|
|--01:SCAN HDFS [functional.testtbl t2]
|
|
| partitions=1/1 files=0 size=0B
|
|
| predicates: t2.id = 17
|
|
| runtime filters: RF000 -> t2.id
|
|
|
|
|
00:SCAN HDFS [functional.testtbl t1]
|
|
partitions=1/1 files=0 size=0B
|
|
runtime filters: RF000 -> t1.id
|
|
====
|
|
# correct placement of predicates with right outer joins; t1 and t2 are nullable
|
|
select *
|
|
from functional.testtbl t1 right outer join functional.testtbl t2 on (
|
|
t1.id - 1 = t2.id + 1
|
|
# lhs predicate is scan predicate
|
|
and t1.zip = 94611
|
|
# rhs predicate is join predicate
|
|
and t2.zip = 94104)
|
|
right outer join functional.testtbl t3 on (
|
|
t1.id = t3.id
|
|
# predicate on t2 is scan predicate
|
|
and t2.id = 15
|
|
# predicate on t1 and t2 could be applied by previous join
|
|
# but the planner doesn't recognize that case and makes it a join predicate
|
|
and t1.id - t2.id = 0
|
|
# rhs predicate is join predicate
|
|
and t3.zip = 94720
|
|
)
|
|
where
|
|
# t1 predicate in where clause is applied by scan and last join
|
|
t1.id > 0
|
|
# t2 predicate in where clause is applied by last join
|
|
and t2.id is null
|
|
# t3 predicate in where clause is applied by scan
|
|
and t3.id is not null
|
|
# join predicate between t1 and t2 applied after last join
|
|
and t1.zip + t2.zip = 10
|
|
# join predicate between t1, t2 and t3 applied after last join
|
|
and t1.zip + t2.zip + t3.zip= 20
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
04:HASH JOIN [RIGHT OUTER JOIN]
|
|
| hash predicates: t1.id = t3.id
|
|
| other join predicates: t3.zip = 94720, t1.id - t2.id = 0
|
|
| other predicates: t2.id IS NULL, t1.id > 0, t1.zip + t2.zip = 10, t1.zip + t2.zip + t3.zip = 20
|
|
| runtime filters: RF000 <- t3.id
|
|
|
|
|
|--02:SCAN HDFS [functional.testtbl t3]
|
|
| partitions=1/1 files=0 size=0B
|
|
| predicates: t3.id IS NOT NULL
|
|
|
|
|
03:HASH JOIN [RIGHT OUTER JOIN]
|
|
| hash predicates: t1.id - 1 = t2.id + 1
|
|
| other join predicates: t1.zip = 94611, t2.zip = 94104
|
|
| runtime filters: RF002 <- t2.id + 1
|
|
|
|
|
|--01:SCAN HDFS [functional.testtbl t2]
|
|
| partitions=1/1 files=0 size=0B
|
|
| predicates: t2.id = 15
|
|
|
|
|
00:SCAN HDFS [functional.testtbl t1]
|
|
partitions=1/1 files=0 size=0B
|
|
predicates: t1.id IS NOT NULL, t1.id > 0
|
|
runtime filters: RF000 -> t1.id, RF002 -> t1.id - 1
|
|
---- DISTRIBUTEDPLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
09:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
04:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
|
|
| hash predicates: t1.id = t3.id
|
|
| other join predicates: t3.zip = 94720, t1.id - t2.id = 0
|
|
| other predicates: t2.id IS NULL, t1.id > 0, t1.zip + t2.zip = 10, t1.zip + t2.zip + t3.zip = 20
|
|
| runtime filters: RF000 <- t3.id
|
|
|
|
|
|--08:EXCHANGE [HASH(t3.id)]
|
|
| |
|
|
| 02:SCAN HDFS [functional.testtbl t3]
|
|
| partitions=1/1 files=0 size=0B
|
|
| predicates: t3.id IS NOT NULL
|
|
|
|
|
07:EXCHANGE [HASH(t1.id)]
|
|
|
|
|
03:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
|
|
| hash predicates: t1.id - 1 = t2.id + 1
|
|
| other join predicates: t1.zip = 94611, t2.zip = 94104
|
|
| runtime filters: RF002 <- t2.id + 1
|
|
|
|
|
|--06:EXCHANGE [HASH(t2.id + 1)]
|
|
| |
|
|
| 01:SCAN HDFS [functional.testtbl t2]
|
|
| partitions=1/1 files=0 size=0B
|
|
| predicates: t2.id = 15
|
|
|
|
|
05:EXCHANGE [HASH(t1.id - 1)]
|
|
|
|
|
00:SCAN HDFS [functional.testtbl t1]
|
|
partitions=1/1 files=0 size=0B
|
|
predicates: t1.id IS NOT NULL, t1.id > 0
|
|
runtime filters: RF000 -> t1.id, RF002 -> t1.id - 1
|
|
====
|
|
# the same thing with subqueries; should produce the same result
|
|
select *
|
|
from (select * from functional.testtbl a1) t1 right outer join (select * from functional.testtbl a2) t2 on (
|
|
t1.id - 1 = t2.id + 1 and t1.zip = 94611 and t2.zip = 94104)
|
|
right outer join (select * from functional.testtbl a3) t3 on (
|
|
t1.id = t3.id and t2.id = 15 and t1.id - t2.id = 0 and t3.zip = 94720 )
|
|
where t1.id > 0 and t2.id is null and t3.id is not null
|
|
and t1.zip + t2.zip = 10 and t1.zip + t2.zip + t3.zip= 20
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
04:HASH JOIN [RIGHT OUTER JOIN]
|
|
| hash predicates: a1.id = a3.id
|
|
| other join predicates: a3.zip = 94720, a1.id - a2.id = 0
|
|
| other predicates: a2.id IS NULL, a1.id > 0, a1.zip + a2.zip = 10, a1.zip + a2.zip + a3.zip = 20
|
|
| runtime filters: RF000 <- a3.id
|
|
|
|
|
|--02:SCAN HDFS [functional.testtbl a3]
|
|
| partitions=1/1 files=0 size=0B
|
|
| predicates: a3.id IS NOT NULL
|
|
|
|
|
03:HASH JOIN [RIGHT OUTER JOIN]
|
|
| hash predicates: a1.id - 1 = a2.id + 1
|
|
| other join predicates: a1.zip = 94611, a2.zip = 94104
|
|
| runtime filters: RF002 <- a2.id + 1
|
|
|
|
|
|--01:SCAN HDFS [functional.testtbl a2]
|
|
| partitions=1/1 files=0 size=0B
|
|
| predicates: a2.id = 15
|
|
|
|
|
00:SCAN HDFS [functional.testtbl a1]
|
|
partitions=1/1 files=0 size=0B
|
|
predicates: a1.id IS NOT NULL, a1.id > 0
|
|
runtime filters: RF000 -> a1.id, RF002 -> a1.id - 1
|
|
---- DISTRIBUTEDPLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
09:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
04:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
|
|
| hash predicates: a1.id = a3.id
|
|
| other join predicates: a3.zip = 94720, a1.id - a2.id = 0
|
|
| other predicates: a2.id IS NULL, a1.id > 0, a1.zip + a2.zip = 10, a1.zip + a2.zip + a3.zip = 20
|
|
| runtime filters: RF000 <- a3.id
|
|
|
|
|
|--08:EXCHANGE [HASH(a3.id)]
|
|
| |
|
|
| 02:SCAN HDFS [functional.testtbl a3]
|
|
| partitions=1/1 files=0 size=0B
|
|
| predicates: a3.id IS NOT NULL
|
|
|
|
|
07:EXCHANGE [HASH(a1.id)]
|
|
|
|
|
03:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
|
|
| hash predicates: a1.id - 1 = a2.id + 1
|
|
| other join predicates: a1.zip = 94611, a2.zip = 94104
|
|
| runtime filters: RF002 <- a2.id + 1
|
|
|
|
|
|--06:EXCHANGE [HASH(a2.id + 1)]
|
|
| |
|
|
| 01:SCAN HDFS [functional.testtbl a2]
|
|
| partitions=1/1 files=0 size=0B
|
|
| predicates: a2.id = 15
|
|
|
|
|
05:EXCHANGE [HASH(a1.id - 1)]
|
|
|
|
|
00:SCAN HDFS [functional.testtbl a1]
|
|
partitions=1/1 files=0 size=0B
|
|
predicates: a1.id IS NOT NULL, a1.id > 0
|
|
runtime filters: RF000 -> a1.id, RF002 -> a1.id - 1
|
|
====
|
|
# right outer join requires the join op to be partitioned, otherwise non-matches cause
|
|
# duplicates
|
|
select a.tinyint_col, b.id
|
|
from functional.alltypesagg a
|
|
right outer join functional.alltypestiny b on (a.tinyint_col = b.id)
|
|
where a.tinyint_col is null
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
02:HASH JOIN [RIGHT OUTER JOIN]
|
|
| hash predicates: a.tinyint_col = b.id
|
|
| other predicates: a.tinyint_col IS NULL
|
|
| runtime filters: RF000 <- b.id
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypestiny b]
|
|
| partitions=4/4 files=4 size=460B
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg a]
|
|
partitions=11/11 files=11 size=814.73KB
|
|
runtime filters: RF000 -> a.tinyint_col
|
|
---- DISTRIBUTEDPLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
05:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
02:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
|
|
| hash predicates: a.tinyint_col = b.id
|
|
| other predicates: a.tinyint_col IS NULL
|
|
| runtime filters: RF000 <- b.id
|
|
|
|
|
|--04:EXCHANGE [HASH(b.id)]
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypestiny b]
|
|
| partitions=4/4 files=4 size=460B
|
|
|
|
|
03:EXCHANGE [HASH(a.tinyint_col)]
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg a]
|
|
partitions=11/11 files=11 size=814.73KB
|
|
runtime filters: RF000 -> a.tinyint_col
|
|
====
|
|
# same for full outer joins
|
|
select a.tinyint_col, b.id
|
|
from functional.alltypesagg a
|
|
full outer join functional.alltypestiny b on (a.tinyint_col = b.id)
|
|
where a.tinyint_col is null
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
02:HASH JOIN [FULL OUTER JOIN]
|
|
| hash predicates: a.tinyint_col = b.id
|
|
| other predicates: a.tinyint_col IS NULL
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypestiny b]
|
|
| partitions=4/4 files=4 size=460B
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg a]
|
|
partitions=11/11 files=11 size=814.73KB
|
|
---- DISTRIBUTEDPLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
05:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
02:HASH JOIN [FULL OUTER JOIN, PARTITIONED]
|
|
| hash predicates: a.tinyint_col = b.id
|
|
| other predicates: a.tinyint_col IS NULL
|
|
|
|
|
|--04:EXCHANGE [HASH(b.id)]
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypestiny b]
|
|
| partitions=4/4 files=4 size=460B
|
|
|
|
|
03:EXCHANGE [HASH(a.tinyint_col)]
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg a]
|
|
partitions=11/11 files=11 size=814.73KB
|
|
====
|
|
# Predicate assignment when query has a full outer join (IMPALA-1371)
|
|
select *
|
|
from functional.alltypes a full outer join functional.alltypes b
|
|
# predicate on a from the on clause is not assigned to the scan node
|
|
on (a.id = b.id and a.int_col < 10 and b.tinyint_col != 5)
|
|
# predicate on b from the where clause is assigned to the scan and
|
|
# also to the full outer join
|
|
where b.bigint_col > 10
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
02:HASH JOIN [FULL OUTER JOIN]
|
|
| hash predicates: a.id = b.id
|
|
| other join predicates: a.int_col < 10, b.tinyint_col != 5
|
|
| other predicates: b.bigint_col > 10
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypes b]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
| predicates: b.bigint_col > 10
|
|
|
|
|
00:SCAN HDFS [functional.alltypes a]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
====
|
|
# Predicate assignment when query has a full outer followed by an inner join
|
|
select *
|
|
from functional.alltypes a full outer join functional.alltypes b
|
|
on (a.id = b.id)
|
|
inner join functional.alltypes c
|
|
on (a.int_col = c.int_col and a.tinyint_col < 10)
|
|
# predicate on b from the where clause is assigned to the
|
|
# first full outer join
|
|
where b.tinyint_col > 20
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
05:HASH JOIN [INNER JOIN]
|
|
| hash predicates: c.int_col = a.int_col
|
|
| other predicates: a.tinyint_col < 10
|
|
| runtime filters: RF000 <- a.int_col
|
|
|
|
|
|--04:HASH JOIN [FULL OUTER JOIN]
|
|
| | hash predicates: a.id = b.id
|
|
| | other predicates: b.tinyint_col > 20
|
|
| |
|
|
| |--01:SCAN HDFS [functional.alltypes b]
|
|
| | partitions=24/24 files=24 size=478.45KB
|
|
| | predicates: b.tinyint_col > 20
|
|
| |
|
|
| 00:SCAN HDFS [functional.alltypes a]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
| predicates: a.tinyint_col < 10
|
|
|
|
|
02:SCAN HDFS [functional.alltypes c]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
runtime filters: RF000 -> c.int_col
|
|
====
|
|
# Predicate assignment when query has an inner join followed by a full outer join
|
|
select *
|
|
from functional.alltypes a inner join functional.alltypes b
|
|
on (a.id = b.id)
|
|
full outer join functional.alltypes c
|
|
on (a.int_col = c.int_col and a.tinyint_col < b.tinyint_col and a.bigint_col < 10)
|
|
# predicate on a is assigned to the scan and the full outer join, predicate on a and b
|
|
# is assigned to the full outer join
|
|
# TODO Ideally, the predicate on a and b should be assigned to the inner join and then
|
|
# re-assigned to the full outer join.
|
|
where a.smallint_col = 100 and a.float_col > b.float_col
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
04:HASH JOIN [FULL OUTER JOIN]
|
|
| hash predicates: c.int_col = a.int_col
|
|
| other join predicates: a.bigint_col < 10, a.tinyint_col < b.tinyint_col
|
|
| other predicates: a.smallint_col = 100, a.float_col > b.float_col
|
|
|
|
|
|--03:HASH JOIN [INNER JOIN]
|
|
| | hash predicates: b.id = a.id
|
|
| | runtime filters: RF000 <- a.id
|
|
| |
|
|
| |--00:SCAN HDFS [functional.alltypes a]
|
|
| | partitions=24/24 files=24 size=478.45KB
|
|
| | predicates: a.smallint_col = 100
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypes b]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
| runtime filters: RF000 -> b.id
|
|
|
|
|
02:SCAN HDFS [functional.alltypes c]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
====
|
|
# Predicate assignment when query has a sequence of full outer joins interleaved with
|
|
# an inner join
|
|
select *
|
|
from functional.alltypes a full outer join functional.alltypes b
|
|
on (a.id = b.id and a.int_col < 10)
|
|
inner join functional.alltypes c
|
|
on (a.tinyint_col = c.tinyint_col and b.int_col > 10)
|
|
full outer join functional.alltypes d
|
|
# predicate on b from the on clause is assigned to the scan and is also re-applied
|
|
# in the full outer join
|
|
on (a.tinyint_col = d.tinyint_col and b.int_col < 20)
|
|
# predicate on b from the where clause is assigned to the first full outer join
|
|
where a.bool_col = false and a.float_col < b.float_col
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
07:HASH JOIN [FULL OUTER JOIN]
|
|
| hash predicates: a.tinyint_col = d.tinyint_col
|
|
| other join predicates: b.int_col < 20
|
|
| other predicates: a.float_col < b.float_col, a.bool_col = FALSE
|
|
|
|
|
|--03:SCAN HDFS [functional.alltypes d]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
|
|
|
06:HASH JOIN [INNER JOIN]
|
|
| hash predicates: a.tinyint_col = c.tinyint_col
|
|
| other predicates: b.int_col > 10
|
|
| runtime filters: RF000 <- c.tinyint_col
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypes c]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
|
|
|
05:HASH JOIN [FULL OUTER JOIN]
|
|
| hash predicates: a.id = b.id
|
|
| other join predicates: a.int_col < 10
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypes b]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
| predicates: b.int_col > 10
|
|
|
|
|
00:SCAN HDFS [functional.alltypes a]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
predicates: a.bool_col = FALSE
|
|
runtime filters: RF000 -> a.tinyint_col
|
|
====
|
|
# Predicate assignment through an inline-view that has a full outer join
|
|
select 1
|
|
from (
|
|
select a.tinyint_col x, b.tinyint_col y
|
|
from functional.alltypes a
|
|
full outer join functional.alltypes b
|
|
on (a.id = b.id)
|
|
inner join functional.alltypes c
|
|
# predicate on a and b from the on clause of the inner join is assigned to
|
|
# the first full outer join
|
|
on (b.smallint_col = c.smallint_col and a.int_col < b.int_col and b.id < 10)
|
|
full outer join functional.alltypes d
|
|
# predicate on a and b from the on clause of the full outer join is assigned
|
|
# to the full outer join
|
|
on (c.id = d.id and a.bigint_col > b.bigint_col)) v
|
|
# predicate not specified in the scope of the inline view is assigned to the last
|
|
# full outer join of the inline view that materializes the corresponding
|
|
# tuple ids
|
|
where x != y
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
06:HASH JOIN [FULL OUTER JOIN]
|
|
| hash predicates: c.id = d.id
|
|
| other join predicates: a.bigint_col > b.bigint_col
|
|
| other predicates: a.tinyint_col != b.tinyint_col
|
|
|
|
|
|--03:SCAN HDFS [functional.alltypes d]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
|
|
|
05:HASH JOIN [INNER JOIN]
|
|
| hash predicates: b.smallint_col = c.smallint_col
|
|
| other predicates: a.int_col < b.int_col, b.id < 10
|
|
| runtime filters: RF000 <- c.smallint_col
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypes c]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
|
|
|
04:HASH JOIN [FULL OUTER JOIN]
|
|
| hash predicates: a.id = b.id
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypes b]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
| predicates: b.id < 10
|
|
| runtime filters: RF000 -> b.smallint_col
|
|
|
|
|
00:SCAN HDFS [functional.alltypes a]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
====
|
|
# Predicate assignment when query has an inner join followed by full outer join
|
|
select 1
|
|
from functional.alltypes a
|
|
inner join functional.alltypes b
|
|
on (a.id = b.id)
|
|
full outer join functional.alltypes c
|
|
on (b.int_col = c.int_col and c.int_col < 10)
|
|
# predicates on a and b from the where clause are assigned to the last full outer join
|
|
# that materializes the corresponding tuple ids
|
|
where a.bigint_col = b.bigint_col and a.tinyint_col < b.tinyint_col
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
04:HASH JOIN [FULL OUTER JOIN]
|
|
| hash predicates: b.int_col = c.int_col
|
|
| other join predicates: c.int_col < 10
|
|
| other predicates: a.bigint_col = b.bigint_col, a.tinyint_col < b.tinyint_col
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypes c]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
|
|
|
03:HASH JOIN [INNER JOIN]
|
|
| hash predicates: b.id = a.id
|
|
| runtime filters: RF000 <- a.id
|
|
|
|
|
|--00:SCAN HDFS [functional.alltypes a]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
|
|
|
01:SCAN HDFS [functional.alltypes b]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
runtime filters: RF000 -> b.id
|
|
====
|
|
# Predicate assignment when query has a sequence of inner, outer and full
|
|
# outer joins
|
|
select 1
|
|
from functional.alltypes a
|
|
inner join functional.alltypes b
|
|
on (a.id = b.id)
|
|
left outer join functional.alltypes c
|
|
on (a.int_col = c.int_col and c.tinyint_col = 10)
|
|
full outer join functional.alltypes d
|
|
on (b.string_col = d.string_col and a.tinyint_col < b.tinyint_col)
|
|
where a.float_col = b.float_col and b.smallint_col = 1 and d.tinyint_col < 10
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
06:HASH JOIN [FULL OUTER JOIN]
|
|
| hash predicates: b.string_col = d.string_col
|
|
| other join predicates: a.tinyint_col < b.tinyint_col
|
|
| other predicates: b.smallint_col = 1, a.float_col = b.float_col, d.tinyint_col < 10
|
|
|
|
|
|--03:SCAN HDFS [functional.alltypes d]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
| predicates: d.tinyint_col < 10
|
|
|
|
|
05:HASH JOIN [RIGHT OUTER JOIN]
|
|
| hash predicates: c.int_col = a.int_col
|
|
| other join predicates: c.tinyint_col = 10
|
|
| runtime filters: RF000 <- a.int_col
|
|
|
|
|
|--04:HASH JOIN [INNER JOIN]
|
|
| | hash predicates: a.id = b.id
|
|
| | runtime filters: RF002 <- b.id
|
|
| |
|
|
| |--01:SCAN HDFS [functional.alltypes b]
|
|
| | partitions=24/24 files=24 size=478.45KB
|
|
| | predicates: b.smallint_col = 1
|
|
| |
|
|
| 00:SCAN HDFS [functional.alltypes a]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
| runtime filters: RF002 -> a.id
|
|
|
|
|
02:SCAN HDFS [functional.alltypes c]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
runtime filters: RF000 -> c.int_col
|
|
====
|
|
# IMPALA-2144: Test correct assignment of Having-clause predicates
|
|
# referencing a grouping column coming from an outer-joined table ref.
|
|
select b.int_col, sum(b.double_col)
|
|
from functional.alltypes a left outer join functional.alltypestiny b
|
|
on (a.id = b.id)
|
|
group by a.bool_col, a.int_col, b.bool_col, b.int_col
|
|
having a.bool_col is null and a.int_col is not null
|
|
and b.bool_col is null and b.int_col is not null
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
03:AGGREGATE [FINALIZE]
|
|
| output: sum(b.double_col)
|
|
| group by: a.bool_col, a.int_col, b.bool_col, b.int_col
|
|
| having: b.bool_col IS NULL, b.int_col IS NOT NULL
|
|
|
|
|
02:HASH JOIN [LEFT OUTER JOIN]
|
|
| hash predicates: a.id = b.id
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypestiny b]
|
|
| partitions=4/4 files=4 size=460B
|
|
| predicates: b.int_col IS NOT NULL
|
|
|
|
|
00:SCAN HDFS [functional.alltypes a]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
predicates: a.bool_col IS NULL, a.int_col IS NOT NULL
|
|
====
|
|
# IMPALA-2144: Same as above but with a full outer join.
|
|
select b.int_col, sum(b.double_col)
|
|
from functional.alltypes a full outer join functional.alltypestiny b
|
|
on (a.id = b.id)
|
|
group by a.bool_col, a.int_col, b.bool_col, b.int_col
|
|
having a.bool_col is null and a.int_col is not null
|
|
and b.bool_col is null and b.int_col is not null
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
03:AGGREGATE [FINALIZE]
|
|
| output: sum(b.double_col)
|
|
| group by: a.bool_col, a.int_col, b.bool_col, b.int_col
|
|
| having: a.bool_col IS NULL, a.int_col IS NOT NULL, b.bool_col IS NULL, b.int_col IS NOT NULL
|
|
|
|
|
02:HASH JOIN [FULL OUTER JOIN]
|
|
| hash predicates: a.id = b.id
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypestiny b]
|
|
| partitions=4/4 files=4 size=460B
|
|
| predicates: b.int_col IS NOT NULL
|
|
|
|
|
00:SCAN HDFS [functional.alltypes a]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
predicates: a.int_col IS NOT NULL
|
|
====
|
|
# IMPALA-2765: Predicate assignment when outer joins have case expressions as predicates
|
|
select *
|
|
from functional.alltypes a
|
|
left outer join functional.alltypes b
|
|
on (a.id = b.id)
|
|
where (
|
|
case when a.id > 100 then
|
|
a.timestamp_col
|
|
else
|
|
a.timestamp_col
|
|
end) >= cast('2001-01-01 00:00:00' as timestamp);
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
02:HASH JOIN [RIGHT OUTER JOIN]
|
|
| hash predicates: b.id = a.id
|
|
| runtime filters: RF000 <- a.id
|
|
|
|
|
|--00:SCAN HDFS [functional.alltypes a]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
| predicates: (CASE WHEN a.id > 100 THEN a.timestamp_col ELSE a.timestamp_col END) >= TIMESTAMP '2001-01-01 00:00:00'
|
|
|
|
|
01:SCAN HDFS [functional.alltypes b]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
runtime filters: RF000 -> b.id
|
|
====
|
|
# IMPALA-3071: Test correct assignment of non-join conjuncts belonging to the On-clause
|
|
# of an inner join that follows an outer join.
|
|
select 1 from functional.alltypestiny a
|
|
left outer join functional.alltypestiny b
|
|
on a.id = b.id
|
|
inner join functional.alltypestiny c
|
|
on b.id = c.id and b.int_col < 0 and a.int_col > 10
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
05:HASH JOIN [INNER JOIN]
|
|
| hash predicates: c.id = b.id
|
|
| other predicates: b.int_col < 0
|
|
| runtime filters: RF000 <- b.id
|
|
|
|
|
|--04:HASH JOIN [LEFT OUTER JOIN]
|
|
| | hash predicates: a.id = b.id
|
|
| |
|
|
| |--01:SCAN HDFS [functional.alltypestiny b]
|
|
| | partitions=4/4 files=4 size=460B
|
|
| | predicates: b.int_col < 0
|
|
| |
|
|
| 00:SCAN HDFS [functional.alltypestiny a]
|
|
| partitions=4/4 files=4 size=460B
|
|
| predicates: a.int_col > 10
|
|
|
|
|
02:SCAN HDFS [functional.alltypestiny c]
|
|
partitions=4/4 files=4 size=460B
|
|
runtime filters: RF000 -> c.id
|
|
====
|
|
# IMPALA-3071: Same as above but with a right outer join.
|
|
select 1 from functional.alltypestiny a
|
|
right outer join functional.alltypestiny b
|
|
on a.id = b.id
|
|
inner join functional.alltypestiny c
|
|
on b.id = c.id and b.int_col < 0 and a.int_col > 10
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
05:HASH JOIN [INNER JOIN]
|
|
| hash predicates: c.id = b.id
|
|
| other predicates: a.int_col > 10
|
|
| runtime filters: RF000 <- b.id
|
|
|
|
|
|--04:HASH JOIN [RIGHT OUTER JOIN]
|
|
| | hash predicates: a.id = b.id
|
|
| | runtime filters: RF002 <- b.id
|
|
| |
|
|
| |--01:SCAN HDFS [functional.alltypestiny b]
|
|
| | partitions=4/4 files=4 size=460B
|
|
| | predicates: b.int_col < 0
|
|
| |
|
|
| 00:SCAN HDFS [functional.alltypestiny a]
|
|
| partitions=4/4 files=4 size=460B
|
|
| predicates: a.int_col > 10
|
|
| runtime filters: RF002 -> a.id
|
|
|
|
|
02:SCAN HDFS [functional.alltypestiny c]
|
|
partitions=4/4 files=4 size=460B
|
|
runtime filters: RF000 -> c.id
|
|
====
|
|
# IMPALA-3071: Same as above but with a full outer join.
|
|
select 1 from functional.alltypestiny a
|
|
full outer join functional.alltypestiny b
|
|
on a.id = b.id
|
|
inner join functional.alltypestiny c
|
|
on b.id = c.id and b.int_col < 0 and a.int_col > 10
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
05:HASH JOIN [INNER JOIN]
|
|
| hash predicates: b.id = c.id
|
|
| other predicates: a.int_col > 10, b.int_col < 0
|
|
| runtime filters: RF000 <- c.id
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypestiny c]
|
|
| partitions=4/4 files=4 size=460B
|
|
|
|
|
04:HASH JOIN [FULL OUTER JOIN]
|
|
| hash predicates: a.id = b.id
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypestiny b]
|
|
| partitions=4/4 files=4 size=460B
|
|
| predicates: b.int_col < 0
|
|
| runtime filters: RF000 -> b.id
|
|
|
|
|
00:SCAN HDFS [functional.alltypestiny a]
|
|
partitions=4/4 files=4 size=460B
|
|
predicates: a.int_col > 10
|
|
====
|
|
# IMPALA-3071: Test that the inner join On-clause predicate is placed
|
|
# precisely at the correct outer join (not before or after).
|
|
select 1 from functional.alltypestiny a
|
|
left outer join functional.alltypestiny b
|
|
on a.id = b.id
|
|
left outer join functional.alltypestiny c
|
|
on b.id = c.id
|
|
inner join functional.alltypestiny d
|
|
on b.int_col < 0 and a.int_col > 10
|
|
full outer join functional.alltypestiny e
|
|
on d.id = e.id
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
09:HASH JOIN [FULL OUTER JOIN]
|
|
| hash predicates: e.id = d.id
|
|
|
|
|
|--08:NESTED LOOP JOIN [INNER JOIN]
|
|
| | predicates: a.int_col > 10, b.int_col < 0
|
|
| |
|
|
| |--07:HASH JOIN [RIGHT OUTER JOIN]
|
|
| | | hash predicates: c.id = b.id
|
|
| | | runtime filters: RF000 <- b.id
|
|
| | |
|
|
| | |--06:HASH JOIN [LEFT OUTER JOIN]
|
|
| | | | hash predicates: a.id = b.id
|
|
| | | |
|
|
| | | |--01:SCAN HDFS [functional.alltypestiny b]
|
|
| | | | partitions=4/4 files=4 size=460B
|
|
| | | | predicates: b.int_col < 0
|
|
| | | |
|
|
| | | 00:SCAN HDFS [functional.alltypestiny a]
|
|
| | | partitions=4/4 files=4 size=460B
|
|
| | | predicates: a.int_col > 10
|
|
| | |
|
|
| | 02:SCAN HDFS [functional.alltypestiny c]
|
|
| | partitions=4/4 files=4 size=460B
|
|
| | runtime filters: RF000 -> c.id
|
|
| |
|
|
| 03:SCAN HDFS [functional.alltypestiny d]
|
|
| partitions=4/4 files=4 size=460B
|
|
|
|
|
04:SCAN HDFS [functional.alltypestiny e]
|
|
partitions=4/4 files=4 size=460B
|
|
====
|
|
# IMPALA-3125: Test that the On-clause predicates from an outer join are assigned to the
|
|
# corresponding outer-join node, even if the predicates do not reference the join rhs.
|
|
select a.id aid, b.id bid, a.int_col aint, b.int_col bint
|
|
from functional.alltypes a
|
|
inner join functional.alltypes b
|
|
on a.int_col = b.int_col
|
|
left outer join functional.alltypes c
|
|
on a.id = b.id and b.bigint_col = c.bigint_col
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
04:HASH JOIN [LEFT OUTER JOIN]
|
|
| hash predicates: b.bigint_col = c.bigint_col
|
|
| other join predicates: a.id = b.id
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypes c]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
|
|
|
03:HASH JOIN [INNER JOIN]
|
|
| hash predicates: b.int_col = a.int_col
|
|
| runtime filters: RF000 <- a.int_col
|
|
|
|
|
|--00:SCAN HDFS [functional.alltypes a]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
|
|
|
01:SCAN HDFS [functional.alltypes b]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
runtime filters: RF000 -> b.int_col
|
|
====
|
|
# IMPALA-3167: Test correct assignment of a WHERE-clause predicate through an inline view
|
|
# that has a grouping aggregation and an outer join. The predicate can be assigned at the
|
|
# scan on the nullable side of the outer join, but it must also be evaluated after the join.
|
|
select v2.id, v2.s
|
|
from (select v1.id, sum(bigint_col) s
|
|
from functional.alltypes t1
|
|
left outer join (select t2.int_col, t2.id
|
|
from functional.alltypessmall t2) v1
|
|
on t1.int_col = v1.int_col
|
|
group by v1.id) v2
|
|
where v2.id < 10
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
03:AGGREGATE [FINALIZE]
|
|
| output: sum(bigint_col)
|
|
| group by: t2.id
|
|
| having: v1.id < 10
|
|
|
|
|
02:HASH JOIN [LEFT OUTER JOIN]
|
|
| hash predicates: t1.int_col = t2.int_col
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypessmall t2]
|
|
| partitions=4/4 files=4 size=6.32KB
|
|
| predicates: t2.id < 10
|
|
|
|
|
00:SCAN HDFS [functional.alltypes t1]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
====
|
|
# IMPALA-3126: Test assignment of an inner join On-clause predicate. The predicate
|
|
# may not be assigned below the join materializing 'd'.
|
|
select 1 from functional.alltypes a
|
|
left outer join functional.alltypes b
|
|
on a.id = b.id
|
|
right outer join functional.alltypes c
|
|
on b.id = c.id
|
|
inner join functional.alltypes d
|
|
on a.int_col = b.int_col
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
06:NESTED LOOP JOIN [INNER JOIN]
|
|
| predicates: a.int_col = b.int_col
|
|
|
|
|
|--03:SCAN HDFS [functional.alltypes d]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
|
|
|
05:HASH JOIN [RIGHT OUTER JOIN]
|
|
| hash predicates: b.id = c.id
|
|
| runtime filters: RF000 <- c.id
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypes c]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
|
|
|
04:HASH JOIN [LEFT OUTER JOIN]
|
|
| hash predicates: a.id = b.id
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypes b]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
| runtime filters: RF000 -> b.id
|
|
|
|
|
00:SCAN HDFS [functional.alltypes a]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
====
|
|
# IMPALA-3126: Same as above but with a semi join at the end.
|
|
select 1 from functional.alltypes a
|
|
left outer join functional.alltypes b
|
|
on a.id = b.id
|
|
right outer join functional.alltypes c
|
|
on b.id = c.id
|
|
left semi join functional.alltypes d
|
|
on a.int_col = b.int_col
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
06:NESTED LOOP JOIN [LEFT SEMI JOIN]
|
|
| join predicates: a.int_col = b.int_col
|
|
|
|
|
|--03:SCAN HDFS [functional.alltypes d]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
|
|
|
05:HASH JOIN [RIGHT OUTER JOIN]
|
|
| hash predicates: b.id = c.id
|
|
| runtime filters: RF000 <- c.id
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypes c]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
|
|
|
04:HASH JOIN [LEFT OUTER JOIN]
|
|
| hash predicates: a.id = b.id
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypes b]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
| runtime filters: RF000 -> b.id
|
|
|
|
|
00:SCAN HDFS [functional.alltypes a]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
====
|
|
# IMPALA-5856: Test correct assignment of a join predicate with the following properties:
|
|
# - from the On-clause of a left outer join
|
|
# - only references the left-hand side tuples (not the right hand side tuple)
|
|
# - references full-outer joined tuples; the full outer join appears on the left
|
|
select * from functional.alltypes t1
|
|
full outer join functional.alltypessmall t2
|
|
on t1.id = t2.id
|
|
left outer join functional.alltypestiny t3
|
|
on coalesce(t1.id, t2.id) = t3.id and coalesce(t1.int_col, t2.int_col) = 2
|
|
and t1.bigint_col > 10 and t2.bigint_col > 30
|
|
where concat(t1.string_col, t2.string_col) = 'test1' and t3.string_col = 'test2'
|
|
---- PLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
04:HASH JOIN [LEFT OUTER JOIN]
|
|
| hash predicates: coalesce(t1.id, t2.id) = t3.id
|
|
| other join predicates: t1.bigint_col > 10, t2.bigint_col > 30, coalesce(t1.int_col, t2.int_col) = 2
|
|
| other predicates: t3.string_col = 'test2'
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypestiny t3]
|
|
| partitions=4/4 files=4 size=460B
|
|
| predicates: t3.string_col = 'test2'
|
|
|
|
|
03:HASH JOIN [FULL OUTER JOIN]
|
|
| hash predicates: t1.id = t2.id
|
|
| other predicates: concat(t1.string_col, t2.string_col) = 'test1'
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypessmall t2]
|
|
| partitions=4/4 files=4 size=6.32KB
|
|
|
|
|
00:SCAN HDFS [functional.alltypes t1]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
====
|
|
# IMPALA-6123: Test the partition compatibility check with consecutive outer joins.
|
|
# In this query the output partition of the right outer join should be the rhs partition
|
|
# t3.id, which is not compatible with the aggregation partition t2.id. There should be
|
|
# an exchange node in the aggregation.
|
|
# The first left outer join is used to ensure 't2' is made nullable before the fragment
|
|
# executing the right outer join.
|
|
select /* +straight_join */ t2.id, count(*)
|
|
from functional.alltypes t1
|
|
left outer join /* +shuffle */ functional.alltypessmall t2
|
|
on t1.int_col = t2.int_col
|
|
right outer join /* +shuffle */ functional.alltypestiny t3
|
|
on t2.id = t3.id
|
|
group by t2.id
|
|
---- DISTRIBUTEDPLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
12:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
11:AGGREGATE [FINALIZE]
|
|
| output: count:merge(*)
|
|
| group by: t2.id
|
|
|
|
|
10:EXCHANGE [HASH(t2.id)]
|
|
|
|
|
05:AGGREGATE [STREAMING]
|
|
| output: count(*)
|
|
| group by: t2.id
|
|
|
|
|
04:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
|
|
| hash predicates: t2.id = t3.id
|
|
| runtime filters: RF000 <- t3.id
|
|
|
|
|
|--09:EXCHANGE [HASH(t3.id)]
|
|
| |
|
|
| 02:SCAN HDFS [functional.alltypestiny t3]
|
|
| partitions=4/4 files=4 size=460B
|
|
|
|
|
08:EXCHANGE [HASH(t2.id)]
|
|
|
|
|
03:HASH JOIN [LEFT OUTER JOIN, PARTITIONED]
|
|
| hash predicates: t1.int_col = t2.int_col
|
|
|
|
|
|--07:EXCHANGE [HASH(t2.int_col)]
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypessmall t2]
|
|
| partitions=4/4 files=4 size=6.32KB
|
|
| runtime filters: RF000 -> t2.id
|
|
|
|
|
06:EXCHANGE [HASH(t1.int_col)]
|
|
|
|
|
00:SCAN HDFS [functional.alltypes t1]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
====
|
|
# IMPALA-6123: Test the partition compatibility check with consecutive outer joins.
|
|
# In this query the output partition of the full outer join should be random. There should
|
|
# be an exchange node in the aggregation.
|
|
# The first right outer join is used to ensure 't1' is made nullable before the fragment
|
|
# executing the full outer join.
|
|
select /* +straight_join */ t1.int_col, count(*)
|
|
from functional.alltypes t1
|
|
right outer join /* +shuffle */ functional.alltypes t2
|
|
on t1.smallint_col = t2.smallint_col
|
|
full outer join /* +shuffle */ functional.alltypes t3
|
|
on t1.int_col = t3.int_col
|
|
group by t1.int_col
|
|
---- DISTRIBUTEDPLAN
|
|
PLAN-ROOT SINK
|
|
|
|
|
12:EXCHANGE [UNPARTITIONED]
|
|
|
|
|
11:AGGREGATE [FINALIZE]
|
|
| output: count:merge(*)
|
|
| group by: t1.int_col
|
|
|
|
|
10:EXCHANGE [HASH(t1.int_col)]
|
|
|
|
|
05:AGGREGATE [STREAMING]
|
|
| output: count(*)
|
|
| group by: t1.int_col
|
|
|
|
|
04:HASH JOIN [FULL OUTER JOIN, PARTITIONED]
|
|
| hash predicates: t1.int_col = t3.int_col
|
|
|
|
|
|--09:EXCHANGE [HASH(t3.int_col)]
|
|
| |
|
|
| 02:SCAN HDFS [functional.alltypes t3]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
|
|
|
08:EXCHANGE [HASH(t1.int_col)]
|
|
|
|
|
03:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
|
|
| hash predicates: t1.smallint_col = t2.smallint_col
|
|
| runtime filters: RF000 <- t2.smallint_col
|
|
|
|
|
|--07:EXCHANGE [HASH(t2.smallint_col)]
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypes t2]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
|
|
|
06:EXCHANGE [HASH(t1.smallint_col)]
|
|
|
|
|
00:SCAN HDFS [functional.alltypes t1]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
runtime filters: RF000 -> t1.smallint_col
|
|
====
|