impala/testdata/workloads/functional-planner/queries/PlannerTest/outer-joins.test

# correct placement of predicates with left outer joins; t2 and t3 are nullable
select *
from functional.testtbl t1 left outer join functional.testtbl t2 on (
    t1.id - 1 = t2.id + 1
# lhs predicate is join predicate
    and t1.zip = 94611
# rhs predicate applied by rhs scan
    and t2.zip = 94104)
  left outer join functional.testtbl t3 on (
    t1.id = t3.id
# predicate on t2 is join predicate, not applied by t2 scan
    and t2.id = 15
# predicate on t1 and t2 is join predicate
    and t1.id - t2.id = 0
# rhs predicate applied by rhs scan
    and t3.zip = 94720
  )
where
# t1 predicate in where clause is applied by scans (t1; also propagated to t3)
t1.id > 0
# t2 predicate in where clause is applied by join and scan
and t2.id is null
# t3 predicate in where clause is applied by join and scan
and t3.id is not null
# join predicate between t1 and t2 applied after t2 join
and t1.zip + t2.zip = 10
# join predicate between t1, t2 and t3 applied after last join
and t1.zip + t2.zip + t3.zip= 20
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [LEFT OUTER JOIN]
|  hash predicates: t1.id = t3.id
|  other join predicates: t2.id = 15, t1.id - t2.id = 0
|  other predicates: t3.id IS NOT NULL, t1.zip + t2.zip + t3.zip = 20
|
|--02:SCAN HDFS [functional.testtbl t3]
|     partitions=1/1 files=0 size=0B
|     predicates: t3.id IS NOT NULL, t3.id > 0, t3.zip = 94720
|
03:HASH JOIN [LEFT OUTER JOIN]
|  hash predicates: t1.id - 1 = t2.id + 1
|  other join predicates: t1.zip = 94611
|  other predicates: t2.id IS NULL, t1.zip + t2.zip = 10
|
|--01:SCAN HDFS [functional.testtbl t2]
|     partitions=1/1 files=0 size=0B
|     predicates: t2.zip = 94104
|
00:SCAN HDFS [functional.testtbl t1]
   partitions=1/1 files=0 size=0B
   predicates: t1.id > 0
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:EXCHANGE [UNPARTITIONED]
|
04:HASH JOIN [LEFT OUTER JOIN, BROADCAST]
|  hash predicates: t1.id = t3.id
|  other join predicates: t2.id = 15, t1.id - t2.id = 0
|  other predicates: t3.id IS NOT NULL, t1.zip + t2.zip + t3.zip = 20
|
|--06:EXCHANGE [BROADCAST]
|  |
|  02:SCAN HDFS [functional.testtbl t3]
|     partitions=1/1 files=0 size=0B
|     predicates: t3.id IS NOT NULL, t3.id > 0, t3.zip = 94720
|
03:HASH JOIN [LEFT OUTER JOIN, BROADCAST]
|  hash predicates: t1.id - 1 = t2.id + 1
|  other join predicates: t1.zip = 94611
|  other predicates: t2.id IS NULL, t1.zip + t2.zip = 10
|
|--05:EXCHANGE [BROADCAST]
|  |
|  01:SCAN HDFS [functional.testtbl t2]
|     partitions=1/1 files=0 size=0B
|     predicates: t2.zip = 94104
|
00:SCAN HDFS [functional.testtbl t1]
   partitions=1/1 files=0 size=0B
   predicates: t1.id > 0
====
# the same thing with subqueries; should produce the same result
select *
from (select * from functional.testtbl a1) t1
  left outer join (select * from functional.testtbl a2) t2 on (
    t1.id - 1 = t2.id + 1 and t1.zip = 94611 and t2.zip = 94104)
  left outer join (select * from functional.testtbl a3) t3 on (
    t1.id = t3.id and t2.id = 15 and t1.id - t2.id = 0 and t3.zip = 94720)
where t1.id > 0 and t2.id is null and t3.id is not null
and t1.zip + t2.zip = 10 and t1.zip + t2.zip + t3.zip= 20
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [LEFT OUTER JOIN]
|  hash predicates: a1.id = a3.id
|  other join predicates: a2.id = 15, a1.id - a2.id = 0
|  other predicates: a3.id IS NOT NULL, a1.zip + a2.zip + a3.zip = 20
|
|--02:SCAN HDFS [functional.testtbl a3]
|     partitions=1/1 files=0 size=0B
|     predicates: a3.id IS NOT NULL, a3.id > 0, a3.zip = 94720
|
03:HASH JOIN [LEFT OUTER JOIN]
|  hash predicates: a1.id - 1 = a2.id + 1
|  other join predicates: a1.zip = 94611
|  other predicates: a2.id IS NULL, a1.zip + a2.zip = 10
|
|--01:SCAN HDFS [functional.testtbl a2]
|     partitions=1/1 files=0 size=0B
|     predicates: a2.zip = 94104
|
00:SCAN HDFS [functional.testtbl a1]
   partitions=1/1 files=0 size=0B
   predicates: a1.id > 0
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
07:EXCHANGE [UNPARTITIONED]
|
04:HASH JOIN [LEFT OUTER JOIN, BROADCAST]
|  hash predicates: a1.id = a3.id
|  other join predicates: a2.id = 15, a1.id - a2.id = 0
|  other predicates: a3.id IS NOT NULL, a1.zip + a2.zip + a3.zip = 20
|
|--06:EXCHANGE [BROADCAST]
|  |
|  02:SCAN HDFS [functional.testtbl a3]
|     partitions=1/1 files=0 size=0B
|     predicates: a3.id IS NOT NULL, a3.id > 0, a3.zip = 94720
|
03:HASH JOIN [LEFT OUTER JOIN, BROADCAST]
|  hash predicates: a1.id - 1 = a2.id + 1
|  other join predicates: a1.zip = 94611
|  other predicates: a2.id IS NULL, a1.zip + a2.zip = 10
|
|--05:EXCHANGE [BROADCAST]
|  |
|  01:SCAN HDFS [functional.testtbl a2]
|     partitions=1/1 files=0 size=0B
|     predicates: a2.zip = 94104
|
00:SCAN HDFS [functional.testtbl a1]
   partitions=1/1 files=0 size=0B
   predicates: a1.id > 0
====
# correct propagation of scan predicates in OJ On clauses:
# id = 17 must not be applied by the t1 and t3 scans
select *
from functional.testtbl t1
  left outer join functional.testtbl t2 on (
    t1.id = t2.id and t1.id = 17)
  join functional.testtbl t3 on (t1.id = t3.id)
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [INNER JOIN]
|  hash predicates: t1.id = t3.id
|  runtime filters: RF000 <- t3.id
|
|--02:SCAN HDFS [functional.testtbl t3]
|     partitions=1/1 files=0 size=0B
|
03:HASH JOIN [LEFT OUTER JOIN]
|  hash predicates: t1.id = t2.id
|  other join predicates: t1.id = 17
|
|--01:SCAN HDFS [functional.testtbl t2]
|     partitions=1/1 files=0 size=0B
|     predicates: t2.id = 17
|     runtime filters: RF000 -> t2.id
|
00:SCAN HDFS [functional.testtbl t1]
   partitions=1/1 files=0 size=0B
   runtime filters: RF000 -> t1.id
====
# correct placement of predicates with right outer joins; t1 and t2 are nullable
select *
from functional.testtbl t1 right outer join functional.testtbl t2 on (
    t1.id - 1 = t2.id + 1
# lhs predicate is scan predicate
    and t1.zip = 94611
# rhs predicate is join predicate
    and t2.zip = 94104)
  right outer join functional.testtbl t3 on (
    t1.id = t3.id
# predicate on t2 is scan predicate
    and t2.id = 15
# predicate on t1 and t2 could be applied by previous join
# but the planner doesn't recognize that case and makes it a join predicate
    and t1.id - t2.id = 0
# rhs predicate is join predicate
    and t3.zip = 94720
  )
where
# t1 predicate in where clause is applied by scan and last join
t1.id > 0
# t2 predicate in where clause is applied by last join
and t2.id is null
# t3 predicate in where clause is applied by scan
and t3.id is not null
# join predicate between t1 and t2 applied after last join
and t1.zip + t2.zip = 10
# join predicate between t1, t2 and t3 applied after last join
and t1.zip + t2.zip + t3.zip= 20
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [RIGHT OUTER JOIN]
|  hash predicates: t1.id = t3.id
|  other join predicates: t3.zip = 94720, t1.id - t2.id = 0
|  other predicates: t2.id IS NULL, t1.id > 0, t1.zip + t2.zip = 10, t1.zip + t2.zip + t3.zip = 20
|  runtime filters: RF000 <- t3.id
|
|--02:SCAN HDFS [functional.testtbl t3]
|     partitions=1/1 files=0 size=0B
|     predicates: t3.id IS NOT NULL
|
03:HASH JOIN [RIGHT OUTER JOIN]
|  hash predicates: t1.id - 1 = t2.id + 1
|  other join predicates: t1.zip = 94611, t2.zip = 94104
|  runtime filters: RF002 <- t2.id + 1
|
|--01:SCAN HDFS [functional.testtbl t2]
|     partitions=1/1 files=0 size=0B
|     predicates: t2.id = 15
|
00:SCAN HDFS [functional.testtbl t1]
   partitions=1/1 files=0 size=0B
   predicates: t1.id IS NOT NULL, t1.id > 0
   runtime filters: RF000 -> t1.id, RF002 -> t1.id - 1
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
09:EXCHANGE [UNPARTITIONED]
|
04:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
|  hash predicates: t1.id = t3.id
|  other join predicates: t3.zip = 94720, t1.id - t2.id = 0
|  other predicates: t2.id IS NULL, t1.id > 0, t1.zip + t2.zip = 10, t1.zip + t2.zip + t3.zip = 20
|  runtime filters: RF000 <- t3.id
|
|--08:EXCHANGE [HASH(t3.id)]
|  |
|  02:SCAN HDFS [functional.testtbl t3]
|     partitions=1/1 files=0 size=0B
|     predicates: t3.id IS NOT NULL
|
07:EXCHANGE [HASH(t1.id)]
|
03:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
|  hash predicates: t1.id - 1 = t2.id + 1
|  other join predicates: t1.zip = 94611, t2.zip = 94104
|  runtime filters: RF002 <- t2.id + 1
|
|--06:EXCHANGE [HASH(t2.id + 1)]
|  |
|  01:SCAN HDFS [functional.testtbl t2]
|     partitions=1/1 files=0 size=0B
|     predicates: t2.id = 15
|
05:EXCHANGE [HASH(t1.id - 1)]
|
00:SCAN HDFS [functional.testtbl t1]
   partitions=1/1 files=0 size=0B
   predicates: t1.id IS NOT NULL, t1.id > 0
   runtime filters: RF000 -> t1.id, RF002 -> t1.id - 1
====
# the same thing with subqueries; should produce the same result
select *
from (select * from functional.testtbl a1) t1 right outer join (select * from functional.testtbl a2) t2 on (
    t1.id - 1 = t2.id + 1 and t1.zip = 94611 and t2.zip = 94104)
  right outer join (select * from functional.testtbl a3) t3 on (
    t1.id = t3.id and t2.id = 15 and t1.id - t2.id = 0 and t3.zip = 94720 )
where t1.id > 0 and t2.id is null and t3.id is not null
and t1.zip + t2.zip = 10 and t1.zip + t2.zip + t3.zip= 20
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [RIGHT OUTER JOIN]
|  hash predicates: a1.id = a3.id
|  other join predicates: a3.zip = 94720, a1.id - a2.id = 0
|  other predicates: a2.id IS NULL, a1.id > 0, a1.zip + a2.zip = 10, a1.zip + a2.zip + a3.zip = 20
|  runtime filters: RF000 <- a3.id
|
|--02:SCAN HDFS [functional.testtbl a3]
|     partitions=1/1 files=0 size=0B
|     predicates: a3.id IS NOT NULL
|
03:HASH JOIN [RIGHT OUTER JOIN]
|  hash predicates: a1.id - 1 = a2.id + 1
|  other join predicates: a1.zip = 94611, a2.zip = 94104
|  runtime filters: RF002 <- a2.id + 1
|
|--01:SCAN HDFS [functional.testtbl a2]
|     partitions=1/1 files=0 size=0B
|     predicates: a2.id = 15
|
00:SCAN HDFS [functional.testtbl a1]
   partitions=1/1 files=0 size=0B
   predicates: a1.id IS NOT NULL, a1.id > 0
   runtime filters: RF000 -> a1.id, RF002 -> a1.id - 1
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
09:EXCHANGE [UNPARTITIONED]
|
04:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
|  hash predicates: a1.id = a3.id
|  other join predicates: a3.zip = 94720, a1.id - a2.id = 0
|  other predicates: a2.id IS NULL, a1.id > 0, a1.zip + a2.zip = 10, a1.zip + a2.zip + a3.zip = 20
|  runtime filters: RF000 <- a3.id
|
|--08:EXCHANGE [HASH(a3.id)]
|  |
|  02:SCAN HDFS [functional.testtbl a3]
|     partitions=1/1 files=0 size=0B
|     predicates: a3.id IS NOT NULL
|
07:EXCHANGE [HASH(a1.id)]
|
03:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
|  hash predicates: a1.id - 1 = a2.id + 1
|  other join predicates: a1.zip = 94611, a2.zip = 94104
|  runtime filters: RF002 <- a2.id + 1
|
|--06:EXCHANGE [HASH(a2.id + 1)]
|  |
|  01:SCAN HDFS [functional.testtbl a2]
|     partitions=1/1 files=0 size=0B
|     predicates: a2.id = 15
|
05:EXCHANGE [HASH(a1.id - 1)]
|
00:SCAN HDFS [functional.testtbl a1]
   partitions=1/1 files=0 size=0B
   predicates: a1.id IS NOT NULL, a1.id > 0
   runtime filters: RF000 -> a1.id, RF002 -> a1.id - 1
====
# right outer join requires the join op to be partitioned, otherwise non-matches cause
# duplicates
select a.tinyint_col, b.id
from functional.alltypesagg a
right outer join functional.alltypestiny b on (a.tinyint_col = b.id)
where a.tinyint_col is null
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [RIGHT OUTER JOIN]
|  hash predicates: a.tinyint_col = b.id
|  other predicates: a.tinyint_col IS NULL
|  runtime filters: RF000 <- b.id
|
|--01:SCAN HDFS [functional.alltypestiny b]
|     partitions=4/4 files=4 size=460B
|
00:SCAN HDFS [functional.alltypesagg a]
   partitions=11/11 files=11 size=814.73KB
   runtime filters: RF000 -> a.tinyint_col
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
05:EXCHANGE [UNPARTITIONED]
|
02:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
|  hash predicates: a.tinyint_col = b.id
|  other predicates: a.tinyint_col IS NULL
|  runtime filters: RF000 <- b.id
|
|--04:EXCHANGE [HASH(b.id)]
|  |
|  01:SCAN HDFS [functional.alltypestiny b]
|     partitions=4/4 files=4 size=460B
|
03:EXCHANGE [HASH(a.tinyint_col)]
|
00:SCAN HDFS [functional.alltypesagg a]
   partitions=11/11 files=11 size=814.73KB
   runtime filters: RF000 -> a.tinyint_col
====
# same for full outer joins
select a.tinyint_col, b.id
from functional.alltypesagg a
full outer join functional.alltypestiny b on (a.tinyint_col = b.id)
where a.tinyint_col is null
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [FULL OUTER JOIN]
|  hash predicates: a.tinyint_col = b.id
|  other predicates: a.tinyint_col IS NULL
|
|--01:SCAN HDFS [functional.alltypestiny b]
|     partitions=4/4 files=4 size=460B
|
00:SCAN HDFS [functional.alltypesagg a]
   partitions=11/11 files=11 size=814.73KB
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
05:EXCHANGE [UNPARTITIONED]
|
02:HASH JOIN [FULL OUTER JOIN, PARTITIONED]
|  hash predicates: a.tinyint_col = b.id
|  other predicates: a.tinyint_col IS NULL
|
|--04:EXCHANGE [HASH(b.id)]
|  |
|  01:SCAN HDFS [functional.alltypestiny b]
|     partitions=4/4 files=4 size=460B
|
03:EXCHANGE [HASH(a.tinyint_col)]
|
00:SCAN HDFS [functional.alltypesagg a]
   partitions=11/11 files=11 size=814.73KB
====
# Predicate assignment when query has a full outer join (IMPALA-1371)
select *
from functional.alltypes a full outer join functional.alltypes b
# predicate on a from the on clause is not assigned to the scan node
  on (a.id = b.id and a.int_col < 10 and b.tinyint_col != 5)
# predicate on b from the where clause is assigned to the scan and
# also to the full outer join
where b.bigint_col > 10
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [FULL OUTER JOIN]
|  hash predicates: a.id = b.id
|  other join predicates: a.int_col < 10, b.tinyint_col != 5
|  other predicates: b.bigint_col > 10
|
|--01:SCAN HDFS [functional.alltypes b]
|     partitions=24/24 files=24 size=478.45KB
|     predicates: b.bigint_col > 10
|
00:SCAN HDFS [functional.alltypes a]
   partitions=24/24 files=24 size=478.45KB
====
# Predicate assignment when query has a full outer followed by an inner join
select *
from functional.alltypes a full outer join functional.alltypes b
  on (a.id = b.id)
inner join functional.alltypes c
  on (a.int_col = c.int_col and a.tinyint_col < 10)
# predicate on b from the where clause is assigned to the
# first full outer join
where b.tinyint_col > 20
---- PLAN
PLAN-ROOT SINK
|
05:HASH JOIN [INNER JOIN]
|  hash predicates: c.int_col = a.int_col
|  other predicates: a.tinyint_col < 10
|  runtime filters: RF000 <- a.int_col
|
|--04:HASH JOIN [FULL OUTER JOIN]
|  |  hash predicates: a.id = b.id
|  |  other predicates: b.tinyint_col > 20
|  |
|  |--01:SCAN HDFS [functional.alltypes b]
|  |     partitions=24/24 files=24 size=478.45KB
|  |     predicates: b.tinyint_col > 20
|  |
|  00:SCAN HDFS [functional.alltypes a]
|     partitions=24/24 files=24 size=478.45KB
|     predicates: a.tinyint_col < 10
|
02:SCAN HDFS [functional.alltypes c]
   partitions=24/24 files=24 size=478.45KB
   runtime filters: RF000 -> c.int_col
====
# Predicate assignment when query has an inner join followed by a full outer join
select *
from functional.alltypes a inner join functional.alltypes b
  on (a.id = b.id)
full outer join functional.alltypes c
  on (a.int_col = c.int_col and a.tinyint_col < b.tinyint_col and a.bigint_col < 10)
# predicate on a is assigned to the scan and the full outer join, predicate on a and b
# is assigned to the full outer join
# TODO Ideally, the predicate on a and b should be assigned to the inner join and then
# re-assigned to the full outer join.
where a.smallint_col = 100 and a.float_col > b.float_col
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [FULL OUTER JOIN]
|  hash predicates: c.int_col = a.int_col
|  other join predicates: a.bigint_col < 10, a.tinyint_col < b.tinyint_col
|  other predicates: a.smallint_col = 100, a.float_col > b.float_col
|
|--03:HASH JOIN [INNER JOIN]
|  |  hash predicates: b.id = a.id
|  |  runtime filters: RF000 <- a.id
|  |
|  |--00:SCAN HDFS [functional.alltypes a]
|  |     partitions=24/24 files=24 size=478.45KB
|  |     predicates: a.smallint_col = 100
|  |
|  01:SCAN HDFS [functional.alltypes b]
|     partitions=24/24 files=24 size=478.45KB
|     runtime filters: RF000 -> b.id
|
02:SCAN HDFS [functional.alltypes c]
   partitions=24/24 files=24 size=478.45KB
====
# Predicate assignment when query has a sequence of full outer joins interleaved with
# an inner join
select *
from functional.alltypes a full outer join functional.alltypes b
  on (a.id = b.id and a.int_col < 10)
inner join functional.alltypes c
  on (a.tinyint_col = c.tinyint_col and b.int_col > 10)
full outer join functional.alltypes d
# predicate on b from the on clause is assigned to the scan and is also re-applied
# in the full outer join
  on (a.tinyint_col = d.tinyint_col and b.int_col < 20)
# predicate on b from the where clause is assigned to the first full outer join
where a.bool_col = false and a.float_col < b.float_col
---- PLAN
PLAN-ROOT SINK
|
07:HASH JOIN [FULL OUTER JOIN]
|  hash predicates: a.tinyint_col = d.tinyint_col
|  other join predicates: b.int_col < 20
|  other predicates: a.float_col < b.float_col, a.bool_col = FALSE
|
|--03:SCAN HDFS [functional.alltypes d]
|     partitions=24/24 files=24 size=478.45KB
|
06:HASH JOIN [INNER JOIN]
|  hash predicates: a.tinyint_col = c.tinyint_col
|  other predicates: b.int_col > 10
|  runtime filters: RF000 <- c.tinyint_col
|
|--02:SCAN HDFS [functional.alltypes c]
|     partitions=24/24 files=24 size=478.45KB
|
05:HASH JOIN [FULL OUTER JOIN]
|  hash predicates: a.id = b.id
|  other join predicates: a.int_col < 10
|
|--01:SCAN HDFS [functional.alltypes b]
|     partitions=24/24 files=24 size=478.45KB
|     predicates: b.int_col > 10
|
00:SCAN HDFS [functional.alltypes a]
   partitions=24/24 files=24 size=478.45KB
   predicates: a.bool_col = FALSE
   runtime filters: RF000 -> a.tinyint_col
====
# Predicate assignment through an inline-view that has a full outer join
select 1
from (
  select a.tinyint_col x, b.tinyint_col y
  from functional.alltypes a
  full outer join functional.alltypes b
    on (a.id = b.id)
  inner join functional.alltypes c
# predicate on a and b from the on clause of the inner join is assigned to
# the first full outer join
    on (b.smallint_col = c.smallint_col and a.int_col < b.int_col and b.id < 10)
  full outer join functional.alltypes d
# predicate on a and b from the on clause of the full outer join is assigned
# to the full outer join
    on (c.id = d.id and a.bigint_col > b.bigint_col)) v
# predicate not specified in the scope of the inline view is assigned to the last
# full outer join of the inline view that materializes the corresponding
# tuple ids
where x != y
---- PLAN
PLAN-ROOT SINK
|
06:HASH JOIN [FULL OUTER JOIN]
|  hash predicates: c.id = d.id
|  other join predicates: a.bigint_col > b.bigint_col
|  other predicates: a.tinyint_col != b.tinyint_col
|
|--03:SCAN HDFS [functional.alltypes d]
|     partitions=24/24 files=24 size=478.45KB
|
05:HASH JOIN [INNER JOIN]
|  hash predicates: b.smallint_col = c.smallint_col
|  other predicates: a.int_col < b.int_col, b.id < 10
|  runtime filters: RF000 <- c.smallint_col
|
|--02:SCAN HDFS [functional.alltypes c]
|     partitions=24/24 files=24 size=478.45KB
|
04:HASH JOIN [FULL OUTER JOIN]
|  hash predicates: a.id = b.id
|
|--01:SCAN HDFS [functional.alltypes b]
|     partitions=24/24 files=24 size=478.45KB
|     predicates: b.id < 10
|     runtime filters: RF000 -> b.smallint_col
|
00:SCAN HDFS [functional.alltypes a]
   partitions=24/24 files=24 size=478.45KB
====
# Predicate assignment when query has an inner join followed by full outer join
select 1
from functional.alltypes a
inner join functional.alltypes b
  on (a.id = b.id)
full outer join functional.alltypes c
  on (b.int_col = c.int_col and c.int_col < 10)
# predicates on a and b from the where clause are assigned to the last full outer join
# that materializes the corresponding tuple ids
where a.bigint_col = b.bigint_col and a.tinyint_col < b.tinyint_col
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [FULL OUTER JOIN]
|  hash predicates: b.int_col = c.int_col
|  other join predicates: c.int_col < 10
|  other predicates: a.bigint_col = b.bigint_col, a.tinyint_col < b.tinyint_col
|
|--02:SCAN HDFS [functional.alltypes c]
|     partitions=24/24 files=24 size=478.45KB
|
03:HASH JOIN [INNER JOIN]
|  hash predicates: b.id = a.id
|  runtime filters: RF000 <- a.id
|
|--00:SCAN HDFS [functional.alltypes a]
|     partitions=24/24 files=24 size=478.45KB
|
01:SCAN HDFS [functional.alltypes b]
   partitions=24/24 files=24 size=478.45KB
   runtime filters: RF000 -> b.id
====
# Predicate assignment when query has a sequence of inner, outer and full
# outer joins
select 1
from functional.alltypes a
inner join functional.alltypes b
  on (a.id = b.id)
left outer join functional.alltypes c
  on (a.int_col = c.int_col and c.tinyint_col = 10)
full outer join functional.alltypes d
  on (b.string_col = d.string_col and a.tinyint_col < b.tinyint_col)
where a.float_col = b.float_col and b.smallint_col = 1 and d.tinyint_col < 10
---- PLAN
PLAN-ROOT SINK
|
06:HASH JOIN [FULL OUTER JOIN]
|  hash predicates: b.string_col = d.string_col
|  other join predicates: a.tinyint_col < b.tinyint_col
|  other predicates: b.smallint_col = 1, a.float_col = b.float_col, d.tinyint_col < 10
|
|--03:SCAN HDFS [functional.alltypes d]
|     partitions=24/24 files=24 size=478.45KB
|     predicates: d.tinyint_col < 10
|
05:HASH JOIN [RIGHT OUTER JOIN]
|  hash predicates: c.int_col = a.int_col
|  other join predicates: c.tinyint_col = 10
|  runtime filters: RF000 <- a.int_col
|
|--04:HASH JOIN [INNER JOIN]
|  |  hash predicates: a.id = b.id
|  |  runtime filters: RF002 <- b.id
|  |
|  |--01:SCAN HDFS [functional.alltypes b]
|  |     partitions=24/24 files=24 size=478.45KB
|  |     predicates: b.smallint_col = 1
|  |
|  00:SCAN HDFS [functional.alltypes a]
|     partitions=24/24 files=24 size=478.45KB
|     runtime filters: RF002 -> a.id
|
02:SCAN HDFS [functional.alltypes c]
   partitions=24/24 files=24 size=478.45KB
   runtime filters: RF000 -> c.int_col
====
# IMPALA-2144: Test correct assignment of Having-clause predicates
# referencing a grouping column coming from an outer-joined table ref.
select b.int_col, sum(b.double_col)
from functional.alltypes a left outer join functional.alltypestiny b
  on (a.id = b.id)
group by a.bool_col, a.int_col, b.bool_col, b.int_col
having a.bool_col is null and a.int_col is not null
  and b.bool_col is null and b.int_col is not null
---- PLAN
PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
|  output: sum(b.double_col)
|  group by: a.bool_col, a.int_col, b.bool_col, b.int_col
|  having: b.bool_col IS NULL, b.int_col IS NOT NULL
|
02:HASH JOIN [LEFT OUTER JOIN]
|  hash predicates: a.id = b.id
|
|--01:SCAN HDFS [functional.alltypestiny b]
|     partitions=4/4 files=4 size=460B
|     predicates: b.int_col IS NOT NULL
|
00:SCAN HDFS [functional.alltypes a]
   partitions=24/24 files=24 size=478.45KB
   predicates: a.bool_col IS NULL, a.int_col IS NOT NULL
====
# IMPALA-2144: Same as above but with a full outer join.
select b.int_col, sum(b.double_col)
from functional.alltypes a full outer join functional.alltypestiny b
  on (a.id = b.id)
group by a.bool_col, a.int_col, b.bool_col, b.int_col
having a.bool_col is null and a.int_col is not null
  and b.bool_col is null and b.int_col is not null
---- PLAN
PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
|  output: sum(b.double_col)
|  group by: a.bool_col, a.int_col, b.bool_col, b.int_col
|  having: a.bool_col IS NULL, a.int_col IS NOT NULL, b.bool_col IS NULL, b.int_col IS NOT NULL
|
02:HASH JOIN [FULL OUTER JOIN]
|  hash predicates: a.id = b.id
|
|--01:SCAN HDFS [functional.alltypestiny b]
|     partitions=4/4 files=4 size=460B
|     predicates: b.int_col IS NOT NULL
|
00:SCAN HDFS [functional.alltypes a]
   partitions=24/24 files=24 size=478.45KB
   predicates: a.int_col IS NOT NULL
====
# IMPALA-2765: Predicate assignment when outer joins have case expressions as predicates
select *
from functional.alltypes a
left outer join functional.alltypes b
  on (a.id = b.id)
where (
  case when a.id > 100 then
    a.timestamp_col
  else
    a.timestamp_col
  end) >= cast('2001-01-01 00:00:00' as timestamp);
---- PLAN
PLAN-ROOT SINK
|
02:HASH JOIN [RIGHT OUTER JOIN]
|  hash predicates: b.id = a.id
|  runtime filters: RF000 <- a.id
|
|--00:SCAN HDFS [functional.alltypes a]
|     partitions=24/24 files=24 size=478.45KB
|     predicates: (CASE WHEN a.id > 100 THEN a.timestamp_col ELSE a.timestamp_col END) >= TIMESTAMP '2001-01-01 00:00:00'
|
01:SCAN HDFS [functional.alltypes b]
   partitions=24/24 files=24 size=478.45KB
   runtime filters: RF000 -> b.id
====
# IMPALA-3071: Test correct assignment of non-join conjuncts belonging to the On-clause
# of an inner join that follows an outer join.
select 1 from functional.alltypestiny a
left outer join functional.alltypestiny b
  on a.id = b.id
inner join functional.alltypestiny c
  on b.id = c.id and b.int_col < 0 and a.int_col > 10
---- PLAN
PLAN-ROOT SINK
|
05:HASH JOIN [INNER JOIN]
|  hash predicates: c.id = b.id
|  other predicates: b.int_col < 0
|  runtime filters: RF000 <- b.id
|
|--04:HASH JOIN [LEFT OUTER JOIN]
|  |  hash predicates: a.id = b.id
|  |
|  |--01:SCAN HDFS [functional.alltypestiny b]
|  |     partitions=4/4 files=4 size=460B
|  |     predicates: b.int_col < 0
|  |
|  00:SCAN HDFS [functional.alltypestiny a]
|     partitions=4/4 files=4 size=460B
|     predicates: a.int_col > 10
|
02:SCAN HDFS [functional.alltypestiny c]
   partitions=4/4 files=4 size=460B
   runtime filters: RF000 -> c.id
====
# IMPALA-3071: Same as above but with a right outer join.
select 1 from functional.alltypestiny a
right outer join functional.alltypestiny b
  on a.id = b.id
inner join functional.alltypestiny c
  on b.id = c.id and b.int_col < 0 and a.int_col > 10
---- PLAN
PLAN-ROOT SINK
|
05:HASH JOIN [INNER JOIN]
|  hash predicates: c.id = b.id
|  other predicates: a.int_col > 10
|  runtime filters: RF000 <- b.id
|
|--04:HASH JOIN [RIGHT OUTER JOIN]
|  |  hash predicates: a.id = b.id
|  |  runtime filters: RF002 <- b.id
|  |
|  |--01:SCAN HDFS [functional.alltypestiny b]
|  |     partitions=4/4 files=4 size=460B
|  |     predicates: b.int_col < 0
|  |
|  00:SCAN HDFS [functional.alltypestiny a]
|     partitions=4/4 files=4 size=460B
|     predicates: a.int_col > 10
|     runtime filters: RF002 -> a.id
|
02:SCAN HDFS [functional.alltypestiny c]
   partitions=4/4 files=4 size=460B
   runtime filters: RF000 -> c.id
====
# IMPALA-3071: Same as above but with a full outer join.
select 1 from functional.alltypestiny a
full outer join functional.alltypestiny b
  on a.id = b.id
inner join functional.alltypestiny c
  on b.id = c.id and b.int_col < 0 and a.int_col > 10
---- PLAN
PLAN-ROOT SINK
|
05:HASH JOIN [INNER JOIN]
|  hash predicates: b.id = c.id
|  other predicates: a.int_col > 10, b.int_col < 0
|  runtime filters: RF000 <- c.id
|
|--02:SCAN HDFS [functional.alltypestiny c]
|     partitions=4/4 files=4 size=460B
|
04:HASH JOIN [FULL OUTER JOIN]
|  hash predicates: a.id = b.id
|
|--01:SCAN HDFS [functional.alltypestiny b]
|     partitions=4/4 files=4 size=460B
|     predicates: b.int_col < 0
|     runtime filters: RF000 -> b.id
|
00:SCAN HDFS [functional.alltypestiny a]
   partitions=4/4 files=4 size=460B
   predicates: a.int_col > 10
====
# IMPALA-3071: Test that the inner join On-clause predicate is placed
# precisely at the correct outer join (not before or after).
select 1 from functional.alltypestiny a
left outer join functional.alltypestiny b
  on a.id = b.id
left outer join functional.alltypestiny c
  on b.id = c.id
inner join functional.alltypestiny d
  on b.int_col < 0 and a.int_col > 10
full outer join functional.alltypestiny e
  on d.id = e.id
---- PLAN
PLAN-ROOT SINK
|
09:HASH JOIN [FULL OUTER JOIN]
|  hash predicates: e.id = d.id
|
|--08:NESTED LOOP JOIN [INNER JOIN]
|  |  predicates: a.int_col > 10, b.int_col < 0
|  |
|  |--07:HASH JOIN [RIGHT OUTER JOIN]
|  |  |  hash predicates: c.id = b.id
|  |  |  runtime filters: RF000 <- b.id
|  |  |
|  |  |--06:HASH JOIN [LEFT OUTER JOIN]
|  |  |  |  hash predicates: a.id = b.id
|  |  |  |
|  |  |  |--01:SCAN HDFS [functional.alltypestiny b]
|  |  |  |     partitions=4/4 files=4 size=460B
|  |  |  |     predicates: b.int_col < 0
|  |  |  |
|  |  |  00:SCAN HDFS [functional.alltypestiny a]
|  |  |     partitions=4/4 files=4 size=460B
|  |  |     predicates: a.int_col > 10
|  |  |
|  |  02:SCAN HDFS [functional.alltypestiny c]
|  |     partitions=4/4 files=4 size=460B
|  |     runtime filters: RF000 -> c.id
|  |
|  03:SCAN HDFS [functional.alltypestiny d]
|     partitions=4/4 files=4 size=460B
|
04:SCAN HDFS [functional.alltypestiny e]
   partitions=4/4 files=4 size=460B
====
# IMPALA-3125: Test that the On-clause predicates from an outer join are assigned to the
# corresponding outer-join node, even if the predicates do not reference the join rhs.
select a.id aid, b.id bid, a.int_col aint, b.int_col bint
from functional.alltypes a
inner join functional.alltypes b
  on a.int_col = b.int_col
left outer join functional.alltypes c
  on a.id = b.id and b.bigint_col = c.bigint_col
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [LEFT OUTER JOIN]
|  hash predicates: b.bigint_col = c.bigint_col
|  other join predicates: a.id = b.id
|
|--02:SCAN HDFS [functional.alltypes c]
|     partitions=24/24 files=24 size=478.45KB
|
03:HASH JOIN [INNER JOIN]
|  hash predicates: b.int_col = a.int_col
|  runtime filters: RF000 <- a.int_col
|
|--00:SCAN HDFS [functional.alltypes a]
|     partitions=24/24 files=24 size=478.45KB
|
01:SCAN HDFS [functional.alltypes b]
   partitions=24/24 files=24 size=478.45KB
   runtime filters: RF000 -> b.int_col
====
# IMPALA-3167: Test correct assignment of a WHERE-clause predicate through an inline view
# that has a grouping aggregation and an outer join. The predicate can be assigned at the
# scan on the nullable side of the outer join, but it must also be evaluated after the join.
select v2.id, v2.s
from (select v1.id, sum(bigint_col) s
      from functional.alltypes t1
      left outer join (select t2.int_col, t2.id
                       from functional.alltypessmall t2) v1
      on t1.int_col = v1.int_col
      group by v1.id) v2
where v2.id < 10
---- PLAN
PLAN-ROOT SINK
|
03:AGGREGATE [FINALIZE]
|  output: sum(bigint_col)
|  group by: t2.id
|  having: v1.id < 10
|
02:HASH JOIN [LEFT OUTER JOIN]
|  hash predicates: t1.int_col = t2.int_col
|
|--01:SCAN HDFS [functional.alltypessmall t2]
|     partitions=4/4 files=4 size=6.32KB
|     predicates: t2.id < 10
|
00:SCAN HDFS [functional.alltypes t1]
   partitions=24/24 files=24 size=478.45KB
====
# IMPALA-3126: Test assignment of an inner join On-clause predicate. The predicate
# may not be assigned below the join materializing 'd'.
select 1 from functional.alltypes a
left outer join functional.alltypes b
  on a.id = b.id
right outer join functional.alltypes c
  on b.id = c.id
inner join functional.alltypes d
  on a.int_col = b.int_col
---- PLAN
PLAN-ROOT SINK
|
06:NESTED LOOP JOIN [INNER JOIN]
|  predicates: a.int_col = b.int_col
|
|--03:SCAN HDFS [functional.alltypes d]
|     partitions=24/24 files=24 size=478.45KB
|
05:HASH JOIN [RIGHT OUTER JOIN]
|  hash predicates: b.id = c.id
|  runtime filters: RF000 <- c.id
|
|--02:SCAN HDFS [functional.alltypes c]
|     partitions=24/24 files=24 size=478.45KB
|
04:HASH JOIN [LEFT OUTER JOIN]
|  hash predicates: a.id = b.id
|
|--01:SCAN HDFS [functional.alltypes b]
|     partitions=24/24 files=24 size=478.45KB
|     runtime filters: RF000 -> b.id
|
00:SCAN HDFS [functional.alltypes a]
   partitions=24/24 files=24 size=478.45KB
====
# IMPALA-3126: Same as above but with a semi join at the end.
select 1 from functional.alltypes a
left outer join functional.alltypes b
  on a.id = b.id
right outer join functional.alltypes c
  on b.id = c.id
left semi join functional.alltypes d
  on a.int_col = b.int_col
---- PLAN
PLAN-ROOT SINK
|
06:NESTED LOOP JOIN [LEFT SEMI JOIN]
|  join predicates: a.int_col = b.int_col
|
|--03:SCAN HDFS [functional.alltypes d]
|     partitions=24/24 files=24 size=478.45KB
|
05:HASH JOIN [RIGHT OUTER JOIN]
|  hash predicates: b.id = c.id
|  runtime filters: RF000 <- c.id
|
|--02:SCAN HDFS [functional.alltypes c]
|     partitions=24/24 files=24 size=478.45KB
|
04:HASH JOIN [LEFT OUTER JOIN]
|  hash predicates: a.id = b.id
|
|--01:SCAN HDFS [functional.alltypes b]
|     partitions=24/24 files=24 size=478.45KB
|     runtime filters: RF000 -> b.id
|
00:SCAN HDFS [functional.alltypes a]
   partitions=24/24 files=24 size=478.45KB
====
# IMPALA-5856: Test correct assignment of a join predicate with the following properties:
# - from the On-clause of a left outer join
# - only references the left-hand side tuples (not the right hand side tuple)
# - references full-outer joined tuples; the full outer join appears on the left
select * from functional.alltypes t1
full outer join functional.alltypessmall t2
  on t1.id = t2.id
left outer join functional.alltypestiny t3
  on coalesce(t1.id, t2.id) = t3.id and coalesce(t1.int_col, t2.int_col) = 2
     and t1.bigint_col > 10 and t2.bigint_col > 30
where concat(t1.string_col, t2.string_col) = 'test1' and t3.string_col = 'test2'
---- PLAN
PLAN-ROOT SINK
|
04:HASH JOIN [LEFT OUTER JOIN]
|  hash predicates: coalesce(t1.id, t2.id) = t3.id
|  other join predicates: t1.bigint_col > 10, t2.bigint_col > 30, coalesce(t1.int_col, t2.int_col) = 2
|  other predicates: t3.string_col = 'test2'
|
|--02:SCAN HDFS [functional.alltypestiny t3]
|     partitions=4/4 files=4 size=460B
|     predicates: t3.string_col = 'test2'
|
03:HASH JOIN [FULL OUTER JOIN]
|  hash predicates: t1.id = t2.id
|  other predicates: concat(t1.string_col, t2.string_col) = 'test1'
|
|--01:SCAN HDFS [functional.alltypessmall t2]
|     partitions=4/4 files=4 size=6.32KB
|
00:SCAN HDFS [functional.alltypes t1]
   partitions=24/24 files=24 size=478.45KB
====
# IMPALA-6123: Test the partition compatibility check with consecutive outer joins.
# In this query the output partition of the right outer join should be the rhs partition
# t3.id, which is not compatible with the aggregation partition t2.id. There should be
# an exchange node in the aggregation.
# The first left outer join is used to ensure 't2' is made nullable before the fragment
# executing the right outer join.
select /* +straight_join */ t2.id, count(*)
from functional.alltypes t1
left outer join /* +shuffle */ functional.alltypessmall t2
  on t1.int_col = t2.int_col
right outer join /* +shuffle */ functional.alltypestiny t3
  on t2.id = t3.id
group by t2.id
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
12:EXCHANGE [UNPARTITIONED]
|
11:AGGREGATE [FINALIZE]
|  output: count:merge(*)
|  group by: t2.id
|
10:EXCHANGE [HASH(t2.id)]
|
05:AGGREGATE [STREAMING]
|  output: count(*)
|  group by: t2.id
|
04:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
|  hash predicates: t2.id = t3.id
|  runtime filters: RF000 <- t3.id
|
|--09:EXCHANGE [HASH(t3.id)]
|  |
|  02:SCAN HDFS [functional.alltypestiny t3]
|     partitions=4/4 files=4 size=460B
|
08:EXCHANGE [HASH(t2.id)]
|
03:HASH JOIN [LEFT OUTER JOIN, PARTITIONED]
|  hash predicates: t1.int_col = t2.int_col
|
|--07:EXCHANGE [HASH(t2.int_col)]
|  |
|  01:SCAN HDFS [functional.alltypessmall t2]
|     partitions=4/4 files=4 size=6.32KB
|     runtime filters: RF000 -> t2.id
|
06:EXCHANGE [HASH(t1.int_col)]
|
00:SCAN HDFS [functional.alltypes t1]
   partitions=24/24 files=24 size=478.45KB
====
# IMPALA-6123: Test the partition compatibility check with consecutive outer joins.
# In this query the output partition of the full outer join should be random. There should
# be an exchange node in the aggregation.
# The first right outer join is used to ensure 't1' is made nullable before the fragment
# executing the full outer join.
select /* +straight_join */ t1.int_col, count(*)
from functional.alltypes t1
right outer join /* +shuffle */ functional.alltypes t2
  on t1.smallint_col = t2.smallint_col
full outer join /* +shuffle */ functional.alltypes t3
  on t1.int_col = t3.int_col
group by t1.int_col
---- DISTRIBUTEDPLAN
PLAN-ROOT SINK
|
12:EXCHANGE [UNPARTITIONED]
|
11:AGGREGATE [FINALIZE]
|  output: count:merge(*)
|  group by: t1.int_col
|
10:EXCHANGE [HASH(t1.int_col)]
|
05:AGGREGATE [STREAMING]
|  output: count(*)
|  group by: t1.int_col
|
04:HASH JOIN [FULL OUTER JOIN, PARTITIONED]
|  hash predicates: t1.int_col = t3.int_col
|
|--09:EXCHANGE [HASH(t3.int_col)]
|  |
|  02:SCAN HDFS [functional.alltypes t3]
|     partitions=24/24 files=24 size=478.45KB
|
08:EXCHANGE [HASH(t1.int_col)]
|
03:HASH JOIN [RIGHT OUTER JOIN, PARTITIONED]
|  hash predicates: t1.smallint_col = t2.smallint_col
|  runtime filters: RF000 <- t2.smallint_col
|
|--07:EXCHANGE [HASH(t2.smallint_col)]
|  |
|  01:SCAN HDFS [functional.alltypes t2]
|     partitions=24/24 files=24 size=478.45KB
|
06:EXCHANGE [HASH(t1.smallint_col)]
|
00:SCAN HDFS [functional.alltypes t1]
   partitions=24/24 files=24 size=478.45KB
   runtime filters: RF000 -> t1.smallint_col
====