mirror of
https://github.com/apache/impala.git
synced 2026-01-10 09:00:16 -05:00
Implement nested-loop join in Impala with support for multiple join modes, including inner, outer, semi and anti joins. Null-aware left anti-join is not currently supported. Summary of changes: Introduced the NestedLoopJoinNode class in the FE that represents the nested loop join. Common functionality between NestedLoopJoinNode and HashJoinNode (e.g. cardinality estimation) was moved to the JoinNode class. In the BE, introduced the NestedLoopJoinNode class that implements the nested-loop join execution strategy. Change-Id: I238ec7dc0080f661847e5e1b84e30d61c3b0bb5c Reviewed-on: http://gerrit.cloudera.org:8080/652 Reviewed-by: Dimitris Tsirogiannis <dtsirogiannis@cloudera.com> Tested-by: Internal Jenkins
1500 lines
42 KiB
Plaintext
1500 lines
42 KiB
Plaintext
# IN predicate
|
|
select *
|
|
from functional.alltypes
|
|
where id in
|
|
(select id from functional.alltypesagg)
|
|
---- PLAN
|
|
02:HASH JOIN [LEFT SEMI JOIN]
|
|
| hash predicates: id = id
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypesagg]
|
|
| partitions=11/11 files=11 size=814.73KB
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
====
|
|
# NOT IN predicate rewritten into a null-aware anti join
|
|
select *
|
|
from functional.alltypes
|
|
where id not in
|
|
(select id from functional.alltypesagg)
|
|
---- PLAN
|
|
02:HASH JOIN [NULL AWARE LEFT ANTI JOIN]
|
|
| hash predicates: id = id
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypesagg]
|
|
| partitions=11/11 files=11 size=814.73KB
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
====
|
|
# Correlated NOT IN rewritten into a null-aware anti join
|
|
select *
|
|
from functional.alltypes a
|
|
where a.int_col not in
|
|
(select int_col
|
|
from functional.alltypesagg g
|
|
where g.id = a.id and g.bigint_col < a.bigint_col)
|
|
and a.int_col < 100
|
|
---- PLAN
|
|
02:HASH JOIN [NULL AWARE LEFT ANTI JOIN]
|
|
| hash predicates: a.int_col = int_col
|
|
| other join predicates: g.bigint_col < a.bigint_col, a.id = g.id
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypesagg g]
|
|
| partitions=11/11 files=11 size=814.73KB
|
|
|
|
|
00:SCAN HDFS [functional.alltypes a]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
predicates: a.int_col < 100
|
|
====
|
|
# Correlated NOT IN subquery resulting in the same eq conjunct
|
|
# being used in both the hash and the other join predicate
|
|
select *
|
|
from functional.alltypes a
|
|
where a.id not in (select id from functional.alltypes b where a.id = b.id)
|
|
---- PLAN
|
|
02:HASH JOIN [NULL AWARE LEFT ANTI JOIN]
|
|
| hash predicates: a.id = id
|
|
| other join predicates: a.id = b.id
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypes b]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
|
|
|
00:SCAN HDFS [functional.alltypes a]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
====
|
|
# Subquery with predicate in the WHERE clause
|
|
select count(*)
|
|
from functional.alltypes a
|
|
where int_col in
|
|
(select int_col from functional.alltypesagg g where a.id = g.id and g.bigint_col < 10)
|
|
and bool_col = false
|
|
---- PLAN
|
|
03:AGGREGATE [FINALIZE]
|
|
| output: count(*)
|
|
|
|
|
02:HASH JOIN [LEFT SEMI JOIN]
|
|
| hash predicates: int_col = int_col, a.id = g.id
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypesagg g]
|
|
| partitions=11/11 files=11 size=814.73KB
|
|
| predicates: g.bigint_col < 10
|
|
|
|
|
00:SCAN HDFS [functional.alltypes a]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
predicates: bool_col = FALSE
|
|
====
|
|
# Complex expression in the IN predicate
|
|
select *
|
|
from functional.alltypes t
|
|
where t.int_col + 1 in
|
|
(select int_col + bigint_col from functional.alltypesagg)
|
|
---- PLAN
|
|
02:HASH JOIN [LEFT SEMI JOIN]
|
|
| hash predicates: t.int_col + 1 = int_col + bigint_col
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypesagg]
|
|
| partitions=11/11 files=11 size=814.73KB
|
|
|
|
|
00:SCAN HDFS [functional.alltypes t]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
====
|
|
# Multiple subqueries in the WHERE clause
|
|
select *
|
|
from functional.alltypes t
|
|
where t.id in
|
|
(select id from functional.alltypesagg where bool_col = false)
|
|
and t.tinyint_col not in (select tinyint_col from functional.alltypestiny)
|
|
and t.bigint_col < 1000
|
|
---- PLAN
|
|
04:HASH JOIN [NULL AWARE LEFT ANTI JOIN]
|
|
| hash predicates: t.tinyint_col = tinyint_col
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypestiny]
|
|
| partitions=4/4 files=4 size=460B
|
|
|
|
|
03:HASH JOIN [LEFT SEMI JOIN]
|
|
| hash predicates: t.id = id
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypesagg]
|
|
| partitions=11/11 files=11 size=814.73KB
|
|
| predicates: bool_col = FALSE
|
|
|
|
|
00:SCAN HDFS [functional.alltypes t]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
predicates: t.bigint_col < 1000
|
|
====
|
|
# Multiple tables in the FROM clause of the outer query block
|
|
select count(*)
|
|
from functional.alltypesagg a, functional.alltypes t
|
|
where a.id = t.id and a.int_col in
|
|
(select int_col from functional.alltypestiny where bool_col = false)
|
|
---- PLAN
|
|
05:AGGREGATE [FINALIZE]
|
|
| output: count(*)
|
|
|
|
|
04:HASH JOIN [LEFT SEMI JOIN]
|
|
| hash predicates: a.int_col = int_col
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypestiny]
|
|
| partitions=4/4 files=4 size=460B
|
|
| predicates: bool_col = FALSE
|
|
|
|
|
03:HASH JOIN [INNER JOIN]
|
|
| hash predicates: a.id = t.id
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypes t]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg a]
|
|
partitions=11/11 files=11 size=814.73KB
|
|
====
|
|
# Multiple tables in the subquery
|
|
select count(*)
|
|
from functional.alltypesagg a
|
|
where a.id in
|
|
(select s.id
|
|
from functional.alltypessmall s, functional.alltypestiny t
|
|
where s.int_col = t.int_col and a.bool_col = s.bool_col)
|
|
and a.int_col < 10
|
|
---- PLAN
|
|
05:AGGREGATE [FINALIZE]
|
|
| output: count(*)
|
|
|
|
|
04:HASH JOIN [LEFT SEMI JOIN]
|
|
| hash predicates: a.id = s.id, a.bool_col = s.bool_col
|
|
|
|
|
|--03:HASH JOIN [INNER JOIN]
|
|
| | hash predicates: s.int_col = t.int_col
|
|
| |
|
|
| |--02:SCAN HDFS [functional.alltypestiny t]
|
|
| | partitions=4/4 files=4 size=460B
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypessmall s]
|
|
| partitions=4/4 files=4 size=6.32KB
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg a]
|
|
partitions=11/11 files=11 size=814.73KB
|
|
predicates: a.int_col < 10
|
|
====
|
|
# Outer join between the tables in the outer query block
|
|
select count(*)
|
|
from functional.alltypesagg a left outer join functional.alltypes t
|
|
on a.int_col = t.int_col
|
|
where a.id in
|
|
(select id from functional.alltypestiny)
|
|
and t.bool_col = false
|
|
---- PLAN
|
|
05:AGGREGATE [FINALIZE]
|
|
| output: count(*)
|
|
|
|
|
04:HASH JOIN [LEFT SEMI JOIN]
|
|
| hash predicates: a.id = id
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypestiny]
|
|
| partitions=4/4 files=4 size=460B
|
|
|
|
|
03:HASH JOIN [LEFT OUTER JOIN]
|
|
| hash predicates: a.int_col = t.int_col
|
|
| other predicates: t.bool_col = FALSE
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypes t]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
| predicates: t.bool_col = FALSE
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg a]
|
|
partitions=11/11 files=11 size=814.73KB
|
|
====
|
|
# Subquery in the outer-joined table
|
|
select count(*)
|
|
from functional.alltypes a left outer join
|
|
(select * from functional.alltypesagg g where id in
|
|
(select id from functional.alltypestiny)) t
|
|
on a.int_col = t.int_col
|
|
where a.bool_col = false and t.bigint_col < 100
|
|
---- PLAN
|
|
05:AGGREGATE [FINALIZE]
|
|
| output: count(*)
|
|
|
|
|
04:HASH JOIN [LEFT OUTER JOIN]
|
|
| hash predicates: a.int_col = g.int_col
|
|
| other predicates: g.bigint_col < 100
|
|
|
|
|
|--03:HASH JOIN [LEFT SEMI JOIN]
|
|
| | hash predicates: id = id
|
|
| |
|
|
| |--02:SCAN HDFS [functional.alltypestiny]
|
|
| | partitions=4/4 files=4 size=460B
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypesagg g]
|
|
| partitions=11/11 files=11 size=814.73KB
|
|
| predicates: g.bigint_col < 100
|
|
|
|
|
00:SCAN HDFS [functional.alltypes a]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
predicates: a.bool_col = FALSE
|
|
====
|
|
# Multiple tables in the FROM clause of the subquery
|
|
select count(distinct id)
|
|
from functional.alltypesagg a
|
|
where a.int_col in
|
|
(select t.int_col
|
|
from functional.alltypes t, functional.alltypessmall s, functional.alltypestiny n
|
|
where t.id = s.id and s.bigint_col = n.bigint_col and n.bool_col = false)
|
|
---- PLAN
|
|
08:AGGREGATE [FINALIZE]
|
|
| output: count(id)
|
|
|
|
|
07:AGGREGATE
|
|
| group by: id
|
|
|
|
|
06:HASH JOIN [RIGHT SEMI JOIN]
|
|
| hash predicates: t.int_col = a.int_col
|
|
|
|
|
|--00:SCAN HDFS [functional.alltypesagg a]
|
|
| partitions=11/11 files=11 size=814.73KB
|
|
|
|
|
05:HASH JOIN [INNER JOIN]
|
|
| hash predicates: s.bigint_col = n.bigint_col
|
|
|
|
|
|--03:SCAN HDFS [functional.alltypestiny n]
|
|
| partitions=4/4 files=4 size=460B
|
|
| predicates: n.bool_col = FALSE
|
|
|
|
|
04:HASH JOIN [INNER JOIN]
|
|
| hash predicates: t.id = s.id
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypessmall s]
|
|
| partitions=4/4 files=4 size=6.32KB
|
|
|
|
|
01:SCAN HDFS [functional.alltypes t]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
====
|
|
# Subqueries with inline views
|
|
select *
|
|
from functional.alltypes t
|
|
where t.id in
|
|
(select a.id from functional.alltypestiny a,
|
|
(select id, count(*) as cnt from functional.alltypessmall group by id) s
|
|
where s.id = a.id and s.cnt = 10)
|
|
---- PLAN
|
|
05:HASH JOIN [LEFT SEMI JOIN]
|
|
| hash predicates: t.id = a.id
|
|
|
|
|
|--04:HASH JOIN [INNER JOIN]
|
|
| | hash predicates: id = a.id
|
|
| |
|
|
| |--01:SCAN HDFS [functional.alltypestiny a]
|
|
| | partitions=4/4 files=4 size=460B
|
|
| |
|
|
| 03:AGGREGATE [FINALIZE]
|
|
| | output: count(*)
|
|
| | group by: id
|
|
| | having: count(*) = 10
|
|
| |
|
|
| 02:SCAN HDFS [functional.alltypessmall]
|
|
| partitions=4/4 files=4 size=6.32KB
|
|
|
|
|
00:SCAN HDFS [functional.alltypes t]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
====
|
|
with t as (select a.* from functional.alltypes a where id in
|
|
(select id from functional.alltypestiny))
|
|
select * from t where t.bool_col = false and t.int_col = 10
|
|
---- PLAN
|
|
02:HASH JOIN [LEFT SEMI JOIN]
|
|
| hash predicates: id = id
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypestiny]
|
|
| partitions=4/4 files=4 size=460B
|
|
|
|
|
00:SCAN HDFS [functional.alltypes a]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
predicates: a.bool_col = FALSE, a.int_col = 10
|
|
====
|
|
# Subqueries in WITH, FROM and WHERE clauses
|
|
with t as (select a.* from functional.alltypes a
|
|
where id in (select id from functional.alltypestiny))
|
|
select t.*
|
|
from t, (select * from functional.alltypesagg g where g.id in
|
|
(select id from functional.alltypes)) s
|
|
where s.string_col = t.string_col and t.int_col in
|
|
(select int_col from functional.alltypessmall)
|
|
and s.bool_col = false
|
|
---- PLAN
|
|
08:HASH JOIN [LEFT SEMI JOIN]
|
|
| hash predicates: a.int_col = int_col
|
|
|
|
|
|--06:SCAN HDFS [functional.alltypessmall]
|
|
| partitions=4/4 files=4 size=6.32KB
|
|
|
|
|
07:HASH JOIN [INNER JOIN]
|
|
| hash predicates: g.string_col = a.string_col
|
|
|
|
|
|--02:HASH JOIN [LEFT SEMI JOIN]
|
|
| | hash predicates: id = id
|
|
| |
|
|
| |--01:SCAN HDFS [functional.alltypestiny]
|
|
| | partitions=4/4 files=4 size=460B
|
|
| |
|
|
| 00:SCAN HDFS [functional.alltypes a]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
|
|
|
05:HASH JOIN [LEFT SEMI JOIN]
|
|
| hash predicates: g.id = id
|
|
|
|
|
|--04:SCAN HDFS [functional.alltypes]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
|
|
|
03:SCAN HDFS [functional.alltypesagg g]
|
|
partitions=11/11 files=11 size=814.73KB
|
|
predicates: g.bool_col = FALSE
|
|
====
|
|
# Correlated subqueries
|
|
select *
|
|
from functional.alltypes t
|
|
where id in
|
|
(select id from functional.alltypesagg a where t.int_col = a.int_col)
|
|
and t.bool_col = false
|
|
---- PLAN
|
|
02:HASH JOIN [LEFT SEMI JOIN]
|
|
| hash predicates: id = id, t.int_col = a.int_col
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypesagg a]
|
|
| partitions=11/11 files=11 size=814.73KB
|
|
|
|
|
00:SCAN HDFS [functional.alltypes t]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
predicates: t.bool_col = FALSE
|
|
====
|
|
# Multiple nesting levels (uncorrelated queries)
|
|
select *
|
|
from functional.alltypes t
|
|
where id in
|
|
(select id from functional.alltypesagg where int_col in
|
|
(select int_col from functional.alltypestiny)
|
|
and bool_col = false)
|
|
and bigint_col < 1000
|
|
---- PLAN
|
|
04:HASH JOIN [LEFT SEMI JOIN]
|
|
| hash predicates: id = id
|
|
|
|
|
|--03:HASH JOIN [LEFT SEMI JOIN]
|
|
| | hash predicates: int_col = int_col
|
|
| |
|
|
| |--02:SCAN HDFS [functional.alltypestiny]
|
|
| | partitions=4/4 files=4 size=460B
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypesagg]
|
|
| partitions=11/11 files=11 size=814.73KB
|
|
| predicates: bool_col = FALSE
|
|
|
|
|
00:SCAN HDFS [functional.alltypes t]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
predicates: bigint_col < 1000
|
|
====
|
|
# Multiple nesting levels (correlated queries)
|
|
select *
|
|
from functional.alltypes t
|
|
where id in
|
|
(select id from functional.alltypesagg a where a.int_col = t.int_col
|
|
and a.tinyint_col in
|
|
(select tinyint_col from functional.alltypestiny s
|
|
where s.bigint_col = a.bigint_col))
|
|
---- PLAN
|
|
04:HASH JOIN [LEFT SEMI JOIN]
|
|
| hash predicates: id = id, t.int_col = a.int_col
|
|
|
|
|
|--03:HASH JOIN [LEFT SEMI JOIN]
|
|
| | hash predicates: a.tinyint_col = tinyint_col, a.bigint_col = s.bigint_col
|
|
| |
|
|
| |--02:SCAN HDFS [functional.alltypestiny s]
|
|
| | partitions=4/4 files=4 size=460B
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypesagg a]
|
|
| partitions=11/11 files=11 size=814.73KB
|
|
|
|
|
00:SCAN HDFS [functional.alltypes t]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
====
|
|
# Multiple nesting levels (correlated and uncorrelated queries)
|
|
select *
|
|
from functional.alltypes t
|
|
where id in
|
|
(select id from functional.alltypesagg a where a.int_col in
|
|
(select int_col from functional.alltypestiny s where a.bigint_col = s.bigint_col))
|
|
---- PLAN
|
|
04:HASH JOIN [LEFT SEMI JOIN]
|
|
| hash predicates: id = id
|
|
|
|
|
|--03:HASH JOIN [LEFT SEMI JOIN]
|
|
| | hash predicates: a.int_col = int_col, a.bigint_col = s.bigint_col
|
|
| |
|
|
| |--02:SCAN HDFS [functional.alltypestiny s]
|
|
| | partitions=4/4 files=4 size=460B
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypesagg a]
|
|
| partitions=11/11 files=11 size=814.73KB
|
|
|
|
|
00:SCAN HDFS [functional.alltypes t]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
====
|
|
# Predicate propagation with uncorrelated subqueries
|
|
select *
|
|
from functional.alltypes
|
|
where id in
|
|
(select id from functional.alltypes where id < 10)
|
|
---- PLAN
|
|
02:HASH JOIN [LEFT SEMI JOIN]
|
|
| hash predicates: id = id
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypes]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
| predicates: id < 10
|
|
|
|
|
00:SCAN HDFS [functional.alltypes]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
predicates: functional.alltypes.id < 10
|
|
====
|
|
# Predicate propagation with correlated subqueries
|
|
select *
|
|
from functional.alltypesagg a inner join functional.alltypes t on t.id = a.id
|
|
where t.int_col < 10 and t.int_col in
|
|
(select int_col from functional.alltypessmall s where s.id = t.id)
|
|
---- PLAN
|
|
04:HASH JOIN [LEFT SEMI JOIN]
|
|
| hash predicates: t.int_col = int_col, t.id = s.id
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypessmall s]
|
|
| partitions=4/4 files=4 size=6.32KB
|
|
| predicates: s.int_col < 10
|
|
|
|
|
03:HASH JOIN [INNER JOIN]
|
|
| hash predicates: a.id = t.id
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypes t]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
| predicates: t.int_col < 10
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg a]
|
|
partitions=11/11 files=11 size=814.73KB
|
|
====
|
|
# Correlated EXISTS
|
|
select count(*)
|
|
from functional.alltypes t
|
|
where exists
|
|
(select * from functional.alltypesagg a where a.id = t.id)
|
|
---- PLAN
|
|
03:AGGREGATE [FINALIZE]
|
|
| output: count(*)
|
|
|
|
|
02:HASH JOIN [RIGHT SEMI JOIN]
|
|
| hash predicates: a.id = t.id
|
|
|
|
|
|--00:SCAN HDFS [functional.alltypes t]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
|
|
|
01:SCAN HDFS [functional.alltypesagg a]
|
|
partitions=11/11 files=11 size=814.73KB
|
|
====
|
|
# Correlated EXISTS with an analytic function and a group by clause
|
|
select 1
|
|
from functional.alltypesagg a
|
|
where exists
|
|
(select id, count(int_col) over (partition by bool_col)
|
|
from functional.alltypestiny b
|
|
where a.tinyint_col = b.tinyint_col
|
|
group by id, int_col, bool_col)
|
|
and tinyint_col < 10
|
|
---- PLAN
|
|
03:HASH JOIN [LEFT SEMI JOIN]
|
|
| hash predicates: a.tinyint_col = b.tinyint_col
|
|
|
|
|
|--02:AGGREGATE [FINALIZE]
|
|
| | group by: id, int_col, bool_col, b.tinyint_col
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypestiny b]
|
|
| partitions=4/4 files=4 size=460B
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg a]
|
|
partitions=11/11 files=11 size=814.73KB
|
|
predicates: tinyint_col < 10
|
|
====
|
|
# Correlated NOT EXISTS
|
|
select count(*)
|
|
from functional.alltypes t
|
|
where not exists
|
|
(select id from functional.alltypesagg a where t.int_col = a.int_col)
|
|
---- PLAN
|
|
03:AGGREGATE [FINALIZE]
|
|
| output: count(*)
|
|
|
|
|
02:HASH JOIN [RIGHT ANTI JOIN]
|
|
| hash predicates: a.int_col = t.int_col
|
|
|
|
|
|--00:SCAN HDFS [functional.alltypes t]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
|
|
|
01:SCAN HDFS [functional.alltypesagg a]
|
|
partitions=11/11 files=11 size=814.73KB
|
|
====
|
|
# Correlated NOT EXISTS with an analytic function and a group by clause
|
|
select count(*)
|
|
from functional.alltypesagg a
|
|
where not exists
|
|
(select b.id, count(b.int_col) over (partition by b.bigint_col)
|
|
from functional.alltypessmall b inner join functional.alltypes c on b.id = c.id
|
|
where c.bool_col = false and a.int_col = b.int_col
|
|
group by b.id, b.int_col, b.bigint_col)
|
|
and bool_col = false
|
|
---- PLAN
|
|
06:AGGREGATE [FINALIZE]
|
|
| output: count(*)
|
|
|
|
|
05:HASH JOIN [RIGHT ANTI JOIN]
|
|
| hash predicates: b.int_col = a.int_col
|
|
|
|
|
|--00:SCAN HDFS [functional.alltypesagg a]
|
|
| partitions=11/11 files=11 size=814.73KB
|
|
| predicates: bool_col = FALSE
|
|
|
|
|
04:AGGREGATE [FINALIZE]
|
|
| group by: b.id, b.int_col, b.bigint_col
|
|
|
|
|
03:HASH JOIN [INNER JOIN]
|
|
| hash predicates: c.id = b.id
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypessmall b]
|
|
| partitions=4/4 files=4 size=6.32KB
|
|
|
|
|
02:SCAN HDFS [functional.alltypes c]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
predicates: c.bool_col = FALSE
|
|
====
|
|
# Uncorrelated EXISTS
|
|
select *
|
|
from functional.alltypestiny t
|
|
where exists (select * from functional.alltypessmall s where s.id < 5)
|
|
---- PLAN
|
|
02:NESTED LOOP JOIN [CROSS JOIN]
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypessmall s]
|
|
| partitions=4/4 files=4 size=6.32KB
|
|
| predicates: s.id < 5
|
|
| limit: 1
|
|
|
|
|
00:SCAN HDFS [functional.alltypestiny t]
|
|
partitions=4/4 files=4 size=460B
|
|
====
|
|
# Uncorrelated EXISTS with an analytic function and a group by clause
|
|
select 1
|
|
from functional.alltypestiny t
|
|
where exists
|
|
(select id, max(int_col) over (partition by bigint_col)
|
|
from functional.alltypesagg where tinyint_col = 10
|
|
group by id, int_col, bigint_col)
|
|
---- PLAN
|
|
03:NESTED LOOP JOIN [CROSS JOIN]
|
|
|
|
|
|--00:SCAN HDFS [functional.alltypestiny t]
|
|
| partitions=4/4 files=4 size=460B
|
|
|
|
|
02:AGGREGATE [FINALIZE]
|
|
| group by: id, int_col, bigint_col
|
|
| limit: 1
|
|
|
|
|
01:SCAN HDFS [functional.alltypesagg]
|
|
partitions=11/11 files=11 size=814.73KB
|
|
predicates: tinyint_col = 10
|
|
====
|
|
# Uncorrelated EXISTS with a LIMIT 0 clause
|
|
select 1
|
|
from functional.alltypestiny t
|
|
where exists (select * from functional.alltypessmall limit 0)
|
|
---- PLAN
|
|
00:EMPTYSET
|
|
====
|
|
# Multiple nesting levels
|
|
select count(*)
|
|
from functional.alltypes a
|
|
where exists
|
|
(select * from functional.alltypestiny t where a.id = t.id and exists
|
|
(select * from functional.alltypesagg g where g.int_col = t.int_col
|
|
and g.bool_col = false))
|
|
---- PLAN
|
|
05:AGGREGATE [FINALIZE]
|
|
| output: count(*)
|
|
|
|
|
04:HASH JOIN [LEFT SEMI JOIN]
|
|
| hash predicates: a.id = t.id
|
|
|
|
|
|--03:HASH JOIN [RIGHT SEMI JOIN]
|
|
| | hash predicates: g.int_col = t.int_col
|
|
| |
|
|
| |--01:SCAN HDFS [functional.alltypestiny t]
|
|
| | partitions=4/4 files=4 size=460B
|
|
| |
|
|
| 02:SCAN HDFS [functional.alltypesagg g]
|
|
| partitions=11/11 files=11 size=814.73KB
|
|
| predicates: g.bool_col = FALSE
|
|
|
|
|
00:SCAN HDFS [functional.alltypes a]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
====
|
|
# Multiple subquery predicates
|
|
select g.int_col, count(*)
|
|
from functional.alltypesagg g left outer join functional.alltypes a
|
|
on g.id = a.id
|
|
where g.int_col < 100 and exists
|
|
(select *
|
|
from functional.alltypestiny t
|
|
where t.id = g.id and t.bool_col = false)
|
|
and g.bigint_col in
|
|
(select bigint_col
|
|
from functional.alltypessmall s
|
|
where s.id = g.id and s.int_col > 10)
|
|
and g.tinyint_col <
|
|
(select count(*)
|
|
from functional.alltypes t
|
|
where t.id = g.id and t.bool_col = true)
|
|
group by g.int_col
|
|
having count(*) < 100
|
|
---- PLAN
|
|
10:AGGREGATE [FINALIZE]
|
|
| output: count(*)
|
|
| group by: g.int_col
|
|
| having: count(*) < 100
|
|
|
|
|
09:HASH JOIN [RIGHT OUTER JOIN]
|
|
| hash predicates: t.id = g.id
|
|
| other predicates: g.tinyint_col < zeroifnull(count(*))
|
|
|
|
|
|--08:HASH JOIN [RIGHT SEMI JOIN]
|
|
| | hash predicates: bigint_col = g.bigint_col, s.id = g.id
|
|
| |
|
|
| |--07:HASH JOIN [LEFT SEMI JOIN]
|
|
| | | hash predicates: g.id = t.id
|
|
| | |
|
|
| | |--02:SCAN HDFS [functional.alltypestiny t]
|
|
| | | partitions=4/4 files=4 size=460B
|
|
| | | predicates: t.bool_col = FALSE
|
|
| | |
|
|
| | 06:HASH JOIN [RIGHT OUTER JOIN]
|
|
| | | hash predicates: a.id = g.id
|
|
| | |
|
|
| | |--00:SCAN HDFS [functional.alltypesagg g]
|
|
| | | partitions=11/11 files=11 size=814.73KB
|
|
| | | predicates: g.int_col < 100
|
|
| | |
|
|
| | 01:SCAN HDFS [functional.alltypes a]
|
|
| | partitions=24/24 files=24 size=478.45KB
|
|
| |
|
|
| 03:SCAN HDFS [functional.alltypessmall s]
|
|
| partitions=4/4 files=4 size=6.32KB
|
|
| predicates: s.int_col > 10
|
|
|
|
|
05:AGGREGATE [FINALIZE]
|
|
| output: count(*)
|
|
| group by: t.id
|
|
|
|
|
04:SCAN HDFS [functional.alltypes t]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
predicates: t.bool_col = TRUE
|
|
====
|
|
# Subqueries with aggregation
|
|
select *
|
|
from functional.alltypes a
|
|
where a.int_col in
|
|
(select count(int_col) from functional.alltypesagg g where g.bool_col
|
|
group by int_col)
|
|
and a.bigint_col < 10
|
|
---- PLAN
|
|
03:HASH JOIN [LEFT SEMI JOIN]
|
|
| hash predicates: a.int_col = count(int_col)
|
|
|
|
|
|--02:AGGREGATE [FINALIZE]
|
|
| | output: count(int_col)
|
|
| | group by: int_col
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypesagg g]
|
|
| partitions=11/11 files=11 size=814.73KB
|
|
| predicates: g.bool_col
|
|
|
|
|
00:SCAN HDFS [functional.alltypes a]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
predicates: a.bigint_col < 10
|
|
====
|
|
# Uncorrelated aggregation subquery
|
|
select *
|
|
from functional.alltypes a
|
|
where a.int_col <
|
|
(select max(int_col) from functional.alltypesagg g where g.bool_col = true)
|
|
and a.bigint_col > 10
|
|
---- PLAN
|
|
03:NESTED LOOP JOIN [INNER JOIN]
|
|
| predicates: a.int_col < max(int_col)
|
|
|
|
|
|--02:AGGREGATE [FINALIZE]
|
|
| | output: max(int_col)
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypesagg g]
|
|
| partitions=11/11 files=11 size=814.73KB
|
|
| predicates: g.bool_col = TRUE
|
|
|
|
|
00:SCAN HDFS [functional.alltypes a]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
predicates: a.bigint_col > 10
|
|
====
|
|
# Aggregation subquery with constant comparison expr
|
|
select *
|
|
from functional.alltypesagg a
|
|
where (select max(id) from functional.alltypes t where t.bool_col = false) > 10
|
|
and a.int_col < 10
|
|
---- PLAN
|
|
03:NESTED LOOP JOIN [CROSS JOIN]
|
|
|
|
|
|--02:AGGREGATE [FINALIZE]
|
|
| | output: max(id)
|
|
| | having: max(id) > 10
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypes t]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
| predicates: t.bool_col = FALSE
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg a]
|
|
partitions=11/11 files=11 size=814.73KB
|
|
predicates: a.int_col < 10
|
|
====
|
|
# Correlated aggregation subquery
|
|
select a.int_col, count(*)
|
|
from functional.alltypesagg a
|
|
where a.id =
|
|
(select min(id)
|
|
from functional.alltypes t
|
|
where t.int_col = a.int_col and t.tinyint_col < 10)
|
|
and a.bool_col = false
|
|
group by a.int_col
|
|
---- PLAN
|
|
04:AGGREGATE [FINALIZE]
|
|
| output: count(*)
|
|
| group by: a.int_col
|
|
|
|
|
03:HASH JOIN [LEFT SEMI JOIN]
|
|
| hash predicates: a.id = min(id), a.int_col = t.int_col
|
|
|
|
|
|--02:AGGREGATE [FINALIZE]
|
|
| | output: min(id)
|
|
| | group by: t.int_col
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypes t]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
| predicates: t.tinyint_col < 10
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg a]
|
|
partitions=11/11 files=11 size=814.73KB
|
|
predicates: a.bool_col = FALSE
|
|
====
|
|
# Aggregation subquery with multiple tables
|
|
select t.tinyint_col, count(*)
|
|
from functional.alltypes t left outer join functional.alltypesagg a
|
|
on t.id = a.id
|
|
where t.int_col <
|
|
(select min(s.int_col)
|
|
from functional.alltypessmall s left outer join functional.alltypestiny p
|
|
on s.id = p.id where s.bool_col = false and s.bigint_col = t.bigint_col)
|
|
and a.bool_col = false
|
|
group by t.tinyint_col
|
|
---- PLAN
|
|
08:AGGREGATE [FINALIZE]
|
|
| output: count(*)
|
|
| group by: t.tinyint_col
|
|
|
|
|
07:HASH JOIN [LEFT SEMI JOIN]
|
|
| hash predicates: t.bigint_col = s.bigint_col
|
|
| other join predicates: t.int_col < min(s.int_col)
|
|
|
|
|
|--05:AGGREGATE [FINALIZE]
|
|
| | output: min(s.int_col)
|
|
| | group by: s.bigint_col
|
|
| |
|
|
| 04:HASH JOIN [LEFT OUTER JOIN]
|
|
| | hash predicates: s.id = p.id
|
|
| |
|
|
| |--03:SCAN HDFS [functional.alltypestiny p]
|
|
| | partitions=4/4 files=4 size=460B
|
|
| |
|
|
| 02:SCAN HDFS [functional.alltypessmall s]
|
|
| partitions=4/4 files=4 size=6.32KB
|
|
| predicates: s.bool_col = FALSE
|
|
|
|
|
06:HASH JOIN [LEFT OUTER JOIN]
|
|
| hash predicates: t.id = a.id
|
|
| other predicates: a.bool_col = FALSE
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypesagg a]
|
|
| partitions=11/11 files=11 size=814.73KB
|
|
| predicates: a.bool_col = FALSE
|
|
|
|
|
00:SCAN HDFS [functional.alltypes t]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
====
|
|
# Multiple aggregation subqueries
|
|
select *
|
|
from functional.alltypesagg a, functional.alltypes t
|
|
where a.id = t.id and a.int_col <
|
|
(select min(int_col)
|
|
from functional.alltypestiny g
|
|
where t.bigint_col = g.bigint_col and g.bool_col = false)
|
|
and a.tinyint_col >
|
|
(select max(tinyint_col) from functional.alltypessmall s where s.id < 10)
|
|
and t.bool_col = false
|
|
---- PLAN
|
|
08:NESTED LOOP JOIN [INNER JOIN]
|
|
| predicates: a.tinyint_col > max(tinyint_col)
|
|
|
|
|
|--05:AGGREGATE [FINALIZE]
|
|
| | output: max(tinyint_col)
|
|
| |
|
|
| 04:SCAN HDFS [functional.alltypessmall s]
|
|
| partitions=4/4 files=4 size=6.32KB
|
|
| predicates: s.id < 10
|
|
|
|
|
07:HASH JOIN [LEFT SEMI JOIN]
|
|
| hash predicates: t.bigint_col = g.bigint_col
|
|
| other join predicates: a.int_col < min(int_col)
|
|
|
|
|
|--03:AGGREGATE [FINALIZE]
|
|
| | output: min(int_col)
|
|
| | group by: g.bigint_col
|
|
| |
|
|
| 02:SCAN HDFS [functional.alltypestiny g]
|
|
| partitions=4/4 files=4 size=460B
|
|
| predicates: g.bool_col = FALSE
|
|
|
|
|
06:HASH JOIN [INNER JOIN]
|
|
| hash predicates: a.id = t.id
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypes t]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
| predicates: t.bool_col = FALSE
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg a]
|
|
partitions=11/11 files=11 size=814.73KB
|
|
====
|
|
# Multiple nesting levels with aggregation subqueries
|
|
select *
|
|
from functional.alltypes t
|
|
where t.int_col <
|
|
(select avg(g.int_col)*2
|
|
from functional.alltypesagg g
|
|
where g.id = t.id and g.bigint_col <
|
|
(select count(*)
|
|
from functional.alltypestiny a
|
|
where a.id = g.id
|
|
and a.bool_col = false))
|
|
---- PLAN
|
|
06:HASH JOIN [LEFT SEMI JOIN]
|
|
| hash predicates: t.id = g.id
|
|
| other join predicates: t.int_col < avg(g.int_col) * 2
|
|
|
|
|
|--05:AGGREGATE [FINALIZE]
|
|
| | output: avg(g.int_col)
|
|
| | group by: g.id
|
|
| |
|
|
| 04:HASH JOIN [LEFT OUTER JOIN]
|
|
| | hash predicates: g.id = a.id
|
|
| | other predicates: g.bigint_col < zeroifnull(count(*))
|
|
| |
|
|
| |--03:AGGREGATE [FINALIZE]
|
|
| | | output: count(*)
|
|
| | | group by: a.id
|
|
| | |
|
|
| | 02:SCAN HDFS [functional.alltypestiny a]
|
|
| | partitions=4/4 files=4 size=460B
|
|
| | predicates: a.bool_col = FALSE
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypesagg g]
|
|
| partitions=11/11 files=11 size=814.73KB
|
|
|
|
|
00:SCAN HDFS [functional.alltypes t]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
====
|
|
# Multiple nesting of aggregate subquery predicates with count
|
|
select *
|
|
from functional.alltypesagg a left outer join functional.alltypes t
|
|
on a.id = t.id
|
|
where a.int_col <
|
|
(select count(*)
|
|
from functional.alltypessmall s
|
|
where s.id = a.id and s.tinyint_col >
|
|
(select count(*) from functional.alltypestiny where bool_col = false))
|
|
---- PLAN
|
|
08:HASH JOIN [LEFT OUTER JOIN]
|
|
| hash predicates: a.id = s.id
|
|
| other predicates: a.int_col < zeroifnull(count(*))
|
|
|
|
|
|--06:AGGREGATE [FINALIZE]
|
|
| | output: count(*)
|
|
| | group by: s.id
|
|
| |
|
|
| 05:NESTED LOOP JOIN [INNER JOIN]
|
|
| | predicates: s.tinyint_col > count(*)
|
|
| |
|
|
| |--04:AGGREGATE [FINALIZE]
|
|
| | | output: count(*)
|
|
| | |
|
|
| | 03:SCAN HDFS [functional.alltypestiny]
|
|
| | partitions=4/4 files=4 size=460B
|
|
| | predicates: bool_col = FALSE
|
|
| |
|
|
| 02:SCAN HDFS [functional.alltypessmall s]
|
|
| partitions=4/4 files=4 size=6.32KB
|
|
|
|
|
07:HASH JOIN [LEFT OUTER JOIN]
|
|
| hash predicates: a.id = t.id
|
|
|
|
|
|--01:SCAN HDFS [functional.alltypes t]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg a]
|
|
partitions=11/11 files=11 size=814.73KB
|
|
====
|
|
# Distinct in the outer select block
|
|
select distinct id, bool_col
|
|
from functional.alltypesagg g
|
|
where 100 < (select count(*) from functional.alltypes where bool_col = false and id < 5)
|
|
and bool_col = false
|
|
---- PLAN
|
|
04:AGGREGATE [FINALIZE]
|
|
| group by: id, bool_col
|
|
|
|
|
03:NESTED LOOP JOIN [CROSS JOIN]
|
|
|
|
|
|--02:AGGREGATE [FINALIZE]
|
|
| | output: count(*)
|
|
| | having: 100 < count(*)
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypes]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
| predicates: bool_col = FALSE, id < 5
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg g]
|
|
partitions=11/11 files=11 size=814.73KB
|
|
predicates: bool_col = FALSE
|
|
====
|
|
# Distinct with an unqualified star in the outer select block
|
|
select distinct *
|
|
from functional.alltypesagg g
|
|
where 100 > (select count(distinct id) from functional.alltypestiny where int_col < 5)
|
|
and g.bigint_col < 1000 and g.bigint_col = true
|
|
---- PLAN
|
|
05:AGGREGATE [FINALIZE]
|
|
| group by: g.id, g.bool_col, g.tinyint_col, g.smallint_col, g.int_col, g.bigint_col, g.float_col, g.double_col, g.date_string_col, g.string_col, g.timestamp_col, g.year, g.month, g.day
|
|
|
|
|
04:NESTED LOOP JOIN [CROSS JOIN]
|
|
|
|
|
|--03:AGGREGATE [FINALIZE]
|
|
| | output: count(id)
|
|
| | having: 100 > count(id)
|
|
| |
|
|
| 02:AGGREGATE
|
|
| | group by: id
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypestiny]
|
|
| partitions=4/4 files=4 size=460B
|
|
| predicates: int_col < 5
|
|
|
|
|
00:SCAN HDFS [functional.alltypesagg g]
|
|
partitions=11/11 files=11 size=814.73KB
|
|
predicates: g.bigint_col < 1000, g.bigint_col = TRUE
|
|
====
|
|
# Aggregate subquery in an IS NULL predicate
|
|
select *
|
|
from functional.alltypestiny t
|
|
where (select max(int_col) from functional.alltypesagg where int_col is null) is null
|
|
---- PLAN
|
|
03:NESTED LOOP JOIN [CROSS JOIN]
|
|
|
|
|
|--02:AGGREGATE [FINALIZE]
|
|
| | output: max(int_col)
|
|
| | having: max(int_col) IS NULL
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypesagg]
|
|
| partitions=11/11 files=11 size=814.73KB
|
|
| predicates: int_col IS NULL
|
|
|
|
|
00:SCAN HDFS [functional.alltypestiny t]
|
|
partitions=4/4 files=4 size=460B
|
|
====
|
|
# Correlated aggregate subquery with a count in an IS NULL predicate
|
|
select int_col, count(*)
|
|
from functional.alltypestiny t
|
|
where (select count(*) from functional.alltypesagg g where t.id = g.id) is null
|
|
and bool_col = false
|
|
group by int_col
|
|
---- PLAN
|
|
04:AGGREGATE [FINALIZE]
|
|
| output: count(*)
|
|
| group by: int_col
|
|
|
|
|
03:HASH JOIN [RIGHT OUTER JOIN]
|
|
| hash predicates: g.id = t.id
|
|
| other predicates: zeroifnull(count(*)) IS NULL
|
|
|
|
|
|--00:SCAN HDFS [functional.alltypestiny t]
|
|
| partitions=4/4 files=4 size=460B
|
|
| predicates: bool_col = FALSE
|
|
|
|
|
02:AGGREGATE [FINALIZE]
|
|
| output: count(*)
|
|
| group by: g.id
|
|
| having: zeroifnull(count(*)) IS NULL
|
|
|
|
|
01:SCAN HDFS [functional.alltypesagg g]
|
|
partitions=11/11 files=11 size=814.73KB
|
|
====
|
|
# Correlated aggregate subquery in an IS NULL predicate
|
|
select *
|
|
from functional.alltypestiny t
|
|
where
|
|
(select max(int_col)
|
|
from functional.alltypesagg g
|
|
where g.id = t.id and g.int_col is null) is null
|
|
and t.bool_col = false
|
|
---- PLAN
|
|
03:HASH JOIN [RIGHT SEMI JOIN]
|
|
| hash predicates: g.id = t.id
|
|
|
|
|
|--00:SCAN HDFS [functional.alltypestiny t]
|
|
| partitions=4/4 files=4 size=460B
|
|
| predicates: t.bool_col = FALSE
|
|
|
|
|
02:AGGREGATE [FINALIZE]
|
|
| output: max(int_col)
|
|
| group by: g.id
|
|
| having: max(int_col) IS NULL
|
|
|
|
|
01:SCAN HDFS [functional.alltypesagg g]
|
|
partitions=11/11 files=11 size=814.73KB
|
|
predicates: g.int_col IS NULL
|
|
====
|
|
# Complex expr with a scalar subquery
|
|
select *
|
|
from functional.alltypestiny t
|
|
where 1 +
|
|
(select count(*) from functional.alltypesagg where bool_col = false) = t.int_col + 2
|
|
and t.bigint_col < 100
|
|
---- PLAN
|
|
03:HASH JOIN [LEFT SEMI JOIN]
|
|
| hash predicates: t.int_col + 2 = 1 + count(*)
|
|
|
|
|
|--02:AGGREGATE [FINALIZE]
|
|
| | output: count(*)
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypesagg]
|
|
| partitions=11/11 files=11 size=814.73KB
|
|
| predicates: bool_col = FALSE
|
|
|
|
|
00:SCAN HDFS [functional.alltypestiny t]
|
|
partitions=4/4 files=4 size=460B
|
|
predicates: t.bigint_col < 100
|
|
====
|
|
# Scalar subquery in a function
|
|
select *
|
|
from functional.alltypestiny t
|
|
where nullifzero((select min(id) from functional.alltypessmall s where s.bool_col = false))
|
|
is null
|
|
and t.id < 10
|
|
---- PLAN
|
|
03:NESTED LOOP JOIN [CROSS JOIN]
|
|
|
|
|
|--02:AGGREGATE [FINALIZE]
|
|
| | output: min(id)
|
|
| | having: nullifzero(min(id)) IS NULL
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypessmall s]
|
|
| partitions=4/4 files=4 size=6.32KB
|
|
| predicates: s.bool_col = FALSE
|
|
|
|
|
00:SCAN HDFS [functional.alltypestiny t]
|
|
partitions=4/4 files=4 size=460B
|
|
predicates: t.id < 10
|
|
====
|
|
# Correlated aggregate subquery with a LIMIT clause that is removed during the rewrite
|
|
select min(t.id)
|
|
from functional.alltypes t
|
|
where t.int_col <
|
|
(select sum(s.int_col)
|
|
from functional.alltypessmall s
|
|
where s.id = t.id
|
|
limit 1)
|
|
group by t.bool_col
|
|
---- PLAN
|
|
04:AGGREGATE [FINALIZE]
|
|
| output: min(t.id)
|
|
| group by: t.bool_col
|
|
|
|
|
03:HASH JOIN [LEFT SEMI JOIN]
|
|
| hash predicates: t.id = s.id
|
|
| other join predicates: t.int_col < sum(s.int_col)
|
|
|
|
|
|--02:AGGREGATE [FINALIZE]
|
|
| | output: sum(s.int_col)
|
|
| | group by: s.id
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypessmall s]
|
|
| partitions=4/4 files=4 size=6.32KB
|
|
|
|
|
00:SCAN HDFS [functional.alltypes t]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
====
|
|
# Between predicate with subqueries
|
|
select *
|
|
from functional.alltypestiny t
|
|
where int_col between
|
|
(select min(int_col) from functional.alltypessmall where bool_col = false) and
|
|
(select max(int_col) from functional.alltypessmall where bool_col = true)
|
|
---- PLAN
|
|
06:NESTED LOOP JOIN [INNER JOIN]
|
|
| predicates: int_col <= max(int_col)
|
|
|
|
|
|--04:AGGREGATE [FINALIZE]
|
|
| | output: max(int_col)
|
|
| |
|
|
| 03:SCAN HDFS [functional.alltypessmall]
|
|
| partitions=4/4 files=4 size=6.32KB
|
|
| predicates: bool_col = TRUE
|
|
|
|
|
05:NESTED LOOP JOIN [INNER JOIN]
|
|
| predicates: int_col >= min(int_col)
|
|
|
|
|
|--02:AGGREGATE [FINALIZE]
|
|
| | output: min(int_col)
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypessmall]
|
|
| partitions=4/4 files=4 size=6.32KB
|
|
| predicates: bool_col = FALSE
|
|
|
|
|
00:SCAN HDFS [functional.alltypestiny t]
|
|
partitions=4/4 files=4 size=460B
|
|
====
|
|
# Aggregate subquery with count (subquery op slotRef)
|
|
select t1.id
|
|
from functional.alltypestiny t1
|
|
where
|
|
(select count(tt1.smallint_col) as int_col_1
|
|
from functional.alltypestiny tt1
|
|
where t1.id = tt1.month) < t1.id
|
|
---- PLAN
|
|
03:HASH JOIN [RIGHT OUTER JOIN]
|
|
| hash predicates: tt1.month = t1.id
|
|
| other predicates: zeroifnull(count(tt1.smallint_col)) < t1.id
|
|
|
|
|
|--00:SCAN HDFS [functional.alltypestiny t1]
|
|
| partitions=4/4 files=4 size=460B
|
|
|
|
|
02:AGGREGATE [FINALIZE]
|
|
| output: count(tt1.smallint_col)
|
|
| group by: tt1.month
|
|
|
|
|
01:SCAN HDFS [functional.alltypestiny tt1]
|
|
partitions=4/4 files=4 size=460B
|
|
====
|
|
# Correlated aggregate subquery with count in a function participating in
|
|
# a complex arithmetic expr
|
|
select int_col, count(*)
|
|
from functional.alltypestiny t
|
|
where
|
|
1 + log(abs((select count(int_col) from functional.alltypes s where s.id = t.id)), 2)
|
|
< 10
|
|
group by int_col
|
|
---- PLAN
|
|
04:AGGREGATE [FINALIZE]
|
|
| output: count(*)
|
|
| group by: int_col
|
|
|
|
|
03:HASH JOIN [RIGHT OUTER JOIN]
|
|
| hash predicates: s.id = t.id
|
|
| other predicates: 1 + log(abs(zeroifnull(count(int_col))), 2) < 10
|
|
|
|
|
|--00:SCAN HDFS [functional.alltypestiny t]
|
|
| partitions=4/4 files=4 size=460B
|
|
|
|
|
02:AGGREGATE [FINALIZE]
|
|
| output: count(int_col)
|
|
| group by: s.id
|
|
|
|
|
01:SCAN HDFS [functional.alltypes s]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
====
|
|
# Correlated scalar subquery with an aggregate function that returns a
|
|
# non-numeric type on empty input
|
|
select int_col, count(*)
|
|
from functional.alltypestiny t
|
|
where
|
|
(select sample(int_col) from functional.alltypes s where s.id = t.id) = t.string_col
|
|
and bool_col = false
|
|
group by int_col
|
|
---- PLAN
|
|
04:AGGREGATE [FINALIZE]
|
|
| output: count(*)
|
|
| group by: int_col
|
|
|
|
|
03:HASH JOIN [RIGHT OUTER JOIN]
|
|
| hash predicates: s.id = t.id
|
|
| other predicates: ifnull(sample(int_col), '') = t.string_col
|
|
|
|
|
|--00:SCAN HDFS [functional.alltypestiny t]
|
|
| partitions=4/4 files=4 size=460B
|
|
| predicates: bool_col = FALSE
|
|
|
|
|
02:AGGREGATE [FINALIZE]
|
|
| output: sample(int_col)
|
|
| group by: s.id
|
|
|
|
|
01:SCAN HDFS [functional.alltypes s]
|
|
partitions=24/24 files=24 size=478.45KB
|
|
====
|
|
# Uncorrelated scalar subquery where columns from the outer appear in both sides
|
|
# of the binary predicate
|
|
select 1
|
|
from functional.alltypestiny t1
|
|
where (select count(*) from functional.alltypessmall) + t1.int_col = t1.bigint_col - 1
|
|
---- PLAN
|
|
03:NESTED LOOP JOIN [INNER JOIN]
|
|
| predicates: count(*) + t1.int_col = t1.bigint_col - 1
|
|
|
|
|
|--02:AGGREGATE [FINALIZE]
|
|
| | output: count(*)
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypessmall]
|
|
| partitions=4/4 files=4 size=6.32KB
|
|
|
|
|
00:SCAN HDFS [functional.alltypestiny t1]
|
|
partitions=4/4 files=4 size=460B
|
|
====
|
|
# Uncorrelated scalar subquery in complex binary predicate that contains columns
|
|
# from two tables of the outer
|
|
select 1
|
|
from functional.alltypestiny t1 join functional.alltypessmall t2 on t1.id = t2.id
|
|
where (select count(*) from functional.alltypes) + 1 = t1.int_col + t2.int_col
|
|
---- PLAN
|
|
05:HASH JOIN [LEFT SEMI JOIN]
|
|
| hash predicates: t1.int_col + t2.int_col = count(*) + 1
|
|
|
|
|
|--03:AGGREGATE [FINALIZE]
|
|
| | output: count(*)
|
|
| |
|
|
| 02:SCAN HDFS [functional.alltypes]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
|
|
|
04:HASH JOIN [INNER JOIN]
|
|
| hash predicates: t2.id = t1.id
|
|
|
|
|
|--00:SCAN HDFS [functional.alltypestiny t1]
|
|
| partitions=4/4 files=4 size=460B
|
|
|
|
|
01:SCAN HDFS [functional.alltypessmall t2]
|
|
partitions=4/4 files=4 size=6.32KB
|
|
====
|
|
# Uncorrelated scalar subquery in complex binary predicate that contains columns
|
|
# from two tables of the outer that appear in both sides of the predicate
|
|
select 1
|
|
from functional.alltypestiny t1 join functional.alltypessmall t2 on t1.id = t2.id
|
|
where
|
|
(select count(*) from functional.alltypes) + t2.bigint_col = t1.int_col + t2.int_col
|
|
---- PLAN
|
|
05:NESTED LOOP JOIN [INNER JOIN]
|
|
| predicates: count(*) + t2.bigint_col = t1.int_col + t2.int_col
|
|
|
|
|
|--03:AGGREGATE [FINALIZE]
|
|
| | output: count(*)
|
|
| |
|
|
| 02:SCAN HDFS [functional.alltypes]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
|
|
|
04:HASH JOIN [INNER JOIN]
|
|
| hash predicates: t2.id = t1.id
|
|
|
|
|
|--00:SCAN HDFS [functional.alltypestiny t1]
|
|
| partitions=4/4 files=4 size=460B
|
|
|
|
|
01:SCAN HDFS [functional.alltypessmall t2]
|
|
partitions=4/4 files=4 size=6.32KB
|
|
====
|
|
# Correlated scalar subquery with complex correlated predicate (IMPALA-1335)
|
|
select 1
|
|
from functional.alltypestiny t
|
|
where
|
|
(select sum(t1.id)
|
|
from functional.alltypesagg t1 inner join functional.alltypes t2 on t1.id = t2.id
|
|
where t1.id + t2.id = t.int_col) = t.int_col
|
|
---- PLAN
|
|
05:HASH JOIN [RIGHT SEMI JOIN]
|
|
| hash predicates: sum(t1.id) = t.int_col
|
|
|
|
|
|--00:SCAN HDFS [functional.alltypestiny t]
|
|
| partitions=4/4 files=4 size=460B
|
|
|
|
|
04:AGGREGATE [FINALIZE]
|
|
| output: sum(t1.id)
|
|
| group by: t1.id + t2.id
|
|
| having: sum(t1.id) = t1.id + t2.id
|
|
|
|
|
03:HASH JOIN [INNER JOIN]
|
|
| hash predicates: t1.id = t2.id
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypes t2]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
|
|
|
01:SCAN HDFS [functional.alltypesagg t1]
|
|
partitions=11/11 files=11 size=814.73KB
|
|
====
|
|
# Correlated scalar subquery with complex correlared predicate (IMPALA-1335)
|
|
select 1
|
|
from functional.alltypestiny t
|
|
where
|
|
(select sum(t1.id)
|
|
from functional.alltypesagg t1 inner join functional.alltypes t2 on t1.id = t2.id
|
|
where t1.id + t2.id = t.bigint_col) = t.int_col
|
|
---- PLAN
|
|
05:HASH JOIN [RIGHT SEMI JOIN]
|
|
| hash predicates: sum(t1.id) = t.int_col, t1.id + t2.id = t.bigint_col
|
|
|
|
|
|--00:SCAN HDFS [functional.alltypestiny t]
|
|
| partitions=4/4 files=4 size=460B
|
|
|
|
|
04:AGGREGATE [FINALIZE]
|
|
| output: sum(t1.id)
|
|
| group by: t1.id + t2.id
|
|
|
|
|
03:HASH JOIN [INNER JOIN]
|
|
| hash predicates: t1.id = t2.id
|
|
|
|
|
|--02:SCAN HDFS [functional.alltypes t2]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
|
|
|
01:SCAN HDFS [functional.alltypesagg t1]
|
|
partitions=11/11 files=11 size=814.73KB
|
|
====
|
|
# Outer query block with multiple tables and a correlated scalar subquery with
|
|
# complex correlated predicate that references multiple subquery tables and multiple
|
|
# tables from the outer query block (IMPALA-1335)
|
|
select 1
|
|
from functional.alltypestiny t1 inner join functional.alltypessmall t2 on t1.id = t2.id
|
|
where
|
|
(select sum(tt1.id)
|
|
from functional.alltypesagg tt1 inner join functional.alltypes tt2
|
|
on tt1.int_col = tt2.int_col
|
|
where tt1.id + tt2.id = t1.int_col - t2.int_col) = t1.bigint_col
|
|
---- PLAN
|
|
07:HASH JOIN [RIGHT SEMI JOIN]
|
|
| hash predicates: sum(tt1.id) = t1.bigint_col, tt1.id + tt2.id = t1.int_col - t2.int_col
|
|
|
|
|
|--06:HASH JOIN [INNER JOIN]
|
|
| | hash predicates: t2.id = t1.id
|
|
| |
|
|
| |--00:SCAN HDFS [functional.alltypestiny t1]
|
|
| | partitions=4/4 files=4 size=460B
|
|
| |
|
|
| 01:SCAN HDFS [functional.alltypessmall t2]
|
|
| partitions=4/4 files=4 size=6.32KB
|
|
|
|
|
05:AGGREGATE [FINALIZE]
|
|
| output: sum(tt1.id)
|
|
| group by: tt1.id + tt2.id
|
|
|
|
|
04:HASH JOIN [INNER JOIN]
|
|
| hash predicates: tt1.int_col = tt2.int_col
|
|
|
|
|
|--03:SCAN HDFS [functional.alltypes tt2]
|
|
| partitions=24/24 files=24 size=478.45KB
|
|
|
|
|
02:SCAN HDFS [functional.alltypesagg tt1]
|
|
partitions=11/11 files=11 size=814.73KB
|
|
====
|
|
# Correlated EXISTS and NOT EXISTS subqueries with limit 0 and
|
|
# aggregates. All predicates evaluate to FALSE. (IMPALA-1550)
|
|
select 1
|
|
from functional.alltypestiny t1
|
|
where exists
|
|
(select id
|
|
from functional.alltypes t2
|
|
where t1.int_col = t2.int_col limit 0)
|
|
and not exists
|
|
(select count(distinct int_col)
|
|
from functional.alltypesagg t3
|
|
where t1.id = t3.id)
|
|
---- PLAN
|
|
00:EMPTYSET
|
|
====
|
|
# Correlated EXISTS and NOT EXISTS subqueries with limit 0 and
|
|
# aggregates. All predicates evaluate to TRUE. (IMPALA-1550)
|
|
select 1
|
|
from functional.alltypestiny t1
|
|
where not exists
|
|
(select id
|
|
from functional.alltypes t2
|
|
where t1.int_col = t2.int_col limit 0)
|
|
and exists
|
|
(select count(distinct int_col), sum(distinct int_col)
|
|
from functional.alltypesagg t3
|
|
where t1.id = t3.id)
|
|
and not exists
|
|
(select sum(int_col)
|
|
from functional.alltypessmall t4
|
|
where t1.id = t4.id limit 0)
|
|
and not exists
|
|
(select min(int_col)
|
|
from functional.alltypestiny t5
|
|
where t1.id = t5.id and false)
|
|
---- PLAN
|
|
00:SCAN HDFS [functional.alltypestiny t1]
|
|
partitions=4/4 files=4 size=460B
|
|
====
|
|
# Correlated EXISTS and NOT EXISTS subqueries with limit 0 and
|
|
# aggregates. Some predicates evaluate to TRUE while others need to
|
|
# be evaluated at run-time. (IMPALA-1550)
|
|
select 1
|
|
from functional.alltypestiny t1
|
|
where not exists
|
|
(select id
|
|
from functional.alltypes t2
|
|
where t1.int_col = t2.int_col limit 0)
|
|
and exists
|
|
(select distinct int_col
|
|
from functional.alltypesagg t3
|
|
where t3.id > 100 and t1.id = t3.id)
|
|
and not exists
|
|
(select count(id)
|
|
from functional.alltypestiny t4
|
|
where t4.int_col = t1.tinyint_col
|
|
having count(id) > 200)
|
|
---- PLAN
|
|
06:HASH JOIN [LEFT ANTI JOIN]
|
|
| hash predicates: t1.tinyint_col = t4.int_col
|
|
|
|
|
|--04:AGGREGATE [FINALIZE]
|
|
| | output: count(id)
|
|
| | group by: t4.int_col
|
|
| | having: count(id) > 200
|
|
| |
|
|
| 03:SCAN HDFS [functional.alltypestiny t4]
|
|
| partitions=4/4 files=4 size=460B
|
|
|
|
|
05:HASH JOIN [RIGHT SEMI JOIN]
|
|
| hash predicates: t3.id = t1.id
|
|
|
|
|
|--00:SCAN HDFS [functional.alltypestiny t1]
|
|
| partitions=4/4 files=4 size=460B
|
|
| predicates: t1.id > 100
|
|
|
|
|
02:AGGREGATE [FINALIZE]
|
|
| group by: int_col, t3.id
|
|
|
|
|
01:SCAN HDFS [functional.alltypesagg t3]
|
|
partitions=11/11 files=11 size=814.73KB
|
|
predicates: t3.id > 100
|
|
====
|