Files
impala/testdata/workloads/functional-planner/queries/PlannerTest/subquery-rewrite.test
Skye Wanderman-Milne 7906ed44ac IMPALA-2015: Add support for nested loop join
Implement nested-loop join in Impala with support for multiple join
modes, including inner, outer, semi and anti joins. Null-aware left
anti-join is not currently supported.

Summary of changes:
Introduced the NestedLoopJoinNode class in the FE that represents the nested
loop join. Common functionality between NestedLoopJoinNode and HashJoinNode
(e.g. cardinality estimation) was moved to the JoinNode class.
In the BE, introduced the NestedLoopJoinNode class that implements the nested-loop
join execution strategy.

Change-Id: I238ec7dc0080f661847e5e1b84e30d61c3b0bb5c
Reviewed-on: http://gerrit.cloudera.org:8080/652
Reviewed-by: Dimitris Tsirogiannis <dtsirogiannis@cloudera.com>
Tested-by: Internal Jenkins
2015-08-19 08:40:14 +00:00

1500 lines
42 KiB
Plaintext

# IN predicate
select *
from functional.alltypes
where id in
(select id from functional.alltypesagg)
---- PLAN
02:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: id = id
|
|--01:SCAN HDFS [functional.alltypesagg]
| partitions=11/11 files=11 size=814.73KB
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
====
# NOT IN predicate rewritten into a null-aware anti join
select *
from functional.alltypes
where id not in
(select id from functional.alltypesagg)
---- PLAN
02:HASH JOIN [NULL AWARE LEFT ANTI JOIN]
| hash predicates: id = id
|
|--01:SCAN HDFS [functional.alltypesagg]
| partitions=11/11 files=11 size=814.73KB
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
====
# Correlated NOT IN rewritten into a null-aware anti join
select *
from functional.alltypes a
where a.int_col not in
(select int_col
from functional.alltypesagg g
where g.id = a.id and g.bigint_col < a.bigint_col)
and a.int_col < 100
---- PLAN
02:HASH JOIN [NULL AWARE LEFT ANTI JOIN]
| hash predicates: a.int_col = int_col
| other join predicates: g.bigint_col < a.bigint_col, a.id = g.id
|
|--01:SCAN HDFS [functional.alltypesagg g]
| partitions=11/11 files=11 size=814.73KB
|
00:SCAN HDFS [functional.alltypes a]
partitions=24/24 files=24 size=478.45KB
predicates: a.int_col < 100
====
# Correlated NOT IN subquery resulting in the same eq conjunct
# being used in both the hash and the other join predicate
select *
from functional.alltypes a
where a.id not in (select id from functional.alltypes b where a.id = b.id)
---- PLAN
02:HASH JOIN [NULL AWARE LEFT ANTI JOIN]
| hash predicates: a.id = id
| other join predicates: a.id = b.id
|
|--01:SCAN HDFS [functional.alltypes b]
| partitions=24/24 files=24 size=478.45KB
|
00:SCAN HDFS [functional.alltypes a]
partitions=24/24 files=24 size=478.45KB
====
# Subquery with predicate in the WHERE clause
select count(*)
from functional.alltypes a
where int_col in
(select int_col from functional.alltypesagg g where a.id = g.id and g.bigint_col < 10)
and bool_col = false
---- PLAN
03:AGGREGATE [FINALIZE]
| output: count(*)
|
02:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: int_col = int_col, a.id = g.id
|
|--01:SCAN HDFS [functional.alltypesagg g]
| partitions=11/11 files=11 size=814.73KB
| predicates: g.bigint_col < 10
|
00:SCAN HDFS [functional.alltypes a]
partitions=24/24 files=24 size=478.45KB
predicates: bool_col = FALSE
====
# Complex expression in the IN predicate
select *
from functional.alltypes t
where t.int_col + 1 in
(select int_col + bigint_col from functional.alltypesagg)
---- PLAN
02:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: t.int_col + 1 = int_col + bigint_col
|
|--01:SCAN HDFS [functional.alltypesagg]
| partitions=11/11 files=11 size=814.73KB
|
00:SCAN HDFS [functional.alltypes t]
partitions=24/24 files=24 size=478.45KB
====
# Multiple subqueries in the WHERE clause
select *
from functional.alltypes t
where t.id in
(select id from functional.alltypesagg where bool_col = false)
and t.tinyint_col not in (select tinyint_col from functional.alltypestiny)
and t.bigint_col < 1000
---- PLAN
04:HASH JOIN [NULL AWARE LEFT ANTI JOIN]
| hash predicates: t.tinyint_col = tinyint_col
|
|--02:SCAN HDFS [functional.alltypestiny]
| partitions=4/4 files=4 size=460B
|
03:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: t.id = id
|
|--01:SCAN HDFS [functional.alltypesagg]
| partitions=11/11 files=11 size=814.73KB
| predicates: bool_col = FALSE
|
00:SCAN HDFS [functional.alltypes t]
partitions=24/24 files=24 size=478.45KB
predicates: t.bigint_col < 1000
====
# Multiple tables in the FROM clause of the outer query block
select count(*)
from functional.alltypesagg a, functional.alltypes t
where a.id = t.id and a.int_col in
(select int_col from functional.alltypestiny where bool_col = false)
---- PLAN
05:AGGREGATE [FINALIZE]
| output: count(*)
|
04:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: a.int_col = int_col
|
|--02:SCAN HDFS [functional.alltypestiny]
| partitions=4/4 files=4 size=460B
| predicates: bool_col = FALSE
|
03:HASH JOIN [INNER JOIN]
| hash predicates: a.id = t.id
|
|--01:SCAN HDFS [functional.alltypes t]
| partitions=24/24 files=24 size=478.45KB
|
00:SCAN HDFS [functional.alltypesagg a]
partitions=11/11 files=11 size=814.73KB
====
# Multiple tables in the subquery
select count(*)
from functional.alltypesagg a
where a.id in
(select s.id
from functional.alltypessmall s, functional.alltypestiny t
where s.int_col = t.int_col and a.bool_col = s.bool_col)
and a.int_col < 10
---- PLAN
05:AGGREGATE [FINALIZE]
| output: count(*)
|
04:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: a.id = s.id, a.bool_col = s.bool_col
|
|--03:HASH JOIN [INNER JOIN]
| | hash predicates: s.int_col = t.int_col
| |
| |--02:SCAN HDFS [functional.alltypestiny t]
| | partitions=4/4 files=4 size=460B
| |
| 01:SCAN HDFS [functional.alltypessmall s]
| partitions=4/4 files=4 size=6.32KB
|
00:SCAN HDFS [functional.alltypesagg a]
partitions=11/11 files=11 size=814.73KB
predicates: a.int_col < 10
====
# Outer join between the tables in the outer query block
select count(*)
from functional.alltypesagg a left outer join functional.alltypes t
on a.int_col = t.int_col
where a.id in
(select id from functional.alltypestiny)
and t.bool_col = false
---- PLAN
05:AGGREGATE [FINALIZE]
| output: count(*)
|
04:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: a.id = id
|
|--02:SCAN HDFS [functional.alltypestiny]
| partitions=4/4 files=4 size=460B
|
03:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: a.int_col = t.int_col
| other predicates: t.bool_col = FALSE
|
|--01:SCAN HDFS [functional.alltypes t]
| partitions=24/24 files=24 size=478.45KB
| predicates: t.bool_col = FALSE
|
00:SCAN HDFS [functional.alltypesagg a]
partitions=11/11 files=11 size=814.73KB
====
# Subquery in the outer-joined table
select count(*)
from functional.alltypes a left outer join
(select * from functional.alltypesagg g where id in
(select id from functional.alltypestiny)) t
on a.int_col = t.int_col
where a.bool_col = false and t.bigint_col < 100
---- PLAN
05:AGGREGATE [FINALIZE]
| output: count(*)
|
04:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: a.int_col = g.int_col
| other predicates: g.bigint_col < 100
|
|--03:HASH JOIN [LEFT SEMI JOIN]
| | hash predicates: id = id
| |
| |--02:SCAN HDFS [functional.alltypestiny]
| | partitions=4/4 files=4 size=460B
| |
| 01:SCAN HDFS [functional.alltypesagg g]
| partitions=11/11 files=11 size=814.73KB
| predicates: g.bigint_col < 100
|
00:SCAN HDFS [functional.alltypes a]
partitions=24/24 files=24 size=478.45KB
predicates: a.bool_col = FALSE
====
# Multiple tables in the FROM clause of the subquery
select count(distinct id)
from functional.alltypesagg a
where a.int_col in
(select t.int_col
from functional.alltypes t, functional.alltypessmall s, functional.alltypestiny n
where t.id = s.id and s.bigint_col = n.bigint_col and n.bool_col = false)
---- PLAN
08:AGGREGATE [FINALIZE]
| output: count(id)
|
07:AGGREGATE
| group by: id
|
06:HASH JOIN [RIGHT SEMI JOIN]
| hash predicates: t.int_col = a.int_col
|
|--00:SCAN HDFS [functional.alltypesagg a]
| partitions=11/11 files=11 size=814.73KB
|
05:HASH JOIN [INNER JOIN]
| hash predicates: s.bigint_col = n.bigint_col
|
|--03:SCAN HDFS [functional.alltypestiny n]
| partitions=4/4 files=4 size=460B
| predicates: n.bool_col = FALSE
|
04:HASH JOIN [INNER JOIN]
| hash predicates: t.id = s.id
|
|--02:SCAN HDFS [functional.alltypessmall s]
| partitions=4/4 files=4 size=6.32KB
|
01:SCAN HDFS [functional.alltypes t]
partitions=24/24 files=24 size=478.45KB
====
# Subqueries with inline views
select *
from functional.alltypes t
where t.id in
(select a.id from functional.alltypestiny a,
(select id, count(*) as cnt from functional.alltypessmall group by id) s
where s.id = a.id and s.cnt = 10)
---- PLAN
05:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: t.id = a.id
|
|--04:HASH JOIN [INNER JOIN]
| | hash predicates: id = a.id
| |
| |--01:SCAN HDFS [functional.alltypestiny a]
| | partitions=4/4 files=4 size=460B
| |
| 03:AGGREGATE [FINALIZE]
| | output: count(*)
| | group by: id
| | having: count(*) = 10
| |
| 02:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 files=4 size=6.32KB
|
00:SCAN HDFS [functional.alltypes t]
partitions=24/24 files=24 size=478.45KB
====
with t as (select a.* from functional.alltypes a where id in
(select id from functional.alltypestiny))
select * from t where t.bool_col = false and t.int_col = 10
---- PLAN
02:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: id = id
|
|--01:SCAN HDFS [functional.alltypestiny]
| partitions=4/4 files=4 size=460B
|
00:SCAN HDFS [functional.alltypes a]
partitions=24/24 files=24 size=478.45KB
predicates: a.bool_col = FALSE, a.int_col = 10
====
# Subqueries in WITH, FROM and WHERE clauses
with t as (select a.* from functional.alltypes a
where id in (select id from functional.alltypestiny))
select t.*
from t, (select * from functional.alltypesagg g where g.id in
(select id from functional.alltypes)) s
where s.string_col = t.string_col and t.int_col in
(select int_col from functional.alltypessmall)
and s.bool_col = false
---- PLAN
08:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: a.int_col = int_col
|
|--06:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 files=4 size=6.32KB
|
07:HASH JOIN [INNER JOIN]
| hash predicates: g.string_col = a.string_col
|
|--02:HASH JOIN [LEFT SEMI JOIN]
| | hash predicates: id = id
| |
| |--01:SCAN HDFS [functional.alltypestiny]
| | partitions=4/4 files=4 size=460B
| |
| 00:SCAN HDFS [functional.alltypes a]
| partitions=24/24 files=24 size=478.45KB
|
05:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: g.id = id
|
|--04:SCAN HDFS [functional.alltypes]
| partitions=24/24 files=24 size=478.45KB
|
03:SCAN HDFS [functional.alltypesagg g]
partitions=11/11 files=11 size=814.73KB
predicates: g.bool_col = FALSE
====
# Correlated subqueries
select *
from functional.alltypes t
where id in
(select id from functional.alltypesagg a where t.int_col = a.int_col)
and t.bool_col = false
---- PLAN
02:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: id = id, t.int_col = a.int_col
|
|--01:SCAN HDFS [functional.alltypesagg a]
| partitions=11/11 files=11 size=814.73KB
|
00:SCAN HDFS [functional.alltypes t]
partitions=24/24 files=24 size=478.45KB
predicates: t.bool_col = FALSE
====
# Multiple nesting levels (uncorrelated queries)
select *
from functional.alltypes t
where id in
(select id from functional.alltypesagg where int_col in
(select int_col from functional.alltypestiny)
and bool_col = false)
and bigint_col < 1000
---- PLAN
04:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: id = id
|
|--03:HASH JOIN [LEFT SEMI JOIN]
| | hash predicates: int_col = int_col
| |
| |--02:SCAN HDFS [functional.alltypestiny]
| | partitions=4/4 files=4 size=460B
| |
| 01:SCAN HDFS [functional.alltypesagg]
| partitions=11/11 files=11 size=814.73KB
| predicates: bool_col = FALSE
|
00:SCAN HDFS [functional.alltypes t]
partitions=24/24 files=24 size=478.45KB
predicates: bigint_col < 1000
====
# Multiple nesting levels (correlated queries)
select *
from functional.alltypes t
where id in
(select id from functional.alltypesagg a where a.int_col = t.int_col
and a.tinyint_col in
(select tinyint_col from functional.alltypestiny s
where s.bigint_col = a.bigint_col))
---- PLAN
04:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: id = id, t.int_col = a.int_col
|
|--03:HASH JOIN [LEFT SEMI JOIN]
| | hash predicates: a.tinyint_col = tinyint_col, a.bigint_col = s.bigint_col
| |
| |--02:SCAN HDFS [functional.alltypestiny s]
| | partitions=4/4 files=4 size=460B
| |
| 01:SCAN HDFS [functional.alltypesagg a]
| partitions=11/11 files=11 size=814.73KB
|
00:SCAN HDFS [functional.alltypes t]
partitions=24/24 files=24 size=478.45KB
====
# Multiple nesting levels (correlated and uncorrelated queries)
select *
from functional.alltypes t
where id in
(select id from functional.alltypesagg a where a.int_col in
(select int_col from functional.alltypestiny s where a.bigint_col = s.bigint_col))
---- PLAN
04:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: id = id
|
|--03:HASH JOIN [LEFT SEMI JOIN]
| | hash predicates: a.int_col = int_col, a.bigint_col = s.bigint_col
| |
| |--02:SCAN HDFS [functional.alltypestiny s]
| | partitions=4/4 files=4 size=460B
| |
| 01:SCAN HDFS [functional.alltypesagg a]
| partitions=11/11 files=11 size=814.73KB
|
00:SCAN HDFS [functional.alltypes t]
partitions=24/24 files=24 size=478.45KB
====
# Predicate propagation with uncorrelated subqueries
select *
from functional.alltypes
where id in
(select id from functional.alltypes where id < 10)
---- PLAN
02:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: id = id
|
|--01:SCAN HDFS [functional.alltypes]
| partitions=24/24 files=24 size=478.45KB
| predicates: id < 10
|
00:SCAN HDFS [functional.alltypes]
partitions=24/24 files=24 size=478.45KB
predicates: functional.alltypes.id < 10
====
# Predicate propagation with correlated subqueries
select *
from functional.alltypesagg a inner join functional.alltypes t on t.id = a.id
where t.int_col < 10 and t.int_col in
(select int_col from functional.alltypessmall s where s.id = t.id)
---- PLAN
04:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: t.int_col = int_col, t.id = s.id
|
|--02:SCAN HDFS [functional.alltypessmall s]
| partitions=4/4 files=4 size=6.32KB
| predicates: s.int_col < 10
|
03:HASH JOIN [INNER JOIN]
| hash predicates: a.id = t.id
|
|--01:SCAN HDFS [functional.alltypes t]
| partitions=24/24 files=24 size=478.45KB
| predicates: t.int_col < 10
|
00:SCAN HDFS [functional.alltypesagg a]
partitions=11/11 files=11 size=814.73KB
====
# Correlated EXISTS
select count(*)
from functional.alltypes t
where exists
(select * from functional.alltypesagg a where a.id = t.id)
---- PLAN
03:AGGREGATE [FINALIZE]
| output: count(*)
|
02:HASH JOIN [RIGHT SEMI JOIN]
| hash predicates: a.id = t.id
|
|--00:SCAN HDFS [functional.alltypes t]
| partitions=24/24 files=24 size=478.45KB
|
01:SCAN HDFS [functional.alltypesagg a]
partitions=11/11 files=11 size=814.73KB
====
# Correlated EXISTS with an analytic function and a group by clause
select 1
from functional.alltypesagg a
where exists
(select id, count(int_col) over (partition by bool_col)
from functional.alltypestiny b
where a.tinyint_col = b.tinyint_col
group by id, int_col, bool_col)
and tinyint_col < 10
---- PLAN
03:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: a.tinyint_col = b.tinyint_col
|
|--02:AGGREGATE [FINALIZE]
| | group by: id, int_col, bool_col, b.tinyint_col
| |
| 01:SCAN HDFS [functional.alltypestiny b]
| partitions=4/4 files=4 size=460B
|
00:SCAN HDFS [functional.alltypesagg a]
partitions=11/11 files=11 size=814.73KB
predicates: tinyint_col < 10
====
# Correlated NOT EXISTS
select count(*)
from functional.alltypes t
where not exists
(select id from functional.alltypesagg a where t.int_col = a.int_col)
---- PLAN
03:AGGREGATE [FINALIZE]
| output: count(*)
|
02:HASH JOIN [RIGHT ANTI JOIN]
| hash predicates: a.int_col = t.int_col
|
|--00:SCAN HDFS [functional.alltypes t]
| partitions=24/24 files=24 size=478.45KB
|
01:SCAN HDFS [functional.alltypesagg a]
partitions=11/11 files=11 size=814.73KB
====
# Correlated NOT EXISTS with an analytic function and a group by clause
select count(*)
from functional.alltypesagg a
where not exists
(select b.id, count(b.int_col) over (partition by b.bigint_col)
from functional.alltypessmall b inner join functional.alltypes c on b.id = c.id
where c.bool_col = false and a.int_col = b.int_col
group by b.id, b.int_col, b.bigint_col)
and bool_col = false
---- PLAN
06:AGGREGATE [FINALIZE]
| output: count(*)
|
05:HASH JOIN [RIGHT ANTI JOIN]
| hash predicates: b.int_col = a.int_col
|
|--00:SCAN HDFS [functional.alltypesagg a]
| partitions=11/11 files=11 size=814.73KB
| predicates: bool_col = FALSE
|
04:AGGREGATE [FINALIZE]
| group by: b.id, b.int_col, b.bigint_col
|
03:HASH JOIN [INNER JOIN]
| hash predicates: c.id = b.id
|
|--01:SCAN HDFS [functional.alltypessmall b]
| partitions=4/4 files=4 size=6.32KB
|
02:SCAN HDFS [functional.alltypes c]
partitions=24/24 files=24 size=478.45KB
predicates: c.bool_col = FALSE
====
# Uncorrelated EXISTS
select *
from functional.alltypestiny t
where exists (select * from functional.alltypessmall s where s.id < 5)
---- PLAN
02:NESTED LOOP JOIN [CROSS JOIN]
|
|--01:SCAN HDFS [functional.alltypessmall s]
| partitions=4/4 files=4 size=6.32KB
| predicates: s.id < 5
| limit: 1
|
00:SCAN HDFS [functional.alltypestiny t]
partitions=4/4 files=4 size=460B
====
# Uncorrelated EXISTS with an analytic function and a group by clause
select 1
from functional.alltypestiny t
where exists
(select id, max(int_col) over (partition by bigint_col)
from functional.alltypesagg where tinyint_col = 10
group by id, int_col, bigint_col)
---- PLAN
03:NESTED LOOP JOIN [CROSS JOIN]
|
|--00:SCAN HDFS [functional.alltypestiny t]
| partitions=4/4 files=4 size=460B
|
02:AGGREGATE [FINALIZE]
| group by: id, int_col, bigint_col
| limit: 1
|
01:SCAN HDFS [functional.alltypesagg]
partitions=11/11 files=11 size=814.73KB
predicates: tinyint_col = 10
====
# Uncorrelated EXISTS with a LIMIT 0 clause
select 1
from functional.alltypestiny t
where exists (select * from functional.alltypessmall limit 0)
---- PLAN
00:EMPTYSET
====
# Multiple nesting levels
select count(*)
from functional.alltypes a
where exists
(select * from functional.alltypestiny t where a.id = t.id and exists
(select * from functional.alltypesagg g where g.int_col = t.int_col
and g.bool_col = false))
---- PLAN
05:AGGREGATE [FINALIZE]
| output: count(*)
|
04:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: a.id = t.id
|
|--03:HASH JOIN [RIGHT SEMI JOIN]
| | hash predicates: g.int_col = t.int_col
| |
| |--01:SCAN HDFS [functional.alltypestiny t]
| | partitions=4/4 files=4 size=460B
| |
| 02:SCAN HDFS [functional.alltypesagg g]
| partitions=11/11 files=11 size=814.73KB
| predicates: g.bool_col = FALSE
|
00:SCAN HDFS [functional.alltypes a]
partitions=24/24 files=24 size=478.45KB
====
# Multiple subquery predicates
select g.int_col, count(*)
from functional.alltypesagg g left outer join functional.alltypes a
on g.id = a.id
where g.int_col < 100 and exists
(select *
from functional.alltypestiny t
where t.id = g.id and t.bool_col = false)
and g.bigint_col in
(select bigint_col
from functional.alltypessmall s
where s.id = g.id and s.int_col > 10)
and g.tinyint_col <
(select count(*)
from functional.alltypes t
where t.id = g.id and t.bool_col = true)
group by g.int_col
having count(*) < 100
---- PLAN
10:AGGREGATE [FINALIZE]
| output: count(*)
| group by: g.int_col
| having: count(*) < 100
|
09:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: t.id = g.id
| other predicates: g.tinyint_col < zeroifnull(count(*))
|
|--08:HASH JOIN [RIGHT SEMI JOIN]
| | hash predicates: bigint_col = g.bigint_col, s.id = g.id
| |
| |--07:HASH JOIN [LEFT SEMI JOIN]
| | | hash predicates: g.id = t.id
| | |
| | |--02:SCAN HDFS [functional.alltypestiny t]
| | | partitions=4/4 files=4 size=460B
| | | predicates: t.bool_col = FALSE
| | |
| | 06:HASH JOIN [RIGHT OUTER JOIN]
| | | hash predicates: a.id = g.id
| | |
| | |--00:SCAN HDFS [functional.alltypesagg g]
| | | partitions=11/11 files=11 size=814.73KB
| | | predicates: g.int_col < 100
| | |
| | 01:SCAN HDFS [functional.alltypes a]
| | partitions=24/24 files=24 size=478.45KB
| |
| 03:SCAN HDFS [functional.alltypessmall s]
| partitions=4/4 files=4 size=6.32KB
| predicates: s.int_col > 10
|
05:AGGREGATE [FINALIZE]
| output: count(*)
| group by: t.id
|
04:SCAN HDFS [functional.alltypes t]
partitions=24/24 files=24 size=478.45KB
predicates: t.bool_col = TRUE
====
# Subqueries with aggregation
select *
from functional.alltypes a
where a.int_col in
(select count(int_col) from functional.alltypesagg g where g.bool_col
group by int_col)
and a.bigint_col < 10
---- PLAN
03:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: a.int_col = count(int_col)
|
|--02:AGGREGATE [FINALIZE]
| | output: count(int_col)
| | group by: int_col
| |
| 01:SCAN HDFS [functional.alltypesagg g]
| partitions=11/11 files=11 size=814.73KB
| predicates: g.bool_col
|
00:SCAN HDFS [functional.alltypes a]
partitions=24/24 files=24 size=478.45KB
predicates: a.bigint_col < 10
====
# Uncorrelated aggregation subquery
select *
from functional.alltypes a
where a.int_col <
(select max(int_col) from functional.alltypesagg g where g.bool_col = true)
and a.bigint_col > 10
---- PLAN
03:NESTED LOOP JOIN [INNER JOIN]
| predicates: a.int_col < max(int_col)
|
|--02:AGGREGATE [FINALIZE]
| | output: max(int_col)
| |
| 01:SCAN HDFS [functional.alltypesagg g]
| partitions=11/11 files=11 size=814.73KB
| predicates: g.bool_col = TRUE
|
00:SCAN HDFS [functional.alltypes a]
partitions=24/24 files=24 size=478.45KB
predicates: a.bigint_col > 10
====
# Aggregation subquery with constant comparison expr
select *
from functional.alltypesagg a
where (select max(id) from functional.alltypes t where t.bool_col = false) > 10
and a.int_col < 10
---- PLAN
03:NESTED LOOP JOIN [CROSS JOIN]
|
|--02:AGGREGATE [FINALIZE]
| | output: max(id)
| | having: max(id) > 10
| |
| 01:SCAN HDFS [functional.alltypes t]
| partitions=24/24 files=24 size=478.45KB
| predicates: t.bool_col = FALSE
|
00:SCAN HDFS [functional.alltypesagg a]
partitions=11/11 files=11 size=814.73KB
predicates: a.int_col < 10
====
# Correlated aggregation subquery
select a.int_col, count(*)
from functional.alltypesagg a
where a.id =
(select min(id)
from functional.alltypes t
where t.int_col = a.int_col and t.tinyint_col < 10)
and a.bool_col = false
group by a.int_col
---- PLAN
04:AGGREGATE [FINALIZE]
| output: count(*)
| group by: a.int_col
|
03:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: a.id = min(id), a.int_col = t.int_col
|
|--02:AGGREGATE [FINALIZE]
| | output: min(id)
| | group by: t.int_col
| |
| 01:SCAN HDFS [functional.alltypes t]
| partitions=24/24 files=24 size=478.45KB
| predicates: t.tinyint_col < 10
|
00:SCAN HDFS [functional.alltypesagg a]
partitions=11/11 files=11 size=814.73KB
predicates: a.bool_col = FALSE
====
# Aggregation subquery with multiple tables
select t.tinyint_col, count(*)
from functional.alltypes t left outer join functional.alltypesagg a
on t.id = a.id
where t.int_col <
(select min(s.int_col)
from functional.alltypessmall s left outer join functional.alltypestiny p
on s.id = p.id where s.bool_col = false and s.bigint_col = t.bigint_col)
and a.bool_col = false
group by t.tinyint_col
---- PLAN
08:AGGREGATE [FINALIZE]
| output: count(*)
| group by: t.tinyint_col
|
07:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: t.bigint_col = s.bigint_col
| other join predicates: t.int_col < min(s.int_col)
|
|--05:AGGREGATE [FINALIZE]
| | output: min(s.int_col)
| | group by: s.bigint_col
| |
| 04:HASH JOIN [LEFT OUTER JOIN]
| | hash predicates: s.id = p.id
| |
| |--03:SCAN HDFS [functional.alltypestiny p]
| | partitions=4/4 files=4 size=460B
| |
| 02:SCAN HDFS [functional.alltypessmall s]
| partitions=4/4 files=4 size=6.32KB
| predicates: s.bool_col = FALSE
|
06:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: t.id = a.id
| other predicates: a.bool_col = FALSE
|
|--01:SCAN HDFS [functional.alltypesagg a]
| partitions=11/11 files=11 size=814.73KB
| predicates: a.bool_col = FALSE
|
00:SCAN HDFS [functional.alltypes t]
partitions=24/24 files=24 size=478.45KB
====
# Multiple aggregation subqueries
select *
from functional.alltypesagg a, functional.alltypes t
where a.id = t.id and a.int_col <
(select min(int_col)
from functional.alltypestiny g
where t.bigint_col = g.bigint_col and g.bool_col = false)
and a.tinyint_col >
(select max(tinyint_col) from functional.alltypessmall s where s.id < 10)
and t.bool_col = false
---- PLAN
08:NESTED LOOP JOIN [INNER JOIN]
| predicates: a.tinyint_col > max(tinyint_col)
|
|--05:AGGREGATE [FINALIZE]
| | output: max(tinyint_col)
| |
| 04:SCAN HDFS [functional.alltypessmall s]
| partitions=4/4 files=4 size=6.32KB
| predicates: s.id < 10
|
07:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: t.bigint_col = g.bigint_col
| other join predicates: a.int_col < min(int_col)
|
|--03:AGGREGATE [FINALIZE]
| | output: min(int_col)
| | group by: g.bigint_col
| |
| 02:SCAN HDFS [functional.alltypestiny g]
| partitions=4/4 files=4 size=460B
| predicates: g.bool_col = FALSE
|
06:HASH JOIN [INNER JOIN]
| hash predicates: a.id = t.id
|
|--01:SCAN HDFS [functional.alltypes t]
| partitions=24/24 files=24 size=478.45KB
| predicates: t.bool_col = FALSE
|
00:SCAN HDFS [functional.alltypesagg a]
partitions=11/11 files=11 size=814.73KB
====
# Multiple nesting levels with aggregation subqueries
select *
from functional.alltypes t
where t.int_col <
(select avg(g.int_col)*2
from functional.alltypesagg g
where g.id = t.id and g.bigint_col <
(select count(*)
from functional.alltypestiny a
where a.id = g.id
and a.bool_col = false))
---- PLAN
06:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: t.id = g.id
| other join predicates: t.int_col < avg(g.int_col) * 2
|
|--05:AGGREGATE [FINALIZE]
| | output: avg(g.int_col)
| | group by: g.id
| |
| 04:HASH JOIN [LEFT OUTER JOIN]
| | hash predicates: g.id = a.id
| | other predicates: g.bigint_col < zeroifnull(count(*))
| |
| |--03:AGGREGATE [FINALIZE]
| | | output: count(*)
| | | group by: a.id
| | |
| | 02:SCAN HDFS [functional.alltypestiny a]
| | partitions=4/4 files=4 size=460B
| | predicates: a.bool_col = FALSE
| |
| 01:SCAN HDFS [functional.alltypesagg g]
| partitions=11/11 files=11 size=814.73KB
|
00:SCAN HDFS [functional.alltypes t]
partitions=24/24 files=24 size=478.45KB
====
# Multiple nesting of aggregate subquery predicates with count
select *
from functional.alltypesagg a left outer join functional.alltypes t
on a.id = t.id
where a.int_col <
(select count(*)
from functional.alltypessmall s
where s.id = a.id and s.tinyint_col >
(select count(*) from functional.alltypestiny where bool_col = false))
---- PLAN
08:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: a.id = s.id
| other predicates: a.int_col < zeroifnull(count(*))
|
|--06:AGGREGATE [FINALIZE]
| | output: count(*)
| | group by: s.id
| |
| 05:NESTED LOOP JOIN [INNER JOIN]
| | predicates: s.tinyint_col > count(*)
| |
| |--04:AGGREGATE [FINALIZE]
| | | output: count(*)
| | |
| | 03:SCAN HDFS [functional.alltypestiny]
| | partitions=4/4 files=4 size=460B
| | predicates: bool_col = FALSE
| |
| 02:SCAN HDFS [functional.alltypessmall s]
| partitions=4/4 files=4 size=6.32KB
|
07:HASH JOIN [LEFT OUTER JOIN]
| hash predicates: a.id = t.id
|
|--01:SCAN HDFS [functional.alltypes t]
| partitions=24/24 files=24 size=478.45KB
|
00:SCAN HDFS [functional.alltypesagg a]
partitions=11/11 files=11 size=814.73KB
====
# Distinct in the outer select block
select distinct id, bool_col
from functional.alltypesagg g
where 100 < (select count(*) from functional.alltypes where bool_col = false and id < 5)
and bool_col = false
---- PLAN
04:AGGREGATE [FINALIZE]
| group by: id, bool_col
|
03:NESTED LOOP JOIN [CROSS JOIN]
|
|--02:AGGREGATE [FINALIZE]
| | output: count(*)
| | having: 100 < count(*)
| |
| 01:SCAN HDFS [functional.alltypes]
| partitions=24/24 files=24 size=478.45KB
| predicates: bool_col = FALSE, id < 5
|
00:SCAN HDFS [functional.alltypesagg g]
partitions=11/11 files=11 size=814.73KB
predicates: bool_col = FALSE
====
# Distinct with an unqualified star in the outer select block
select distinct *
from functional.alltypesagg g
where 100 > (select count(distinct id) from functional.alltypestiny where int_col < 5)
and g.bigint_col < 1000 and g.bigint_col = true
---- PLAN
05:AGGREGATE [FINALIZE]
| group by: g.id, g.bool_col, g.tinyint_col, g.smallint_col, g.int_col, g.bigint_col, g.float_col, g.double_col, g.date_string_col, g.string_col, g.timestamp_col, g.year, g.month, g.day
|
04:NESTED LOOP JOIN [CROSS JOIN]
|
|--03:AGGREGATE [FINALIZE]
| | output: count(id)
| | having: 100 > count(id)
| |
| 02:AGGREGATE
| | group by: id
| |
| 01:SCAN HDFS [functional.alltypestiny]
| partitions=4/4 files=4 size=460B
| predicates: int_col < 5
|
00:SCAN HDFS [functional.alltypesagg g]
partitions=11/11 files=11 size=814.73KB
predicates: g.bigint_col < 1000, g.bigint_col = TRUE
====
# Aggregate subquery in an IS NULL predicate
select *
from functional.alltypestiny t
where (select max(int_col) from functional.alltypesagg where int_col is null) is null
---- PLAN
03:NESTED LOOP JOIN [CROSS JOIN]
|
|--02:AGGREGATE [FINALIZE]
| | output: max(int_col)
| | having: max(int_col) IS NULL
| |
| 01:SCAN HDFS [functional.alltypesagg]
| partitions=11/11 files=11 size=814.73KB
| predicates: int_col IS NULL
|
00:SCAN HDFS [functional.alltypestiny t]
partitions=4/4 files=4 size=460B
====
# Correlated aggregate subquery with a count in an IS NULL predicate
select int_col, count(*)
from functional.alltypestiny t
where (select count(*) from functional.alltypesagg g where t.id = g.id) is null
and bool_col = false
group by int_col
---- PLAN
04:AGGREGATE [FINALIZE]
| output: count(*)
| group by: int_col
|
03:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: g.id = t.id
| other predicates: zeroifnull(count(*)) IS NULL
|
|--00:SCAN HDFS [functional.alltypestiny t]
| partitions=4/4 files=4 size=460B
| predicates: bool_col = FALSE
|
02:AGGREGATE [FINALIZE]
| output: count(*)
| group by: g.id
| having: zeroifnull(count(*)) IS NULL
|
01:SCAN HDFS [functional.alltypesagg g]
partitions=11/11 files=11 size=814.73KB
====
# Correlated aggregate subquery in an IS NULL predicate
select *
from functional.alltypestiny t
where
(select max(int_col)
from functional.alltypesagg g
where g.id = t.id and g.int_col is null) is null
and t.bool_col = false
---- PLAN
03:HASH JOIN [RIGHT SEMI JOIN]
| hash predicates: g.id = t.id
|
|--00:SCAN HDFS [functional.alltypestiny t]
| partitions=4/4 files=4 size=460B
| predicates: t.bool_col = FALSE
|
02:AGGREGATE [FINALIZE]
| output: max(int_col)
| group by: g.id
| having: max(int_col) IS NULL
|
01:SCAN HDFS [functional.alltypesagg g]
partitions=11/11 files=11 size=814.73KB
predicates: g.int_col IS NULL
====
# Complex expr with a scalar subquery
select *
from functional.alltypestiny t
where 1 +
(select count(*) from functional.alltypesagg where bool_col = false) = t.int_col + 2
and t.bigint_col < 100
---- PLAN
03:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: t.int_col + 2 = 1 + count(*)
|
|--02:AGGREGATE [FINALIZE]
| | output: count(*)
| |
| 01:SCAN HDFS [functional.alltypesagg]
| partitions=11/11 files=11 size=814.73KB
| predicates: bool_col = FALSE
|
00:SCAN HDFS [functional.alltypestiny t]
partitions=4/4 files=4 size=460B
predicates: t.bigint_col < 100
====
# Scalar subquery in a function
select *
from functional.alltypestiny t
where nullifzero((select min(id) from functional.alltypessmall s where s.bool_col = false))
is null
and t.id < 10
---- PLAN
03:NESTED LOOP JOIN [CROSS JOIN]
|
|--02:AGGREGATE [FINALIZE]
| | output: min(id)
| | having: nullifzero(min(id)) IS NULL
| |
| 01:SCAN HDFS [functional.alltypessmall s]
| partitions=4/4 files=4 size=6.32KB
| predicates: s.bool_col = FALSE
|
00:SCAN HDFS [functional.alltypestiny t]
partitions=4/4 files=4 size=460B
predicates: t.id < 10
====
# Correlated aggregate subquery with a LIMIT clause that is removed during the rewrite
select min(t.id)
from functional.alltypes t
where t.int_col <
(select sum(s.int_col)
from functional.alltypessmall s
where s.id = t.id
limit 1)
group by t.bool_col
---- PLAN
04:AGGREGATE [FINALIZE]
| output: min(t.id)
| group by: t.bool_col
|
03:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: t.id = s.id
| other join predicates: t.int_col < sum(s.int_col)
|
|--02:AGGREGATE [FINALIZE]
| | output: sum(s.int_col)
| | group by: s.id
| |
| 01:SCAN HDFS [functional.alltypessmall s]
| partitions=4/4 files=4 size=6.32KB
|
00:SCAN HDFS [functional.alltypes t]
partitions=24/24 files=24 size=478.45KB
====
# Between predicate with subqueries
select *
from functional.alltypestiny t
where int_col between
(select min(int_col) from functional.alltypessmall where bool_col = false) and
(select max(int_col) from functional.alltypessmall where bool_col = true)
---- PLAN
06:NESTED LOOP JOIN [INNER JOIN]
| predicates: int_col <= max(int_col)
|
|--04:AGGREGATE [FINALIZE]
| | output: max(int_col)
| |
| 03:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 files=4 size=6.32KB
| predicates: bool_col = TRUE
|
05:NESTED LOOP JOIN [INNER JOIN]
| predicates: int_col >= min(int_col)
|
|--02:AGGREGATE [FINALIZE]
| | output: min(int_col)
| |
| 01:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 files=4 size=6.32KB
| predicates: bool_col = FALSE
|
00:SCAN HDFS [functional.alltypestiny t]
partitions=4/4 files=4 size=460B
====
# Aggregate subquery with count (subquery op slotRef)
select t1.id
from functional.alltypestiny t1
where
(select count(tt1.smallint_col) as int_col_1
from functional.alltypestiny tt1
where t1.id = tt1.month) < t1.id
---- PLAN
03:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: tt1.month = t1.id
| other predicates: zeroifnull(count(tt1.smallint_col)) < t1.id
|
|--00:SCAN HDFS [functional.alltypestiny t1]
| partitions=4/4 files=4 size=460B
|
02:AGGREGATE [FINALIZE]
| output: count(tt1.smallint_col)
| group by: tt1.month
|
01:SCAN HDFS [functional.alltypestiny tt1]
partitions=4/4 files=4 size=460B
====
# Correlated aggregate subquery with count in a function participating in
# a complex arithmetic expr
select int_col, count(*)
from functional.alltypestiny t
where
1 + log(abs((select count(int_col) from functional.alltypes s where s.id = t.id)), 2)
< 10
group by int_col
---- PLAN
04:AGGREGATE [FINALIZE]
| output: count(*)
| group by: int_col
|
03:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: s.id = t.id
| other predicates: 1 + log(abs(zeroifnull(count(int_col))), 2) < 10
|
|--00:SCAN HDFS [functional.alltypestiny t]
| partitions=4/4 files=4 size=460B
|
02:AGGREGATE [FINALIZE]
| output: count(int_col)
| group by: s.id
|
01:SCAN HDFS [functional.alltypes s]
partitions=24/24 files=24 size=478.45KB
====
# Correlated scalar subquery with an aggregate function that returns a
# non-numeric type on empty input
select int_col, count(*)
from functional.alltypestiny t
where
(select sample(int_col) from functional.alltypes s where s.id = t.id) = t.string_col
and bool_col = false
group by int_col
---- PLAN
04:AGGREGATE [FINALIZE]
| output: count(*)
| group by: int_col
|
03:HASH JOIN [RIGHT OUTER JOIN]
| hash predicates: s.id = t.id
| other predicates: ifnull(sample(int_col), '') = t.string_col
|
|--00:SCAN HDFS [functional.alltypestiny t]
| partitions=4/4 files=4 size=460B
| predicates: bool_col = FALSE
|
02:AGGREGATE [FINALIZE]
| output: sample(int_col)
| group by: s.id
|
01:SCAN HDFS [functional.alltypes s]
partitions=24/24 files=24 size=478.45KB
====
# Uncorrelated scalar subquery where columns from the outer appear in both sides
# of the binary predicate
select 1
from functional.alltypestiny t1
where (select count(*) from functional.alltypessmall) + t1.int_col = t1.bigint_col - 1
---- PLAN
03:NESTED LOOP JOIN [INNER JOIN]
| predicates: count(*) + t1.int_col = t1.bigint_col - 1
|
|--02:AGGREGATE [FINALIZE]
| | output: count(*)
| |
| 01:SCAN HDFS [functional.alltypessmall]
| partitions=4/4 files=4 size=6.32KB
|
00:SCAN HDFS [functional.alltypestiny t1]
partitions=4/4 files=4 size=460B
====
# Uncorrelated scalar subquery in complex binary predicate that contains columns
# from two tables of the outer
select 1
from functional.alltypestiny t1 join functional.alltypessmall t2 on t1.id = t2.id
where (select count(*) from functional.alltypes) + 1 = t1.int_col + t2.int_col
---- PLAN
05:HASH JOIN [LEFT SEMI JOIN]
| hash predicates: t1.int_col + t2.int_col = count(*) + 1
|
|--03:AGGREGATE [FINALIZE]
| | output: count(*)
| |
| 02:SCAN HDFS [functional.alltypes]
| partitions=24/24 files=24 size=478.45KB
|
04:HASH JOIN [INNER JOIN]
| hash predicates: t2.id = t1.id
|
|--00:SCAN HDFS [functional.alltypestiny t1]
| partitions=4/4 files=4 size=460B
|
01:SCAN HDFS [functional.alltypessmall t2]
partitions=4/4 files=4 size=6.32KB
====
# Uncorrelated scalar subquery in complex binary predicate that contains columns
# from two tables of the outer that appear in both sides of the predicate
select 1
from functional.alltypestiny t1 join functional.alltypessmall t2 on t1.id = t2.id
where
(select count(*) from functional.alltypes) + t2.bigint_col = t1.int_col + t2.int_col
---- PLAN
05:NESTED LOOP JOIN [INNER JOIN]
| predicates: count(*) + t2.bigint_col = t1.int_col + t2.int_col
|
|--03:AGGREGATE [FINALIZE]
| | output: count(*)
| |
| 02:SCAN HDFS [functional.alltypes]
| partitions=24/24 files=24 size=478.45KB
|
04:HASH JOIN [INNER JOIN]
| hash predicates: t2.id = t1.id
|
|--00:SCAN HDFS [functional.alltypestiny t1]
| partitions=4/4 files=4 size=460B
|
01:SCAN HDFS [functional.alltypessmall t2]
partitions=4/4 files=4 size=6.32KB
====
# Correlated scalar subquery with complex correlated predicate (IMPALA-1335)
select 1
from functional.alltypestiny t
where
(select sum(t1.id)
from functional.alltypesagg t1 inner join functional.alltypes t2 on t1.id = t2.id
where t1.id + t2.id = t.int_col) = t.int_col
---- PLAN
05:HASH JOIN [RIGHT SEMI JOIN]
| hash predicates: sum(t1.id) = t.int_col
|
|--00:SCAN HDFS [functional.alltypestiny t]
| partitions=4/4 files=4 size=460B
|
04:AGGREGATE [FINALIZE]
| output: sum(t1.id)
| group by: t1.id + t2.id
| having: sum(t1.id) = t1.id + t2.id
|
03:HASH JOIN [INNER JOIN]
| hash predicates: t1.id = t2.id
|
|--02:SCAN HDFS [functional.alltypes t2]
| partitions=24/24 files=24 size=478.45KB
|
01:SCAN HDFS [functional.alltypesagg t1]
partitions=11/11 files=11 size=814.73KB
====
# Correlated scalar subquery with complex correlared predicate (IMPALA-1335)
select 1
from functional.alltypestiny t
where
(select sum(t1.id)
from functional.alltypesagg t1 inner join functional.alltypes t2 on t1.id = t2.id
where t1.id + t2.id = t.bigint_col) = t.int_col
---- PLAN
05:HASH JOIN [RIGHT SEMI JOIN]
| hash predicates: sum(t1.id) = t.int_col, t1.id + t2.id = t.bigint_col
|
|--00:SCAN HDFS [functional.alltypestiny t]
| partitions=4/4 files=4 size=460B
|
04:AGGREGATE [FINALIZE]
| output: sum(t1.id)
| group by: t1.id + t2.id
|
03:HASH JOIN [INNER JOIN]
| hash predicates: t1.id = t2.id
|
|--02:SCAN HDFS [functional.alltypes t2]
| partitions=24/24 files=24 size=478.45KB
|
01:SCAN HDFS [functional.alltypesagg t1]
partitions=11/11 files=11 size=814.73KB
====
# Outer query block with multiple tables and a correlated scalar subquery with
# complex correlated predicate that references multiple subquery tables and multiple
# tables from the outer query block (IMPALA-1335)
select 1
from functional.alltypestiny t1 inner join functional.alltypessmall t2 on t1.id = t2.id
where
(select sum(tt1.id)
from functional.alltypesagg tt1 inner join functional.alltypes tt2
on tt1.int_col = tt2.int_col
where tt1.id + tt2.id = t1.int_col - t2.int_col) = t1.bigint_col
---- PLAN
07:HASH JOIN [RIGHT SEMI JOIN]
| hash predicates: sum(tt1.id) = t1.bigint_col, tt1.id + tt2.id = t1.int_col - t2.int_col
|
|--06:HASH JOIN [INNER JOIN]
| | hash predicates: t2.id = t1.id
| |
| |--00:SCAN HDFS [functional.alltypestiny t1]
| | partitions=4/4 files=4 size=460B
| |
| 01:SCAN HDFS [functional.alltypessmall t2]
| partitions=4/4 files=4 size=6.32KB
|
05:AGGREGATE [FINALIZE]
| output: sum(tt1.id)
| group by: tt1.id + tt2.id
|
04:HASH JOIN [INNER JOIN]
| hash predicates: tt1.int_col = tt2.int_col
|
|--03:SCAN HDFS [functional.alltypes tt2]
| partitions=24/24 files=24 size=478.45KB
|
02:SCAN HDFS [functional.alltypesagg tt1]
partitions=11/11 files=11 size=814.73KB
====
# Correlated EXISTS and NOT EXISTS subqueries with limit 0 and
# aggregates. All predicates evaluate to FALSE. (IMPALA-1550)
select 1
from functional.alltypestiny t1
where exists
(select id
from functional.alltypes t2
where t1.int_col = t2.int_col limit 0)
and not exists
(select count(distinct int_col)
from functional.alltypesagg t3
where t1.id = t3.id)
---- PLAN
00:EMPTYSET
====
# Correlated EXISTS and NOT EXISTS subqueries with limit 0 and
# aggregates. All predicates evaluate to TRUE. (IMPALA-1550)
select 1
from functional.alltypestiny t1
where not exists
(select id
from functional.alltypes t2
where t1.int_col = t2.int_col limit 0)
and exists
(select count(distinct int_col), sum(distinct int_col)
from functional.alltypesagg t3
where t1.id = t3.id)
and not exists
(select sum(int_col)
from functional.alltypessmall t4
where t1.id = t4.id limit 0)
and not exists
(select min(int_col)
from functional.alltypestiny t5
where t1.id = t5.id and false)
---- PLAN
00:SCAN HDFS [functional.alltypestiny t1]
partitions=4/4 files=4 size=460B
====
# Correlated EXISTS and NOT EXISTS subqueries with limit 0 and
# aggregates. Some predicates evaluate to TRUE while others need to
# be evaluated at run-time. (IMPALA-1550)
select 1
from functional.alltypestiny t1
where not exists
(select id
from functional.alltypes t2
where t1.int_col = t2.int_col limit 0)
and exists
(select distinct int_col
from functional.alltypesagg t3
where t3.id > 100 and t1.id = t3.id)
and not exists
(select count(id)
from functional.alltypestiny t4
where t4.int_col = t1.tinyint_col
having count(id) > 200)
---- PLAN
06:HASH JOIN [LEFT ANTI JOIN]
| hash predicates: t1.tinyint_col = t4.int_col
|
|--04:AGGREGATE [FINALIZE]
| | output: count(id)
| | group by: t4.int_col
| | having: count(id) > 200
| |
| 03:SCAN HDFS [functional.alltypestiny t4]
| partitions=4/4 files=4 size=460B
|
05:HASH JOIN [RIGHT SEMI JOIN]
| hash predicates: t3.id = t1.id
|
|--00:SCAN HDFS [functional.alltypestiny t1]
| partitions=4/4 files=4 size=460B
| predicates: t1.id > 100
|
02:AGGREGATE [FINALIZE]
| group by: int_col, t3.id
|
01:SCAN HDFS [functional.alltypesagg t3]
partitions=11/11 files=11 size=814.73KB
predicates: t3.id > 100
====