Files
impala/testdata/workloads/functional-query/queries/QueryTest/subquery.test
Taras Bobrovytsky 46c3e43edb IMPALA-3232: Allow not-exists uncorrelated subqueries
Before this patch, correlated exists and not exists subqueries were
rewritten as as left semi and anti joins respectively. Uncorrelated
exists subqueries were rewritten as cross joins, and uncorrelated
not-exists subqueries were not supported at all. This patch takes
advantage of the nested loop join that was recently introduced, which
allows us to rewrite both correlated and uncorrelated exists subqueries
as left semi joins and both correlated and uncorrelated not-exists
subqueries as anti joins.

Change-Id: I52ae12f116d026190f3a2a7575cda855317d11e8
Reviewed-on: http://gerrit.cloudera.org:8080/2792
Reviewed-by: Taras Bobrovytsky <tbobrovytsky@cloudera.com>
Tested-by: Internal Jenkins
2016-05-12 23:06:36 -07:00

839 lines
17 KiB
Plaintext

====
---- QUERY
# Uncorrelated IN subquery
select a.id, a.int_col, a.string_col
from functional.alltypessmall a
where a.id in (select id from functional.alltypestiny where bool_col = false)
and a.id < 5
---- RESULTS
1,1,'1'
3,3,'3'
---- TYPES
INT, INT, STRING
====
---- QUERY
# Uncorrelated IN subquery with IS NOT NULL predicate
select a.id, a.int_col, a.string_col
from functional.alltypes a
where a.int_col in (select int_col from functional.alltypesagg where int_col is not null)
and a.id < 5
---- RESULTS
1,1,'1'
2,2,'2'
3,3,'3'
4,4,'4'
---- TYPES
INT, INT, STRING
====
---- QUERY
# Uncorrelated IN subquery with IS NULL predicate (empty result)
select a.id, a.int_col, a.string_col
from functional.alltypes a
where a.id in (select int_col from functional.alltypesagg where int_col is null)
and a.id < 5
---- RESULTS
---- TYPES
INT, INT, STRING
====
---- QUERY
# Uncorrelated NOT IN subquery
select id, year, month
from functional.alltypessmall
where id not in (select id from functional.alltypestiny where bool_col = false)
and id < 5
---- RESULTS
0,2009,1
2,2009,1
4,2009,1
---- TYPES
INT, INT, INT
====
---- QUERY
# Uncorrelated NOT IN subquery that returns only null values
select id
from functional.alltypestiny
where int_col not in (select int_col from functional.alltypesagg where int_col is null)
and id < 10
---- RESULTS
---- TYPES
INT
====
---- QUERY
# Uncorrelated NOT IN subquery that returns null and non-null values
select id
from functional.alltypestiny
where int_col not in (select int_col from functional.alltypesagg)
---- RESULTS
---- TYPES
INT
====
---- QUERY
# Uncorrelated NOT IN subquery that returns an empty set
# (result should include all the tuples of the outer)
select id, int_col
from functional.alltypestiny
where int_col not in (select int_col from functional.alltypesagg where id < 0)
order by id
---- RESULTS
0,0
1,1
2,0
3,1
4,0
5,1
6,0
7,1
---- TYPES
INT, INT
====
---- QUERY
# Outer with nulls and an uncorrelated NOT IN subquery that returns an empty set
# (result should include NULLs in int_col)
select id, int_col
from functional.alltypesagg
where int_col not in (select int_col from functional.alltypestiny where id < 0)
and id < 5
order by id
---- RESULTS
0,NULL
0,NULL
1,1
2,2
3,3
4,4
---- TYPES
INT, INT
====
---- QUERY
# Outer with NULLS and an uncorrelated NOT IN
select id
from functional.alltypesagg
where int_col is null and
int_col not in (select int_col from functional.alltypestiny)
---- RESULTS
---- TYPES
INT
====
---- QUERY
# Outer with NULLS and an uncorrelated NOT IN subquery that returns an empty set
select id, int_col
from functional.alltypesagg
where int_col is null and
int_col not in (select int_col from functional.alltypestiny where id < 0)
and id < 10
order by id
---- RESULTS
0,NULL
0,NULL
---- TYPES
INT, INT
====
---- QUERY
# Correlated IN subquery
select s.id, s.bool_col, s.int_col, s.date_string_col
from functional.alltypessmall s
where s.id in (select id from functional.alltypestiny t where t.int_col = s.int_col)
and s.bool_col = false
---- RESULTS
1,false,1,'01/01/09'
---- TYPES
INT, BOOLEAN, INT, STRING
====
---- QUERY
# Correlated NOT IN subquery
select s.id, s.bool_col, s.int_col
from functional.alltypessmall s
where s.id not in
(select id
from functional.alltypestiny t
where s.int_col = t.int_col and t.bool_col = false)
and s.id < 5
order by s.id
---- RESULTS
0,true,0
2,true,2
3,false,3
4,true,4
---- TYPES
INT, BOOLEAN, INT
====
---- QUERY
# Correlated NOT IN subquery that returns an empty set
# (results should include all the tuples of the outer that pass
# the remaining predicates)
select id, int_col
from functional.alltypestiny t
where t.int_col not in
(select int_col from functional.alltypes a where t.id = a.id and a.bigint_col < 0)
and id < 5
order by id
---- RESULTS
0,0
1,1
2,0
3,1
4,0
---- TYPES
INT, INT
====
---- QUERY
# Correlated NOT IN subquery that returns nulls
select id, int_col
from functional.alltypestiny t
where t.int_col not in
(select int_col from functional.alltypesagg a where int_col is null and a.id = t.id)
order by id
---- RESULTS
1,1
2,0
3,1
4,0
5,1
6,0
7,1
---- TYPES
INT, INT
====
---- QUERY
# Outer with nulls and a correlated NOT IN subquery that returns null and
# non-null values
select id, int_col
from functional.alltypesagg a
where a.int_col not in
(select int_col from functional.alltypesagg b where a.id = b.id)
and id < 10
---- RESULTS
---- TYPES
INT, INT
====
---- QUERY
# Outer with nulls and a correlated NOT IN subquery that does not return nulls
select id, int_col
from functional.alltypesagg a
where int_col not in
(select int_col from functional.alltypestiny t where a.id = t.id)
and bigint_col = 10
order by id
---- RESULTS
1001,1
2001,1
3001,1
4001,1
5001,1
6001,1
7001,1
8001,1
9001,1
---- TYPES
INT, INT
====
---- QUERY
# Correlated NOT IN subquery that returns an empty set
select id, int_col, bigint_col
from functional.alltypesagg a
where int_col not in
(select int_col from alltypestiny t where a.id = t.id and t.bigint_col < 0)
and bigint_col = 10
order by id
---- RESULTS
1,1,10
1001,1,10
2001,1,10
3001,1,10
4001,1,10
5001,1,10
6001,1,10
7001,1,10
8001,1,10
9001,1,10
---- TYPES
INT, INT, BIGINT
====
---- QUERY
# Outer that has only nulls, correlated NOT IN subquery
select id, int_col, bigint_col
from alltypesagg a
where int_col not in (select int_col from alltypesagg t where a.id = t.id)
and int_col is null
---- RESULTS
---- TYPES
INT, INT, BIGINT
====
---- QUERY
# Correlated NOT IN subquery (IMPALA-1297)
select count(distinct id)
from alltypesagg t1
where t1.day not in
(select tt1.tinyint_col as tinyint_col_1
from alltypesagg tt1
where t1.smallint_col = tt1.smallint_col)
---- RESULTS
8200
---- TYPES
BIGINT
====
---- QUERY
# Correlated EXISTS subquery
select count(*)
from functional.alltypestiny t
where exists (select * from functional.alltypessmall s where t.int_col = s.int_col)
and id < 4
---- RESULTS
4
---- TYPES
BIGINT
====
---- QUERY
# Correlated NOT EXISTS subquery
select id, int_col
from functional.alltypessmall t
where not exists (select 1 from functional.alltypestiny s where t.id = s.id)
and month = 1 and int_col < 5
order by id
---- RESULTS
10,0
11,1
12,2
13,3
14,4
20,0
21,1
22,2
23,3
24,4
---- TYPES
INT, INT
====
---- QUERY
# Uncorrelated EXISTS
select id
from functional.alltypestiny t
where exists (select 1 from functional.alltypessmall where bool_col = false)
and bool_col = true
order by id
---- RESULTS
0
2
4
6
---- TYPES
INT
====
---- QUERY
# Uncorrelated EXISTS that returns an empty set
select 1
from functional.alltypestiny t
where exists (select null from functional.alltypessmall where id < 0)
and t.id > 0
---- RESULTS
---- TYPES
TINYINT
====
---- QUERY
# Uncorrelated NOT EXISTS
select id
from functional.alltypestiny t
where not exists (select 1 from functional.alltypessmall where bool_col = false)
and bool_col = true
---- RESULTS
---- TYPES
INT
====
---- QUERY
# Uncorrelated NOT EXISTS that returns an empty set
select 1
from functional.alltypestiny t
where not exists (select null from functional.alltypessmall where id < 0)
and t.id > 0
---- RESULTS
1
1
1
1
1
1
1
---- TYPES
TINYINT
====
---- QUERY
# Uncorrelated aggregate subquery
select count(*) from
functional.alltypessmall t
where t.id < (select max(id) from functional.alltypestiny)
and t.bool_col = true
---- RESULTS
4
---- TYPES
BIGINT
====
---- QUERY
# Uncorrelated aggregate subquery with count
select id, int_col, year, month
from functional.alltypessmall
where int_col = (select count(*) from functional.alltypestiny)
order by id
---- RESULTS
8,8,2009,1
18,8,2009,1
33,8,2009,2
43,8,2009,2
58,8,2009,3
68,8,2009,3
83,8,2009,4
93,8,2009,4
---- TYPES
INT, INT, INT, INT
====
---- QUERY
# Correlated aggregate subquery
select id, int_col, year, month
from functional.alltypessmall s
where s.int_col = (select count(*) from functional.alltypestiny t where s.id = t.id)
order by id
---- RESULTS
1,1,2009,1
10,0,2009,1
20,0,2009,1
25,0,2009,2
35,0,2009,2
45,0,2009,2
50,0,2009,3
60,0,2009,3
70,0,2009,3
75,0,2009,4
85,0,2009,4
95,0,2009,4
---- TYPES
INT, INT, INT, INT
====
---- QUERY
# Multiple subquery predicates
select id, bool_col, int_col, date_string_col
from functional.alltypessmall s
where s.id in (select id from functional.alltypestiny where bool_col = true)
and exists
(select *
from functional.alltypesagg g
where s.int_col = g.int_col and g.bigint_col < 100)
and s.int_col < (select count(*) from functional.alltypes where month = 1)
---- RESULTS
2,true,2,'01/01/09'
4,true,4,'01/01/09'
6,true,6,'01/01/09'
---- TYPES
INT, BOOLEAN, INT, STRING
====
---- QUERY
# Multiple nesting levels
select month, count(*)
from functional.alltypessmall s
where id in
(select id
from functional.alltypestiny t
where t.int_col <
(select min(int_col)
from functional.alltypesagg a
where a.bool_col = false and exists
(select * from functional.alltypes b where b.id = a.id)))
group by month
---- RESULTS
1,4
---- TYPES
INT, BIGINT
====
---- QUERY
# Multiple tables in outer select block and in subqueries
select t.id, t.month, t.year
from functional.alltypestiny t left outer join functional.alltypessmall s
on s.id = t.id
where t.int_col <
(select avg(a.int_col) * 2
from functional.alltypesagg a left outer join functional.alltypes b
on a.bigint_col = b.bigint_col
where a.id = t.id and b.int_col < 10)
order by t.id
---- RESULTS
1,1,2009
2,2,2009
3,2,2009
4,3,2009
5,3,2009
6,4,2009
7,4,2009
---- TYPES
INT, INT, INT
====
---- QUERY
# Subquery in the WITH clause
with t as
(select *
from functional.alltypessmall
where id in
(select id
from functional.alltypestiny
where bool_col = false))
select id, month, year from t where t.int_col =
(select count(*) from functional.alltypestiny where id < 5)
---- RESULTS
5,1,2009
---- TYPES
INT, INT, INT
====
---- QUERY
# Subquery in an inline view
select s.id, s.year
from functional.alltypessmall s left outer join
(select *
from functional.alltypestiny t
where t.id in (select id from functional.alltypesagg)) b
on (s.id = b.id)
where s.int_col < (select max(int_col) from functional.alltypes) and s.id < 10
order by s.id
---- RESULTS
0,2009
1,2009
2,2009
3,2009
4,2009
5,2009
6,2009
7,2009
8,2009
---- TYPES
INT, INT
====
---- QUERY
# Subquery returning a decimal
select id, double_col
from functional.alltypestiny
where double_col < (select min(d3) from functional.decimal_tbl)
order by id
---- RESULTS
0,0
2,0
4,0
6,0
---- TYPES
INT, DOUBLE
====
---- QUERY
# Compare a decimal with the result of a subquery
select d1, d2, d3
from functional.decimal_tbl
where d3 < (select max(double_col) from functional.alltypestiny)
---- RESULTS
1234,2222,1.2345678900
---- TYPES
DECIMAL, DECIMAL, DECIMAL
====
---- QUERY
# Distinct in the outer select block
select distinct bool_col
from functional.alltypestiny t
where 1 < (select count(*) from functional.alltypessmall)
---- RESULTS
false
true
---- TYPES
BOOLEAN
====
---- QUERY
# Distinct with an unqualified star in the outer select block
select distinct *
from functional.alltypestiny t
where 1 < (select avg(distinct id) from functional.alltypessmall)
and id < 2
---- RESULTS
0,true,0,0,0,0,0,0,'01/01/09','0',2009-01-01 00:00:00,2009,1
1,false,1,1,1,10,1.100000023841858,10.1,'01/01/09','1',2009-01-01 00:01:00,2009,1
---- TYPES
INT, BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, STRING, STRING, TIMESTAMP, INT, INT
====
---- QUERY
# Aggregate subquery in an IS NULL predicate
select count(*)
from functional.alltypestiny
where (select max(int_col) from functional.alltypesagg where int_col is null) is null
---- RESULTS
8
---- TYPES
BIGINT
====
---- QUERY
# Aggregate subquery in an IS NOT NULL predicate
select count(*)
from functional.alltypestiny
where (select max(int_col) from functional.alltypesagg where int_col is null) is not null
---- RESULTS
0
---- TYPES
BIGINT
====
---- QUERY
# Correlated aggregate subquery in an IS NULL predicate
select id, bool_col
from functional.alltypestiny t
where (select sum(int_col) from functional.alltypesagg g where t.id = g.id) is null
order by id
---- RESULTS
0,true
---- TYPES
INT, BOOLEAN
====
---- QUERY
# Correlated aggregate subquery in an IS NOT NULL predicate
select id, bool_col
from functional.alltypestiny t
where (select sum(int_col) from functional.alltypesagg g where t.id = g.id) is not null
order by id
---- RESULTS
1,false
2,true
3,false
4,true
5,false
6,true
7,false
---- TYPES
INT, BOOLEAN
====
---- QUERY
# Function with a scalar subquery
select count(*)
from functional.alltypestiny t
where
zeroifnull((select max(int_col) from functional.alltypesagg where int_col is null)) = 0
---- RESULTS
8
---- TYPES
BIGINT
====
---- QUERY
# Function with a scalar subquery
select id
from functional.alltypestiny t
where
nullifzero((select min(id) from functional.alltypesagg where int_col is null)) is null
and id < 5
order by id
---- RESULTS
0
1
2
3
4
---- TYPES
INT
====
---- QUERY
# Between predicate with subqueries
select id from functional.alltypessmall
where id between
(select min(bigint_col) from functional.alltypestiny) and
(select max(bigint_col) from functional.alltypestiny)
order by id
---- RESULTS
0
1
2
3
4
5
6
7
8
9
10
---- TYPES
INT
====
---- QUERY
# Correlated EXISTS subquery with an analytic function and a group by clause
select id, int_col
from functional.alltypesagg a
where exists
(select id, sum(int_col) over (partition by bool_col)
from functional.alltypes b
where a.id = b.id and b.bigint_col < 100
group by id, int_col, bool_col
order by id)
and id < 5
order by id
---- RESULTS
0,NULL
0,NULL
1,1
2,2
3,3
4,4
---- TYPES
INT, INT
====
---- QUERY
# Correlated NOT EXISTS subquery with analytic function and group by
select id, int_col from alltypestiny t
where not exists
(select id, sum(int_col) over (partition by bool_col)
from alltypesagg a where bool_col = false and t.id = a.int_col
group by id, int_col, bool_col having sum(id) = 1)
order by id, int_col
---- RESULTS
0,0
2,0
3,1
4,0
5,1
6,0
7,1
---- TYPES
INT, INT
====
---- QUERY
# Testing a crash in the buffered tuple stream related to IMPALA-1306.
SELECT COUNT(t1.id) AS int_col_1
FROM alltypes t1
WHERE t1.month IN
(SELECT tt1.tinyint_col AS tinyint_col_1
FROM alltypes tt1
WHERE t1.month = tt1.tinyint_col)
AND EXISTS
(SELECT MIN(tt1.tinyint_col) AS tinyint_col_1
FROM alltypestiny tt1)
AND
(SELECT t1.year AS int_col_1
FROM alltypes t1
ORDER BY t1.year ASC LIMIT 1) = t1.id
---- RESULTS
1
---- TYPES
BIGINT
====
---- QUERY
# Testing a crash in the buffered tuple stream related to IMPALA-1306.
SELECT COUNT(t1.id) AS int_col_1
FROM alltypestiny t1
WHERE t1.month IN
(SELECT tt1.tinyint_col AS tinyint_col_1
FROM alltypes tt1
WHERE t1.month = tt1.tinyint_col)
AND EXISTS
(SELECT MIN(tt1.tinyint_col) AS tinyint_col_1
FROM alltypestiny tt1)
AND
(SELECT t1.year AS int_col_1
FROM alltypes t1
ORDER BY t1.year ASC LIMIT 1) = t1.id
---- RESULTS
0
---- TYPES
BIGINT
====
---- QUERY
# Regression test for IMPALA-1318.
select count(t1.c) over () from
(select max(int_col) c from functional.alltypestiny) t1
where t1.c not in
(select sum(t1.smallint_col) from functional.alltypes t1)
---- RESULTS
1
---- TYPES
BIGINT
====
---- QUERY
# Regression test for IMPALA-1348.
select count(*)
FROM alltypesagg t1
WHERE day IS NOT NULL
AND t1.int_col NOT IN
(SELECT tt1.month AS tinyint_col_1
FROM alltypesagg tt1
LEFT JOIN alltypestiny tt2 ON tt2.year = tt1.id
AND t1.bigint_col = tt2.smallint_col)
---- RESULTS
10000
---- TYPES
BIGINT
====
---- QUERY
# WITH definition containing a subquery is used in a UNION
WITH foo AS (SELECT 1 FROM alltypestiny WHERE int_col IN (SELECT 1))
SELECT * FROM foo
UNION SELECT * FROM foo
---- RESULTS
1
---- TYPES
TINYINT
====
---- QUERY
# Regression test for IMPALA-1365
WITH foo AS (SELECT 1 FROM alltypestiny WHERE int_col IN (SELECT 1))
SELECT * FROM (SELECT * FROM foo UNION SELECT * FROM foo) bar
---- RESULTS
1
---- TYPES
TINYINT
====
---- QUERY
# UNION of query with subquery and various other queries
SELECT 1 FROM ALLTYPESTINY WHERE 1 = (SELECT 1)
UNION VALUES (2)
UNION ALL SELECT 3
---- RESULTS
1
2
3
---- TYPES
TINYINT
====
---- QUERY
# Correlated NOT EXISTS subquery with an aggregate function (IMPALA-1550)
SELECT t1.bigint_col
FROM alltypestiny t1
WHERE NOT EXISTS
(SELECT SUM(smallint_col) AS int_col
FROM alltypestiny
WHERE t1.date_string_col = string_col AND t1.timestamp_col = timestamp_col)
GROUP BY t1.bigint_col
---- RESULTS
---- TYPES
BIGINT
====
---- QUERY
# Correlated EXISTS subquery with an aggregate function (IMPALA-1550)
SELECT t1.bigint_col
FROM alltypestiny t1
WHERE EXISTS
(SELECT SUM(smallint_col) AS int_col
FROM alltypestiny
WHERE t1.date_string_col = string_col AND t1.timestamp_col = timestamp_col)
GROUP BY t1.bigint_col
---- RESULTS
0
10
---- TYPES
BIGINT
====
---- QUERY
# Correlated EXISTS subquery with a having clause (IMPALA-2734)
SELECT 1
FROM alltypestiny t1
WHERE EXISTS
(SELECT 1
FROM alltypestiny t2
WHERE t1.id = t2.id AND t1.int_col = t2.int_col
GROUP BY t2.id
HAVING count(1) = 1)
---- RESULTS
1
1
1
1
1
1
1
1
---- TYPES
TINYINT
====