Files
impala/testdata/workloads/functional-query/queries/QueryTest/subquery.test
Dimitris Tsirogiannis f21aed16fd Bug fixes in null-aware anti-join
This commit fixes issue IMPALA-1215 where NOT IN subqueries return wrong
results in the presence of null values.

Change-Id: I97e41c8df8ba864d0189595d670b3f0349fcad36
Reviewed-on: http://gerrit.sjc.cloudera.com:8080/4467
Reviewed-by: Dimitris Tsirogiannis <dtsirogiannis@cloudera.com>
Reviewed-by: Nong Li <nong@cloudera.com>
Tested-by: Nong Li <nong@cloudera.com>
2014-09-23 07:33:23 -07:00

611 lines
12 KiB
Plaintext

====
---- QUERY
# Uncorrelated IN subquery
select a.id, a.int_col, a.string_col
from functional.alltypessmall a
where a.id in (select id from functional.alltypestiny where bool_col = false)
and a.id < 5
---- RESULTS
1,1,'1'
3,3,'3'
---- TYPES
INT, INT, STRING
====
---- QUERY
# Uncorrelated IN subquery with IS NOT NULL predicate
select a.id, a.int_col, a.string_col
from functional.alltypes a
where a.int_col in (select int_col from functional.alltypesagg where int_col is not null)
and a.id < 5
---- RESULTS
1,1,'1'
2,2,'2'
3,3,'3'
4,4,'4'
---- TYPES
INT, INT, STRING
====
---- QUERY
# Uncorrelated IN subquery with IS NULL predicate (empty result)
select a.id, a.int_col, a.string_col
from functional.alltypes a
where a.id in (select int_col from functional.alltypesagg where int_col is null)
and a.id < 5
---- RESULTS
---- TYPES
INT, INT, STRING
====
---- QUERY
# Uncorrelated NOT IN subquery
select id, year, month
from functional.alltypessmall
where id not in (select id from functional.alltypestiny where bool_col = false)
and id < 5
---- RESULTS
0,2009,1
2,2009,1
4,2009,1
---- TYPES
INT, INT, INT
====
---- QUERY
# Uncorrelated NOT IN subquery that returns only null values
select id
from functional.alltypestiny
where int_col not in (select int_col from functional.alltypesagg where int_col is null)
and id < 10
---- RESULTS
---- TYPES
INT
====
---- QUERY
# Uncorrelated NOT IN subquery that returns null and non-null values
select id
from functional.alltypestiny
where int_col not in (select int_col from functional.alltypesagg)
---- RESULTS
---- TYPES
INT
====
---- QUERY
# Uncorrelated NOT IN subquery that returns an empty set
# (result should include all the tuples of the outer)
select id, int_col
from functional.alltypestiny
where int_col not in (select int_col from functional.alltypesagg where id < 0)
order by id
---- RESULTS
0,0
1,1
2,0
3,1
4,0
5,1
6,0
7,1
---- TYPES
INT, INT
====
---- QUERY
# Outer with nulls and an uncorrelated NOT IN subquery that returns an empty set
# (result should include NULLs in int_col)
select id, int_col
from functional.alltypesagg
where int_col not in (select int_col from functional.alltypestiny where id < 0)
and id < 5
order by id
---- RESULTS
0,NULL
0,NULL
1,1
2,2
3,3
4,4
---- TYPES
INT, INT
====
---- QUERY
# Outer with NULLS and an uncorrelated NOT IN
select id
from functional.alltypesagg
where int_col is null and
int_col not in (select int_col from functional.alltypestiny)
---- RESULTS
---- TYPES
INT
====
---- QUERY
# Outer with NULLS and an uncorrelated NOT IN subquery that returns an empty set
select id, int_col
from functional.alltypesagg
where int_col is null and
int_col not in (select int_col from functional.alltypestiny where id < 0)
and id < 10
order by id
---- RESULTS
0,NULL
0,NULL
---- TYPES
INT, INT
====
---- QUERY
# Correlated IN subquery
select s.id, s.bool_col, s.int_col, s.date_string_col
from functional.alltypessmall s
where s.id in (select id from functional.alltypestiny t where t.int_col = s.int_col)
and s.bool_col = false
---- RESULTS
1,false,1,'01/01/09'
---- TYPES
INT, BOOLEAN, INT, STRING
====
---- QUERY
# Correlated NOT IN subquery
select s.id, s.bool_col, s.int_col
from functional.alltypessmall s
where s.id not in
(select id
from functional.alltypestiny t
where s.int_col = t.int_col and t.bool_col = false)
and s.id < 5
order by s.id
---- RESULTS
0,true,0
2,true,2
3,false,3
4,true,4
---- TYPES
INT, BOOLEAN, INT
====
---- QUERY
# Correlated NOT IN subquery that returns an empty set
# (results should include all the tuples of the outer that pass
# the remaining predicates)
select id, int_col
from functional.alltypestiny t
where t.int_col not in
(select int_col from functional.alltypes a where t.id = a.id and a.bigint_col < 0)
and id < 5
order by id
---- RESULTS
0,0
1,1
2,0
3,1
4,0
---- TYPES
INT, INT
====
---- QUERY
# Correlated NOT IN subquery that returns nulls
select id, int_col
from functional.alltypestiny t
where t.int_col not in
(select int_col from functional.alltypesagg a where int_col is null and a.id = t.id)
order by id
---- RESULTS
1,1
2,0
3,1
4,0
5,1
6,0
7,1
---- TYPES
INT, INT
====
---- QUERY
# Outer with nulls and a correlated NOT IN subquery that returns null and
# non-null values
select id, int_col
from functional.alltypesagg a
where a.int_col not in
(select int_col from functional.alltypesagg b where a.id = b.id)
and id < 10
---- RESULTS
---- TYPES
INT, INT
====
---- QUERY
# Outer with nulls and a correlated NOT IN subquery that does not return nulls
select id, int_col
from functional.alltypesagg a
where int_col not in
(select int_col from functional.alltypestiny t where a.id = t.id)
and bigint_col = 10
order by id
---- RESULTS
1001,1
2001,1
3001,1
4001,1
5001,1
6001,1
7001,1
8001,1
9001,1
---- TYPES
INT, INT
====
---- QUERY
# Correlated NOT IN subquery that returns an empty set
select id, int_col, bigint_col
from functional.alltypesagg a
where int_col not in
(select int_col from alltypestiny t where a.id = t.id and t.bigint_col < 0)
and bigint_col = 10
order by id
---- RESULTS
1,1,10
1001,1,10
2001,1,10
3001,1,10
4001,1,10
5001,1,10
6001,1,10
7001,1,10
8001,1,10
9001,1,10
---- TYPES
INT, INT, BIGINT
====
---- QUERY
# Outer that has only nulls, correlated NOT IN subquery
select id, int_col, bigint_col
from alltypesagg a
where int_col not in (select int_col from alltypesagg t where a.id = t.id)
and int_col is null
---- RESULTS
---- TYPES
INT, INT, BIGINT
====
---- QUERY
# Correlated EXISTS subquery
select count(*)
from functional.alltypestiny t
where exists (select * from functional.alltypessmall s where t.int_col = s.int_col)
and id < 4
---- RESULTS
4
---- TYPES
BIGINT
====
---- QUERY
# Correlated NOT EXISTS subquery
select id, int_col
from functional.alltypessmall t
where not exists (select 1 from functional.alltypestiny s where t.id = s.id)
and month = 1 and int_col < 5
order by id
---- RESULTS
10,0
11,1
12,2
13,3
14,4
20,0
21,1
22,2
23,3
24,4
---- TYPES
INT, INT
====
---- QUERY
# Uncorrelated EXISTS
select id
from functional.alltypestiny t
where exists (select 1 from functional.alltypessmall where bool_col = false)
and bool_col = true
order by id
---- RESULTS
0
2
4
6
---- TYPES
INT
====
---- QUERY
# Uncorrelated EXISTS that returns an empty set
select 1
from functional.alltypestiny t
where exists (select null from functional.alltypessmall where id < 0)
and t.id > 0
---- RESULTS
---- TYPES
TINYINT
====
---- QUERY
# Uncorrelated aggregate subquery
select count(*) from
functional.alltypessmall t
where t.id < (select max(id) from functional.alltypestiny)
and t.bool_col = true
---- RESULTS
4
---- TYPES
BIGINT
====
---- QUERY
# Uncorrelated aggregate subquery with count
select id, int_col, year, month
from functional.alltypessmall
where int_col = (select count(*) from functional.alltypestiny)
order by id
---- RESULTS
8,8,2009,1
18,8,2009,1
33,8,2009,2
43,8,2009,2
58,8,2009,3
68,8,2009,3
83,8,2009,4
93,8,2009,4
---- TYPES
INT, INT, INT, INT
====
---- QUERY
# Correlated aggregate subquery
select id, int_col, year, month
from functional.alltypessmall s
where s.int_col = (select count(*) from functional.alltypestiny t where s.id = t.id)
order by id
---- RESULTS
1,1,2009,1
10,0,2009,1
20,0,2009,1
25,0,2009,2
35,0,2009,2
45,0,2009,2
50,0,2009,3
60,0,2009,3
70,0,2009,3
75,0,2009,4
85,0,2009,4
95,0,2009,4
---- TYPES
INT, INT, INT, INT
====
---- QUERY
# Multiple subquery predicates
select id, bool_col, int_col, date_string_col
from functional.alltypessmall s
where s.id in (select id from functional.alltypestiny where bool_col = true)
and exists
(select *
from functional.alltypesagg g
where s.int_col = g.int_col and g.bigint_col < 100)
and s.int_col < (select count(*) from functional.alltypes where month = 1)
---- RESULTS
2,true,2,'01/01/09'
4,true,4,'01/01/09'
6,true,6,'01/01/09'
---- TYPES
INT, BOOLEAN, INT, STRING
====
---- QUERY
# Multiple nesting levels
select month, count(*)
from functional.alltypessmall s
where id in
(select id
from functional.alltypestiny t
where t.int_col <
(select min(int_col)
from functional.alltypesagg a
where a.bool_col = false and exists
(select * from functional.alltypes b where b.id = a.id)))
group by month
---- RESULTS
1,4
---- TYPES
INT, BIGINT
====
---- QUERY
# Multiple tables in outer select block and in subqueries
select t.id, t.month, t.year
from functional.alltypestiny t left outer join functional.alltypessmall s
on s.id = t.id
where t.int_col <
(select avg(a.int_col) * 2
from functional.alltypesagg a left outer join functional.alltypes b
on a.bigint_col = b.bigint_col
where a.id = t.id and b.int_col < 10)
order by t.id
---- RESULTS
1,1,2009
2,2,2009
3,2,2009
4,3,2009
5,3,2009
6,4,2009
7,4,2009
---- TYPES
INT, INT, INT
====
---- QUERY
# Subquery in the WITH clause
with t as
(select *
from functional.alltypessmall
where id in
(select id
from functional.alltypestiny
where bool_col = false))
select id, month, year from t where t.int_col =
(select count(*) from functional.alltypestiny where id < 5)
---- RESULTS
5,1,2009
---- TYPES
INT, INT, INT
====
---- QUERY
# Subquery in an inline view
select s.id, s.year
from functional.alltypessmall s left outer join
(select *
from functional.alltypestiny t
where t.id in (select id from functional.alltypesagg)) b
on (s.id = b.id)
where s.int_col < (select max(int_col) from functional.alltypes) and s.id < 10
order by s.id
---- RESULTS
0,2009
1,2009
2,2009
3,2009
4,2009
5,2009
6,2009
7,2009
8,2009
---- TYPES
INT, INT
====
---- QUERY
# Subquery returning a decimal
select id, double_col
from functional.alltypestiny
where double_col < (select min(d3) from functional.decimal_tbl)
order by id
---- RESULTS
0,0
2,0
4,0
6,0
---- TYPES
INT, DOUBLE
====
---- QUERY
# Compare a decimal with the result of a subquery
select d1, d2, d3
from functional.decimal_tbl
where d3 < (select max(double_col) from functional.alltypestiny)
---- RESULTS
1234,2222,1.2345678900
---- TYPES
DECIMAL, DECIMAL, DECIMAL
====
---- QUERY
# Distinct in the outer select block
select distinct bool_col
from functional.alltypestiny t
where 1 < (select count(*) from functional.alltypessmall)
---- RESULTS
false
true
---- TYPES
BOOLEAN
====
---- QUERY
# Distinct with an unqualified star in the outer select block
select distinct *
from functional.alltypestiny t
where 1 < (select avg(distinct id) from functional.alltypessmall)
and id < 2
---- RESULTS
0,true,0,0,0,0,0,0,'01/01/09','0',2009-01-01 00:00:00,2009,1
1,false,1,1,1,10,1.100000023841858,10.1,'01/01/09','1',2009-01-01 00:01:00,2009,1
---- TYPES
INT, BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, STRING, STRING, TIMESTAMP, INT, INT
====
---- QUERY
# Aggregate subquery in an IS NULL predicate
select count(*)
from functional.alltypestiny
where (select max(int_col) from functional.alltypesagg where int_col is null) is null
---- RESULTS
8
---- TYPES
BIGINT
====
---- QUERY
# Aggregate subquery in an IS NOT NULL predicate
select count(*)
from functional.alltypestiny
where (select max(int_col) from functional.alltypesagg where int_col is null) is not null
---- RESULTS
0
---- TYPES
BIGINT
====
---- QUERY
# Correlated aggregate subquery in an IS NULL predicate
select id, bool_col
from functional.alltypestiny t
where (select sum(int_col) from functional.alltypesagg g where t.id = g.id) is null
order by id
---- RESULTS
0,true
---- TYPES
INT, BOOLEAN
====
---- QUERY
# Correlated aggregate subquery in an IS NOT NULL predicate
select id, bool_col
from functional.alltypestiny t
where (select sum(int_col) from functional.alltypesagg g where t.id = g.id) is not null
order by id
---- RESULTS
1,false
2,true
3,false
4,true
5,false
6,true
7,false
---- TYPES
INT, BOOLEAN
====
---- QUERY
# Function with a scalar subquery
select count(*)
from functional.alltypestiny t
where
zeroifnull((select max(int_col) from functional.alltypesagg where int_col is null)) = 0
---- RESULTS
8
---- TYPES
BIGINT
====
---- QUERY
# Function with a scalar subquery
select id
from functional.alltypestiny t
where
nullifzero((select min(id) from functional.alltypesagg where int_col is null)) is null
and id < 5
order by id
---- RESULTS
0
1
2
3
4
---- TYPES
INT
====
---- QUERY
# Between predicate with subqueries
select id from functional.alltypessmall
where id between
(select min(bigint_col) from functional.alltypestiny) and
(select max(bigint_col) from functional.alltypestiny)
order by id
---- RESULTS
0
1
2
3
4
5
6
7
8
9
10
---- TYPES
INT
====