Files
impala/testdata/workloads/functional-query/queries/QueryTest/subquery.test
Henry Robinson 16af29ea5f IMPALA-770: Fix crash in aggregation node with zero-width tuple
The select exprs of an inline view may not always be materialised, yet
the output tuple itself may be. This patch fixes a crash in this
situation in the backend aggregation node which assumed its output tuple
would always have at least one materialised slot.

The cause was a couple of too-conservative DCHECKs that failed if the
tuple was NULL. In fact, the code was robust to this possibility without
the checks, so this bug didn't affect release builds of Impala.

Change-Id: If0b90809d30fcd196f55197953392452d1ac9c4f
Reviewed-on: http://gerrit.ent.cloudera.com:8080/1431
Reviewed-by: Henry Robinson <henry@cloudera.com>
Tested-by: jenkins
(cherry picked from commit 8c1c21b66c43e900760ace54d090305f32a85a1f)
Reviewed-on: http://gerrit.ent.cloudera.com:8080/1471
Tested-by: Henry Robinson <henry@cloudera.com>
2014-02-05 22:01:35 -08:00

490 lines
12 KiB
Plaintext

====
---- QUERY
# join between three tables, extra join predicates, extra scan predicates, nulls in joins
# cols
# (alltypesagg.tinyint_col contains nulls instead of 0s)
select x.smallint_col, x.id, x.tinyint_col, c.id, x.int_col, x.float_col, c.string_col
from (
select a.smallint_col smallint_col, a.tinyint_col tinyint_col, a.day day,
a.int_col int_col, a.month month,
b.float_col float_col, b.id id
from (
select *
from alltypesagg a
where month=1
) a
join alltypessmall b
on (a.smallint_col = b.id)
) x
join alltypessmall c on (x.tinyint_col = c.id)
where x.day=1
and x.int_col > 899
and x.float_col > 4.5
and c.string_col < '7'
and x.int_col + x.float_col + cast(c.string_col as float) < 1000
---- TYPES
smallint, int, tinyint, int, int, float, string
---- RESULTS
15,15,5,5,915,5.5,'5'
16,16,6,6,916,6.599999904632568,'6'
31,31,1,1,931,6.599999904632568,'1'
32,32,2,2,932,7.699999809265137,'2'
33,33,3,3,933,8.800000190734863,'3'
34,34,4,4,934,9.899999618530273,'4'
41,41,1,1,941,6.599999904632568,'1'
42,42,2,2,942,7.699999809265137,'2'
43,43,3,3,943,8.800000190734863,'3'
44,44,4,4,944,9.899999618530273,'4'
5,5,5,5,905,5.5,'5'
55,55,5,5,955,5.5,'5'
56,56,6,6,956,6.599999904632568,'6'
6,6,6,6,906,6.599999904632568,'6'
65,65,5,5,965,5.5,'5'
66,66,6,6,966,6.599999904632568,'6'
81,81,1,1,981,6.599999904632568,'1'
82,82,2,2,982,7.699999809265137,'2'
83,83,3,3,983,8.800000190734863,'3'
84,84,4,4,984,9.899999618530273,'4'
91,91,1,1,991,6.599999904632568,'1'
====
---- QUERY
# Same join as above, but subquery on the RHS
select x.smallint_col, x.id, x.tinyint_col, c.id, x.int_col, x.float_col, c.string_col
from alltypessmall c
join
(
select a.smallint_col smallint_col, a.tinyint_col tinyint_col, a.day day,
a.int_col int_col, a.month month,
b.float_col float_col, b.id id
from alltypessmall b
join
(
select *
from alltypesagg a
where month=1
) a
on (a.smallint_col = b.id)
) x
on (x.tinyint_col = c.id)
where x.day=1
and x.int_col > 899
and x.float_col > 4.5
and c.string_col < '7'
and x.int_col + x.float_col + cast(c.string_col as float) < 1000
---- TYPES
smallint, int, tinyint, int, int, float, string
---- RESULTS
15,15,5,5,915,5.5,'5'
16,16,6,6,916,6.599999904632568,'6'
31,31,1,1,931,6.599999904632568,'1'
32,32,2,2,932,7.699999809265137,'2'
33,33,3,3,933,8.800000190734863,'3'
34,34,4,4,934,9.899999618530273,'4'
41,41,1,1,941,6.599999904632568,'1'
42,42,2,2,942,7.699999809265137,'2'
43,43,3,3,943,8.800000190734863,'3'
44,44,4,4,944,9.899999618530273,'4'
5,5,5,5,905,5.5,'5'
55,55,5,5,955,5.5,'5'
56,56,6,6,956,6.599999904632568,'6'
6,6,6,6,906,6.599999904632568,'6'
65,65,5,5,965,5.5,'5'
66,66,6,6,966,6.599999904632568,'6'
81,81,1,1,981,6.599999904632568,'1'
82,82,2,2,982,7.699999809265137,'2'
83,83,3,3,983,8.800000190734863,'3'
84,84,4,4,984,9.899999618530273,'4'
91,91,1,1,991,6.599999904632568,'1'
====
---- QUERY
# aggregate without group by
select *
from (
select count(*), count(tinyint_col), min(tinyint_col), max(tinyint_col),
sum(tinyint_col), avg(tinyint_col)
from (
select * from alltypesagg
) a
) b
---- TYPES
bigint, bigint, tinyint, tinyint, bigint, double
---- RESULTS
10000,9000,1,9,45000,5
====
---- QUERY
# aggregate with group-by, having
select *
from (
select int_col % 7 c1, count(*) c2, avg(int_col) c3
from (
select * from alltypesagg
) a
group by 1
having avg(int_col) > 500 or count(*) = 10
) b
where c1 is not null
and c2 > 10
---- TYPES
int, bigint, double
---- RESULTS
0,1420,500.5
4,1430,501
5,1430,502
====
---- QUERY
# multiple levels of aggregation
select c1, c3, m2
from (
select c1, c3, max(c2) m2
from (
select c1, c2, c3
from (
select int_col c1, tinyint_col c2, max(id) c3
from functional_hbase.alltypessmall
group by 1, 2
order by 1,2
limit 5
) x
) x2
group by c1, c3
limit 10
) t
where c1 > 0
order by 2, 1 desc
limit 3
---- TYPES
int, int, tinyint
---- RESULTS
1,96,1
2,97,2
3,98,3
====
---- QUERY
# do not materialize the agg expr slot
select c1, c2
from (
select int_col c1, tinyint_col c2, min(float_col) c3
from functional_hbase.alltypessmall
group by 1, 2
) x
---- TYPES
int, tinyint
---- RESULTS
0,0
1,1
2,2
3,3
4,4
5,5
6,6
7,7
8,8
9,9
====
---- QUERY
# subquery with aggregation and order by/limit, as left-hand side of join;
# having clause in subquery is transfered to merge agg step in distrib plan
select *
from (
select int_col, count(*)
from alltypessmall
where month = 1
group by int_col
having count(*) > 2
order by count(*) desc, int_col limit 5
) t1
join alltypes t2 on (t1.int_col = t2.id)
where month = 1
---- TYPES
int, bigint, int, boolean, tinyint, smallint, int, bigint, float, double, string, string, timestamp, int, int
---- RESULTS
0,3,0,true,0,0,0,0,0,0,'01/01/09','0',2009-01-01 00:00:00,2009,1
1,3,1,false,1,1,1,10,1.100000023841858,10.1,'01/01/09','1',2009-01-01 00:01:00,2009,1
2,3,2,true,2,2,2,20,2.200000047683716,20.2,'01/01/09','2',2009-01-01 00:02:00.100000000,2009,1
3,3,3,false,3,3,3,30,3.299999952316284,30.3,'01/01/09','3',2009-01-01 00:03:00.300000000,2009,1
4,3,4,true,4,4,4,40,4.400000095367432,40.4,'01/01/09','4',2009-01-01 00:04:00.600000000,2009,1
====
---- QUERY
select distinct *
from (
select bool_col, tinyint_col, count(*)
from alltypesagg
group by bool_col, tinyint_col
having bool_col = true
) x
where tinyint_col < 6
---- TYPES
boolean, tinyint, bigint
---- RESULTS
true,2,1000
true,4,1000
====
---- QUERY
# distinct w/ explicit select list
select *
from (
select distinct bool_col, tinyint_col
from (
select * from alltypesagg where tinyint_col < 7
) y
) x
where bool_col = true
---- TYPES
boolean, tinyint
---- RESULTS
true,2
true,4
true,6
====
---- QUERY
# semi-join on string
select *
from (
select d.*
from DimTbl d left semi join JoinTbl j on (j.test_name = d.name)
) x
where x.name > 'Name1'
---- TYPES
bigint, string, int
---- RESULTS
1002,'Name2',94611
1003,'Name3',94612
1004,'Name4',94612
1005,'Name5',94613
1006,'Name6',94613
====
---- QUERY
select j.*, d.*
from (
select *
from JoinTbl a
) j
left outer join
(
select *
from DimTbl b
) d
on (j.test_name = d.name)
where j.test_id <= 1006
---- TYPES
bigint, string, int, int, bigint, string, int
---- RESULTS
1001,'Name1',94611,5000,1001,'Name1',94611
1002,'Name2',94611,5000,1002,'Name2',94611
1003,'Name3',94611,5000,1003,'Name3',94612
1004,'Name4',94611,5000,1004,'Name4',94612
1005,'Name5',94611,5000,1005,'Name5',94613
1006,'Name16',94612,15000,NULL,'NULL',NULL
1006,'Name16',94612,5000,NULL,'NULL',NULL
1006,'Name16',94616,15000,NULL,'NULL',NULL
1006,'Name16',94616,5000,NULL,'NULL',NULL
1006,'Name6',94616,15000,1006,'Name6',94613
1006,'Name6',94616,5000,1006,'Name6',94613
====
---- QUERY
# TODO: If we apply predicate on d, the result will be incorrect. This is a general
# predicate evaluation issue.
#
select j.*, d.*
from (
select *
from JoinTbl a
) j
left outer join
(
select *
from DimTbl b
) d
on (j.test_name = d.name)
where j.test_id <= 1006
---- TYPES
bigint, string, int, int, bigint, string, int
---- RESULTS
1001,'Name1',94611,5000,1001,'Name1',94611
1002,'Name2',94611,5000,1002,'Name2',94611
1003,'Name3',94611,5000,1003,'Name3',94612
1004,'Name4',94611,5000,1004,'Name4',94612
1005,'Name5',94611,5000,1005,'Name5',94613
1006,'Name16',94612,15000,NULL,'NULL',NULL
1006,'Name16',94612,5000,NULL,'NULL',NULL
1006,'Name16',94616,15000,NULL,'NULL',NULL
1006,'Name16',94616,5000,NULL,'NULL',NULL
1006,'Name6',94616,15000,1006,'Name6',94613
1006,'Name6',94616,5000,1006,'Name6',94613
====
---- QUERY
# Constant selects in subqueries
select * from (select 1, 2) x
---- TYPES
tinyint, tinyint
---- RESULTS
1,2
====
---- QUERY
# Constant selects in subqueries
select * from (select y from (select 1 y) a where y < 10) b
---- TYPES
tinyint
---- RESULTS
1
====
---- QUERY
# Constant selects in subqueries
select * from (select 1 union all select 2 union all select * from (select 3) y) x
---- TYPES
tinyint
---- RESULTS
1
2
3
====
---- QUERY
# Join on inline views made up of unions of constant selects
select * from
(select 1 a, 2 b union all select 1 a, 2 b) x
inner join
(select 1 a, 3 b union all select 1 a, 2 b) y on x.a = y.a
inner join
(select 1 a, 3 b union all select 1 a, 3 b) z on z.b = y.b
---- TYPES
tinyint, tinyint, tinyint, tinyint, tinyint, tinyint
---- RESULTS
1,2,1,3,1,3
1,2,1,3,1,3
1,2,1,3,1,3
1,2,1,3,1,3
====
---- QUERY
# Semi and inner join on a table and on inline views made up of constant selects
select x.date_string_col, y.*, z.* from functional.alltypessmall x
left semi join
(select 1 a, 3 b union all select 1 a, 3 b) y on y.a = x.id
inner join
(select 1 a, 3 b union all select 1 a, 3 b) z on z.b = y.b
---- TYPES
string, tinyint, tinyint, tinyint, tinyint
---- RESULTS
'01/01/09',1,3,1,3
'01/01/09',1,3,1,3
====
---- QUERY
# Values statement in subqueries
select * from (values(1, 2), (3, 4)) x
---- RESULTS
1,2
3,4
---- TYPES
TINYINT, TINYINT
====
---- QUERY
# Values statement in subqueries
select * from (select y from (values(1 as y), (11)) a where y < 10) b
---- RESULTS
1
---- TYPES
TINYINT
====
---- QUERY
# Values statement in subqueries with union
select * from (values(1), (2) union all select * from (values(3)) y) x
---- RESULTS
1
2
3
---- TYPES
TINYINT
====
---- QUERY
# Join on inline views made up of values statements
select * from
(values(1 a, 2 b), (1, 2)) x
inner join
(values(1 a, 3 b), (1, 2)) y on x.a = y.a
inner join
(values(1 a, 3 b), (1, 3)) z on z.b = y.b
---- RESULTS
1,2,1,3,1,3
1,2,1,3,1,3
1,2,1,3,1,3
1,2,1,3,1,3
---- TYPES
TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT
====
---- QUERY
# Semi and inner join on a table and on inline views made up of values statements
select x.date_string_col, y.*, z.* from functional.alltypessmall x
left semi join
(values(1 a, 3 b), (1, 3)) y on y.a = x.id
inner join
(values(1 a, 3 b), (1, 3)) z on z.b = y.b
---- RESULTS
'01/01/09',1,3,1,3
'01/01/09',1,3,1,3
---- TYPES
STRING, TINYINT, TINYINT, TINYINT, TINYINT
====
---- QUERY
# Select constant with unreferenced aggregate in subquery
select 1 from (select count(*) from functional.alltypessmall) x
---- TYPES
tinyint
---- RESULTS
1
====
---- QUERY
# Select constant with unreferenced distinct aggregate in subquery
select 1 from (select count(distinct tinyint_col) from functional.alltypessmall) x
---- TYPES
tinyint
---- RESULTS
1
====
---- QUERY
# Select aggregate from unreferenced aggregate in subquery
select count(*) from (select count(*) from functional.alltypessmall) x
---- TYPES
bigint
---- RESULTS
1
====
---- QUERY
# Select * from aggregate in subquery
select * from (select count(*) from functional.alltypessmall) x
---- TYPES
bigint
---- RESULTS
100
====
---- QUERY
# Select from aliased aggregate in subquery
select c from (select count(*) c from functional.alltypessmall) x
---- TYPES
bigint
---- RESULTS
100
====
---- QUERY
# Select aggregate from aliased aggregate in subquery
select count(c) from (select count(*) c from functional.alltypessmall) x
---- TYPES
bigint
---- RESULTS
1
====
---- QUERY
# Select aggregate from aggregate of basetable column in subquery
select count(1) from (select count(tinyint_col) from functional.alltypessmall) x
---- TYPES
bigint
---- RESULTS
1
====
---- QUERY
# Select aggregate from aggregate in subquery with group by
select count(1) from
(select count(*) from functional.alltypessmall group by tinyint_col) x
---- TYPES
bigint
---- RESULTS
10
====