mirror of
https://github.com/apache/impala.git
synced 2026-01-02 12:00:33 -05:00
The select exprs of an inline view may not always be materialised, yet the output tuple itself may be. This patch fixes a crash in this situation in the backend aggregation node which assumed its output tuple would always have at least one materialised slot. The cause was a couple of too-conservative DCHECKs that failed if the tuple was NULL. In fact, the code was robust to this possibility without the checks, so this bug didn't affect release builds of Impala. Change-Id: If0b90809d30fcd196f55197953392452d1ac9c4f Reviewed-on: http://gerrit.ent.cloudera.com:8080/1431 Reviewed-by: Henry Robinson <henry@cloudera.com> Tested-by: jenkins (cherry picked from commit 8c1c21b66c43e900760ace54d090305f32a85a1f) Reviewed-on: http://gerrit.ent.cloudera.com:8080/1471 Tested-by: Henry Robinson <henry@cloudera.com>
490 lines
12 KiB
Plaintext
490 lines
12 KiB
Plaintext
====
|
|
---- QUERY
|
|
# join between three tables, extra join predicates, extra scan predicates, nulls in joins
|
|
# cols
|
|
# (alltypesagg.tinyint_col contains nulls instead of 0s)
|
|
select x.smallint_col, x.id, x.tinyint_col, c.id, x.int_col, x.float_col, c.string_col
|
|
from (
|
|
select a.smallint_col smallint_col, a.tinyint_col tinyint_col, a.day day,
|
|
a.int_col int_col, a.month month,
|
|
b.float_col float_col, b.id id
|
|
from (
|
|
select *
|
|
from alltypesagg a
|
|
where month=1
|
|
) a
|
|
join alltypessmall b
|
|
on (a.smallint_col = b.id)
|
|
) x
|
|
join alltypessmall c on (x.tinyint_col = c.id)
|
|
where x.day=1
|
|
and x.int_col > 899
|
|
and x.float_col > 4.5
|
|
and c.string_col < '7'
|
|
and x.int_col + x.float_col + cast(c.string_col as float) < 1000
|
|
---- TYPES
|
|
smallint, int, tinyint, int, int, float, string
|
|
---- RESULTS
|
|
15,15,5,5,915,5.5,'5'
|
|
16,16,6,6,916,6.599999904632568,'6'
|
|
31,31,1,1,931,6.599999904632568,'1'
|
|
32,32,2,2,932,7.699999809265137,'2'
|
|
33,33,3,3,933,8.800000190734863,'3'
|
|
34,34,4,4,934,9.899999618530273,'4'
|
|
41,41,1,1,941,6.599999904632568,'1'
|
|
42,42,2,2,942,7.699999809265137,'2'
|
|
43,43,3,3,943,8.800000190734863,'3'
|
|
44,44,4,4,944,9.899999618530273,'4'
|
|
5,5,5,5,905,5.5,'5'
|
|
55,55,5,5,955,5.5,'5'
|
|
56,56,6,6,956,6.599999904632568,'6'
|
|
6,6,6,6,906,6.599999904632568,'6'
|
|
65,65,5,5,965,5.5,'5'
|
|
66,66,6,6,966,6.599999904632568,'6'
|
|
81,81,1,1,981,6.599999904632568,'1'
|
|
82,82,2,2,982,7.699999809265137,'2'
|
|
83,83,3,3,983,8.800000190734863,'3'
|
|
84,84,4,4,984,9.899999618530273,'4'
|
|
91,91,1,1,991,6.599999904632568,'1'
|
|
====
|
|
---- QUERY
|
|
# Same join as above, but subquery on the RHS
|
|
select x.smallint_col, x.id, x.tinyint_col, c.id, x.int_col, x.float_col, c.string_col
|
|
from alltypessmall c
|
|
join
|
|
(
|
|
select a.smallint_col smallint_col, a.tinyint_col tinyint_col, a.day day,
|
|
a.int_col int_col, a.month month,
|
|
b.float_col float_col, b.id id
|
|
from alltypessmall b
|
|
join
|
|
(
|
|
select *
|
|
from alltypesagg a
|
|
where month=1
|
|
) a
|
|
on (a.smallint_col = b.id)
|
|
) x
|
|
on (x.tinyint_col = c.id)
|
|
where x.day=1
|
|
and x.int_col > 899
|
|
and x.float_col > 4.5
|
|
and c.string_col < '7'
|
|
and x.int_col + x.float_col + cast(c.string_col as float) < 1000
|
|
---- TYPES
|
|
smallint, int, tinyint, int, int, float, string
|
|
---- RESULTS
|
|
15,15,5,5,915,5.5,'5'
|
|
16,16,6,6,916,6.599999904632568,'6'
|
|
31,31,1,1,931,6.599999904632568,'1'
|
|
32,32,2,2,932,7.699999809265137,'2'
|
|
33,33,3,3,933,8.800000190734863,'3'
|
|
34,34,4,4,934,9.899999618530273,'4'
|
|
41,41,1,1,941,6.599999904632568,'1'
|
|
42,42,2,2,942,7.699999809265137,'2'
|
|
43,43,3,3,943,8.800000190734863,'3'
|
|
44,44,4,4,944,9.899999618530273,'4'
|
|
5,5,5,5,905,5.5,'5'
|
|
55,55,5,5,955,5.5,'5'
|
|
56,56,6,6,956,6.599999904632568,'6'
|
|
6,6,6,6,906,6.599999904632568,'6'
|
|
65,65,5,5,965,5.5,'5'
|
|
66,66,6,6,966,6.599999904632568,'6'
|
|
81,81,1,1,981,6.599999904632568,'1'
|
|
82,82,2,2,982,7.699999809265137,'2'
|
|
83,83,3,3,983,8.800000190734863,'3'
|
|
84,84,4,4,984,9.899999618530273,'4'
|
|
91,91,1,1,991,6.599999904632568,'1'
|
|
====
|
|
---- QUERY
|
|
# aggregate without group by
|
|
select *
|
|
from (
|
|
select count(*), count(tinyint_col), min(tinyint_col), max(tinyint_col),
|
|
sum(tinyint_col), avg(tinyint_col)
|
|
from (
|
|
select * from alltypesagg
|
|
) a
|
|
) b
|
|
---- TYPES
|
|
bigint, bigint, tinyint, tinyint, bigint, double
|
|
---- RESULTS
|
|
10000,9000,1,9,45000,5
|
|
====
|
|
---- QUERY
|
|
# aggregate with group-by, having
|
|
select *
|
|
from (
|
|
select int_col % 7 c1, count(*) c2, avg(int_col) c3
|
|
from (
|
|
select * from alltypesagg
|
|
) a
|
|
group by 1
|
|
having avg(int_col) > 500 or count(*) = 10
|
|
) b
|
|
where c1 is not null
|
|
and c2 > 10
|
|
---- TYPES
|
|
int, bigint, double
|
|
---- RESULTS
|
|
0,1420,500.5
|
|
4,1430,501
|
|
5,1430,502
|
|
====
|
|
---- QUERY
|
|
# multiple levels of aggregation
|
|
select c1, c3, m2
|
|
from (
|
|
select c1, c3, max(c2) m2
|
|
from (
|
|
select c1, c2, c3
|
|
from (
|
|
select int_col c1, tinyint_col c2, max(id) c3
|
|
from functional_hbase.alltypessmall
|
|
group by 1, 2
|
|
order by 1,2
|
|
limit 5
|
|
) x
|
|
) x2
|
|
group by c1, c3
|
|
limit 10
|
|
) t
|
|
where c1 > 0
|
|
order by 2, 1 desc
|
|
limit 3
|
|
---- TYPES
|
|
int, int, tinyint
|
|
---- RESULTS
|
|
1,96,1
|
|
2,97,2
|
|
3,98,3
|
|
====
|
|
---- QUERY
|
|
# do not materialize the agg expr slot
|
|
select c1, c2
|
|
from (
|
|
select int_col c1, tinyint_col c2, min(float_col) c3
|
|
from functional_hbase.alltypessmall
|
|
group by 1, 2
|
|
) x
|
|
---- TYPES
|
|
int, tinyint
|
|
---- RESULTS
|
|
0,0
|
|
1,1
|
|
2,2
|
|
3,3
|
|
4,4
|
|
5,5
|
|
6,6
|
|
7,7
|
|
8,8
|
|
9,9
|
|
====
|
|
---- QUERY
|
|
# subquery with aggregation and order by/limit, as left-hand side of join;
|
|
# having clause in subquery is transfered to merge agg step in distrib plan
|
|
select *
|
|
from (
|
|
select int_col, count(*)
|
|
from alltypessmall
|
|
where month = 1
|
|
group by int_col
|
|
having count(*) > 2
|
|
order by count(*) desc, int_col limit 5
|
|
) t1
|
|
join alltypes t2 on (t1.int_col = t2.id)
|
|
where month = 1
|
|
---- TYPES
|
|
int, bigint, int, boolean, tinyint, smallint, int, bigint, float, double, string, string, timestamp, int, int
|
|
---- RESULTS
|
|
0,3,0,true,0,0,0,0,0,0,'01/01/09','0',2009-01-01 00:00:00,2009,1
|
|
1,3,1,false,1,1,1,10,1.100000023841858,10.1,'01/01/09','1',2009-01-01 00:01:00,2009,1
|
|
2,3,2,true,2,2,2,20,2.200000047683716,20.2,'01/01/09','2',2009-01-01 00:02:00.100000000,2009,1
|
|
3,3,3,false,3,3,3,30,3.299999952316284,30.3,'01/01/09','3',2009-01-01 00:03:00.300000000,2009,1
|
|
4,3,4,true,4,4,4,40,4.400000095367432,40.4,'01/01/09','4',2009-01-01 00:04:00.600000000,2009,1
|
|
====
|
|
---- QUERY
|
|
select distinct *
|
|
from (
|
|
select bool_col, tinyint_col, count(*)
|
|
from alltypesagg
|
|
group by bool_col, tinyint_col
|
|
having bool_col = true
|
|
) x
|
|
where tinyint_col < 6
|
|
---- TYPES
|
|
boolean, tinyint, bigint
|
|
---- RESULTS
|
|
true,2,1000
|
|
true,4,1000
|
|
====
|
|
---- QUERY
|
|
# distinct w/ explicit select list
|
|
select *
|
|
from (
|
|
select distinct bool_col, tinyint_col
|
|
from (
|
|
select * from alltypesagg where tinyint_col < 7
|
|
) y
|
|
) x
|
|
where bool_col = true
|
|
---- TYPES
|
|
boolean, tinyint
|
|
---- RESULTS
|
|
true,2
|
|
true,4
|
|
true,6
|
|
====
|
|
---- QUERY
|
|
# semi-join on string
|
|
select *
|
|
from (
|
|
select d.*
|
|
from DimTbl d left semi join JoinTbl j on (j.test_name = d.name)
|
|
) x
|
|
where x.name > 'Name1'
|
|
---- TYPES
|
|
bigint, string, int
|
|
---- RESULTS
|
|
1002,'Name2',94611
|
|
1003,'Name3',94612
|
|
1004,'Name4',94612
|
|
1005,'Name5',94613
|
|
1006,'Name6',94613
|
|
====
|
|
---- QUERY
|
|
select j.*, d.*
|
|
from (
|
|
select *
|
|
from JoinTbl a
|
|
) j
|
|
left outer join
|
|
(
|
|
select *
|
|
from DimTbl b
|
|
) d
|
|
on (j.test_name = d.name)
|
|
where j.test_id <= 1006
|
|
---- TYPES
|
|
bigint, string, int, int, bigint, string, int
|
|
---- RESULTS
|
|
1001,'Name1',94611,5000,1001,'Name1',94611
|
|
1002,'Name2',94611,5000,1002,'Name2',94611
|
|
1003,'Name3',94611,5000,1003,'Name3',94612
|
|
1004,'Name4',94611,5000,1004,'Name4',94612
|
|
1005,'Name5',94611,5000,1005,'Name5',94613
|
|
1006,'Name16',94612,15000,NULL,'NULL',NULL
|
|
1006,'Name16',94612,5000,NULL,'NULL',NULL
|
|
1006,'Name16',94616,15000,NULL,'NULL',NULL
|
|
1006,'Name16',94616,5000,NULL,'NULL',NULL
|
|
1006,'Name6',94616,15000,1006,'Name6',94613
|
|
1006,'Name6',94616,5000,1006,'Name6',94613
|
|
====
|
|
---- QUERY
|
|
# TODO: If we apply predicate on d, the result will be incorrect. This is a general
|
|
# predicate evaluation issue.
|
|
#
|
|
select j.*, d.*
|
|
from (
|
|
select *
|
|
from JoinTbl a
|
|
) j
|
|
left outer join
|
|
(
|
|
select *
|
|
from DimTbl b
|
|
) d
|
|
on (j.test_name = d.name)
|
|
where j.test_id <= 1006
|
|
---- TYPES
|
|
bigint, string, int, int, bigint, string, int
|
|
---- RESULTS
|
|
1001,'Name1',94611,5000,1001,'Name1',94611
|
|
1002,'Name2',94611,5000,1002,'Name2',94611
|
|
1003,'Name3',94611,5000,1003,'Name3',94612
|
|
1004,'Name4',94611,5000,1004,'Name4',94612
|
|
1005,'Name5',94611,5000,1005,'Name5',94613
|
|
1006,'Name16',94612,15000,NULL,'NULL',NULL
|
|
1006,'Name16',94612,5000,NULL,'NULL',NULL
|
|
1006,'Name16',94616,15000,NULL,'NULL',NULL
|
|
1006,'Name16',94616,5000,NULL,'NULL',NULL
|
|
1006,'Name6',94616,15000,1006,'Name6',94613
|
|
1006,'Name6',94616,5000,1006,'Name6',94613
|
|
====
|
|
---- QUERY
|
|
# Constant selects in subqueries
|
|
select * from (select 1, 2) x
|
|
---- TYPES
|
|
tinyint, tinyint
|
|
---- RESULTS
|
|
1,2
|
|
====
|
|
---- QUERY
|
|
# Constant selects in subqueries
|
|
select * from (select y from (select 1 y) a where y < 10) b
|
|
---- TYPES
|
|
tinyint
|
|
---- RESULTS
|
|
1
|
|
====
|
|
---- QUERY
|
|
# Constant selects in subqueries
|
|
select * from (select 1 union all select 2 union all select * from (select 3) y) x
|
|
---- TYPES
|
|
tinyint
|
|
---- RESULTS
|
|
1
|
|
2
|
|
3
|
|
====
|
|
---- QUERY
|
|
# Join on inline views made up of unions of constant selects
|
|
select * from
|
|
(select 1 a, 2 b union all select 1 a, 2 b) x
|
|
inner join
|
|
(select 1 a, 3 b union all select 1 a, 2 b) y on x.a = y.a
|
|
inner join
|
|
(select 1 a, 3 b union all select 1 a, 3 b) z on z.b = y.b
|
|
---- TYPES
|
|
tinyint, tinyint, tinyint, tinyint, tinyint, tinyint
|
|
---- RESULTS
|
|
1,2,1,3,1,3
|
|
1,2,1,3,1,3
|
|
1,2,1,3,1,3
|
|
1,2,1,3,1,3
|
|
====
|
|
---- QUERY
|
|
# Semi and inner join on a table and on inline views made up of constant selects
|
|
select x.date_string_col, y.*, z.* from functional.alltypessmall x
|
|
left semi join
|
|
(select 1 a, 3 b union all select 1 a, 3 b) y on y.a = x.id
|
|
inner join
|
|
(select 1 a, 3 b union all select 1 a, 3 b) z on z.b = y.b
|
|
---- TYPES
|
|
string, tinyint, tinyint, tinyint, tinyint
|
|
---- RESULTS
|
|
'01/01/09',1,3,1,3
|
|
'01/01/09',1,3,1,3
|
|
====
|
|
---- QUERY
|
|
# Values statement in subqueries
|
|
select * from (values(1, 2), (3, 4)) x
|
|
---- RESULTS
|
|
1,2
|
|
3,4
|
|
---- TYPES
|
|
TINYINT, TINYINT
|
|
====
|
|
---- QUERY
|
|
# Values statement in subqueries
|
|
select * from (select y from (values(1 as y), (11)) a where y < 10) b
|
|
---- RESULTS
|
|
1
|
|
---- TYPES
|
|
TINYINT
|
|
====
|
|
---- QUERY
|
|
# Values statement in subqueries with union
|
|
select * from (values(1), (2) union all select * from (values(3)) y) x
|
|
---- RESULTS
|
|
1
|
|
2
|
|
3
|
|
---- TYPES
|
|
TINYINT
|
|
====
|
|
---- QUERY
|
|
# Join on inline views made up of values statements
|
|
select * from
|
|
(values(1 a, 2 b), (1, 2)) x
|
|
inner join
|
|
(values(1 a, 3 b), (1, 2)) y on x.a = y.a
|
|
inner join
|
|
(values(1 a, 3 b), (1, 3)) z on z.b = y.b
|
|
---- RESULTS
|
|
1,2,1,3,1,3
|
|
1,2,1,3,1,3
|
|
1,2,1,3,1,3
|
|
1,2,1,3,1,3
|
|
---- TYPES
|
|
TINYINT, TINYINT, TINYINT, TINYINT, TINYINT, TINYINT
|
|
====
|
|
---- QUERY
|
|
# Semi and inner join on a table and on inline views made up of values statements
|
|
select x.date_string_col, y.*, z.* from functional.alltypessmall x
|
|
left semi join
|
|
(values(1 a, 3 b), (1, 3)) y on y.a = x.id
|
|
inner join
|
|
(values(1 a, 3 b), (1, 3)) z on z.b = y.b
|
|
---- RESULTS
|
|
'01/01/09',1,3,1,3
|
|
'01/01/09',1,3,1,3
|
|
---- TYPES
|
|
STRING, TINYINT, TINYINT, TINYINT, TINYINT
|
|
====
|
|
---- QUERY
|
|
# Select constant with unreferenced aggregate in subquery
|
|
select 1 from (select count(*) from functional.alltypessmall) x
|
|
---- TYPES
|
|
tinyint
|
|
---- RESULTS
|
|
1
|
|
====
|
|
---- QUERY
|
|
# Select constant with unreferenced distinct aggregate in subquery
|
|
select 1 from (select count(distinct tinyint_col) from functional.alltypessmall) x
|
|
---- TYPES
|
|
tinyint
|
|
---- RESULTS
|
|
1
|
|
====
|
|
---- QUERY
|
|
# Select aggregate from unreferenced aggregate in subquery
|
|
select count(*) from (select count(*) from functional.alltypessmall) x
|
|
---- TYPES
|
|
bigint
|
|
---- RESULTS
|
|
1
|
|
====
|
|
---- QUERY
|
|
# Select * from aggregate in subquery
|
|
select * from (select count(*) from functional.alltypessmall) x
|
|
---- TYPES
|
|
bigint
|
|
---- RESULTS
|
|
100
|
|
====
|
|
---- QUERY
|
|
# Select from aliased aggregate in subquery
|
|
select c from (select count(*) c from functional.alltypessmall) x
|
|
---- TYPES
|
|
bigint
|
|
---- RESULTS
|
|
100
|
|
====
|
|
---- QUERY
|
|
# Select aggregate from aliased aggregate in subquery
|
|
select count(c) from (select count(*) c from functional.alltypessmall) x
|
|
---- TYPES
|
|
bigint
|
|
---- RESULTS
|
|
1
|
|
====
|
|
---- QUERY
|
|
# Select aggregate from aggregate of basetable column in subquery
|
|
select count(1) from (select count(tinyint_col) from functional.alltypessmall) x
|
|
---- TYPES
|
|
bigint
|
|
---- RESULTS
|
|
1
|
|
====
|
|
---- QUERY
|
|
# Select aggregate from aggregate in subquery with group by
|
|
select count(1) from
|
|
(select count(*) from functional.alltypessmall group by tinyint_col) x
|
|
---- TYPES
|
|
bigint
|
|
---- RESULTS
|
|
10
|
|
==== |