impala/testdata/workloads/functional-query/queries/QueryTest/subquery.test

select *
from (
       select y x
       from (
              select id y from hbasealltypessmall
            ) a
     ) b
---- TYPES
int
---- RESULTS
0
1
10
11
12
13
14
15
16
17
18
19
2
20
21
22
23
24
25
26
27
28
29
3
30
31
32
33
34
35
36
37
38
39
4
40
41
42
43
44
45
46
47
48
49
5
50
51
52
53
54
55
56
57
58
59
6
60
61
62
63
64
65
66
67
68
69
7
70
71
72
73
74
75
76
77
78
79
8
80
81
82
83
84
85
86
87
88
89
9
90
91
92
93
94
95
96
97
98
99
====
# subquery with predicate inside
select *
from (
       select * from hbasealltypessmall where string_col = '4'
     ) a
---- TYPES
int, boolean, double, float, bigint, int, smallint, tinyint, string, string, timestamp
---- RESULTS
14,true,40.4,4.400000095367432,40,4,4,4,'01/02/09','4',2009-01-02 00:14:00.510000000
24,true,40.4,4.400000095367432,40,4,4,4,'01/03/09','4',2009-01-03 00:24:00.960000000
29,false,40.4,4.400000095367432,40,4,4,4,'02/01/09','4',2009-02-01 00:04:00.600000000
39,false,40.4,4.400000095367432,40,4,4,4,'02/02/09','4',2009-02-02 00:14:00.510000000
4,true,40.4,4.400000095367432,40,4,4,4,'01/01/09','4',2009-01-01 00:04:00.600000000
49,false,40.4,4.400000095367432,40,4,4,4,'02/03/09','4',2009-02-03 00:24:00.960000000
54,true,40.4,4.400000095367432,40,4,4,4,'03/01/09','4',2009-03-01 00:04:00.600000000
64,true,40.4,4.400000095367432,40,4,4,4,'03/02/09','4',2009-03-02 00:14:00.510000000
74,true,40.4,4.400000095367432,40,4,4,4,'03/03/09','4',2009-03-03 00:24:00.960000000
79,false,40.4,4.400000095367432,40,4,4,4,'04/01/09','4',2009-04-01 00:04:00.600000000
89,false,40.4,4.400000095367432,40,4,4,4,'04/02/09','4',2009-04-02 00:14:00.510000000
99,false,40.4,4.400000095367432,40,4,4,4,'04/03/09','4',2009-04-03 00:24:00.960000000
====
# subquery with predicate push down
select *
from (
       select *
       from (
              select * from hbasealltypessmall
            ) x
     ) y
where string_col = '4'
---- TYPES
int, boolean, double, float, bigint, int, smallint, tinyint, string, string, timestamp
---- RESULTS
14,true,40.4,4.400000095367432,40,4,4,4,'01/02/09','4',2009-01-02 00:14:00.510000000
24,true,40.4,4.400000095367432,40,4,4,4,'01/03/09','4',2009-01-03 00:24:00.960000000
29,false,40.4,4.400000095367432,40,4,4,4,'02/01/09','4',2009-02-01 00:04:00.600000000
39,false,40.4,4.400000095367432,40,4,4,4,'02/02/09','4',2009-02-02 00:14:00.510000000
4,true,40.4,4.400000095367432,40,4,4,4,'01/01/09','4',2009-01-01 00:04:00.600000000
49,false,40.4,4.400000095367432,40,4,4,4,'02/03/09','4',2009-02-03 00:24:00.960000000
54,true,40.4,4.400000095367432,40,4,4,4,'03/01/09','4',2009-03-01 00:04:00.600000000
64,true,40.4,4.400000095367432,40,4,4,4,'03/02/09','4',2009-03-02 00:14:00.510000000
74,true,40.4,4.400000095367432,40,4,4,4,'03/03/09','4',2009-03-03 00:24:00.960000000
79,false,40.4,4.400000095367432,40,4,4,4,'04/01/09','4',2009-04-01 00:04:00.600000000
89,false,40.4,4.400000095367432,40,4,4,4,'04/02/09','4',2009-04-02 00:14:00.510000000
99,false,40.4,4.400000095367432,40,4,4,4,'04/03/09','4',2009-04-03 00:24:00.960000000
====
# join between three tables, extra join predicates, extra scan predicates, nulls in joins
# cols
# (alltypesagg.tinyint_col contains nulls instead of 0s)
select x.smallint_col, x.id, x.tinyint_col, c.id, x.int_col, x.float_col, c.string_col
from (
       select a.smallint_col smallint_col, a.tinyint_col tinyint_col, a.day day,
              a.int_col int_col, a.month month,
              b.float_col float_col, b.id id
       from (
              select *
              from alltypesagg$TABLE a
              where month=1
            ) a
            join alltypessmall$TABLE b
            on (a.smallint_col = b.id)
     ) x
join alltypessmall$TABLE c on (x.tinyint_col = c.id)
where x.day=1
and   x.int_col > 899
and   x.float_col > 4.5
and   c.string_col < '7'
and   x.int_col + x.float_col + c.string_col < 1000
---- TYPES
smallint, int, tinyint, int, int, float, string
---- RESULTS
15,15,5,5,915,5.5,'5'
16,16,6,6,916,6.599999904632568,'6'
31,31,1,1,931,6.599999904632568,'1'
32,32,2,2,932,7.699999809265137,'2'
33,33,3,3,933,8.800000190734863,'3'
34,34,4,4,934,9.899999618530273,'4'
41,41,1,1,941,6.599999904632568,'1'
42,42,2,2,942,7.699999809265137,'2'
43,43,3,3,943,8.800000190734863,'3'
44,44,4,4,944,9.899999618530273,'4'
5,5,5,5,905,5.5,'5'
55,55,5,5,955,5.5,'5'
56,56,6,6,956,6.599999904632568,'6'
6,6,6,6,906,6.599999904632568,'6'
65,65,5,5,965,5.5,'5'
66,66,6,6,966,6.599999904632568,'6'
81,81,1,1,981,6.599999904632568,'1'
82,82,2,2,982,7.699999809265137,'2'
83,83,3,3,983,8.800000190734863,'3'
84,84,4,4,984,9.899999618530273,'4'
91,91,1,1,991,6.599999904632568,'1'
====
# Same join as above, but subquery on the RHS
select x.smallint_col, x.id, x.tinyint_col, c.id, x.int_col, x.float_col, c.string_col
from alltypessmall$TABLE c
     join
     (
       select a.smallint_col smallint_col, a.tinyint_col tinyint_col, a.day day,
              a.int_col int_col, a.month month,
              b.float_col float_col, b.id id
       from alltypessmall$TABLE b
            join
            (
              select *
              from alltypesagg$TABLE a
              where month=1
            ) a
            on (a.smallint_col = b.id)
     ) x
     on (x.tinyint_col = c.id)
where x.day=1
and   x.int_col > 899
and   x.float_col > 4.5
and   c.string_col < '7'
and   x.int_col + x.float_col + c.string_col < 1000
---- TYPES
smallint, int, tinyint, int, int, float, string
---- RESULTS
15,15,5,5,915,5.5,'5'
16,16,6,6,916,6.599999904632568,'6'
31,31,1,1,931,6.599999904632568,'1'
32,32,2,2,932,7.699999809265137,'2'
33,33,3,3,933,8.800000190734863,'3'
34,34,4,4,934,9.899999618530273,'4'
41,41,1,1,941,6.599999904632568,'1'
42,42,2,2,942,7.699999809265137,'2'
43,43,3,3,943,8.800000190734863,'3'
44,44,4,4,944,9.899999618530273,'4'
5,5,5,5,905,5.5,'5'
55,55,5,5,955,5.5,'5'
56,56,6,6,956,6.599999904632568,'6'
6,6,6,6,906,6.599999904632568,'6'
65,65,5,5,965,5.5,'5'
66,66,6,6,966,6.599999904632568,'6'
81,81,1,1,981,6.599999904632568,'1'
82,82,2,2,982,7.699999809265137,'2'
83,83,3,3,983,8.800000190734863,'3'
84,84,4,4,984,9.899999618530273,'4'
91,91,1,1,991,6.599999904632568,'1'
====
# aggregate without group by
select *
from (
       select count(*), count(tinyint_col), min(tinyint_col), max(tinyint_col),
              sum(tinyint_col), avg(tinyint_col)
       from (
              select * from alltypesagg$TABLE
            ) a
     ) b
---- TYPES
bigint, bigint, tinyint, tinyint, bigint, double
---- RESULTS
10000,9000,1,9,45000,5
====
# aggregate with group-by, having
select *
from (
       select int_col % 7 c1, count(*) c2, avg(int_col) c3
       from (
              select * from alltypesagg$TABLE
            ) a
       group by 1
       having avg(int_col) > 500 or count(*) = 10
     ) b
where c1 is not null
and   c2 > 10
---- TYPES
int, bigint, double
---- RESULTS
0,1420,500.5
4,1430,501
5,1430,502
====
#
# So, we've this test to test multiple level of aggregate instead.
select c1, c3, m2
from (
       select c1, c3, max(c2) m2
       from (
              select c1, c2, c3
              from (
                     select int_col c1, tinyint_col c2, max(id) c3
                     from hbasealltypessmall
                     group by 1, 2
                     order by 1,2
                     limit 5
                   ) x
             ) x2
       group by c1, c3
       limit 10
     ) t
where c1 > 0
order by 2, 1 desc
limit 3
---- TYPES
int, int, tinyint
---- RESULTS
1,96,1
2,97,2
3,98,3
====
#Do not materialize the agg expr slot
select c1, c2
from (
       select int_col c1, tinyint_col c2, min(float_col) c3
       from hbasealltypessmall
       group by 1, 2
     ) x
---- TYPES
int, tinyint
---- RESULTS
0,0
1,1
2,2
3,3
4,4
5,5
6,6
7,7
8,8
9,9
====
select distinct *
from (
       select bool_col, tinyint_col, count(*)
       from alltypesagg$TABLE
       group by bool_col, tinyint_col
       having bool_col = true
     ) x
where tinyint_col < 6
---- TYPES
boolean, tinyint, bigint
---- RESULTS
true,2,1000
true,4,1000
====
# distinct w/ explicit select list
select *
from (
       select distinct bool_col, tinyint_col
       from (
              select * from alltypesagg$TABLE where tinyint_col < 7
            ) y
     ) x
where bool_col = true
---- TYPES
boolean, tinyint
---- RESULTS
true,2
true,4
true,6
====
# semi-join on string
select *
from (
       select d.*
       from DimTbl d left semi join JoinTbl j on (j.test_name = d.name)
     ) x
where x.name > 'Name1'
---- TYPES
bigint, string, int
---- RESULTS
1002,'Name2',94611
1003,'Name3',94612
1004,'Name4',94612
1005,'Name5',94613
1006,'Name6',94613
====
select j.*, d.*
from (
       select *
       from JoinTbl a
     ) j
     left outer join
     (
       select *
       from DimTbl b
     ) d
     on (j.test_name = d.name)
where j.test_id <= 1006
---- TYPES
bigint, string, int, int, bigint, string, int
---- RESULTS
1001,'Name1',94611,5000,1001,'Name1',94611
1002,'Name2',94611,5000,1002,'Name2',94611
1003,'Name3',94611,5000,1003,'Name3',94612
1004,'Name4',94611,5000,1004,'Name4',94612
1005,'Name5',94611,5000,1005,'Name5',94613
1006,'Name16',94612,15000,NULL,'NULL',NULL
1006,'Name16',94612,5000,NULL,'NULL',NULL
1006,'Name16',94616,15000,NULL,'NULL',NULL
1006,'Name16',94616,5000,NULL,'NULL',NULL
1006,'Name6',94616,15000,1006,'Name6',94613
1006,'Name6',94616,5000,1006,'Name6',94613
====
# TODO: If we apply predicate on d, the result will be incorrect. This is a general
# predicate evaluation issue.
#
select j.*, d.*
from (
       select *
       from JoinTbl a
     ) j
     left outer join
     (
       select *
       from DimTbl b
     ) d
     on (j.test_name = d.name)
where j.test_id <= 1006
---- TYPES
bigint, string, int, int, bigint, string, int
---- RESULTS
1001,'Name1',94611,5000,1001,'Name1',94611
1002,'Name2',94611,5000,1002,'Name2',94611
1003,'Name3',94611,5000,1003,'Name3',94612
1004,'Name4',94611,5000,1004,'Name4',94612
1005,'Name5',94611,5000,1005,'Name5',94613
1006,'Name16',94612,15000,NULL,'NULL',NULL
1006,'Name16',94612,5000,NULL,'NULL',NULL
1006,'Name16',94616,15000,NULL,'NULL',NULL
1006,'Name16',94616,5000,NULL,'NULL',NULL
1006,'Name6',94616,15000,1006,'Name6',94613
1006,'Name6',94616,5000,1006,'Name6',94613
====