Files
impala/testdata/workloads/functional-query/queries/QueryTest/aggregation.test
2014-01-08 10:49:32 -08:00

854 lines
17 KiB
Plaintext

====
---- QUERY
## Test the distinctpcsa aggregate function on all col types with group by
#select
# tinyint_col,
# distinctpcsa(id),
# distinctpcsa(bool_col),
# distinctpcsa(smallint_col),
# distinctpcsa(int_col),
# distinctpcsa(bigint_col),
# distinctpcsa(float_col),
# distinctpcsa(double_col),
# distinctpcsa(string_col),
# distinctpcsa(timestamp_col)
#from alltypesagg
#group by tinyint_col
#order by tinyint_col
#limit 100
#---- TYPES
#tinyint, string, string, string, string, string, string, string, string, string
#---- RESULTS
#1,'1101','82','87','142','136','137','124','117','1149'
#2,'1162','82','88','137','137','153','134','117','926'
#3,'1125','82','86','146','153','145','133','117','967'
#4,'1031','82','87','143','140','134','143','117','1031'
#5,'1175','82','87','146','140','156','145','117','1009'
#6,'1077','82','86','146','146','151','139','117','988'
#7,'1213','82','88','146','148','139','148','117','1101'
#8,'1137','82','87','145','139','156','133','117','1077'
#9,'998','82','87','140','143','148','130','117','1101'
#NULL,'1162','82','87','151','136','146','140','117','1031'
#====
#---- QUERY
## Test the distinctpcsa aggregate function on all col types without group by
#select
# distinctpcsa(id),
# distinctpcsa(bool_col),
# distinctpcsa(tinyint_col),
# distinctpcsa(smallint_col),
# distinctpcsa(int_col),
# distinctpcsa(bigint_col),
# distinctpcsa(float_col),
# distinctpcsa(double_col),
# distinctpcsa(string_col),
# distinctpcsa(timestamp_col)
#from alltypesagg
#---- TYPES
#string, string, string, string, string, string, string, string, string, string
#---- RESULTS
#'3744','82','88','172','1832','1089','1077','1009','988','9817'
#====
#---- QUERY
## Test the distinctpc aggregate function on all col types with group by
#select
# tinyint_col,
# distinctpc(id),
# distinctpc(bool_col),
# distinctpc(smallint_col),
# distinctpc(int_col),
# distinctpc(bigint_col),
# distinctpc(float_col),
# distinctpc(double_col),
# distinctpc(string_col),
# distinctpc(timestamp_col)
#from alltypesagg
#group by tinyint_col
#order by tinyint_col
#limit 100
#---- TYPES
#tinyint, string, string, string, string, string, string, string, string, string
#---- RESULTS
#1,'1089','1','11','120','70','99','96','82','1125'
#2,'1043','1','11','105','94','88','92','82','886'
#3,'1043','1','11','105','107','106','81','82','906'
#4,'1009','1','9','109','108','103','97','165','977'
#5,'1009','1','9','109','122','96','123','165','988'
#6,'977','1','12','101','112','98','119','165','956'
#7,'977','1','12','101','95','118','99','165','1227'
#8,'1043','1','8','107','85','100','124','82','1149'
#9,'1043','1','8','107','88','96','91','82','1125'
#NULL,'1089','1','6','119','86','119','93','82','998'
#====
#---- QUERY
## Test the distinctpc aggregate function on all col types without group by
#select
# distinctpc(id),
# distinctpc(bool_col),
# distinctpc(tinyint_col),
# distinctpc(smallint_col),
# distinctpc(int_col),
# distinctpc(bigint_col),
# distinctpc(float_col),
# distinctpc(double_col),
# distinctpc(string_col),
# distinctpc(timestamp_col)
#from alltypesagg
#---- TYPES
#string, string, string, string, string, string, string, string, string, string
#---- RESULTS
#'2647','3','3','122','1323','1295','1101','1031','1077','11930'
#====
#---- QUERY
## Test the distinctpc aggregate function on empty table
#select distinctpc(field) from EmptyTable
#---- TYPES
#string
#---- RESULTS
#'0'
#====
# no grouping exprs, cols contain nulls except for bool cols
select count(bool_col), min(bool_col), max(bool_col)
from alltypesagg
---- TYPES
bigint, boolean, boolean
---- RESULTS
10000,false,true
====
---- QUERY
# no grouping exprs, cols contain nulls
select count(*), count(tinyint_col), min(tinyint_col), max(tinyint_col), sum(tinyint_col),
avg(tinyint_col)
from alltypesagg
---- TYPES
bigint, bigint, tinyint, tinyint, bigint, double
---- RESULTS
10000,9000,1,9,45000,5
====
---- QUERY
select count(*), count(smallint_col), min(smallint_col), max(smallint_col), sum(smallint_col),
avg(smallint_col)
from alltypesagg
---- TYPES
bigint, bigint, smallint, smallint, bigint, double
---- RESULTS
10000,9900,1,99,495000,50
====
---- QUERY
select count(*), count(int_col), min(int_col), max(int_col), sum(int_col), avg(int_col)
from alltypesagg
---- TYPES
bigint, bigint, int, int, bigint, double
---- RESULTS
10000,9990,1,999,4995000,500
====
---- QUERY
select count(*), count(bigint_col), min(bigint_col), max(bigint_col), sum(bigint_col),
avg(bigint_col)
from alltypesagg
---- TYPES
bigint, bigint, bigint, bigint, bigint, double
---- RESULTS
10000,9990,10,9990,49950000,5000
====
---- QUERY
select count(*), count(float_col), min(float_col), max(float_col), sum(float_col),
avg(float_col)
from alltypesagg
---- TYPES
bigint, bigint, float, float, double, double
---- RESULTS
10000,9990,1.100000023841858,1098.900024414062,5494499.999767542,549.9999999767309
====
---- QUERY
select count(*), count(double_col), min(double_col), max(double_col), round(sum(double_col), 0),
round(avg(double_col), 0)
from alltypesagg
---- TYPES
bigint, bigint, double, double, double, double
---- RESULTS
10000,9990,10.1,10089.9,50449500,5050
====
---- QUERY
select count(*), min(string_col), max(string_col), min(date_string_col),
max(date_string_col)
from alltypesagg
---- TYPES
bigint, string, string, string, string
---- RESULTS
10000,'0','999','01/01/10','01/10/10'
#====
#---- QUERY
# TODO: figure out why I'm getting a diff on the avg(timestamp_col) expr
# 2010-01-05 20:47:11.705094575
#select min(timestamp_col), max(timestamp_col), avg(timestamp_col) from alltypesagg
#---- TYPES
#timestamp, timestamp, timestamp
#---- RESULTS
#2010-01-01 00:00:00,2010-01-10 18:02:05.100000000,2010-01-05 20:47:11.705086469
====
---- QUERY
# grouping by different data types, with NULLs
select tinyint_col, count(*) from alltypesagg group by 1
---- TYPES
tinyint, bigint
---- RESULTS
1,1000
2,1000
3,1000
4,1000
5,1000
6,1000
7,1000
8,1000
9,1000
NULL,1000
====
---- QUERY
# grouping by different data types, with NULLs, grouping expr missing from select list
select bool_col,min(bool_col),max(bool_col) from alltypesagg group by 1
---- TYPES
boolean,boolean,boolean
---- RESULTS
false,false,false
true,true,true
====
---- QUERY
select count(*) from alltypesagg group by tinyint_col
---- TYPES
bigint
---- RESULTS
1000
1000
1000
1000
1000
1000
1000
1000
1000
1000
====
---- QUERY
select smallint_col % 10, count(*) from alltypesagg group by 1
---- TYPES
smallint, bigint
---- RESULTS
0,900
1,1000
2,1000
3,1000
4,1000
5,1000
6,1000
7,1000
8,1000
9,1000
NULL,100
====
---- QUERY
select count(*) from alltypesagg group by smallint_col % 10
---- TYPES
bigint
---- RESULTS
100
1000
1000
1000
1000
1000
1000
1000
1000
1000
900
====
---- QUERY
select int_col % 10, count(*) from alltypesagg group by 1
---- TYPES
int, bigint
---- RESULTS
0,990
1,1000
2,1000
3,1000
4,1000
5,1000
6,1000
7,1000
8,1000
9,1000
NULL,10
====
---- QUERY
select count(*) from alltypesagg group by int_col % 10
---- TYPES
bigint
---- RESULTS
10
1000
1000
1000
1000
1000
1000
1000
1000
1000
990
====
---- QUERY
# Check that ALL inside aggregates is correct
select count(ALL *) from alltypesagg group by int_col % 10
---- TYPES
bigint
---- RESULTS
10
1000
1000
1000
1000
1000
1000
1000
1000
1000
990
====
---- QUERY
select bigint_col % 100, count(*) from alltypesagg group by 1
---- TYPES
bigint, bigint
---- RESULTS
0,990
10,1000
20,1000
30,1000
40,1000
50,1000
60,1000
70,1000
80,1000
90,1000
NULL,10
====
---- QUERY
select count(*) from alltypesagg group by bigint_col % 100
---- TYPES
bigint
---- RESULTS
10
1000
1000
1000
1000
1000
1000
1000
1000
1000
990
====
---- QUERY
select float_col, float_col * 2, count(*) from alltypes group by 1, 2
---- TYPES
float, double, bigint
---- RESULTS
0,0,730
3.299999952316284,6.599999904632568,730
8.800000190734863,17.60000038146973,730
6.599999904632568,13.19999980926514,730
7.699999809265137,15.39999961853027,730
2.200000047683716,4.400000095367432,730
5.5,11,730
1.100000023841858,2.200000047683716,730
9.899999618530273,19.79999923706055,730
4.400000095367432,8.800000190734863,730
====
---- QUERY
select count(*) from alltypes group by float_col
---- TYPES
bigint
---- RESULTS
730
730
730
730
730
730
730
730
730
730
====
---- QUERY
select float_col, count(*) from alltypesagg where float_col is null group by 1
---- TYPES
float, bigint
---- RESULTS
NULL,10
====
---- QUERY
select double_col, double_col * 2, count(*) from alltypes group by 1, 2
---- TYPES
double, double, bigint
---- RESULTS
0,0,730
90.90000000000001,181.8,730
40.4,80.8,730
20.2,40.4,730
80.8,161.6,730
10.1,20.2,730
70.7,141.4,730
50.5,101,730
30.3,60.6,730
60.6,121.2,730
====
---- QUERY
select count(*) from alltypes group by double_col
---- TYPES
bigint
---- RESULTS
730
730
730
730
730
730
730
730
730
730
====
---- QUERY
select double_col, count(*) from alltypesagg where double_col is null group by 1
---- TYPES
double, bigint
---- RESULTS
NULL,10
====
---- QUERY
select date_string_col, count(*) from alltypesagg group by 1
---- TYPES
string, bigint
---- RESULTS
'01/01/10',1000
'01/02/10',1000
'01/03/10',1000
'01/04/10',1000
'01/05/10',1000
'01/06/10',1000
'01/07/10',1000
'01/08/10',1000
'01/09/10',1000
'01/10/10',1000
====
---- QUERY
select count(*) from alltypesagg group by date_string_col
---- TYPES
bigint
---- RESULTS
1000
1000
1000
1000
1000
1000
1000
1000
1000
1000
====
---- QUERY
# grouping by multiple exprs, with nulls
select tinyint_col % 3, smallint_col % 3, count(*) from alltypesagg
where day = 1 group by 1, 2
---- TYPES
tinyint, smallint, bigint
---- RESULTS
0,0,120
0,1,90
0,2,90
1,0,90
1,1,120
1,2,90
2,0,90
2,1,90
2,2,120
NULL,0,30
NULL,1,30
NULL,2,30
NULL,NULL,10
====
---- QUERY
select count(*) from alltypesagg
where day = 1 group by tinyint_col % 3, smallint_col % 3
---- TYPES
bigint
---- RESULTS
10
120
120
120
30
30
30
90
90
90
90
90
90
====
---- QUERY
# same result as previous query
select tinyint_col % 3, smallint_col % 3, count(*) from alltypesagg where day = 1 group by 2, 1
---- TYPES
tinyint, smallint, bigint
---- RESULTS
0,0,120
0,1,90
0,2,90
1,0,90
1,1,120
1,2,90
2,0,90
2,1,90
2,2,120
NULL,0,30
NULL,1,30
NULL,2,30
NULL,NULL,10
====
---- QUERY
select tinyint_col % 2, smallint_col % 2, int_col % 2, bigint_col % 2, date_string_col, count(*)
from alltypesagg
where date_string_col = '01/01/10' or date_string_col = '01/02/10'
group by 1, 2, 3, 4, 5
---- TYPES
tinyint, smallint, int, bigint, string, bigint
---- RESULTS
0,0,0,0,'01/01/10',400
0,0,0,0,'01/02/10',400
1,1,1,0,'01/01/10',500
1,1,1,0,'01/02/10',500
NULL,0,0,0,'01/01/10',90
NULL,0,0,0,'01/02/10',90
NULL,NULL,0,0,'01/01/10',9
NULL,NULL,0,0,'01/02/10',9
NULL,NULL,NULL,NULL,'01/01/10',1
NULL,NULL,NULL,NULL,'01/02/10',1
====
---- QUERY
select count(*)
from alltypesagg
where date_string_col = '01/01/10' or date_string_col = '01/02/10'
group by tinyint_col % 2, smallint_col % 2, int_col % 2, bigint_col % 2, date_string_col
---- TYPES
bigint
---- RESULTS
1
1
400
400
500
500
9
9
90
90
====
---- QUERY
# no grouping cols, no matching rows
select count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col), avg(tinyint_col)
from alltypesagg
where tinyint_col = -1
---- TYPES
bigint, tinyint, tinyint, bigint, double
---- RESULTS
0,NULL,NULL,NULL,NULL
====
---- QUERY
select count(*), min(smallint_col), max(smallint_col), sum(smallint_col), avg(smallint_col)
from alltypesagg
where smallint_col = -1
---- TYPES
bigint, smallint, smallint, bigint, double
---- RESULTS
0,NULL,NULL,NULL,NULL
====
---- QUERY
select count(*), min(int_col), max(int_col), sum(int_col), avg(int_col)
from alltypesagg
where int_col = -1
---- TYPES
bigint, int, int, bigint, double
---- RESULTS
0,NULL,NULL,NULL,NULL
====
---- QUERY
select count(*), min(bigint_col), max(bigint_col), sum(bigint_col), avg(bigint_col)
from alltypesagg
where bigint_col = -1
---- TYPES
bigint, bigint, bigint, bigint, double
---- RESULTS
0,NULL,NULL,NULL,NULL
====
---- QUERY
select count(*), min(float_col), max(float_col), sum(float_col), avg(float_col)
from alltypesagg
where float_col < -1.0
---- TYPES
bigint, float, float, double, double
---- RESULTS
0,NULL,NULL,NULL,NULL
====
---- QUERY
select count(*), min(double_col), max(double_col), sum(double_col), avg(double_col)
from alltypesagg
where double_col < -1.0
---- TYPES
bigint, double, double, double, double
---- RESULTS
0,NULL,NULL,NULL,NULL
====
---- QUERY
# HAVING clauses over all aggregation functions, plus compound HAVING clauses
select int_col % 7, count(*), max(int_col) from alltypesagg group by 1
---- TYPES
int, bigint, int
---- RESULTS
0,1420,994
1,1430,995
2,1430,996
3,1430,997
4,1430,998
5,1430,999
6,1420,993
NULL,10,NULL
====
---- QUERY
select int_col % 7, count(*) from alltypesagg group by 1 having max(int_col) > 991
---- TYPES
int, bigint
---- RESULTS
0,1420
1,1430
2,1430
3,1430
4,1430
5,1430
6,1420
====
---- QUERY
select int_col % 7, count(*) from alltypesagg group by 1
having max(int_col) > 991 and count(*) > 1420
---- TYPES
int, bigint
---- RESULTS
1,1430
2,1430
3,1430
4,1430
5,1430
====
---- QUERY
select int_col % 7, count(*) from alltypesagg group by 1
having min(int_col) < 7
---- TYPES
int, bigint
---- RESULTS
1,1430
2,1430
3,1430
4,1430
5,1430
6,1420
====
---- QUERY
select int_col % 7, count(*) from alltypesagg group by 1
having min(int_col) < 7 and count(*) > 1420
---- TYPES
int, bigint
---- RESULTS
1,1430
2,1430
3,1430
4,1430
5,1430
====
---- QUERY
select int_col % 7, count(*), sum(int_col) from alltypesagg group by 1
---- TYPES
int, bigint, bigint
---- RESULTS
0,1420,710710
1,1430,712140
2,1430,713570
3,1430,715000
4,1430,716430
5,1430,717860
6,1420,709290
NULL,10,NULL
====
---- QUERY
select int_col % 7, count(*), sum(int_col) from alltypesagg group by 1
having sum(int_col) >= 715000
---- TYPES
int, bigint, bigint
---- RESULTS
3,1430,715000
4,1430,716430
5,1430,717860
====
---- QUERY
select int_col % 7, count(*), sum(int_col) from alltypesagg group by 1
having sum(int_col) >= 715000 or count(*) > 1420
---- TYPES
int, bigint, bigint
---- RESULTS
1,1430,712140
2,1430,713570
3,1430,715000
4,1430,716430
5,1430,717860
====
---- QUERY
select int_col % 7, count(*), sum(int_col) from alltypesagg group by 1
having sum(int_col) is null
---- TYPES
int, bigint, bigint
---- RESULTS
NULL,10,NULL
====
---- QUERY
select int_col % 7, count(*), avg(int_col) from alltypesagg group by 1
---- TYPES
int, bigint, double
---- RESULTS
0,1420,500.5
1,1430,498
2,1430,499
3,1430,500
4,1430,501
5,1430,502
6,1420,499.5
NULL,10,NULL
====
---- QUERY
select int_col % 7, count(*), avg(int_col) from alltypesagg group by 1
having avg(int_col) > 500
---- TYPES
int, bigint, double
---- RESULTS
0,1420,500.5
4,1430,501
5,1430,502
====
---- QUERY
select int_col % 7, count(*), avg(int_col) from alltypesagg group by 1
having avg(int_col) > 500 or count(*) = 10
---- TYPES
int, bigint, double
---- RESULTS
0,1420,500.5
4,1430,501
5,1430,502
NULL,10,NULL
====
---- QUERY
select timestamp_col, count(*) from alltypesagg
group by timestamp_col having timestamp_col < cast('2010-01-01 01:05:20' as timestamp)
---- TYPES
timestamp, bigint
---- RESULTS
2010-01-01 00:00:00,1
2010-01-01 00:01:00,1
2010-01-01 00:02:00.100000000,1
2010-01-01 00:03:00.300000000,1
2010-01-01 00:04:00.600000000,1
2010-01-01 00:05:00.100000000,1
2010-01-01 00:06:00.150000000,1
2010-01-01 00:07:00.210000000,1
2010-01-01 00:08:00.280000000,1
2010-01-01 00:09:00.360000000,1
2010-01-01 00:10:00.450000000,1
2010-01-01 00:11:00.550000000,1
2010-01-01 00:12:00.660000000,1
2010-01-01 00:13:00.780000000,1
2010-01-01 00:14:00.910000000,1
2010-01-01 00:15:01.500000000,1
2010-01-01 00:16:01.200000000,1
2010-01-01 00:17:01.360000000,1
2010-01-01 00:18:01.530000000,1
2010-01-01 00:19:01.710000000,1
2010-01-01 00:20:01.900000000,1
2010-01-01 00:21:02.100000000,1
2010-01-01 00:22:02.310000000,1
2010-01-01 00:23:02.530000000,1
2010-01-01 00:24:02.760000000,1
2010-01-01 00:25:03,1
2010-01-01 00:26:03.250000000,1
2010-01-01 00:27:03.510000000,1
2010-01-01 00:28:03.780000000,1
2010-01-01 00:29:04.600000000,1
2010-01-01 00:30:04.350000000,1
2010-01-01 00:31:04.650000000,1
2010-01-01 00:32:04.960000000,1
2010-01-01 00:33:05.280000000,1
2010-01-01 00:34:05.610000000,1
2010-01-01 00:35:05.950000000,1
2010-01-01 00:36:06.300000000,1
2010-01-01 00:37:06.660000000,1
2010-01-01 00:38:07.300000000,1
2010-01-01 00:39:07.410000000,1
2010-01-01 00:40:07.800000000,1
2010-01-01 00:41:08.200000000,1
2010-01-01 00:42:08.610000000,1
2010-01-01 00:43:09.300000000,1
2010-01-01 00:44:09.460000000,1
2010-01-01 00:45:09.900000000,1
2010-01-01 00:46:10.350000000,1
2010-01-01 00:47:10.810000000,1
2010-01-01 00:48:11.280000000,1
2010-01-01 00:49:11.760000000,1
2010-01-01 00:50:12.250000000,1
2010-01-01 00:51:12.750000000,1
2010-01-01 00:52:13.260000000,1
2010-01-01 00:53:13.780000000,1
2010-01-01 00:54:14.310000000,1
2010-01-01 00:55:14.850000000,1
2010-01-01 00:56:15.400000000,1
2010-01-01 00:57:15.960000000,1
2010-01-01 00:58:16.530000000,1
2010-01-01 00:59:17.110000000,1
2010-01-01 01:00:17.700000000,1
2010-01-01 01:01:18.300000000,1
2010-01-01 01:02:18.910000000,1
2010-01-01 01:03:19.530000000,1
2010-01-01 01:04:20.160000000,1
====
---- QUERY
# Test NULLs in aggregate functions
select count(NULL), min(NULL), max(NULL), sum(NULL), avg(NULL) from alltypesagg
---- TYPES
bigint, NULL, NULL, NULL, double
---- RESULTS
0,NULL,NULL,NULL,NULL
====
---- QUERY
# Test ignored distinct in MIN and MAX with NULLs
---- TYPES
NULL, NULL
---- RESULTS
NULL,NULL
---- QUERY
# TODO: Fix count(distinct null) to return 0 instead of 1
select count(distinct NULL) from alltypesagg
---- TYPES
bigint
---- RESULTS
1
====