Files
impala/testdata/workloads/functional-query/queries/QueryTest/aggregation.test
Lenni Kuff 30dbf59ef2 Final changes to enable Python test infrastructure and tests
With this change the Python tests will now be called as part of buildall and
the corresponding Java tests have been disabled. The new tests can also be
invoked calling ./tests/run-tests.sh directly.

This includes a fix from Nong that caused wrong results for limit on non-io
manager formats.
2014-01-08 10:46:57 -08:00

736 lines
16 KiB
Plaintext

====
---- QUERY
## Test the distinctpcsa aggregate function on all col types with group by
#select
# tinyint_col,
# distinctpcsa(id),
# distinctpcsa(bool_col),
# distinctpcsa(smallint_col),
# distinctpcsa(int_col),
# distinctpcsa(bigint_col),
# distinctpcsa(float_col),
# distinctpcsa(double_col),
# distinctpcsa(string_col),
# distinctpcsa(timestamp_col)
#from alltypesagg
#group by tinyint_col
#order by tinyint_col
#limit 100
#---- TYPES
#tinyint, string, string, string, string, string, string, string, string, string
#---- RESULTS
#1,'1101','82','87','142','136','137','124','117','1149'
#2,'1162','82','88','137','137','153','134','117','926'
#3,'1125','82','86','146','153','145','133','117','967'
#4,'1031','82','87','143','140','134','143','117','1031'
#5,'1175','82','87','146','140','156','145','117','1009'
#6,'1077','82','86','146','146','151','139','117','988'
#7,'1213','82','88','146','148','139','148','117','1101'
#8,'1137','82','87','145','139','156','133','117','1077'
#9,'998','82','87','140','143','148','130','117','1101'
#NULL,'1162','82','87','151','136','146','140','117','1031'
#====
#---- QUERY
## Test the distinctpcsa aggregate function on all col types without group by
#select
# distinctpcsa(id),
# distinctpcsa(bool_col),
# distinctpcsa(tinyint_col),
# distinctpcsa(smallint_col),
# distinctpcsa(int_col),
# distinctpcsa(bigint_col),
# distinctpcsa(float_col),
# distinctpcsa(double_col),
# distinctpcsa(string_col),
# distinctpcsa(timestamp_col)
#from alltypesagg
#---- TYPES
#string, string, string, string, string, string, string, string, string, string
#---- RESULTS
#'3744','82','88','172','1832','1089','1077','1009','988','9817'
#====
#---- QUERY
## Test the distinctpc aggregate function on all col types with group by
#select
# tinyint_col,
# distinctpc(id),
# distinctpc(bool_col),
# distinctpc(smallint_col),
# distinctpc(int_col),
# distinctpc(bigint_col),
# distinctpc(float_col),
# distinctpc(double_col),
# distinctpc(string_col),
# distinctpc(timestamp_col)
#from alltypesagg
#group by tinyint_col
#order by tinyint_col
#limit 100
#---- TYPES
#tinyint, string, string, string, string, string, string, string, string, string
#---- RESULTS
#1,'1089','1','11','120','70','99','96','82','1125'
#2,'1043','1','11','105','94','88','92','82','886'
#3,'1043','1','11','105','107','106','81','82','906'
#4,'1009','1','9','109','108','103','97','165','977'
#5,'1009','1','9','109','122','96','123','165','988'
#6,'977','1','12','101','112','98','119','165','956'
#7,'977','1','12','101','95','118','99','165','1227'
#8,'1043','1','8','107','85','100','124','82','1149'
#9,'1043','1','8','107','88','96','91','82','1125'
#NULL,'1089','1','6','119','86','119','93','82','998'
#====
#---- QUERY
## Test the distinctpc aggregate function on all col types without group by
#select
# distinctpc(id),
# distinctpc(bool_col),
# distinctpc(tinyint_col),
# distinctpc(smallint_col),
# distinctpc(int_col),
# distinctpc(bigint_col),
# distinctpc(float_col),
# distinctpc(double_col),
# distinctpc(string_col),
# distinctpc(timestamp_col)
#from alltypesagg
#---- TYPES
#string, string, string, string, string, string, string, string, string, string
#---- RESULTS
#'2647','3','3','122','1323','1295','1101','1031','1077','11930'
#====
#---- QUERY
## Test the distinctpc aggregate function on empty table
#select distinctpc(field) from EmptyTable
#---- TYPES
#string
#---- RESULTS
#'0'
#====
# no grouping exprs, cols contain nulls except for bool cols
select count(bool_col), min(bool_col), max(bool_col)
from alltypesagg$TABLE
---- TYPES
bigint, boolean, boolean
---- RESULTS
10000,false,true
====
---- QUERY
# no grouping exprs, cols contain nulls
select count(*), count(tinyint_col), min(tinyint_col), max(tinyint_col), sum(tinyint_col),
avg(tinyint_col)
from alltypesagg$TABLE
---- TYPES
bigint, bigint, tinyint, tinyint, bigint, double
---- RESULTS
10000,9000,1,9,45000,5
====
---- QUERY
select count(*), count(smallint_col), min(smallint_col), max(smallint_col), sum(smallint_col),
avg(smallint_col)
from alltypesagg$TABLE
---- TYPES
bigint, bigint, smallint, smallint, bigint, double
---- RESULTS
10000,9900,1,99,495000,50
====
---- QUERY
select count(*), count(int_col), min(int_col), max(int_col), sum(int_col), avg(int_col)
from alltypesagg$TABLE
---- TYPES
bigint, bigint, int, int, bigint, double
---- RESULTS
10000,9990,1,999,4995000,500
====
---- QUERY
select count(*), count(bigint_col), min(bigint_col), max(bigint_col), sum(bigint_col),
avg(bigint_col)
from alltypesagg$TABLE
---- TYPES
bigint, bigint, bigint, bigint, bigint, double
---- RESULTS
10000,9990,10,9990,49950000,5000
====
---- QUERY
select count(*), count(float_col), min(float_col), max(float_col), sum(float_col),
avg(float_col)
from alltypesagg$TABLE
---- TYPES
bigint, bigint, float, float, double, double
---- RESULTS
10000,9990,1.100000023841858,1098.900024414062,5494499.999767542,549.9999999767309
====
---- QUERY
select count(*), count(double_col), min(double_col), max(double_col), round(sum(double_col), 0),
round(avg(double_col), 0)
from alltypesagg$TABLE
---- TYPES
bigint, bigint, double, double, double, double
---- RESULTS
10000,9990,10.1,10089.9,50449500,5050
====
---- QUERY
select count(*), min(string_col), max(string_col), min(date_string_col),
max(date_string_col)
from alltypesagg$TABLE
---- TYPES
bigint, string, string, string, string
---- RESULTS
10000,'0','999','01/01/10','01/10/10'
#====
#---- QUERY
# TODO: figure out why I'm getting a diff on the avg(timestamp_col) expr
# 2010-01-05 20:47:11.705094575
#select min(timestamp_col), max(timestamp_col), avg(timestamp_col) from alltypesagg$TABLE
#---- TYPES
#timestamp, timestamp, timestamp
#---- RESULTS
#2010-01-01 00:00:00,2010-01-10 18:02:05.100000000,2010-01-05 20:47:11.705086469
====
---- QUERY
# grouping by different data types, with NULLs
select tinyint_col, count(*) from alltypesagg$TABLE group by 1
---- TYPES
tinyint, bigint
---- RESULTS
1,1000
2,1000
3,1000
4,1000
5,1000
6,1000
7,1000
8,1000
9,1000
NULL,1000
====
---- QUERY
# grouping by different data types, with NULLs, grouping expr missing from select list
select bool_col,min(bool_col),max(bool_col) from alltypesagg$TABLE group by 1
---- TYPES
boolean,boolean,boolean
---- RESULTS
false,false,false
true,true,true
====
---- QUERY
select count(*) from alltypesagg$TABLE group by tinyint_col
---- TYPES
bigint
---- RESULTS
1000
1000
1000
1000
1000
1000
1000
1000
1000
1000
====
---- QUERY
select smallint_col % 10, count(*) from alltypesagg$TABLE group by 1
---- TYPES
smallint, bigint
---- RESULTS
0,900
1,1000
2,1000
3,1000
4,1000
5,1000
6,1000
7,1000
8,1000
9,1000
NULL,100
====
---- QUERY
select count(*) from alltypesagg$TABLE group by smallint_col % 10
---- TYPES
bigint
---- RESULTS
100
1000
1000
1000
1000
1000
1000
1000
1000
1000
900
====
---- QUERY
select int_col % 10, count(*) from alltypesagg$TABLE group by 1
---- TYPES
int, bigint
---- RESULTS
0,990
1,1000
2,1000
3,1000
4,1000
5,1000
6,1000
7,1000
8,1000
9,1000
NULL,10
====
---- QUERY
select count(*) from alltypesagg$TABLE group by int_col % 10
---- TYPES
bigint
---- RESULTS
10
1000
1000
1000
1000
1000
1000
1000
1000
1000
990
====
---- QUERY
select bigint_col % 100, count(*) from alltypesagg$TABLE group by 1
---- TYPES
bigint, bigint
---- RESULTS
0,990
10,1000
20,1000
30,1000
40,1000
50,1000
60,1000
70,1000
80,1000
90,1000
NULL,10
====
---- QUERY
select count(*) from alltypesagg$TABLE group by bigint_col % 100
---- TYPES
bigint
---- RESULTS
10
1000
1000
1000
1000
1000
1000
1000
1000
1000
990
====
---- QUERY
select date_string_col, count(*) from alltypesagg$TABLE group by 1
---- TYPES
string, bigint
---- RESULTS
'01/01/10',1000
'01/02/10',1000
'01/03/10',1000
'01/04/10',1000
'01/05/10',1000
'01/06/10',1000
'01/07/10',1000
'01/08/10',1000
'01/09/10',1000
'01/10/10',1000
====
---- QUERY
select count(*) from alltypesagg$TABLE group by date_string_col
---- TYPES
bigint
---- RESULTS
1000
1000
1000
1000
1000
1000
1000
1000
1000
1000
====
---- QUERY
# grouping by multiple exprs, with nulls
select tinyint_col % 3, smallint_col % 3, count(*) from alltypesagg$TABLE
where day = 1 group by 1, 2
---- TYPES
tinyint, smallint, bigint
---- RESULTS
0,0,120
0,1,90
0,2,90
1,0,90
1,1,120
1,2,90
2,0,90
2,1,90
2,2,120
NULL,0,30
NULL,1,30
NULL,2,30
NULL,NULL,10
====
---- QUERY
select count(*) from alltypesagg$TABLE
where day = 1 group by tinyint_col % 3, smallint_col % 3
---- TYPES
bigint
---- RESULTS
10
120
120
120
30
30
30
90
90
90
90
90
90
====
---- QUERY
# same result as previous query
select tinyint_col % 3, smallint_col % 3, count(*) from alltypesagg$TABLE where day = 1 group by 2, 1
---- TYPES
tinyint, smallint, bigint
---- RESULTS
0,0,120
0,1,90
0,2,90
1,0,90
1,1,120
1,2,90
2,0,90
2,1,90
2,2,120
NULL,0,30
NULL,1,30
NULL,2,30
NULL,NULL,10
====
---- QUERY
select tinyint_col % 2, smallint_col % 2, int_col % 2, bigint_col % 2, date_string_col, count(*)
from alltypesagg$TABLE
where date_string_col = '01/01/10' or date_string_col = '01/02/10'
group by 1, 2, 3, 4, 5
---- TYPES
tinyint, smallint, int, bigint, string, bigint
---- RESULTS
0,0,0,0,'01/01/10',400
0,0,0,0,'01/02/10',400
1,1,1,0,'01/01/10',500
1,1,1,0,'01/02/10',500
NULL,0,0,0,'01/01/10',90
NULL,0,0,0,'01/02/10',90
NULL,NULL,0,0,'01/01/10',9
NULL,NULL,0,0,'01/02/10',9
NULL,NULL,NULL,NULL,'01/01/10',1
NULL,NULL,NULL,NULL,'01/02/10',1
====
---- QUERY
select count(*)
from alltypesagg$TABLE
where date_string_col = '01/01/10' or date_string_col = '01/02/10'
group by tinyint_col % 2, smallint_col % 2, int_col % 2, bigint_col % 2, date_string_col
---- TYPES
bigint
---- RESULTS
1
1
400
400
500
500
9
9
90
90
====
---- QUERY
# no grouping cols, no matching rows
select count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col), avg(tinyint_col)
from alltypesagg$TABLE
where tinyint_col = -1
---- TYPES
bigint, tinyint, tinyint, bigint, double
---- RESULTS
0,NULL,NULL,NULL,NULL
====
---- QUERY
select count(*), min(smallint_col), max(smallint_col), sum(smallint_col), avg(smallint_col)
from alltypesagg$TABLE
where smallint_col = -1
---- TYPES
bigint, smallint, smallint, bigint, double
---- RESULTS
0,NULL,NULL,NULL,NULL
====
---- QUERY
select count(*), min(int_col), max(int_col), sum(int_col), avg(int_col)
from alltypesagg$TABLE
where int_col = -1
---- TYPES
bigint, int, int, bigint, double
---- RESULTS
0,NULL,NULL,NULL,NULL
====
---- QUERY
select count(*), min(bigint_col), max(bigint_col), sum(bigint_col), avg(bigint_col)
from alltypesagg$TABLE
where bigint_col = -1
---- TYPES
bigint, bigint, bigint, bigint, double
---- RESULTS
0,NULL,NULL,NULL,NULL
====
---- QUERY
select count(*), min(float_col), max(float_col), sum(float_col), avg(float_col)
from alltypesagg$TABLE
where float_col < -1.0
---- TYPES
bigint, float, float, double, double
---- RESULTS
0,NULL,NULL,NULL,NULL
====
---- QUERY
select count(*), min(double_col), max(double_col), sum(double_col), avg(double_col)
from alltypesagg$TABLE
where double_col < -1.0
---- TYPES
bigint, double, double, double, double
---- RESULTS
0,NULL,NULL,NULL,NULL
====
---- QUERY
# HAVING clauses over all aggregation functions, plus compound HAVING clauses
select int_col % 7, count(*), max(int_col) from alltypesagg$TABLE group by 1
---- TYPES
int, bigint, int
---- RESULTS
0,1420,994
1,1430,995
2,1430,996
3,1430,997
4,1430,998
5,1430,999
6,1420,993
NULL,10,NULL
====
---- QUERY
select int_col % 7, count(*) from alltypesagg$TABLE group by 1 having max(int_col) > 991
---- TYPES
int, bigint
---- RESULTS
0,1420
1,1430
2,1430
3,1430
4,1430
5,1430
6,1420
====
---- QUERY
select int_col % 7, count(*) from alltypesagg$TABLE group by 1
having max(int_col) > 991 and count(*) > 1420
---- TYPES
int, bigint
---- RESULTS
1,1430
2,1430
3,1430
4,1430
5,1430
====
---- QUERY
select int_col % 7, count(*) from alltypesagg$TABLE group by 1
having min(int_col) < 7
---- TYPES
int, bigint
---- RESULTS
1,1430
2,1430
3,1430
4,1430
5,1430
6,1420
====
---- QUERY
select int_col % 7, count(*) from alltypesagg$TABLE group by 1
having min(int_col) < 7 and count(*) > 1420
---- TYPES
int, bigint
---- RESULTS
1,1430
2,1430
3,1430
4,1430
5,1430
====
---- QUERY
select int_col % 7, count(*), sum(int_col) from alltypesagg$TABLE group by 1
---- TYPES
int, bigint, bigint
---- RESULTS
0,1420,710710
1,1430,712140
2,1430,713570
3,1430,715000
4,1430,716430
5,1430,717860
6,1420,709290
NULL,10,NULL
====
---- QUERY
select int_col % 7, count(*), sum(int_col) from alltypesagg$TABLE group by 1
having sum(int_col) >= 715000
---- TYPES
int, bigint, bigint
---- RESULTS
3,1430,715000
4,1430,716430
5,1430,717860
====
---- QUERY
select int_col % 7, count(*), sum(int_col) from alltypesagg$TABLE group by 1
having sum(int_col) >= 715000 or count(*) > 1420
---- TYPES
int, bigint, bigint
---- RESULTS
1,1430,712140
2,1430,713570
3,1430,715000
4,1430,716430
5,1430,717860
====
---- QUERY
select int_col % 7, count(*), sum(int_col) from alltypesagg$TABLE group by 1
having sum(int_col) is null
---- TYPES
int, bigint, bigint
---- RESULTS
NULL,10,NULL
====
---- QUERY
select int_col % 7, count(*), avg(int_col) from alltypesagg$TABLE group by 1
---- TYPES
int, bigint, double
---- RESULTS
0,1420,500.5
1,1430,498
2,1430,499
3,1430,500
4,1430,501
5,1430,502
6,1420,499.5
NULL,10,NULL
====
---- QUERY
select int_col % 7, count(*), avg(int_col) from alltypesagg$TABLE group by 1
having avg(int_col) > 500
---- TYPES
int, bigint, double
---- RESULTS
0,1420,500.5
4,1430,501
5,1430,502
====
---- QUERY
select int_col % 7, count(*), avg(int_col) from alltypesagg$TABLE group by 1
having avg(int_col) > 500 or count(*) = 10
---- TYPES
int, bigint, double
---- RESULTS
0,1420,500.5
4,1430,501
5,1430,502
NULL,10,NULL
====
---- QUERY
select timestamp_col, count(*) from alltypesagg$TABLE
group by timestamp_col having timestamp_col < cast('2010-01-01 01:05:20' as timestamp)
---- TYPES
timestamp, bigint
---- RESULTS
2010-01-01 00:00:00,1
2010-01-01 00:01:00,1
2010-01-01 00:02:00.100000000,1
2010-01-01 00:03:00.300000000,1
2010-01-01 00:04:00.600000000,1
2010-01-01 00:05:00.100000000,1
2010-01-01 00:06:00.150000000,1
2010-01-01 00:07:00.210000000,1
2010-01-01 00:08:00.280000000,1
2010-01-01 00:09:00.360000000,1
2010-01-01 00:10:00.450000000,1
2010-01-01 00:11:00.550000000,1
2010-01-01 00:12:00.660000000,1
2010-01-01 00:13:00.780000000,1
2010-01-01 00:14:00.910000000,1
2010-01-01 00:15:01.500000000,1
2010-01-01 00:16:01.200000000,1
2010-01-01 00:17:01.360000000,1
2010-01-01 00:18:01.530000000,1
2010-01-01 00:19:01.710000000,1
2010-01-01 00:20:01.900000000,1
2010-01-01 00:21:02.100000000,1
2010-01-01 00:22:02.310000000,1
2010-01-01 00:23:02.530000000,1
2010-01-01 00:24:02.760000000,1
2010-01-01 00:25:03,1
2010-01-01 00:26:03.250000000,1
2010-01-01 00:27:03.510000000,1
2010-01-01 00:28:03.780000000,1
2010-01-01 00:29:04.600000000,1
2010-01-01 00:30:04.350000000,1
2010-01-01 00:31:04.650000000,1
2010-01-01 00:32:04.960000000,1
2010-01-01 00:33:05.280000000,1
2010-01-01 00:34:05.610000000,1
2010-01-01 00:35:05.950000000,1
2010-01-01 00:36:06.300000000,1
2010-01-01 00:37:06.660000000,1
2010-01-01 00:38:07.300000000,1
2010-01-01 00:39:07.410000000,1
2010-01-01 00:40:07.800000000,1
2010-01-01 00:41:08.200000000,1
2010-01-01 00:42:08.610000000,1
2010-01-01 00:43:09.300000000,1
2010-01-01 00:44:09.460000000,1
2010-01-01 00:45:09.900000000,1
2010-01-01 00:46:10.350000000,1
2010-01-01 00:47:10.810000000,1
2010-01-01 00:48:11.280000000,1
2010-01-01 00:49:11.760000000,1
2010-01-01 00:50:12.250000000,1
2010-01-01 00:51:12.750000000,1
2010-01-01 00:52:13.260000000,1
2010-01-01 00:53:13.780000000,1
2010-01-01 00:54:14.310000000,1
2010-01-01 00:55:14.850000000,1
2010-01-01 00:56:15.400000000,1
2010-01-01 00:57:15.960000000,1
2010-01-01 00:58:16.530000000,1
2010-01-01 00:59:17.110000000,1
2010-01-01 01:00:17.700000000,1
2010-01-01 01:01:18.300000000,1
2010-01-01 01:02:18.910000000,1
2010-01-01 01:03:19.530000000,1
2010-01-01 01:04:20.160000000,1
====