Files
impala/testdata/workloads/functional-query/queries/QueryTest/aggregation.test
Lenni Kuff ef48f65e76 Add test framework for running Impala query tests via Python
This is the first set of changes required to start getting our functional test
infrastructure moved from JUnit to Python. After investigating a number of
option, I decided to go with a python test executor named py.test
(http://pytest.org/). It is very flexible, open source (MIT licensed), and will
enable us to do some cool things like parallel test execution.

As part of this change, we now use our "test vectors" for query test execution.
This will be very nice because it means if load the "core" dataset you know you
will be able to run the "core" query tests (specified by --exploration_strategy
when running the tests).

You will see that now each combination of table format + query exec options is
treated like an individual test case. this will make it much easier to debug
exactly where something failed.

These new tests can be run using the script at tests/run-tests.sh
2014-01-08 10:46:50 -08:00

736 lines
16 KiB
Plaintext

====
---- QUERY
## Test the distinctpcsa aggregate function on all col types with group by
#select
# tinyint_col,
# distinctpcsa(id),
# distinctpcsa(bool_col),
# distinctpcsa(smallint_col),
# distinctpcsa(int_col),
# distinctpcsa(bigint_col),
# distinctpcsa(float_col),
# distinctpcsa(double_col),
# distinctpcsa(string_col),
# distinctpcsa(timestamp_col)
#from alltypesagg
#group by tinyint_col
#order by tinyint_col
#limit 100
#---- TYPES
#tinyint, string, string, string, string, string, string, string, string, string
#---- RESULTS
#1,'1101','82','87','142','136','137','124','117','1149'
#2,'1162','82','88','137','137','153','134','117','926'
#3,'1125','82','86','146','153','145','133','117','967'
#4,'1031','82','87','143','140','134','143','117','1031'
#5,'1175','82','87','146','140','156','145','117','1009'
#6,'1077','82','86','146','146','151','139','117','988'
#7,'1213','82','88','146','148','139','148','117','1101'
#8,'1137','82','87','145','139','156','133','117','1077'
#9,'998','82','87','140','143','148','130','117','1101'
#NULL,'1162','82','87','151','136','146','140','117','1031'
#====
#---- QUERY
## Test the distinctpcsa aggregate function on all col types without group by
#select
# distinctpcsa(id),
# distinctpcsa(bool_col),
# distinctpcsa(tinyint_col),
# distinctpcsa(smallint_col),
# distinctpcsa(int_col),
# distinctpcsa(bigint_col),
# distinctpcsa(float_col),
# distinctpcsa(double_col),
# distinctpcsa(string_col),
# distinctpcsa(timestamp_col)
#from alltypesagg
#---- TYPES
#string, string, string, string, string, string, string, string, string, string
#---- RESULTS
#'3744','82','88','172','1832','1089','1077','1009','988','9817'
#====
#---- QUERY
## Test the distinctpc aggregate function on all col types with group by
#select
# tinyint_col,
# distinctpc(id),
# distinctpc(bool_col),
# distinctpc(smallint_col),
# distinctpc(int_col),
# distinctpc(bigint_col),
# distinctpc(float_col),
# distinctpc(double_col),
# distinctpc(string_col),
# distinctpc(timestamp_col)
#from alltypesagg
#group by tinyint_col
#order by tinyint_col
#limit 100
#---- TYPES
#tinyint, string, string, string, string, string, string, string, string, string
#---- RESULTS
#1,'1089','1','11','120','70','99','96','82','1125'
#2,'1043','1','11','105','94','88','92','82','886'
#3,'1043','1','11','105','107','106','81','82','906'
#4,'1009','1','9','109','108','103','97','165','977'
#5,'1009','1','9','109','122','96','123','165','988'
#6,'977','1','12','101','112','98','119','165','956'
#7,'977','1','12','101','95','118','99','165','1227'
#8,'1043','1','8','107','85','100','124','82','1149'
#9,'1043','1','8','107','88','96','91','82','1125'
#NULL,'1089','1','6','119','86','119','93','82','998'
#====
#---- QUERY
## Test the distinctpc aggregate function on all col types without group by
#select
# distinctpc(id),
# distinctpc(bool_col),
# distinctpc(tinyint_col),
# distinctpc(smallint_col),
# distinctpc(int_col),
# distinctpc(bigint_col),
# distinctpc(float_col),
# distinctpc(double_col),
# distinctpc(string_col),
# distinctpc(timestamp_col)
#from alltypesagg
#---- TYPES
#string, string, string, string, string, string, string, string, string, string
#---- RESULTS
#'2647','3','3','122','1323','1295','1101','1031','1077','11930'
#====
#---- QUERY
## Test the distinctpc aggregate function on empty table
#select distinctpc(field) from EmptyTable
#---- TYPES
#string
#---- RESULTS
#'0'
#====
# no grouping exprs, cols contain nulls except for bool cols
select count(bool_col), min(bool_col), max(bool_col)
from alltypesagg$TABLE
---- TYPES
bigint, boolean, boolean
---- RESULTS
10000,false,true
====
---- QUERY
# no grouping exprs, cols contain nulls
select count(*), count(tinyint_col), min(tinyint_col), max(tinyint_col), sum(tinyint_col),
avg(tinyint_col)
from alltypesagg$TABLE
---- TYPES
bigint, bigint, tinyint, tinyint, bigint, double
---- RESULTS
10000,9000,1,9,45000,5
====
---- QUERY
select count(*), count(smallint_col), min(smallint_col), max(smallint_col), sum(smallint_col),
avg(smallint_col)
from alltypesagg$TABLE
---- TYPES
bigint, bigint, smallint, smallint, bigint, double
---- RESULTS
10000,9900,1,99,495000,50
====
---- QUERY
select count(*), count(int_col), min(int_col), max(int_col), sum(int_col), avg(int_col)
from alltypesagg$TABLE
---- TYPES
bigint, bigint, int, int, bigint, double
---- RESULTS
10000,9990,1,999,4995000,500
====
---- QUERY
select count(*), count(bigint_col), min(bigint_col), max(bigint_col), sum(bigint_col),
avg(bigint_col)
from alltypesagg$TABLE
---- TYPES
bigint, bigint, bigint, bigint, bigint, double
---- RESULTS
10000,9990,10,9990,49950000,5000
====
---- QUERY
select count(*), count(float_col), min(float_col), max(float_col), sum(float_col),
avg(float_col)
from alltypesagg$TABLE
---- TYPES
bigint, bigint, float, float, double, double
---- RESULTS
10000,9990,1.100000023841858,1098.900024414062,5494499.999767542,549.9999999767309
====
---- QUERY
select count(*), count(double_col), min(double_col), max(double_col), sum(double_col),
avg(double_col)
from alltypesagg$TABLE
---- TYPES
bigint, bigint, double, double, double, double
---- RESULTS
10000,9990,10.1,10089.9,50449500,5050
====
---- QUERY
select count(*), min(string_col), max(string_col), min(date_string_col),
max(date_string_col)
from alltypesagg$TABLE
---- TYPES
bigint, string, string, string, string
---- RESULTS
10000,'0','999','01/01/10','01/10/10'
#====
#---- QUERY
# TODO: figure out why I'm getting a diff on the avg(timestamp_col) expr
# 2010-01-05 20:47:11.705094575
#select min(timestamp_col), max(timestamp_col), avg(timestamp_col) from alltypesagg$TABLE
#---- TYPES
#timestamp, timestamp, timestamp
#---- RESULTS
#2010-01-01 00:00:00,2010-01-10 18:02:05.100000000,2010-01-05 20:47:11.705086469
====
---- QUERY
# grouping by different data types, with NULLs
select tinyint_col, count(*) from alltypesagg$TABLE group by 1
---- TYPES
tinyint, bigint
---- RESULTS
1,1000
2,1000
3,1000
4,1000
5,1000
6,1000
7,1000
8,1000
9,1000
NULL,1000
====
---- QUERY
# grouping by different data types, with NULLs, grouping expr missing from select list
select bool_col,min(bool_col),max(bool_col) from alltypesagg$TABLE group by 1
---- TYPES
boolean,boolean,boolean
---- RESULTS
false,false,false
true,true,true
====
---- QUERY
select count(*) from alltypesagg$TABLE group by tinyint_col
---- TYPES
bigint
---- RESULTS
1000
1000
1000
1000
1000
1000
1000
1000
1000
1000
====
---- QUERY
select smallint_col % 10, count(*) from alltypesagg$TABLE group by 1
---- TYPES
smallint, bigint
---- RESULTS
0,900
1,1000
2,1000
3,1000
4,1000
5,1000
6,1000
7,1000
8,1000
9,1000
NULL,100
====
---- QUERY
select count(*) from alltypesagg$TABLE group by smallint_col % 10
---- TYPES
bigint
---- RESULTS
100
1000
1000
1000
1000
1000
1000
1000
1000
1000
900
====
---- QUERY
select int_col % 10, count(*) from alltypesagg$TABLE group by 1
---- TYPES
int, bigint
---- RESULTS
0,990
1,1000
2,1000
3,1000
4,1000
5,1000
6,1000
7,1000
8,1000
9,1000
NULL,10
====
---- QUERY
select count(*) from alltypesagg$TABLE group by int_col % 10
---- TYPES
bigint
---- RESULTS
10
1000
1000
1000
1000
1000
1000
1000
1000
1000
990
====
---- QUERY
select bigint_col % 100, count(*) from alltypesagg$TABLE group by 1
---- TYPES
bigint, bigint
---- RESULTS
0,990
10,1000
20,1000
30,1000
40,1000
50,1000
60,1000
70,1000
80,1000
90,1000
NULL,10
====
---- QUERY
select count(*) from alltypesagg$TABLE group by bigint_col % 100
---- TYPES
bigint
---- RESULTS
10
1000
1000
1000
1000
1000
1000
1000
1000
1000
990
====
---- QUERY
select date_string_col, count(*) from alltypesagg$TABLE group by 1
---- TYPES
string, bigint
---- RESULTS
'01/01/10',1000
'01/02/10',1000
'01/03/10',1000
'01/04/10',1000
'01/05/10',1000
'01/06/10',1000
'01/07/10',1000
'01/08/10',1000
'01/09/10',1000
'01/10/10',1000
====
---- QUERY
select count(*) from alltypesagg$TABLE group by date_string_col
---- TYPES
bigint
---- RESULTS
1000
1000
1000
1000
1000
1000
1000
1000
1000
1000
====
---- QUERY
# grouping by multiple exprs, with nulls
select tinyint_col % 3, smallint_col % 3, count(*) from alltypesagg$TABLE
where day = 1 group by 1, 2
---- TYPES
tinyint, smallint, bigint
---- RESULTS
0,0,120
0,1,90
0,2,90
1,0,90
1,1,120
1,2,90
2,0,90
2,1,90
2,2,120
NULL,0,30
NULL,1,30
NULL,2,30
NULL,NULL,10
====
---- QUERY
select count(*) from alltypesagg$TABLE
where day = 1 group by tinyint_col % 3, smallint_col % 3
---- TYPES
bigint
---- RESULTS
10
120
120
120
30
30
30
90
90
90
90
90
90
====
---- QUERY
# same result as previous query
select tinyint_col % 3, smallint_col % 3, count(*) from alltypesagg$TABLE where day = 1 group by 2, 1
---- TYPES
tinyint, smallint, bigint
---- RESULTS
0,0,120
0,1,90
0,2,90
1,0,90
1,1,120
1,2,90
2,0,90
2,1,90
2,2,120
NULL,0,30
NULL,1,30
NULL,2,30
NULL,NULL,10
====
---- QUERY
select tinyint_col % 2, smallint_col % 2, int_col % 2, bigint_col % 2, date_string_col, count(*)
from alltypesagg$TABLE
where date_string_col = '01/01/10' or date_string_col = '01/02/10'
group by 1, 2, 3, 4, 5
---- TYPES
tinyint, smallint, int, bigint, string, bigint
---- RESULTS
0,0,0,0,'01/01/10',400
0,0,0,0,'01/02/10',400
1,1,1,0,'01/01/10',500
1,1,1,0,'01/02/10',500
NULL,0,0,0,'01/01/10',90
NULL,0,0,0,'01/02/10',90
NULL,NULL,0,0,'01/01/10',9
NULL,NULL,0,0,'01/02/10',9
NULL,NULL,NULL,NULL,'01/01/10',1
NULL,NULL,NULL,NULL,'01/02/10',1
====
---- QUERY
select count(*)
from alltypesagg$TABLE
where date_string_col = '01/01/10' or date_string_col = '01/02/10'
group by tinyint_col % 2, smallint_col % 2, int_col % 2, bigint_col % 2, date_string_col
---- TYPES
bigint
---- RESULTS
1
1
400
400
500
500
9
9
90
90
====
---- QUERY
# no grouping cols, no matching rows
select count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col), avg(tinyint_col)
from alltypesagg$TABLE
where tinyint_col = -1
---- TYPES
bigint, tinyint, tinyint, bigint, double
---- RESULTS
0,NULL,NULL,NULL,NULL
====
---- QUERY
select count(*), min(smallint_col), max(smallint_col), sum(smallint_col), avg(smallint_col)
from alltypesagg$TABLE
where smallint_col = -1
---- TYPES
bigint, smallint, smallint, bigint, double
---- RESULTS
0,NULL,NULL,NULL,NULL
====
---- QUERY
select count(*), min(int_col), max(int_col), sum(int_col), avg(int_col)
from alltypesagg$TABLE
where int_col = -1
---- TYPES
bigint, int, int, bigint, double
---- RESULTS
0,NULL,NULL,NULL,NULL
====
---- QUERY
select count(*), min(bigint_col), max(bigint_col), sum(bigint_col), avg(bigint_col)
from alltypesagg$TABLE
where bigint_col = -1
---- TYPES
bigint, bigint, bigint, bigint, double
---- RESULTS
0,NULL,NULL,NULL,NULL
====
---- QUERY
select count(*), min(float_col), max(float_col), sum(float_col), avg(float_col)
from alltypesagg$TABLE
where float_col < -1.0
---- TYPES
bigint, float, float, double, double
---- RESULTS
0,NULL,NULL,NULL,NULL
====
---- QUERY
select count(*), min(double_col), max(double_col), sum(double_col), avg(double_col)
from alltypesagg$TABLE
where double_col < -1.0
---- TYPES
bigint, double, double, double, double
---- RESULTS
0,NULL,NULL,NULL,NULL
====
---- QUERY
# HAVING clauses over all aggregation functions, plus compound HAVING clauses
select int_col % 7, count(*), max(int_col) from alltypesagg$TABLE group by 1
---- TYPES
int, bigint, int
---- RESULTS
0,1420,994
1,1430,995
2,1430,996
3,1430,997
4,1430,998
5,1430,999
6,1420,993
NULL,10,NULL
====
---- QUERY
select int_col % 7, count(*) from alltypesagg$TABLE group by 1 having max(int_col) > 991
---- TYPES
int, bigint
---- RESULTS
0,1420
1,1430
2,1430
3,1430
4,1430
5,1430
6,1420
====
---- QUERY
select int_col % 7, count(*) from alltypesagg$TABLE group by 1
having max(int_col) > 991 and count(*) > 1420
---- TYPES
int, bigint
---- RESULTS
1,1430
2,1430
3,1430
4,1430
5,1430
====
---- QUERY
select int_col % 7, count(*) from alltypesagg$TABLE group by 1
having min(int_col) < 7
---- TYPES
int, bigint
---- RESULTS
1,1430
2,1430
3,1430
4,1430
5,1430
6,1420
====
---- QUERY
select int_col % 7, count(*) from alltypesagg$TABLE group by 1
having min(int_col) < 7 and count(*) > 1420
---- TYPES
int, bigint
---- RESULTS
1,1430
2,1430
3,1430
4,1430
5,1430
====
---- QUERY
select int_col % 7, count(*), sum(int_col) from alltypesagg$TABLE group by 1
---- TYPES
int, bigint, bigint
---- RESULTS
0,1420,710710
1,1430,712140
2,1430,713570
3,1430,715000
4,1430,716430
5,1430,717860
6,1420,709290
NULL,10,NULL
====
---- QUERY
select int_col % 7, count(*), sum(int_col) from alltypesagg$TABLE group by 1
having sum(int_col) >= 715000
---- TYPES
int, bigint, bigint
---- RESULTS
3,1430,715000
4,1430,716430
5,1430,717860
====
---- QUERY
select int_col % 7, count(*), sum(int_col) from alltypesagg$TABLE group by 1
having sum(int_col) >= 715000 or count(*) > 1420
---- TYPES
int, bigint, bigint
---- RESULTS
1,1430,712140
2,1430,713570
3,1430,715000
4,1430,716430
5,1430,717860
====
---- QUERY
select int_col % 7, count(*), sum(int_col) from alltypesagg$TABLE group by 1
having sum(int_col) is null
---- TYPES
int, bigint, bigint
---- RESULTS
NULL,10,NULL
====
---- QUERY
select int_col % 7, count(*), avg(int_col) from alltypesagg$TABLE group by 1
---- TYPES
int, bigint, double
---- RESULTS
0,1420,500.5
1,1430,498
2,1430,499
3,1430,500
4,1430,501
5,1430,502
6,1420,499.5
NULL,10,NULL
====
---- QUERY
select int_col % 7, count(*), avg(int_col) from alltypesagg$TABLE group by 1
having avg(int_col) > 500
---- TYPES
int, bigint, double
---- RESULTS
0,1420,500.5
4,1430,501
5,1430,502
====
---- QUERY
select int_col % 7, count(*), avg(int_col) from alltypesagg$TABLE group by 1
having avg(int_col) > 500 or count(*) = 10
---- TYPES
int, bigint, double
---- RESULTS
0,1420,500.5
4,1430,501
5,1430,502
NULL,10,NULL
====
---- QUERY
select timestamp_col, count(*) from alltypesagg$TABLE
group by timestamp_col having timestamp_col < cast('2010-01-01 01:05:20' as timestamp)
---- TYPES
timestamp, bigint
---- RESULTS
2010-01-01 00:00:00,1
2010-01-01 00:01:00,1
2010-01-01 00:02:00.100000000,1
2010-01-01 00:03:00.300000000,1
2010-01-01 00:04:00.600000000,1
2010-01-01 00:05:00.100000000,1
2010-01-01 00:06:00.150000000,1
2010-01-01 00:07:00.210000000,1
2010-01-01 00:08:00.280000000,1
2010-01-01 00:09:00.360000000,1
2010-01-01 00:10:00.450000000,1
2010-01-01 00:11:00.550000000,1
2010-01-01 00:12:00.660000000,1
2010-01-01 00:13:00.780000000,1
2010-01-01 00:14:00.910000000,1
2010-01-01 00:15:01.500000000,1
2010-01-01 00:16:01.200000000,1
2010-01-01 00:17:01.360000000,1
2010-01-01 00:18:01.530000000,1
2010-01-01 00:19:01.710000000,1
2010-01-01 00:20:01.900000000,1
2010-01-01 00:21:02.100000000,1
2010-01-01 00:22:02.310000000,1
2010-01-01 00:23:02.530000000,1
2010-01-01 00:24:02.760000000,1
2010-01-01 00:25:03,1
2010-01-01 00:26:03.250000000,1
2010-01-01 00:27:03.510000000,1
2010-01-01 00:28:03.780000000,1
2010-01-01 00:29:04.600000000,1
2010-01-01 00:30:04.350000000,1
2010-01-01 00:31:04.650000000,1
2010-01-01 00:32:04.960000000,1
2010-01-01 00:33:05.280000000,1
2010-01-01 00:34:05.610000000,1
2010-01-01 00:35:05.950000000,1
2010-01-01 00:36:06.300000000,1
2010-01-01 00:37:06.660000000,1
2010-01-01 00:38:07.300000000,1
2010-01-01 00:39:07.410000000,1
2010-01-01 00:40:07.800000000,1
2010-01-01 00:41:08.200000000,1
2010-01-01 00:42:08.610000000,1
2010-01-01 00:43:09.300000000,1
2010-01-01 00:44:09.460000000,1
2010-01-01 00:45:09.900000000,1
2010-01-01 00:46:10.350000000,1
2010-01-01 00:47:10.810000000,1
2010-01-01 00:48:11.280000000,1
2010-01-01 00:49:11.760000000,1
2010-01-01 00:50:12.250000000,1
2010-01-01 00:51:12.750000000,1
2010-01-01 00:52:13.260000000,1
2010-01-01 00:53:13.780000000,1
2010-01-01 00:54:14.310000000,1
2010-01-01 00:55:14.850000000,1
2010-01-01 00:56:15.400000000,1
2010-01-01 00:57:15.960000000,1
2010-01-01 00:58:16.530000000,1
2010-01-01 00:59:17.110000000,1
2010-01-01 01:00:17.700000000,1
2010-01-01 01:01:18.300000000,1
2010-01-01 01:02:18.910000000,1
2010-01-01 01:03:19.530000000,1
2010-01-01 01:04:20.160000000,1
====