Files
impala/testdata/workloads/functional-query/queries/QueryTest/aggregation.test
Lenni Kuff 04edc8f534 Update benchmark tests to run against generic workload, data loading with scale factor, +more
This change updates the run-benchmark script to enable it to target one or more
workloads. Now benchmarks can be run like:

./run-benchmark --workloads=hive-benchmark,tpch

We lookup the workload in the workloads directory, then read the associated
query .test files and start executing them.

To ensure the queries are not duplicated between benchmark and query tests, I
moved all existing queries (under fe/src/test/resources/* to the workloads
directory. You do NOT need to look through all the .test files, I've just moved
them. The one new file is the 'hive-benchmark.test' which contains the hive
benchmark queries.

Also added support for generating schema for different scale factors as well as
executing against these scale factors. For example, let's say we have a dataset
with a scale factor called "SF1". We would first generate the schema using:

./generate_schema_statements --workload=<workload> --scale_factor="SF3"
This will create tables with a unique names from the other scale factors.

Run the generated .sql file to load the data. Alternatively, the data can loaded
by running a new python script:
./bin/load-data.py -w <workload1>,<workload2> -e <exploration strategy> -s [scale factor]
For example: load-data.sh -w tpch -e core -s SF3

Then run against this:
./run-benchmark --workloads=<workload> --scale_factor=SF3

This changeset also includes a few other minor tweaks to some of the test
scripts.

Change-Id: Ife8a8d91567d75c9612be37bec96c1e7780f50d6
2014-01-08 10:44:22 -08:00

567 lines
11 KiB
Plaintext

# no grouping exprs, cols contain nulls
select count(*), count(tinyint_col), min(tinyint_col), max(tinyint_col), sum(tinyint_col),
avg(tinyint_col)
from alltypesagg$TABLE
---- TYPES
bigint, bigint, tinyint, tinyint, bigint, double
---- RESULTS
10000,9000,1,9,45000,5
====
select count(*), count(smallint_col), min(smallint_col), max(smallint_col), sum(smallint_col),
avg(smallint_col)
from alltypesagg$TABLE
---- TYPES
bigint, bigint, smallint, smallint, bigint, double
---- RESULTS
10000,9900,1,99,495000,50
====
select count(*), count(int_col), min(int_col), max(int_col), sum(int_col), avg(int_col)
from alltypesagg$TABLE
---- TYPES
bigint, bigint, int, int, bigint, double
---- RESULTS
10000,9990,1,999,4995000,500
====
select count(*), count(bigint_col), min(bigint_col), max(bigint_col), sum(bigint_col),
avg(bigint_col)
from alltypesagg$TABLE
---- TYPES
bigint, bigint, bigint, bigint, bigint, double
---- RESULTS
10000,9990,10,9990,49950000,5000
====
select count(*), count(float_col), min(float_col), max(float_col), sum(float_col),
avg(float_col)
from alltypesagg$TABLE
---- TYPES
bigint, bigint, float, float, double, double
---- RESULTS
10000,9990,1.100000023841858,1098.900024414062,5494499.999767542,549.9999999767309
====
select count(*), count(double_col), min(double_col), max(double_col), sum(double_col),
avg(double_col)
from alltypesagg$TABLE
---- TYPES
bigint, bigint, double, double, double, double
---- RESULTS
10000,9990,10.1,10089.9,50449500,5050
====
select count(*), min(string_col), max(string_col), min(date_string_col),
max(date_string_col)
from alltypesagg$TABLE
---- TYPES
bigint, string, string, string, string
---- RESULTS
10000,'0','999','01/01/10','01/10/10'
====
select min(timestamp_col), max(timestamp_col), avg(timestamp_col) from alltypesagg$TABLE
---- TYPES
timestamp, timestamp, timestamp
---- RESULTS
2010-01-01 00:00:00,2010-01-10 18:02:05.100000000,2010-01-05 20:47:11.705086469
====
# grouping by different data types, with NULLs
select tinyint_col, count(*) from alltypesagg$TABLE group by 1
---- TYPES
tinyint, bigint
---- RESULTS
1,1000
2,1000
3,1000
4,1000
5,1000
6,1000
7,1000
8,1000
9,1000
NULL,1000
====
# grouping by different data types, with NULLs, grouping expr missing from select list
select count(*) from alltypesagg$TABLE group by tinyint_col
---- TYPES
bigint
---- RESULTS
1000
1000
1000
1000
1000
1000
1000
1000
1000
1000
====
select smallint_col % 10, count(*) from alltypesagg$TABLE group by 1
---- TYPES
smallint, bigint
---- RESULTS
0,900
1,1000
2,1000
3,1000
4,1000
5,1000
6,1000
7,1000
8,1000
9,1000
NULL,100
====
select count(*) from alltypesagg$TABLE group by smallint_col % 10
---- TYPES
bigint
---- RESULTS
100
1000
1000
1000
1000
1000
1000
1000
1000
1000
900
====
select int_col % 10, count(*) from alltypesagg$TABLE group by 1
---- TYPES
int, bigint
---- RESULTS
0,990
1,1000
2,1000
3,1000
4,1000
5,1000
6,1000
7,1000
8,1000
9,1000
NULL,10
====
select count(*) from alltypesagg$TABLE group by int_col % 10
---- TYPES
bigint
---- RESULTS
10
1000
1000
1000
1000
1000
1000
1000
1000
1000
990
====
select bigint_col % 100, count(*) from alltypesagg$TABLE group by 1
---- TYPES
bigint, bigint
---- RESULTS
0,990
10,1000
20,1000
30,1000
40,1000
50,1000
60,1000
70,1000
80,1000
90,1000
NULL,10
====
select count(*) from alltypesagg$TABLE group by bigint_col % 100
---- TYPES
bigint
---- RESULTS
10
1000
1000
1000
1000
1000
1000
1000
1000
1000
990
====
select date_string_col, count(*) from alltypesagg$TABLE group by 1
---- TYPES
string, bigint
---- RESULTS
'01/01/10',1000
'01/02/10',1000
'01/03/10',1000
'01/04/10',1000
'01/05/10',1000
'01/06/10',1000
'01/07/10',1000
'01/08/10',1000
'01/09/10',1000
'01/10/10',1000
====
select count(*) from alltypesagg$TABLE group by date_string_col
---- TYPES
bigint
---- RESULTS
1000
1000
1000
1000
1000
1000
1000
1000
1000
1000
====
# grouping by multiple exprs, with nulls
select tinyint_col % 3, smallint_col % 3, count(*) from alltypesagg$TABLE
where day = 1 group by 1, 2
---- TYPES
tinyint, smallint, bigint
---- RESULTS
0,0,120
0,1,90
0,2,90
1,0,90
1,1,120
1,2,90
2,0,90
2,1,90
2,2,120
NULL,0,30
NULL,1,30
NULL,2,30
NULL,NULL,10
====
select count(*) from alltypesagg$TABLE
where day = 1 group by tinyint_col % 3, smallint_col % 3
---- TYPES
bigint
---- RESULTS
10
120
120
120
30
30
30
90
90
90
90
90
90
====
# same result as previous query
select tinyint_col % 3, smallint_col % 3, count(*) from alltypesagg$TABLE where day = 1 group by 2, 1
---- TYPES
tinyint, smallint, bigint
---- RESULTS
0,0,120
0,1,90
0,2,90
1,0,90
1,1,120
1,2,90
2,0,90
2,1,90
2,2,120
NULL,0,30
NULL,1,30
NULL,2,30
NULL,NULL,10
====
select tinyint_col % 2, smallint_col % 2, int_col % 2, bigint_col % 2, date_string_col, count(*)
from alltypesagg$TABLE
where date_string_col = '01/01/10' or date_string_col = '01/02/10'
group by 1, 2, 3, 4, 5
---- TYPES
tinyint, smallint, int, bigint, string, bigint
---- RESULTS
0,0,0,0,'01/01/10',400
0,0,0,0,'01/02/10',400
1,1,1,0,'01/01/10',500
1,1,1,0,'01/02/10',500
NULL,0,0,0,'01/01/10',90
NULL,0,0,0,'01/02/10',90
NULL,NULL,0,0,'01/01/10',9
NULL,NULL,0,0,'01/02/10',9
NULL,NULL,NULL,NULL,'01/01/10',1
NULL,NULL,NULL,NULL,'01/02/10',1
====
select count(*)
from alltypesagg$TABLE
where date_string_col = '01/01/10' or date_string_col = '01/02/10'
group by tinyint_col % 2, smallint_col % 2, int_col % 2, bigint_col % 2, date_string_col
---- TYPES
bigint
---- RESULTS
1
1
400
400
500
500
9
9
90
90
====
# no grouping cols, no matching rows
select count(*), min(tinyint_col), max(tinyint_col), sum(tinyint_col), avg(tinyint_col)
from alltypesagg$TABLE
where tinyint_col = -1
---- TYPES
bigint, tinyint, tinyint, bigint, double
---- RESULTS
0,NULL,NULL,NULL,NULL
====
select count(*), min(smallint_col), max(smallint_col), sum(smallint_col), avg(smallint_col)
from alltypesagg$TABLE
where smallint_col = -1
---- TYPES
bigint, smallint, smallint, bigint, double
---- RESULTS
0,NULL,NULL,NULL,NULL
====
select count(*), min(int_col), max(int_col), sum(int_col), avg(int_col)
from alltypesagg$TABLE
where int_col = -1
---- TYPES
bigint, int, int, bigint, double
---- RESULTS
0,NULL,NULL,NULL,NULL
====
select count(*), min(bigint_col), max(bigint_col), sum(bigint_col), avg(bigint_col)
from alltypesagg$TABLE
where bigint_col = -1
---- TYPES
bigint, bigint, bigint, bigint, double
---- RESULTS
0,NULL,NULL,NULL,NULL
====
select count(*), min(float_col), max(float_col), sum(float_col), avg(float_col)
from alltypesagg$TABLE
where float_col < -1.0
---- TYPES
bigint, float, float, double, double
---- RESULTS
0,NULL,NULL,NULL,NULL
====
select count(*), min(double_col), max(double_col), sum(double_col), avg(double_col)
from alltypesagg$TABLE
where double_col < -1.0
---- TYPES
bigint, double, double, double, double
---- RESULTS
0,NULL,NULL,NULL,NULL
====
# HAVING clauses over all aggregation functions, plus compound HAVING clauses
select int_col % 7, count(*), max(int_col) from alltypesagg$TABLE group by 1
---- TYPES
int, bigint, int
---- RESULTS
0,1420,994
1,1430,995
2,1430,996
3,1430,997
4,1430,998
5,1430,999
6,1420,993
NULL,10,NULL
====
select int_col % 7, count(*) from alltypesagg$TABLE group by 1 having max(int_col) > 991
---- TYPES
int, bigint
---- RESULTS
0,1420
1,1430
2,1430
3,1430
4,1430
5,1430
6,1420
====
select int_col % 7, count(*) from alltypesagg$TABLE group by 1
having max(int_col) > 991 and count(*) > 1420
---- TYPES
int, bigint
---- RESULTS
1,1430
2,1430
3,1430
4,1430
5,1430
====
select int_col % 7, count(*) from alltypesagg$TABLE group by 1
having min(int_col) < 7
---- TYPES
int, bigint
---- RESULTS
1,1430
2,1430
3,1430
4,1430
5,1430
6,1420
====
select int_col % 7, count(*) from alltypesagg$TABLE group by 1
having min(int_col) < 7 and count(*) > 1420
---- TYPES
int, bigint
---- RESULTS
1,1430
2,1430
3,1430
4,1430
5,1430
====
select int_col % 7, count(*), sum(int_col) from alltypesagg$TABLE group by 1
---- TYPES
int, bigint, bigint
---- RESULTS
0,1420,710710
1,1430,712140
2,1430,713570
3,1430,715000
4,1430,716430
5,1430,717860
6,1420,709290
NULL,10,NULL
====
select int_col % 7, count(*), sum(int_col) from alltypesagg$TABLE group by 1
having sum(int_col) >= 715000
---- TYPES
int, bigint, bigint
---- RESULTS
3,1430,715000
4,1430,716430
5,1430,717860
====
select int_col % 7, count(*), sum(int_col) from alltypesagg$TABLE group by 1
having sum(int_col) >= 715000 or count(*) > 1420
---- TYPES
int, bigint, bigint
---- RESULTS
1,1430,712140
2,1430,713570
3,1430,715000
4,1430,716430
5,1430,717860
====
select int_col % 7, count(*), sum(int_col) from alltypesagg$TABLE group by 1
having sum(int_col) is null
---- TYPES
int, bigint, bigint
---- RESULTS
NULL,10,NULL
====
select int_col % 7, count(*), avg(int_col) from alltypesagg$TABLE group by 1
---- TYPES
int, bigint, double
---- RESULTS
0,1420,500.5
1,1430,498
2,1430,499
3,1430,500
4,1430,501
5,1430,502
6,1420,499.5
NULL,10,NULL
====
select int_col % 7, count(*), avg(int_col) from alltypesagg$TABLE group by 1
having avg(int_col) > 500
---- TYPES
int, bigint, double
---- RESULTS
0,1420,500.5
4,1430,501
5,1430,502
====
select int_col % 7, count(*), avg(int_col) from alltypesagg$TABLE group by 1
having avg(int_col) > 500 or count(*) = 10
---- TYPES
int, bigint, double
---- RESULTS
0,1420,500.5
4,1430,501
5,1430,502
NULL,10,NULL
====
select timestamp_col, count(*) from alltypesagg$TABLE
group by timestamp_col having timestamp_col < cast('2010-01-01 01:05:20' as timestamp)
---- TYPES
timestamp, bigint
---- RESULTS
2010-01-01 00:00:00,1
2010-01-01 00:01:00,1
2010-01-01 00:02:00.100000000,1
2010-01-01 00:03:00.300000000,1
2010-01-01 00:04:00.600000000,1
2010-01-01 00:05:00.100000000,1
2010-01-01 00:06:00.150000000,1
2010-01-01 00:07:00.210000000,1
2010-01-01 00:08:00.280000000,1
2010-01-01 00:09:00.360000000,1
2010-01-01 00:10:00.450000000,1
2010-01-01 00:11:00.550000000,1
2010-01-01 00:12:00.660000000,1
2010-01-01 00:13:00.780000000,1
2010-01-01 00:14:00.910000000,1
2010-01-01 00:15:01.500000000,1
2010-01-01 00:16:01.200000000,1
2010-01-01 00:17:01.360000000,1
2010-01-01 00:18:01.530000000,1
2010-01-01 00:19:01.710000000,1
2010-01-01 00:20:01.900000000,1
2010-01-01 00:21:02.100000000,1
2010-01-01 00:22:02.310000000,1
2010-01-01 00:23:02.530000000,1
2010-01-01 00:24:02.760000000,1
2010-01-01 00:25:03,1
2010-01-01 00:26:03.250000000,1
2010-01-01 00:27:03.510000000,1
2010-01-01 00:28:03.780000000,1
2010-01-01 00:29:04.600000000,1
2010-01-01 00:30:04.350000000,1
2010-01-01 00:31:04.650000000,1
2010-01-01 00:32:04.960000000,1
2010-01-01 00:33:05.280000000,1
2010-01-01 00:34:05.610000000,1
2010-01-01 00:35:05.950000000,1
2010-01-01 00:36:06.300000000,1
2010-01-01 00:37:06.660000000,1
2010-01-01 00:38:07.300000000,1
2010-01-01 00:39:07.410000000,1
2010-01-01 00:40:07.800000000,1
2010-01-01 00:41:08.200000000,1
2010-01-01 00:42:08.610000000,1
2010-01-01 00:43:09.300000000,1
2010-01-01 00:44:09.460000000,1
2010-01-01 00:45:09.900000000,1
2010-01-01 00:46:10.350000000,1
2010-01-01 00:47:10.810000000,1
2010-01-01 00:48:11.280000000,1
2010-01-01 00:49:11.760000000,1
2010-01-01 00:50:12.250000000,1
2010-01-01 00:51:12.750000000,1
2010-01-01 00:52:13.260000000,1
2010-01-01 00:53:13.780000000,1
2010-01-01 00:54:14.310000000,1
2010-01-01 00:55:14.850000000,1
2010-01-01 00:56:15.400000000,1
2010-01-01 00:57:15.960000000,1
2010-01-01 00:58:16.530000000,1
2010-01-01 00:59:17.110000000,1
2010-01-01 01:00:17.700000000,1
2010-01-01 01:01:18.300000000,1
2010-01-01 01:02:18.910000000,1
2010-01-01 01:03:19.530000000,1
2010-01-01 01:04:20.160000000,1
====