mirror of
https://github.com/apache/impala.git
synced 2026-01-04 09:00:56 -05:00
This change updates the run-benchmark script to enable it to target one or more workloads. Now benchmarks can be run like: ./run-benchmark --workloads=hive-benchmark,tpch We lookup the workload in the workloads directory, then read the associated query .test files and start executing them. To ensure the queries are not duplicated between benchmark and query tests, I moved all existing queries (under fe/src/test/resources/* to the workloads directory. You do NOT need to look through all the .test files, I've just moved them. The one new file is the 'hive-benchmark.test' which contains the hive benchmark queries. Also added support for generating schema for different scale factors as well as executing against these scale factors. For example, let's say we have a dataset with a scale factor called "SF1". We would first generate the schema using: ./generate_schema_statements --workload=<workload> --scale_factor="SF3" This will create tables with a unique names from the other scale factors. Run the generated .sql file to load the data. Alternatively, the data can loaded by running a new python script: ./bin/load-data.py -w <workload1>,<workload2> -e <exploration strategy> -s [scale factor] For example: load-data.sh -w tpch -e core -s SF3 Then run against this: ./run-benchmark --workloads=<workload> --scale_factor=SF3 This changeset also includes a few other minor tweaks to some of the test scripts. Change-Id: Ife8a8d91567d75c9612be37bec96c1e7780f50d6
628 lines
9.6 KiB
Plaintext
628 lines
9.6 KiB
Plaintext
# Based on Aggregation Queries
|
|
select int_col, sum(float_col)
|
|
from hbasealltypessmall
|
|
where id < 5
|
|
group by 1
|
|
order by 2
|
|
limit 3
|
|
---- TYPES
|
|
int, double
|
|
---- RESULTS
|
|
0,0
|
|
1,1.100000023841858
|
|
2,2.200000047683716
|
|
====
|
|
# Run query without order by
|
|
select tinyint_col, count(*)
|
|
from alltypesagg$TABLE
|
|
group by 1
|
|
limit 10
|
|
---- TYPES
|
|
tinyint, bigint
|
|
---- RESULTS
|
|
1,1000
|
|
2,1000
|
|
3,1000
|
|
4,1000
|
|
5,1000
|
|
6,1000
|
|
7,1000
|
|
8,1000
|
|
9,1000
|
|
NULL,1000
|
|
====
|
|
# Same query order by asc first col
|
|
select tinyint_col, count(*)
|
|
from alltypesagg$TABLE
|
|
group by 1
|
|
order by 1
|
|
limit 10
|
|
---- TYPES
|
|
tinyint, bigint
|
|
---- RESULTS
|
|
1,1000
|
|
2,1000
|
|
3,1000
|
|
4,1000
|
|
5,1000
|
|
6,1000
|
|
7,1000
|
|
8,1000
|
|
9,1000
|
|
NULL,1000
|
|
====
|
|
# Same query order by asc first desc
|
|
select tinyint_col, count(*)
|
|
from alltypesagg$TABLE
|
|
group by 1
|
|
order by 1 desc
|
|
limit 20
|
|
---- TYPES
|
|
tinyint, bigint
|
|
---- RESULTS
|
|
1,1000
|
|
2,1000
|
|
3,1000
|
|
4,1000
|
|
5,1000
|
|
6,1000
|
|
7,1000
|
|
8,1000
|
|
9,1000
|
|
NULL,1000
|
|
====
|
|
select date_string_col,int_col
|
|
from alltypesagg$TABLE
|
|
order by date_string_col, int_col desc
|
|
limit 10
|
|
---- TYPES
|
|
string, int
|
|
---- RESULTS
|
|
'01/01/10',990
|
|
'01/01/10',991
|
|
'01/01/10',992
|
|
'01/01/10',993
|
|
'01/01/10',994
|
|
'01/01/10',995
|
|
'01/01/10',996
|
|
'01/01/10',997
|
|
'01/01/10',998
|
|
'01/01/10',999
|
|
====
|
|
# order by with null tuples in tuple row
|
|
select j.*, d.* from JoinTbl j full outer join DimTbl d
|
|
on (j.test_id = d.id)
|
|
order by j.test_id, d.name
|
|
limit 100
|
|
---- TYPES
|
|
bigint, string, int, int, bigint, string, int
|
|
---- RESULTS
|
|
1001,'Name1',94611,5000,1001,'Name1',94611
|
|
1002,'Name2',94611,5000,1002,'Name2',94611
|
|
1003,'Name3',94611,5000,1003,'Name3',94612
|
|
1004,'Name4',94611,5000,1004,'Name4',94612
|
|
1005,'Name5',94611,5000,1005,'Name5',94613
|
|
1006,'Name16',94612,15000,1006,'Name6',94613
|
|
1006,'Name16',94612,5000,1006,'Name6',94613
|
|
1006,'Name16',94616,15000,1006,'Name6',94613
|
|
1006,'Name16',94616,5000,1006,'Name6',94613
|
|
1006,'Name6',94616,15000,1006,'Name6',94613
|
|
1006,'Name6',94616,5000,1006,'Name6',94613
|
|
1106,'Name16',94612,15000,NULL,'NULL',NULL
|
|
1106,'Name16',94612,5000,NULL,'NULL',NULL
|
|
1106,'Name16',94616,15000,NULL,'NULL',NULL
|
|
1106,'Name16',94616,5000,NULL,'NULL',NULL
|
|
1106,'Name6',94612,15000,NULL,'NULL',NULL
|
|
1106,'Name6',94612,5000,NULL,'NULL',NULL
|
|
1106,'Name6',94616,15000,NULL,'NULL',NULL
|
|
1106,'Name6',94616,5000,NULL,'NULL',NULL
|
|
NULL,'NULL',NULL,NULL,1007,'Name7',94614
|
|
NULL,'NULL',NULL,NULL,1008,'Name8',94614
|
|
NULL,'NULL',NULL,NULL,1009,'Name9',94615
|
|
NULL,'NULL',NULL,NULL,1010,'Name10',94615
|
|
====
|
|
# order by multiple cols with nulls
|
|
select tinyint_col % 3, smallint_col % 3, count(*)
|
|
from alltypesagg$TABLE
|
|
where day = 1
|
|
group by 1, 2
|
|
order by 1, 2
|
|
limit 20
|
|
---- TYPES
|
|
tinyint, smallint, bigint
|
|
---- RESULTS
|
|
0,0,120
|
|
0,1,90
|
|
0,2,90
|
|
1,0,90
|
|
1,1,120
|
|
1,2,90
|
|
2,0,90
|
|
2,1,90
|
|
2,2,120
|
|
NULL,0,30
|
|
NULL,1,30
|
|
NULL,2,30
|
|
NULL,NULL,10
|
|
====
|
|
select tinyint_col % 3, smallint_col % 3, count(*)
|
|
from alltypesagg$TABLE
|
|
where day = 1
|
|
group by 1, 2
|
|
order by 1, 2 desc
|
|
limit 20
|
|
---- TYPES
|
|
tinyint, smallint, bigint
|
|
---- RESULTS
|
|
0,0,120
|
|
0,1,90
|
|
0,2,90
|
|
1,0,90
|
|
1,1,120
|
|
1,2,90
|
|
2,0,90
|
|
2,1,90
|
|
2,2,120
|
|
NULL,0,30
|
|
NULL,1,30
|
|
NULL,2,30
|
|
NULL,NULL,10
|
|
====
|
|
select tinyint_col % 3, smallint_col % 3, count(*)
|
|
from alltypesagg$TABLE
|
|
where day = 1
|
|
group by 1, 2
|
|
order by 1 desc, 2
|
|
limit 20
|
|
---- TYPES
|
|
tinyint, smallint, bigint
|
|
---- RESULTS
|
|
0,0,120
|
|
0,1,90
|
|
0,2,90
|
|
1,0,90
|
|
1,1,120
|
|
1,2,90
|
|
2,0,90
|
|
2,1,90
|
|
2,2,120
|
|
NULL,0,30
|
|
NULL,1,30
|
|
NULL,2,30
|
|
NULL,NULL,10
|
|
====
|
|
select tinyint_col % 3, smallint_col % 3, count(*)
|
|
from alltypesagg$TABLE
|
|
where day = 1
|
|
group by 1, 2
|
|
order by 1 desc, 2 desc
|
|
limit 20
|
|
---- TYPES
|
|
tinyint, smallint, bigint
|
|
---- RESULTS
|
|
0,0,120
|
|
0,1,90
|
|
0,2,90
|
|
1,0,90
|
|
1,1,120
|
|
1,2,90
|
|
2,0,90
|
|
2,1,90
|
|
2,2,120
|
|
NULL,0,30
|
|
NULL,1,30
|
|
NULL,2,30
|
|
NULL,NULL,10
|
|
====
|
|
select date_string_col
|
|
from alltypessmall$TABLE
|
|
order by date_string_col desc
|
|
limit 50
|
|
---- TYPES
|
|
string
|
|
---- RESULTS
|
|
'03/01/09'
|
|
'03/01/09'
|
|
'03/01/09'
|
|
'03/01/09'
|
|
'03/01/09'
|
|
'03/01/09'
|
|
'03/01/09'
|
|
'03/01/09'
|
|
'03/01/09'
|
|
'03/01/09'
|
|
'03/02/09'
|
|
'03/02/09'
|
|
'03/02/09'
|
|
'03/02/09'
|
|
'03/02/09'
|
|
'03/02/09'
|
|
'03/02/09'
|
|
'03/02/09'
|
|
'03/02/09'
|
|
'03/02/09'
|
|
'03/03/09'
|
|
'03/03/09'
|
|
'03/03/09'
|
|
'03/03/09'
|
|
'03/03/09'
|
|
'04/01/09'
|
|
'04/01/09'
|
|
'04/01/09'
|
|
'04/01/09'
|
|
'04/01/09'
|
|
'04/01/09'
|
|
'04/01/09'
|
|
'04/01/09'
|
|
'04/01/09'
|
|
'04/01/09'
|
|
'04/02/09'
|
|
'04/02/09'
|
|
'04/02/09'
|
|
'04/02/09'
|
|
'04/02/09'
|
|
'04/02/09'
|
|
'04/02/09'
|
|
'04/02/09'
|
|
'04/02/09'
|
|
'04/02/09'
|
|
'04/03/09'
|
|
'04/03/09'
|
|
'04/03/09'
|
|
'04/03/09'
|
|
'04/03/09'
|
|
====
|
|
# Based on join queries
|
|
select a.tinyint_col, b.id, a.string_col
|
|
from alltypesagg$TABLE a join alltypessmall$TABLE b on (a.tinyint_col = b.id)
|
|
where a.month=1
|
|
and a.day=1
|
|
and a.tinyint_col + b.tinyint_col < 5
|
|
and a.string_col > '88'
|
|
and b.bool_col = false
|
|
order by a.string_col
|
|
limit 5
|
|
---- TYPES
|
|
tinyint, int, string
|
|
---- RESULTS
|
|
1,1,'881'
|
|
1,1,'891'
|
|
1,1,'901'
|
|
1,1,'91'
|
|
1,1,'911'
|
|
====
|
|
select a.tinyint_col, b.id, a.string_col
|
|
from alltypesagg$TABLE a join alltypessmall$TABLE b on (a.tinyint_col = b.id)
|
|
where a.month=1
|
|
and a.day=1
|
|
and a.tinyint_col + b.tinyint_col < 5
|
|
and a.string_col > '88'
|
|
and b.bool_col = false
|
|
order by a.string_col desc
|
|
limit 5
|
|
---- TYPES
|
|
tinyint, int, string
|
|
---- RESULTS
|
|
1,1,'951'
|
|
1,1,'961'
|
|
1,1,'971'
|
|
1,1,'981'
|
|
1,1,'991'
|
|
====
|
|
select a.smallint_col, b.id, a.tinyint_col, c.id, a.int_col, b.float_col, c.string_col
|
|
from alltypesagg$TABLE a
|
|
join alltypessmall$TABLE b on (a.smallint_col = b.id)
|
|
join alltypessmall$TABLE c on (a.tinyint_col = c.id)
|
|
where a.month=1
|
|
and a.day=1
|
|
and a.int_col > 899
|
|
and b.float_col > 4.5
|
|
and c.string_col < '7'
|
|
and a.int_col + b.float_col + c.string_col < 1000
|
|
order by c.string_col desc, a.smallint_col
|
|
limit 10
|
|
---- TYPES
|
|
smallint, int, tinyint, int, int, float, string
|
|
---- RESULTS
|
|
15,15,5,5,915,5.5,'5'
|
|
16,16,6,6,916,6.599999904632568,'6'
|
|
34,34,4,4,934,9.899999618530273,'4'
|
|
44,44,4,4,944,9.899999618530273,'4'
|
|
5,5,5,5,905,5.5,'5'
|
|
55,55,5,5,955,5.5,'5'
|
|
56,56,6,6,956,6.599999904632568,'6'
|
|
6,6,6,6,906,6.599999904632568,'6'
|
|
65,65,5,5,965,5.5,'5'
|
|
66,66,6,6,966,6.599999904632568,'6'
|
|
====
|
|
# Order by a column that is not in the select list
|
|
# Query with ordering column in select list
|
|
# Don't include date_string_col, it comes back in random order.
|
|
select int_col, tinyint_col
|
|
from alltypessmall$TABLE
|
|
order by int_col desc
|
|
limit 20
|
|
---- TYPES
|
|
int, tinyint
|
|
---- RESULTS
|
|
7,7
|
|
7,7
|
|
7,7
|
|
7,7
|
|
8,8
|
|
8,8
|
|
8,8
|
|
8,8
|
|
8,8
|
|
8,8
|
|
8,8
|
|
8,8
|
|
9,9
|
|
9,9
|
|
9,9
|
|
9,9
|
|
9,9
|
|
9,9
|
|
9,9
|
|
9,9
|
|
====
|
|
# Same query with ordering col not in select list
|
|
select tinyint_col
|
|
from alltypessmall$TABLE
|
|
order by int_col desc
|
|
limit 20
|
|
---- TYPES
|
|
tinyint
|
|
---- RESULTS
|
|
7
|
|
7
|
|
7
|
|
7
|
|
8
|
|
8
|
|
8
|
|
8
|
|
8
|
|
8
|
|
8
|
|
8
|
|
9
|
|
9
|
|
9
|
|
9
|
|
9
|
|
9
|
|
9
|
|
9
|
|
====
|
|
# Order by many exprs
|
|
select year, month, count(*)
|
|
from alltypes
|
|
group by 1, 2
|
|
order by 1, 2
|
|
limit 100
|
|
---- TYPES
|
|
int, int, bigint
|
|
---- RESULTS
|
|
2009,1,310
|
|
2009,10,310
|
|
2009,11,300
|
|
2009,12,310
|
|
2009,2,280
|
|
2009,3,310
|
|
2009,4,300
|
|
2009,5,310
|
|
2009,6,300
|
|
2009,7,310
|
|
2009,8,310
|
|
2009,9,300
|
|
2010,1,310
|
|
2010,10,310
|
|
2010,11,300
|
|
2010,12,310
|
|
2010,2,280
|
|
2010,3,310
|
|
2010,4,300
|
|
2010,5,310
|
|
2010,6,300
|
|
2010,7,310
|
|
2010,8,310
|
|
2010,9,300
|
|
====
|
|
# More Complex Ordering Exprs
|
|
select int_col % 7, count(*), avg(tinyint_col)
|
|
from alltypesagg$TABLE
|
|
group by 1
|
|
order by avg(tinyint_col)
|
|
limit 10
|
|
---- TYPES
|
|
int, bigint, double
|
|
---- RESULTS
|
|
0,1420,5.0078125
|
|
1,1430,4.992248062015504
|
|
2,1430,5.015503875968992
|
|
3,1430,5
|
|
4,1430,4.984496124031008
|
|
5,1430,5.007751937984496
|
|
6,1420,4.9921875
|
|
NULL,10,NULL
|
|
====
|
|
select int_col % 7, count(*), max(int_col)
|
|
from alltypesagg$TABLE
|
|
group by 1
|
|
order by max(int_col)
|
|
limit 10
|
|
---- TYPES
|
|
int, bigint, int
|
|
---- RESULTS
|
|
0,1420,994
|
|
1,1430,995
|
|
2,1430,996
|
|
3,1430,997
|
|
4,1430,998
|
|
5,1430,999
|
|
6,1420,993
|
|
NULL,10,NULL
|
|
====
|
|
select int_col % 5, count(*), avg(tinyint_col) - avg(float_col)
|
|
from alltypesagg$TABLE
|
|
group by 1
|
|
order by avg(tinyint_col) - avg(float_col) desc
|
|
limit 10
|
|
---- TYPES
|
|
int, bigint, double
|
|
---- RESULTS
|
|
0,1990,-545
|
|
1,2000,-544.8499889141322
|
|
2,2000,-544.9500045645237
|
|
3,2000,-545.0499953591824
|
|
4,2000,-545.1500110459327
|
|
NULL,10,NULL
|
|
====
|
|
select int_col
|
|
from alltypessmall$TABLE
|
|
order by int_col % 5, int_col
|
|
limit 100
|
|
---- TYPES
|
|
int
|
|
---- RESULTS
|
|
0
|
|
0
|
|
0
|
|
0
|
|
0
|
|
0
|
|
0
|
|
0
|
|
0
|
|
0
|
|
0
|
|
0
|
|
1
|
|
1
|
|
1
|
|
1
|
|
1
|
|
1
|
|
1
|
|
1
|
|
1
|
|
1
|
|
1
|
|
1
|
|
2
|
|
2
|
|
2
|
|
2
|
|
2
|
|
2
|
|
2
|
|
2
|
|
2
|
|
2
|
|
2
|
|
2
|
|
3
|
|
3
|
|
3
|
|
3
|
|
3
|
|
3
|
|
3
|
|
3
|
|
3
|
|
3
|
|
3
|
|
3
|
|
4
|
|
4
|
|
4
|
|
4
|
|
4
|
|
4
|
|
4
|
|
4
|
|
4
|
|
4
|
|
4
|
|
4
|
|
5
|
|
5
|
|
5
|
|
5
|
|
5
|
|
5
|
|
5
|
|
5
|
|
6
|
|
6
|
|
6
|
|
6
|
|
6
|
|
6
|
|
6
|
|
6
|
|
7
|
|
7
|
|
7
|
|
7
|
|
7
|
|
7
|
|
7
|
|
7
|
|
8
|
|
8
|
|
8
|
|
8
|
|
8
|
|
8
|
|
8
|
|
8
|
|
9
|
|
9
|
|
9
|
|
9
|
|
9
|
|
9
|
|
9
|
|
9
|
|
====
|
|
# All select list items have an implicit alias. Test that the order by column ref
|
|
# "int_col" is correctly aliased to t1.int_col, and therefore it is not an
|
|
# ambiguous reference.
|
|
select t1.int_col from alltypessmall t1, alltypessmall t2 where t1.id = t2.id
|
|
order by int_col
|
|
limit 2
|
|
---- TYPES
|
|
int
|
|
---- RESULTS
|
|
0
|
|
0
|
|
====
|
|
select date_sub(timestamp_col, id), timestamp_col, id
|
|
from alltypessmall order by 1 limit 20
|
|
---- TYPES
|
|
timestamp,timestamp,int
|
|
---- RESULTS
|
|
2008-12-10 00:24:00.960000000,2009-01-03 00:24:00.960000000,24
|
|
2008-12-11 00:23:00.930000000,2009-01-03 00:23:00.930000000,23
|
|
2008-12-12 00:22:00.910000000,2009-01-03 00:22:00.910000000,22
|
|
2008-12-13 00:21:00.900000000,2009-01-03 00:21:00.900000000,21
|
|
2008-12-14 00:19:00.810000000,2009-01-02 00:19:00.810000000,19
|
|
2008-12-14 00:20:00.900000000,2009-01-03 00:20:00.900000000,20
|
|
2008-12-15 00:18:00.730000000,2009-01-02 00:18:00.730000000,18
|
|
2008-12-16 00:17:00.660000000,2009-01-02 00:17:00.660000000,17
|
|
2008-12-16 00:24:00.960000000,2009-02-03 00:24:00.960000000,49
|
|
2008-12-17 00:16:00.600000000,2009-01-02 00:16:00.600000000,16
|
|
2008-12-17 00:23:00.930000000,2009-02-03 00:23:00.930000000,48
|
|
2008-12-18 00:15:00.550000000,2009-01-02 00:15:00.550000000,15
|
|
2008-12-18 00:22:00.910000000,2009-02-03 00:22:00.910000000,47
|
|
2008-12-19 00:14:00.510000000,2009-01-02 00:14:00.510000000,14
|
|
2008-12-19 00:21:00.900000000,2009-02-03 00:21:00.900000000,46
|
|
2008-12-19 00:24:00.960000000,2009-03-03 00:24:00.960000000,74
|
|
2008-12-20 00:13:00.480000000,2009-01-02 00:13:00.480000000,13
|
|
2008-12-20 00:19:00.810000000,2009-02-02 00:19:00.810000000,44
|
|
2008-12-20 00:20:00.900000000,2009-02-03 00:20:00.900000000,45
|
|
2008-12-20 00:23:00.930000000,2009-03-03 00:23:00.930000000,73
|
|
====
|