mirror of
https://github.com/apache/impala.git
synced 2025-12-23 21:08:39 -05:00
This change updates the run-benchmark script to enable it to target one or more workloads. Now benchmarks can be run like: ./run-benchmark --workloads=hive-benchmark,tpch We lookup the workload in the workloads directory, then read the associated query .test files and start executing them. To ensure the queries are not duplicated between benchmark and query tests, I moved all existing queries (under fe/src/test/resources/* to the workloads directory. You do NOT need to look through all the .test files, I've just moved them. The one new file is the 'hive-benchmark.test' which contains the hive benchmark queries. Also added support for generating schema for different scale factors as well as executing against these scale factors. For example, let's say we have a dataset with a scale factor called "SF1". We would first generate the schema using: ./generate_schema_statements --workload=<workload> --scale_factor="SF3" This will create tables with a unique names from the other scale factors. Run the generated .sql file to load the data. Alternatively, the data can loaded by running a new python script: ./bin/load-data.py -w <workload1>,<workload2> -e <exploration strategy> -s [scale factor] For example: load-data.sh -w tpch -e core -s SF3 Then run against this: ./run-benchmark --workloads=<workload> --scale_factor=SF3 This changeset also includes a few other minor tweaks to some of the test scripts. Change-Id: Ife8a8d91567d75c9612be37bec96c1e7780f50d6
247 lines
6.6 KiB
Plaintext
247 lines
6.6 KiB
Plaintext
# Test join on timestamp, hashing was not working properly
|
|
select a.timestamp_col from alltypessmall a inner join alltypessmall b on
|
|
(a.timestamp_col = b.timestamp_col)
|
|
where a.year=2009 and a.month=1 and b.year=2009 and b.month=1
|
|
---- TYPES
|
|
timestamp
|
|
---- RESULTS
|
|
2009-01-01 00:00:00
|
|
2009-01-01 00:01:00
|
|
2009-01-01 00:02:00.100000000
|
|
2009-01-01 00:03:00.300000000
|
|
2009-01-01 00:04:00.600000000
|
|
2009-01-01 00:05:00.100000000
|
|
2009-01-01 00:06:00.150000000
|
|
2009-01-01 00:07:00.210000000
|
|
2009-01-01 00:08:00.280000000
|
|
2009-01-01 00:09:00.360000000
|
|
2009-01-02 00:10:00.450000000
|
|
2009-01-02 00:11:00.450000000
|
|
2009-01-02 00:12:00.460000000
|
|
2009-01-02 00:13:00.480000000
|
|
2009-01-02 00:14:00.510000000
|
|
2009-01-02 00:15:00.550000000
|
|
2009-01-02 00:16:00.600000000
|
|
2009-01-02 00:17:00.660000000
|
|
2009-01-02 00:18:00.730000000
|
|
2009-01-02 00:19:00.810000000
|
|
2009-01-03 00:20:00.900000000
|
|
2009-01-03 00:21:00.900000000
|
|
2009-01-03 00:22:00.910000000
|
|
2009-01-03 00:23:00.930000000
|
|
2009-01-03 00:24:00.960000000
|
|
====
|
|
# Joins with multiple exprs
|
|
select j.*, d.* from JoinTbl j inner join DimTbl d on
|
|
(j.test_name = d.name AND j.test_zip = d.zip)
|
|
---- TYPES
|
|
bigint, string, int, int, bigint, string, int
|
|
---- RESULTS
|
|
1001,'Name1',94611,5000,1001,'Name1',94611
|
|
1002,'Name2',94611,5000,1002,'Name2',94611
|
|
====
|
|
select j.*, d.* from JoinTbl j inner join DimTbl d on
|
|
(j.test_zip = d.zip AND j.test_name = d.name)
|
|
---- TYPES
|
|
bigint, string, int, int, bigint, string, int
|
|
---- RESULTS
|
|
1001,'Name1',94611,5000,1001,'Name1',94611
|
|
1002,'Name2',94611,5000,1002,'Name2',94611
|
|
====
|
|
# join between hdfs and hbase, extra join predicate, extra scan predicates, nulls in
|
|
# joins cols and non-equality join predicate
|
|
# (alltypesagg.tinyint_col contains nulls instead of 0s)
|
|
# Should be same result as the test below
|
|
select a.tinyint_col, b.id, a.string_col, a.tinyint_col + b.tinyint_col
|
|
from alltypesagg$TABLE a join hbasealltypessmall b
|
|
on (a.tinyint_col = b.id and a.tinyint_col + b.tinyint_col < 5)
|
|
where a.month=1
|
|
and a.day=1
|
|
and a.string_col > '88'
|
|
and b.bool_col = false
|
|
---- TYPES
|
|
tinyint, int, string, bigint
|
|
---- RESULTS
|
|
1,1,'881',2
|
|
1,1,'891',2
|
|
1,1,'901',2
|
|
1,1,'91',2
|
|
1,1,'911',2
|
|
1,1,'921',2
|
|
1,1,'931',2
|
|
1,1,'941',2
|
|
1,1,'951',2
|
|
1,1,'961',2
|
|
1,1,'971',2
|
|
1,1,'981',2
|
|
1,1,'991',2
|
|
====
|
|
# join between hdfs and hbase, extra join predicate, extra scan predicates, nulls in joins cols
|
|
# (alltypesagg.tinyint_col contains nulls instead of 0s)
|
|
# Should be same result as the test below
|
|
select a.tinyint_col, b.id, a.string_col
|
|
from alltypesagg$TABLE a join hbasealltypessmall b on (a.tinyint_col = b.id)
|
|
where a.month=1
|
|
and a.day=1
|
|
and a.tinyint_col + b.tinyint_col < 5
|
|
and a.string_col > '88'
|
|
and b.bool_col = false
|
|
---- TYPES
|
|
tinyint, int, string
|
|
---- RESULTS
|
|
1,1,'881'
|
|
1,1,'891'
|
|
1,1,'901'
|
|
1,1,'91'
|
|
1,1,'911'
|
|
1,1,'921'
|
|
1,1,'931'
|
|
1,1,'941'
|
|
1,1,'951'
|
|
1,1,'961'
|
|
1,1,'971'
|
|
1,1,'981'
|
|
1,1,'991'
|
|
====
|
|
|
|
# join between two tables, extra join predicate, extra scan predicates, nulls in joins cols
|
|
# (alltypesagg.tinyint_col contains nulls instead of 0s)
|
|
select a.tinyint_col, b.id, a.string_col
|
|
from alltypesagg$TABLE a join alltypessmall$TABLE b on (a.tinyint_col = b.id)
|
|
where a.month=1
|
|
and a.day=1
|
|
and a.tinyint_col + b.tinyint_col < 5
|
|
and a.string_col > '88'
|
|
and b.bool_col = false
|
|
---- TYPES
|
|
tinyint, int, string
|
|
---- RESULTS
|
|
1,1,'881'
|
|
1,1,'891'
|
|
1,1,'901'
|
|
1,1,'91'
|
|
1,1,'911'
|
|
1,1,'921'
|
|
1,1,'931'
|
|
1,1,'941'
|
|
1,1,'951'
|
|
1,1,'961'
|
|
1,1,'971'
|
|
1,1,'981'
|
|
1,1,'991'
|
|
====
|
|
# reversing the order of the tables produces the same result
|
|
select a.tinyint_col, b.id, a.string_col
|
|
from alltypessmall$TABLE b join alltypesagg$TABLE a on (a.tinyint_col = b.id)
|
|
where a.month=1
|
|
and a.day=1
|
|
and a.tinyint_col + b.tinyint_col < 5
|
|
and a.string_col > '88'
|
|
and b.bool_col = false
|
|
---- TYPES
|
|
tinyint, int, string
|
|
---- RESULTS
|
|
1,1,'881'
|
|
1,1,'891'
|
|
1,1,'901'
|
|
1,1,'91'
|
|
1,1,'911'
|
|
1,1,'921'
|
|
1,1,'931'
|
|
1,1,'941'
|
|
1,1,'951'
|
|
1,1,'961'
|
|
1,1,'971'
|
|
1,1,'981'
|
|
1,1,'991'
|
|
====
|
|
# join between three tables, extra join predicates, extra scan predicates, nulls in joins cols
|
|
# (alltypesagg.tinyint_col contains nulls instead of 0s)
|
|
select a.smallint_col, b.id, a.tinyint_col, c.id, a.int_col, b.float_col, c.string_col
|
|
from alltypesagg$TABLE a
|
|
join alltypessmall$TABLE b on (a.smallint_col = b.id)
|
|
join alltypessmall$TABLE c on (a.tinyint_col = c.id)
|
|
where a.month=1
|
|
and a.day=1
|
|
and a.int_col > 899
|
|
and b.float_col > 4.5
|
|
and c.string_col < '7'
|
|
and a.int_col + b.float_col + c.string_col < 1000
|
|
---- TYPES
|
|
smallint, int, tinyint, int, int, float, string
|
|
---- RESULTS
|
|
15,15,5,5,915,5.5,'5'
|
|
16,16,6,6,916,6.599999904632568,'6'
|
|
31,31,1,1,931,6.599999904632568,'1'
|
|
32,32,2,2,932,7.699999809265137,'2'
|
|
33,33,3,3,933,8.800000190734863,'3'
|
|
34,34,4,4,934,9.899999618530273,'4'
|
|
41,41,1,1,941,6.599999904632568,'1'
|
|
42,42,2,2,942,7.699999809265137,'2'
|
|
43,43,3,3,943,8.800000190734863,'3'
|
|
44,44,4,4,944,9.899999618530273,'4'
|
|
5,5,5,5,905,5.5,'5'
|
|
55,55,5,5,955,5.5,'5'
|
|
56,56,6,6,956,6.599999904632568,'6'
|
|
6,6,6,6,906,6.599999904632568,'6'
|
|
65,65,5,5,965,5.5,'5'
|
|
66,66,6,6,966,6.599999904632568,'6'
|
|
81,81,1,1,981,6.599999904632568,'1'
|
|
82,82,2,2,982,7.699999809265137,'2'
|
|
83,83,3,3,983,8.800000190734863,'3'
|
|
84,84,4,4,984,9.899999618530273,'4'
|
|
91,91,1,1,991,6.599999904632568,'1'
|
|
====
|
|
# reversing the order produces the same results
|
|
select a.smallint_col, b.id, a.tinyint_col, c.id, a.int_col, b.float_col, c.string_col
|
|
from alltypessmall$TABLE b
|
|
join alltypesagg$TABLE a on (a.smallint_col = b.id)
|
|
join alltypessmall$TABLE c on (a.tinyint_col = c.id)
|
|
where a.month=1
|
|
and a.day=1
|
|
and a.int_col > 899
|
|
and b.float_col > 4.5
|
|
and c.string_col < '7'
|
|
and a.int_col + b.float_col + c.string_col < 1000
|
|
---- TYPES
|
|
smallint, int, tinyint, int, int, float, string
|
|
---- RESULTS
|
|
15,15,5,5,915,5.5,'5'
|
|
16,16,6,6,916,6.599999904632568,'6'
|
|
31,31,1,1,931,6.599999904632568,'1'
|
|
32,32,2,2,932,7.699999809265137,'2'
|
|
33,33,3,3,933,8.800000190734863,'3'
|
|
34,34,4,4,934,9.899999618530273,'4'
|
|
41,41,1,1,941,6.599999904632568,'1'
|
|
42,42,2,2,942,7.699999809265137,'2'
|
|
43,43,3,3,943,8.800000190734863,'3'
|
|
44,44,4,4,944,9.899999618530273,'4'
|
|
5,5,5,5,905,5.5,'5'
|
|
55,55,5,5,955,5.5,'5'
|
|
56,56,6,6,956,6.599999904632568,'6'
|
|
6,6,6,6,906,6.599999904632568,'6'
|
|
65,65,5,5,965,5.5,'5'
|
|
66,66,6,6,966,6.599999904632568,'6'
|
|
81,81,1,1,981,6.599999904632568,'1'
|
|
82,82,2,2,982,7.699999809265137,'2'
|
|
83,83,3,3,983,8.800000190734863,'3'
|
|
84,84,4,4,984,9.899999618530273,'4'
|
|
91,91,1,1,991,6.599999904632568,'1'
|
|
====
|
|
# joins on empty tables
|
|
select * from emptytable t1 join emptytable t2 on (t1.field=t2.field)
|
|
---- TYPES
|
|
int, string, int, string
|
|
---- RESULTS
|
|
====
|
|
select * from emptytable t1 join greptiny t2 on (t1.field=t2.field)
|
|
---- TYPES
|
|
int, string, string
|
|
---- RESULTS
|
|
====
|
|
select * from greptiny t1 join emptytable t2 on (t1.field=t2.field)
|
|
---- TYPES
|
|
string, int, string
|
|
---- RESULTS
|
|
====
|