mirror of
https://github.com/apache/impala.git
synced 2026-01-01 18:00:30 -05:00
This change updates the run-benchmark script to enable it to target one or more workloads. Now benchmarks can be run like: ./run-benchmark --workloads=hive-benchmark,tpch We lookup the workload in the workloads directory, then read the associated query .test files and start executing them. To ensure the queries are not duplicated between benchmark and query tests, I moved all existing queries (under fe/src/test/resources/* to the workloads directory. You do NOT need to look through all the .test files, I've just moved them. The one new file is the 'hive-benchmark.test' which contains the hive benchmark queries. Also added support for generating schema for different scale factors as well as executing against these scale factors. For example, let's say we have a dataset with a scale factor called "SF1". We would first generate the schema using: ./generate_schema_statements --workload=<workload> --scale_factor="SF3" This will create tables with a unique names from the other scale factors. Run the generated .sql file to load the data. Alternatively, the data can loaded by running a new python script: ./bin/load-data.py -w <workload1>,<workload2> -e <exploration strategy> -s [scale factor] For example: load-data.sh -w tpch -e core -s SF3 Then run against this: ./run-benchmark --workloads=<workload> --scale_factor=SF3 This changeset also includes a few other minor tweaks to some of the test scripts. Change-Id: Ife8a8d91567d75c9612be37bec96c1e7780f50d6
438 lines
10 KiB
Plaintext
438 lines
10 KiB
Plaintext
select *
|
|
from (
|
|
select y x
|
|
from (
|
|
select id y from hbasealltypessmall
|
|
) a
|
|
) b
|
|
---- TYPES
|
|
int
|
|
---- RESULTS
|
|
0
|
|
1
|
|
10
|
|
11
|
|
12
|
|
13
|
|
14
|
|
15
|
|
16
|
|
17
|
|
18
|
|
19
|
|
2
|
|
20
|
|
21
|
|
22
|
|
23
|
|
24
|
|
25
|
|
26
|
|
27
|
|
28
|
|
29
|
|
3
|
|
30
|
|
31
|
|
32
|
|
33
|
|
34
|
|
35
|
|
36
|
|
37
|
|
38
|
|
39
|
|
4
|
|
40
|
|
41
|
|
42
|
|
43
|
|
44
|
|
45
|
|
46
|
|
47
|
|
48
|
|
49
|
|
5
|
|
50
|
|
51
|
|
52
|
|
53
|
|
54
|
|
55
|
|
56
|
|
57
|
|
58
|
|
59
|
|
6
|
|
60
|
|
61
|
|
62
|
|
63
|
|
64
|
|
65
|
|
66
|
|
67
|
|
68
|
|
69
|
|
7
|
|
70
|
|
71
|
|
72
|
|
73
|
|
74
|
|
75
|
|
76
|
|
77
|
|
78
|
|
79
|
|
8
|
|
80
|
|
81
|
|
82
|
|
83
|
|
84
|
|
85
|
|
86
|
|
87
|
|
88
|
|
89
|
|
9
|
|
90
|
|
91
|
|
92
|
|
93
|
|
94
|
|
95
|
|
96
|
|
97
|
|
98
|
|
99
|
|
====
|
|
# subquery with predicate inside
|
|
select *
|
|
from (
|
|
select * from hbasealltypessmall where string_col = '4'
|
|
) a
|
|
---- TYPES
|
|
int, boolean, double, float, bigint, int, smallint, tinyint, string, string, timestamp
|
|
---- RESULTS
|
|
14,true,40.4,4.400000095367432,40,4,4,4,'01/02/09','4',2009-01-02 00:14:00.510000000
|
|
24,true,40.4,4.400000095367432,40,4,4,4,'01/03/09','4',2009-01-03 00:24:00.960000000
|
|
29,false,40.4,4.400000095367432,40,4,4,4,'02/01/09','4',2009-02-01 00:04:00.600000000
|
|
39,false,40.4,4.400000095367432,40,4,4,4,'02/02/09','4',2009-02-02 00:14:00.510000000
|
|
4,true,40.4,4.400000095367432,40,4,4,4,'01/01/09','4',2009-01-01 00:04:00.600000000
|
|
49,false,40.4,4.400000095367432,40,4,4,4,'02/03/09','4',2009-02-03 00:24:00.960000000
|
|
54,true,40.4,4.400000095367432,40,4,4,4,'03/01/09','4',2009-03-01 00:04:00.600000000
|
|
64,true,40.4,4.400000095367432,40,4,4,4,'03/02/09','4',2009-03-02 00:14:00.510000000
|
|
74,true,40.4,4.400000095367432,40,4,4,4,'03/03/09','4',2009-03-03 00:24:00.960000000
|
|
79,false,40.4,4.400000095367432,40,4,4,4,'04/01/09','4',2009-04-01 00:04:00.600000000
|
|
89,false,40.4,4.400000095367432,40,4,4,4,'04/02/09','4',2009-04-02 00:14:00.510000000
|
|
99,false,40.4,4.400000095367432,40,4,4,4,'04/03/09','4',2009-04-03 00:24:00.960000000
|
|
====
|
|
# subquery with predicate push down
|
|
select *
|
|
from (
|
|
select *
|
|
from (
|
|
select * from hbasealltypessmall
|
|
) x
|
|
) y
|
|
where string_col = '4'
|
|
---- TYPES
|
|
int, boolean, double, float, bigint, int, smallint, tinyint, string, string, timestamp
|
|
---- RESULTS
|
|
14,true,40.4,4.400000095367432,40,4,4,4,'01/02/09','4',2009-01-02 00:14:00.510000000
|
|
24,true,40.4,4.400000095367432,40,4,4,4,'01/03/09','4',2009-01-03 00:24:00.960000000
|
|
29,false,40.4,4.400000095367432,40,4,4,4,'02/01/09','4',2009-02-01 00:04:00.600000000
|
|
39,false,40.4,4.400000095367432,40,4,4,4,'02/02/09','4',2009-02-02 00:14:00.510000000
|
|
4,true,40.4,4.400000095367432,40,4,4,4,'01/01/09','4',2009-01-01 00:04:00.600000000
|
|
49,false,40.4,4.400000095367432,40,4,4,4,'02/03/09','4',2009-02-03 00:24:00.960000000
|
|
54,true,40.4,4.400000095367432,40,4,4,4,'03/01/09','4',2009-03-01 00:04:00.600000000
|
|
64,true,40.4,4.400000095367432,40,4,4,4,'03/02/09','4',2009-03-02 00:14:00.510000000
|
|
74,true,40.4,4.400000095367432,40,4,4,4,'03/03/09','4',2009-03-03 00:24:00.960000000
|
|
79,false,40.4,4.400000095367432,40,4,4,4,'04/01/09','4',2009-04-01 00:04:00.600000000
|
|
89,false,40.4,4.400000095367432,40,4,4,4,'04/02/09','4',2009-04-02 00:14:00.510000000
|
|
99,false,40.4,4.400000095367432,40,4,4,4,'04/03/09','4',2009-04-03 00:24:00.960000000
|
|
====
|
|
# join between three tables, extra join predicates, extra scan predicates, nulls in joins
|
|
# cols
|
|
# (alltypesagg.tinyint_col contains nulls instead of 0s)
|
|
select x.smallint_col, x.id, x.tinyint_col, c.id, x.int_col, x.float_col, c.string_col
|
|
from (
|
|
select a.smallint_col smallint_col, a.tinyint_col tinyint_col, a.day day,
|
|
a.int_col int_col, a.month month,
|
|
b.float_col float_col, b.id id
|
|
from (
|
|
select *
|
|
from alltypesagg$TABLE a
|
|
where month=1
|
|
) a
|
|
join alltypessmall$TABLE b
|
|
on (a.smallint_col = b.id)
|
|
) x
|
|
join alltypessmall$TABLE c on (x.tinyint_col = c.id)
|
|
where x.day=1
|
|
and x.int_col > 899
|
|
and x.float_col > 4.5
|
|
and c.string_col < '7'
|
|
and x.int_col + x.float_col + c.string_col < 1000
|
|
---- TYPES
|
|
smallint, int, tinyint, int, int, float, string
|
|
---- RESULTS
|
|
15,15,5,5,915,5.5,'5'
|
|
16,16,6,6,916,6.599999904632568,'6'
|
|
31,31,1,1,931,6.599999904632568,'1'
|
|
32,32,2,2,932,7.699999809265137,'2'
|
|
33,33,3,3,933,8.800000190734863,'3'
|
|
34,34,4,4,934,9.899999618530273,'4'
|
|
41,41,1,1,941,6.599999904632568,'1'
|
|
42,42,2,2,942,7.699999809265137,'2'
|
|
43,43,3,3,943,8.800000190734863,'3'
|
|
44,44,4,4,944,9.899999618530273,'4'
|
|
5,5,5,5,905,5.5,'5'
|
|
55,55,5,5,955,5.5,'5'
|
|
56,56,6,6,956,6.599999904632568,'6'
|
|
6,6,6,6,906,6.599999904632568,'6'
|
|
65,65,5,5,965,5.5,'5'
|
|
66,66,6,6,966,6.599999904632568,'6'
|
|
81,81,1,1,981,6.599999904632568,'1'
|
|
82,82,2,2,982,7.699999809265137,'2'
|
|
83,83,3,3,983,8.800000190734863,'3'
|
|
84,84,4,4,984,9.899999618530273,'4'
|
|
91,91,1,1,991,6.599999904632568,'1'
|
|
====
|
|
# Same join as above, but subquery on the RHS
|
|
select x.smallint_col, x.id, x.tinyint_col, c.id, x.int_col, x.float_col, c.string_col
|
|
from alltypessmall$TABLE c
|
|
join
|
|
(
|
|
select a.smallint_col smallint_col, a.tinyint_col tinyint_col, a.day day,
|
|
a.int_col int_col, a.month month,
|
|
b.float_col float_col, b.id id
|
|
from alltypessmall$TABLE b
|
|
join
|
|
(
|
|
select *
|
|
from alltypesagg$TABLE a
|
|
where month=1
|
|
) a
|
|
on (a.smallint_col = b.id)
|
|
) x
|
|
on (x.tinyint_col = c.id)
|
|
where x.day=1
|
|
and x.int_col > 899
|
|
and x.float_col > 4.5
|
|
and c.string_col < '7'
|
|
and x.int_col + x.float_col + c.string_col < 1000
|
|
---- TYPES
|
|
smallint, int, tinyint, int, int, float, string
|
|
---- RESULTS
|
|
15,15,5,5,915,5.5,'5'
|
|
16,16,6,6,916,6.599999904632568,'6'
|
|
31,31,1,1,931,6.599999904632568,'1'
|
|
32,32,2,2,932,7.699999809265137,'2'
|
|
33,33,3,3,933,8.800000190734863,'3'
|
|
34,34,4,4,934,9.899999618530273,'4'
|
|
41,41,1,1,941,6.599999904632568,'1'
|
|
42,42,2,2,942,7.699999809265137,'2'
|
|
43,43,3,3,943,8.800000190734863,'3'
|
|
44,44,4,4,944,9.899999618530273,'4'
|
|
5,5,5,5,905,5.5,'5'
|
|
55,55,5,5,955,5.5,'5'
|
|
56,56,6,6,956,6.599999904632568,'6'
|
|
6,6,6,6,906,6.599999904632568,'6'
|
|
65,65,5,5,965,5.5,'5'
|
|
66,66,6,6,966,6.599999904632568,'6'
|
|
81,81,1,1,981,6.599999904632568,'1'
|
|
82,82,2,2,982,7.699999809265137,'2'
|
|
83,83,3,3,983,8.800000190734863,'3'
|
|
84,84,4,4,984,9.899999618530273,'4'
|
|
91,91,1,1,991,6.599999904632568,'1'
|
|
====
|
|
# aggregate without group by
|
|
select *
|
|
from (
|
|
select count(*), count(tinyint_col), min(tinyint_col), max(tinyint_col),
|
|
sum(tinyint_col), avg(tinyint_col)
|
|
from (
|
|
select * from alltypesagg$TABLE
|
|
) a
|
|
) b
|
|
---- TYPES
|
|
bigint, bigint, tinyint, tinyint, bigint, double
|
|
---- RESULTS
|
|
10000,9000,1,9,45000,5
|
|
====
|
|
# aggregate with group-by, having
|
|
select *
|
|
from (
|
|
select int_col % 7 c1, count(*) c2, avg(int_col) c3
|
|
from (
|
|
select * from alltypesagg$TABLE
|
|
) a
|
|
group by 1
|
|
having avg(int_col) > 500 or count(*) = 10
|
|
) b
|
|
where c1 is not null
|
|
and c2 > 10
|
|
---- TYPES
|
|
int, bigint, double
|
|
---- RESULTS
|
|
0,1420,500.5
|
|
4,1430,501
|
|
5,1430,502
|
|
====
|
|
#
|
|
# So, we've this test to test multiple level of aggregate instead.
|
|
select c1, c3, m2
|
|
from (
|
|
select c1, c3, max(c2) m2
|
|
from (
|
|
select c1, c2, c3
|
|
from (
|
|
select int_col c1, tinyint_col c2, max(id) c3
|
|
from hbasealltypessmall
|
|
group by 1, 2
|
|
order by 1,2
|
|
limit 5
|
|
) x
|
|
) x2
|
|
group by c1, c3
|
|
limit 10
|
|
) t
|
|
where c1 > 0
|
|
order by 2, 1 desc
|
|
limit 3
|
|
---- TYPES
|
|
int, int, tinyint
|
|
---- RESULTS
|
|
1,96,1
|
|
2,97,2
|
|
3,98,3
|
|
====
|
|
#Do not materialize the agg expr slot
|
|
select c1, c2
|
|
from (
|
|
select int_col c1, tinyint_col c2, min(float_col) c3
|
|
from hbasealltypessmall
|
|
group by 1, 2
|
|
) x
|
|
---- TYPES
|
|
int, tinyint
|
|
---- RESULTS
|
|
0,0
|
|
1,1
|
|
2,2
|
|
3,3
|
|
4,4
|
|
5,5
|
|
6,6
|
|
7,7
|
|
8,8
|
|
9,9
|
|
====
|
|
select distinct *
|
|
from (
|
|
select bool_col, tinyint_col, count(*)
|
|
from alltypesagg$TABLE
|
|
group by bool_col, tinyint_col
|
|
having bool_col = true
|
|
) x
|
|
where tinyint_col < 6
|
|
---- TYPES
|
|
boolean, tinyint, bigint
|
|
---- RESULTS
|
|
true,2,1000
|
|
true,4,1000
|
|
====
|
|
# distinct w/ explicit select list
|
|
select *
|
|
from (
|
|
select distinct bool_col, tinyint_col
|
|
from (
|
|
select * from alltypesagg$TABLE where tinyint_col < 7
|
|
) y
|
|
) x
|
|
where bool_col = true
|
|
---- TYPES
|
|
boolean, tinyint
|
|
---- RESULTS
|
|
true,2
|
|
true,4
|
|
true,6
|
|
====
|
|
# semi-join on string
|
|
select *
|
|
from (
|
|
select d.*
|
|
from DimTbl d left semi join JoinTbl j on (j.test_name = d.name)
|
|
) x
|
|
where x.name > 'Name1'
|
|
---- TYPES
|
|
bigint, string, int
|
|
---- RESULTS
|
|
1002,'Name2',94611
|
|
1003,'Name3',94612
|
|
1004,'Name4',94612
|
|
1005,'Name5',94613
|
|
1006,'Name6',94613
|
|
====
|
|
select j.*, d.*
|
|
from (
|
|
select *
|
|
from JoinTbl a
|
|
) j
|
|
left outer join
|
|
(
|
|
select *
|
|
from DimTbl b
|
|
) d
|
|
on (j.test_name = d.name)
|
|
where j.test_id <= 1006
|
|
---- TYPES
|
|
bigint, string, int, int, bigint, string, int
|
|
---- RESULTS
|
|
1001,'Name1',94611,5000,1001,'Name1',94611
|
|
1002,'Name2',94611,5000,1002,'Name2',94611
|
|
1003,'Name3',94611,5000,1003,'Name3',94612
|
|
1004,'Name4',94611,5000,1004,'Name4',94612
|
|
1005,'Name5',94611,5000,1005,'Name5',94613
|
|
1006,'Name16',94612,15000,NULL,'NULL',NULL
|
|
1006,'Name16',94612,5000,NULL,'NULL',NULL
|
|
1006,'Name16',94616,15000,NULL,'NULL',NULL
|
|
1006,'Name16',94616,5000,NULL,'NULL',NULL
|
|
1006,'Name6',94616,15000,1006,'Name6',94613
|
|
1006,'Name6',94616,5000,1006,'Name6',94613
|
|
====
|
|
# TODO: If we apply predicate on d, the result will be incorrect. This is a general
|
|
# predicate evaluation issue.
|
|
#
|
|
select j.*, d.*
|
|
from (
|
|
select *
|
|
from JoinTbl a
|
|
) j
|
|
left outer join
|
|
(
|
|
select *
|
|
from DimTbl b
|
|
) d
|
|
on (j.test_name = d.name)
|
|
where j.test_id <= 1006
|
|
---- TYPES
|
|
bigint, string, int, int, bigint, string, int
|
|
---- RESULTS
|
|
1001,'Name1',94611,5000,1001,'Name1',94611
|
|
1002,'Name2',94611,5000,1002,'Name2',94611
|
|
1003,'Name3',94611,5000,1003,'Name3',94612
|
|
1004,'Name4',94611,5000,1004,'Name4',94612
|
|
1005,'Name5',94611,5000,1005,'Name5',94613
|
|
1006,'Name16',94612,15000,NULL,'NULL',NULL
|
|
1006,'Name16',94612,5000,NULL,'NULL',NULL
|
|
1006,'Name16',94616,15000,NULL,'NULL',NULL
|
|
1006,'Name16',94616,5000,NULL,'NULL',NULL
|
|
1006,'Name6',94616,15000,1006,'Name6',94613
|
|
1006,'Name6',94616,5000,1006,'Name6',94613
|
|
====
|