Files
impala/testdata/workloads/functional-query/queries/QueryTest/top-n.test
Lenni Kuff 04edc8f534 Update benchmark tests to run against generic workload, data loading with scale factor, +more
This change updates the run-benchmark script to enable it to target one or more
workloads. Now benchmarks can be run like:

./run-benchmark --workloads=hive-benchmark,tpch

We lookup the workload in the workloads directory, then read the associated
query .test files and start executing them.

To ensure the queries are not duplicated between benchmark and query tests, I
moved all existing queries (under fe/src/test/resources/* to the workloads
directory. You do NOT need to look through all the .test files, I've just moved
them. The one new file is the 'hive-benchmark.test' which contains the hive
benchmark queries.

Also added support for generating schema for different scale factors as well as
executing against these scale factors. For example, let's say we have a dataset
with a scale factor called "SF1". We would first generate the schema using:

./generate_schema_statements --workload=<workload> --scale_factor="SF3"
This will create tables with a unique names from the other scale factors.

Run the generated .sql file to load the data. Alternatively, the data can loaded
by running a new python script:
./bin/load-data.py -w <workload1>,<workload2> -e <exploration strategy> -s [scale factor]
For example: load-data.sh -w tpch -e core -s SF3

Then run against this:
./run-benchmark --workloads=<workload> --scale_factor=SF3

This changeset also includes a few other minor tweaks to some of the test
scripts.

Change-Id: Ife8a8d91567d75c9612be37bec96c1e7780f50d6
2014-01-08 10:44:22 -08:00

628 lines
9.6 KiB
Plaintext

# Based on Aggregation Queries
select int_col, sum(float_col)
from hbasealltypessmall
where id < 5
group by 1
order by 2
limit 3
---- TYPES
int, double
---- RESULTS
0,0
1,1.100000023841858
2,2.200000047683716
====
# Run query without order by
select tinyint_col, count(*)
from alltypesagg$TABLE
group by 1
limit 10
---- TYPES
tinyint, bigint
---- RESULTS
1,1000
2,1000
3,1000
4,1000
5,1000
6,1000
7,1000
8,1000
9,1000
NULL,1000
====
# Same query order by asc first col
select tinyint_col, count(*)
from alltypesagg$TABLE
group by 1
order by 1
limit 10
---- TYPES
tinyint, bigint
---- RESULTS
1,1000
2,1000
3,1000
4,1000
5,1000
6,1000
7,1000
8,1000
9,1000
NULL,1000
====
# Same query order by asc first desc
select tinyint_col, count(*)
from alltypesagg$TABLE
group by 1
order by 1 desc
limit 20
---- TYPES
tinyint, bigint
---- RESULTS
1,1000
2,1000
3,1000
4,1000
5,1000
6,1000
7,1000
8,1000
9,1000
NULL,1000
====
select date_string_col,int_col
from alltypesagg$TABLE
order by date_string_col, int_col desc
limit 10
---- TYPES
string, int
---- RESULTS
'01/01/10',990
'01/01/10',991
'01/01/10',992
'01/01/10',993
'01/01/10',994
'01/01/10',995
'01/01/10',996
'01/01/10',997
'01/01/10',998
'01/01/10',999
====
# order by with null tuples in tuple row
select j.*, d.* from JoinTbl j full outer join DimTbl d
on (j.test_id = d.id)
order by j.test_id, d.name
limit 100
---- TYPES
bigint, string, int, int, bigint, string, int
---- RESULTS
1001,'Name1',94611,5000,1001,'Name1',94611
1002,'Name2',94611,5000,1002,'Name2',94611
1003,'Name3',94611,5000,1003,'Name3',94612
1004,'Name4',94611,5000,1004,'Name4',94612
1005,'Name5',94611,5000,1005,'Name5',94613
1006,'Name16',94612,15000,1006,'Name6',94613
1006,'Name16',94612,5000,1006,'Name6',94613
1006,'Name16',94616,15000,1006,'Name6',94613
1006,'Name16',94616,5000,1006,'Name6',94613
1006,'Name6',94616,15000,1006,'Name6',94613
1006,'Name6',94616,5000,1006,'Name6',94613
1106,'Name16',94612,15000,NULL,'NULL',NULL
1106,'Name16',94612,5000,NULL,'NULL',NULL
1106,'Name16',94616,15000,NULL,'NULL',NULL
1106,'Name16',94616,5000,NULL,'NULL',NULL
1106,'Name6',94612,15000,NULL,'NULL',NULL
1106,'Name6',94612,5000,NULL,'NULL',NULL
1106,'Name6',94616,15000,NULL,'NULL',NULL
1106,'Name6',94616,5000,NULL,'NULL',NULL
NULL,'NULL',NULL,NULL,1007,'Name7',94614
NULL,'NULL',NULL,NULL,1008,'Name8',94614
NULL,'NULL',NULL,NULL,1009,'Name9',94615
NULL,'NULL',NULL,NULL,1010,'Name10',94615
====
# order by multiple cols with nulls
select tinyint_col % 3, smallint_col % 3, count(*)
from alltypesagg$TABLE
where day = 1
group by 1, 2
order by 1, 2
limit 20
---- TYPES
tinyint, smallint, bigint
---- RESULTS
0,0,120
0,1,90
0,2,90
1,0,90
1,1,120
1,2,90
2,0,90
2,1,90
2,2,120
NULL,0,30
NULL,1,30
NULL,2,30
NULL,NULL,10
====
select tinyint_col % 3, smallint_col % 3, count(*)
from alltypesagg$TABLE
where day = 1
group by 1, 2
order by 1, 2 desc
limit 20
---- TYPES
tinyint, smallint, bigint
---- RESULTS
0,0,120
0,1,90
0,2,90
1,0,90
1,1,120
1,2,90
2,0,90
2,1,90
2,2,120
NULL,0,30
NULL,1,30
NULL,2,30
NULL,NULL,10
====
select tinyint_col % 3, smallint_col % 3, count(*)
from alltypesagg$TABLE
where day = 1
group by 1, 2
order by 1 desc, 2
limit 20
---- TYPES
tinyint, smallint, bigint
---- RESULTS
0,0,120
0,1,90
0,2,90
1,0,90
1,1,120
1,2,90
2,0,90
2,1,90
2,2,120
NULL,0,30
NULL,1,30
NULL,2,30
NULL,NULL,10
====
select tinyint_col % 3, smallint_col % 3, count(*)
from alltypesagg$TABLE
where day = 1
group by 1, 2
order by 1 desc, 2 desc
limit 20
---- TYPES
tinyint, smallint, bigint
---- RESULTS
0,0,120
0,1,90
0,2,90
1,0,90
1,1,120
1,2,90
2,0,90
2,1,90
2,2,120
NULL,0,30
NULL,1,30
NULL,2,30
NULL,NULL,10
====
select date_string_col
from alltypessmall$TABLE
order by date_string_col desc
limit 50
---- TYPES
string
---- RESULTS
'03/01/09'
'03/01/09'
'03/01/09'
'03/01/09'
'03/01/09'
'03/01/09'
'03/01/09'
'03/01/09'
'03/01/09'
'03/01/09'
'03/02/09'
'03/02/09'
'03/02/09'
'03/02/09'
'03/02/09'
'03/02/09'
'03/02/09'
'03/02/09'
'03/02/09'
'03/02/09'
'03/03/09'
'03/03/09'
'03/03/09'
'03/03/09'
'03/03/09'
'04/01/09'
'04/01/09'
'04/01/09'
'04/01/09'
'04/01/09'
'04/01/09'
'04/01/09'
'04/01/09'
'04/01/09'
'04/01/09'
'04/02/09'
'04/02/09'
'04/02/09'
'04/02/09'
'04/02/09'
'04/02/09'
'04/02/09'
'04/02/09'
'04/02/09'
'04/02/09'
'04/03/09'
'04/03/09'
'04/03/09'
'04/03/09'
'04/03/09'
====
# Based on join queries
select a.tinyint_col, b.id, a.string_col
from alltypesagg$TABLE a join alltypessmall$TABLE b on (a.tinyint_col = b.id)
where a.month=1
and a.day=1
and a.tinyint_col + b.tinyint_col < 5
and a.string_col > '88'
and b.bool_col = false
order by a.string_col
limit 5
---- TYPES
tinyint, int, string
---- RESULTS
1,1,'881'
1,1,'891'
1,1,'901'
1,1,'91'
1,1,'911'
====
select a.tinyint_col, b.id, a.string_col
from alltypesagg$TABLE a join alltypessmall$TABLE b on (a.tinyint_col = b.id)
where a.month=1
and a.day=1
and a.tinyint_col + b.tinyint_col < 5
and a.string_col > '88'
and b.bool_col = false
order by a.string_col desc
limit 5
---- TYPES
tinyint, int, string
---- RESULTS
1,1,'951'
1,1,'961'
1,1,'971'
1,1,'981'
1,1,'991'
====
select a.smallint_col, b.id, a.tinyint_col, c.id, a.int_col, b.float_col, c.string_col
from alltypesagg$TABLE a
join alltypessmall$TABLE b on (a.smallint_col = b.id)
join alltypessmall$TABLE c on (a.tinyint_col = c.id)
where a.month=1
and a.day=1
and a.int_col > 899
and b.float_col > 4.5
and c.string_col < '7'
and a.int_col + b.float_col + c.string_col < 1000
order by c.string_col desc, a.smallint_col
limit 10
---- TYPES
smallint, int, tinyint, int, int, float, string
---- RESULTS
15,15,5,5,915,5.5,'5'
16,16,6,6,916,6.599999904632568,'6'
34,34,4,4,934,9.899999618530273,'4'
44,44,4,4,944,9.899999618530273,'4'
5,5,5,5,905,5.5,'5'
55,55,5,5,955,5.5,'5'
56,56,6,6,956,6.599999904632568,'6'
6,6,6,6,906,6.599999904632568,'6'
65,65,5,5,965,5.5,'5'
66,66,6,6,966,6.599999904632568,'6'
====
# Order by a column that is not in the select list
# Query with ordering column in select list
# Don't include date_string_col, it comes back in random order.
select int_col, tinyint_col
from alltypessmall$TABLE
order by int_col desc
limit 20
---- TYPES
int, tinyint
---- RESULTS
7,7
7,7
7,7
7,7
8,8
8,8
8,8
8,8
8,8
8,8
8,8
8,8
9,9
9,9
9,9
9,9
9,9
9,9
9,9
9,9
====
# Same query with ordering col not in select list
select tinyint_col
from alltypessmall$TABLE
order by int_col desc
limit 20
---- TYPES
tinyint
---- RESULTS
7
7
7
7
8
8
8
8
8
8
8
8
9
9
9
9
9
9
9
9
====
# Order by many exprs
select year, month, count(*)
from alltypes
group by 1, 2
order by 1, 2
limit 100
---- TYPES
int, int, bigint
---- RESULTS
2009,1,310
2009,10,310
2009,11,300
2009,12,310
2009,2,280
2009,3,310
2009,4,300
2009,5,310
2009,6,300
2009,7,310
2009,8,310
2009,9,300
2010,1,310
2010,10,310
2010,11,300
2010,12,310
2010,2,280
2010,3,310
2010,4,300
2010,5,310
2010,6,300
2010,7,310
2010,8,310
2010,9,300
====
# More Complex Ordering Exprs
select int_col % 7, count(*), avg(tinyint_col)
from alltypesagg$TABLE
group by 1
order by avg(tinyint_col)
limit 10
---- TYPES
int, bigint, double
---- RESULTS
0,1420,5.0078125
1,1430,4.992248062015504
2,1430,5.015503875968992
3,1430,5
4,1430,4.984496124031008
5,1430,5.007751937984496
6,1420,4.9921875
NULL,10,NULL
====
select int_col % 7, count(*), max(int_col)
from alltypesagg$TABLE
group by 1
order by max(int_col)
limit 10
---- TYPES
int, bigint, int
---- RESULTS
0,1420,994
1,1430,995
2,1430,996
3,1430,997
4,1430,998
5,1430,999
6,1420,993
NULL,10,NULL
====
select int_col % 5, count(*), avg(tinyint_col) - avg(float_col)
from alltypesagg$TABLE
group by 1
order by avg(tinyint_col) - avg(float_col) desc
limit 10
---- TYPES
int, bigint, double
---- RESULTS
0,1990,-545
1,2000,-544.8499889141322
2,2000,-544.9500045645237
3,2000,-545.0499953591824
4,2000,-545.1500110459327
NULL,10,NULL
====
select int_col
from alltypessmall$TABLE
order by int_col % 5, int_col
limit 100
---- TYPES
int
---- RESULTS
0
0
0
0
0
0
0
0
0
0
0
0
1
1
1
1
1
1
1
1
1
1
1
1
2
2
2
2
2
2
2
2
2
2
2
2
3
3
3
3
3
3
3
3
3
3
3
3
4
4
4
4
4
4
4
4
4
4
4
4
5
5
5
5
5
5
5
5
6
6
6
6
6
6
6
6
7
7
7
7
7
7
7
7
8
8
8
8
8
8
8
8
9
9
9
9
9
9
9
9
====
# All select list items have an implicit alias. Test that the order by column ref
# "int_col" is correctly aliased to t1.int_col, and therefore it is not an
# ambiguous reference.
select t1.int_col from alltypessmall t1, alltypessmall t2 where t1.id = t2.id
order by int_col
limit 2
---- TYPES
int
---- RESULTS
0
0
====
select date_sub(timestamp_col, id), timestamp_col, id
from alltypessmall order by 1 limit 20
---- TYPES
timestamp,timestamp,int
---- RESULTS
2008-12-10 00:24:00.960000000,2009-01-03 00:24:00.960000000,24
2008-12-11 00:23:00.930000000,2009-01-03 00:23:00.930000000,23
2008-12-12 00:22:00.910000000,2009-01-03 00:22:00.910000000,22
2008-12-13 00:21:00.900000000,2009-01-03 00:21:00.900000000,21
2008-12-14 00:19:00.810000000,2009-01-02 00:19:00.810000000,19
2008-12-14 00:20:00.900000000,2009-01-03 00:20:00.900000000,20
2008-12-15 00:18:00.730000000,2009-01-02 00:18:00.730000000,18
2008-12-16 00:17:00.660000000,2009-01-02 00:17:00.660000000,17
2008-12-16 00:24:00.960000000,2009-02-03 00:24:00.960000000,49
2008-12-17 00:16:00.600000000,2009-01-02 00:16:00.600000000,16
2008-12-17 00:23:00.930000000,2009-02-03 00:23:00.930000000,48
2008-12-18 00:15:00.550000000,2009-01-02 00:15:00.550000000,15
2008-12-18 00:22:00.910000000,2009-02-03 00:22:00.910000000,47
2008-12-19 00:14:00.510000000,2009-01-02 00:14:00.510000000,14
2008-12-19 00:21:00.900000000,2009-02-03 00:21:00.900000000,46
2008-12-19 00:24:00.960000000,2009-03-03 00:24:00.960000000,74
2008-12-20 00:13:00.480000000,2009-01-02 00:13:00.480000000,13
2008-12-20 00:19:00.810000000,2009-02-02 00:19:00.810000000,44
2008-12-20 00:20:00.900000000,2009-02-03 00:20:00.900000000,45
2008-12-20 00:23:00.930000000,2009-03-03 00:23:00.930000000,73
====