Files
impala/testdata/workloads/functional-query/queries/QueryTest/joins.test
Lenni Kuff 04edc8f534 Update benchmark tests to run against generic workload, data loading with scale factor, +more
This change updates the run-benchmark script to enable it to target one or more
workloads. Now benchmarks can be run like:

./run-benchmark --workloads=hive-benchmark,tpch

We lookup the workload in the workloads directory, then read the associated
query .test files and start executing them.

To ensure the queries are not duplicated between benchmark and query tests, I
moved all existing queries (under fe/src/test/resources/* to the workloads
directory. You do NOT need to look through all the .test files, I've just moved
them. The one new file is the 'hive-benchmark.test' which contains the hive
benchmark queries.

Also added support for generating schema for different scale factors as well as
executing against these scale factors. For example, let's say we have a dataset
with a scale factor called "SF1". We would first generate the schema using:

./generate_schema_statements --workload=<workload> --scale_factor="SF3"
This will create tables with a unique names from the other scale factors.

Run the generated .sql file to load the data. Alternatively, the data can loaded
by running a new python script:
./bin/load-data.py -w <workload1>,<workload2> -e <exploration strategy> -s [scale factor]
For example: load-data.sh -w tpch -e core -s SF3

Then run against this:
./run-benchmark --workloads=<workload> --scale_factor=SF3

This changeset also includes a few other minor tweaks to some of the test
scripts.

Change-Id: Ife8a8d91567d75c9612be37bec96c1e7780f50d6
2014-01-08 10:44:22 -08:00

247 lines
6.6 KiB
Plaintext

# Test join on timestamp, hashing was not working properly
select a.timestamp_col from alltypessmall a inner join alltypessmall b on
(a.timestamp_col = b.timestamp_col)
where a.year=2009 and a.month=1 and b.year=2009 and b.month=1
---- TYPES
timestamp
---- RESULTS
2009-01-01 00:00:00
2009-01-01 00:01:00
2009-01-01 00:02:00.100000000
2009-01-01 00:03:00.300000000
2009-01-01 00:04:00.600000000
2009-01-01 00:05:00.100000000
2009-01-01 00:06:00.150000000
2009-01-01 00:07:00.210000000
2009-01-01 00:08:00.280000000
2009-01-01 00:09:00.360000000
2009-01-02 00:10:00.450000000
2009-01-02 00:11:00.450000000
2009-01-02 00:12:00.460000000
2009-01-02 00:13:00.480000000
2009-01-02 00:14:00.510000000
2009-01-02 00:15:00.550000000
2009-01-02 00:16:00.600000000
2009-01-02 00:17:00.660000000
2009-01-02 00:18:00.730000000
2009-01-02 00:19:00.810000000
2009-01-03 00:20:00.900000000
2009-01-03 00:21:00.900000000
2009-01-03 00:22:00.910000000
2009-01-03 00:23:00.930000000
2009-01-03 00:24:00.960000000
====
# Joins with multiple exprs
select j.*, d.* from JoinTbl j inner join DimTbl d on
(j.test_name = d.name AND j.test_zip = d.zip)
---- TYPES
bigint, string, int, int, bigint, string, int
---- RESULTS
1001,'Name1',94611,5000,1001,'Name1',94611
1002,'Name2',94611,5000,1002,'Name2',94611
====
select j.*, d.* from JoinTbl j inner join DimTbl d on
(j.test_zip = d.zip AND j.test_name = d.name)
---- TYPES
bigint, string, int, int, bigint, string, int
---- RESULTS
1001,'Name1',94611,5000,1001,'Name1',94611
1002,'Name2',94611,5000,1002,'Name2',94611
====
# join between hdfs and hbase, extra join predicate, extra scan predicates, nulls in
# joins cols and non-equality join predicate
# (alltypesagg.tinyint_col contains nulls instead of 0s)
# Should be same result as the test below
select a.tinyint_col, b.id, a.string_col, a.tinyint_col + b.tinyint_col
from alltypesagg$TABLE a join hbasealltypessmall b
on (a.tinyint_col = b.id and a.tinyint_col + b.tinyint_col < 5)
where a.month=1
and a.day=1
and a.string_col > '88'
and b.bool_col = false
---- TYPES
tinyint, int, string, bigint
---- RESULTS
1,1,'881',2
1,1,'891',2
1,1,'901',2
1,1,'91',2
1,1,'911',2
1,1,'921',2
1,1,'931',2
1,1,'941',2
1,1,'951',2
1,1,'961',2
1,1,'971',2
1,1,'981',2
1,1,'991',2
====
# join between hdfs and hbase, extra join predicate, extra scan predicates, nulls in joins cols
# (alltypesagg.tinyint_col contains nulls instead of 0s)
# Should be same result as the test below
select a.tinyint_col, b.id, a.string_col
from alltypesagg$TABLE a join hbasealltypessmall b on (a.tinyint_col = b.id)
where a.month=1
and a.day=1
and a.tinyint_col + b.tinyint_col < 5
and a.string_col > '88'
and b.bool_col = false
---- TYPES
tinyint, int, string
---- RESULTS
1,1,'881'
1,1,'891'
1,1,'901'
1,1,'91'
1,1,'911'
1,1,'921'
1,1,'931'
1,1,'941'
1,1,'951'
1,1,'961'
1,1,'971'
1,1,'981'
1,1,'991'
====
# join between two tables, extra join predicate, extra scan predicates, nulls in joins cols
# (alltypesagg.tinyint_col contains nulls instead of 0s)
select a.tinyint_col, b.id, a.string_col
from alltypesagg$TABLE a join alltypessmall$TABLE b on (a.tinyint_col = b.id)
where a.month=1
and a.day=1
and a.tinyint_col + b.tinyint_col < 5
and a.string_col > '88'
and b.bool_col = false
---- TYPES
tinyint, int, string
---- RESULTS
1,1,'881'
1,1,'891'
1,1,'901'
1,1,'91'
1,1,'911'
1,1,'921'
1,1,'931'
1,1,'941'
1,1,'951'
1,1,'961'
1,1,'971'
1,1,'981'
1,1,'991'
====
# reversing the order of the tables produces the same result
select a.tinyint_col, b.id, a.string_col
from alltypessmall$TABLE b join alltypesagg$TABLE a on (a.tinyint_col = b.id)
where a.month=1
and a.day=1
and a.tinyint_col + b.tinyint_col < 5
and a.string_col > '88'
and b.bool_col = false
---- TYPES
tinyint, int, string
---- RESULTS
1,1,'881'
1,1,'891'
1,1,'901'
1,1,'91'
1,1,'911'
1,1,'921'
1,1,'931'
1,1,'941'
1,1,'951'
1,1,'961'
1,1,'971'
1,1,'981'
1,1,'991'
====
# join between three tables, extra join predicates, extra scan predicates, nulls in joins cols
# (alltypesagg.tinyint_col contains nulls instead of 0s)
select a.smallint_col, b.id, a.tinyint_col, c.id, a.int_col, b.float_col, c.string_col
from alltypesagg$TABLE a
join alltypessmall$TABLE b on (a.smallint_col = b.id)
join alltypessmall$TABLE c on (a.tinyint_col = c.id)
where a.month=1
and a.day=1
and a.int_col > 899
and b.float_col > 4.5
and c.string_col < '7'
and a.int_col + b.float_col + c.string_col < 1000
---- TYPES
smallint, int, tinyint, int, int, float, string
---- RESULTS
15,15,5,5,915,5.5,'5'
16,16,6,6,916,6.599999904632568,'6'
31,31,1,1,931,6.599999904632568,'1'
32,32,2,2,932,7.699999809265137,'2'
33,33,3,3,933,8.800000190734863,'3'
34,34,4,4,934,9.899999618530273,'4'
41,41,1,1,941,6.599999904632568,'1'
42,42,2,2,942,7.699999809265137,'2'
43,43,3,3,943,8.800000190734863,'3'
44,44,4,4,944,9.899999618530273,'4'
5,5,5,5,905,5.5,'5'
55,55,5,5,955,5.5,'5'
56,56,6,6,956,6.599999904632568,'6'
6,6,6,6,906,6.599999904632568,'6'
65,65,5,5,965,5.5,'5'
66,66,6,6,966,6.599999904632568,'6'
81,81,1,1,981,6.599999904632568,'1'
82,82,2,2,982,7.699999809265137,'2'
83,83,3,3,983,8.800000190734863,'3'
84,84,4,4,984,9.899999618530273,'4'
91,91,1,1,991,6.599999904632568,'1'
====
# reversing the order produces the same results
select a.smallint_col, b.id, a.tinyint_col, c.id, a.int_col, b.float_col, c.string_col
from alltypessmall$TABLE b
join alltypesagg$TABLE a on (a.smallint_col = b.id)
join alltypessmall$TABLE c on (a.tinyint_col = c.id)
where a.month=1
and a.day=1
and a.int_col > 899
and b.float_col > 4.5
and c.string_col < '7'
and a.int_col + b.float_col + c.string_col < 1000
---- TYPES
smallint, int, tinyint, int, int, float, string
---- RESULTS
15,15,5,5,915,5.5,'5'
16,16,6,6,916,6.599999904632568,'6'
31,31,1,1,931,6.599999904632568,'1'
32,32,2,2,932,7.699999809265137,'2'
33,33,3,3,933,8.800000190734863,'3'
34,34,4,4,934,9.899999618530273,'4'
41,41,1,1,941,6.599999904632568,'1'
42,42,2,2,942,7.699999809265137,'2'
43,43,3,3,943,8.800000190734863,'3'
44,44,4,4,944,9.899999618530273,'4'
5,5,5,5,905,5.5,'5'
55,55,5,5,955,5.5,'5'
56,56,6,6,956,6.599999904632568,'6'
6,6,6,6,906,6.599999904632568,'6'
65,65,5,5,965,5.5,'5'
66,66,6,6,966,6.599999904632568,'6'
81,81,1,1,981,6.599999904632568,'1'
82,82,2,2,982,7.699999809265137,'2'
83,83,3,3,983,8.800000190734863,'3'
84,84,4,4,984,9.899999618530273,'4'
91,91,1,1,991,6.599999904632568,'1'
====
# joins on empty tables
select * from emptytable t1 join emptytable t2 on (t1.field=t2.field)
---- TYPES
int, string, int, string
---- RESULTS
====
select * from emptytable t1 join greptiny t2 on (t1.field=t2.field)
---- TYPES
int, string, string
---- RESULTS
====
select * from greptiny t1 join emptytable t2 on (t1.field=t2.field)
---- TYPES
string, int, string
---- RESULTS
====