Files
impala/testdata/workloads/functional-query/queries/QueryTest/joins.test
Ippokratis Pandis 3ee273ae50 Adding support for {anti,left semi,left outer} joins to the partitioned hash join implementation.
Adding the "anti join" keyword in the frontend and the corresponding backend paths for the
partitioned hash join implementation.  Adding some basic testing for this new join (the
other types have already tests).

Also, fixing a bug in the tuple stream when it was handling strings.

Change-Id: Ied8cff96b2bca284a5f66f7d11df5c5b5ec789cc
Reviewed-on: http://gerrit.sjc.cloudera.com:8080/3805
Reviewed-by: Ippokratis Pandis <ipandis@cloudera.com>
Tested-by: jenkins
2014-08-17 12:24:17 -07:00

647 lines
16 KiB
Plaintext

====
---- QUERY
# Test join on timestamp, hashing was not working properly
select a.timestamp_col from alltypessmall a inner join alltypessmall b on
(a.timestamp_col = b.timestamp_col)
where a.year=2009 and a.month=1 and b.year=2009 and b.month=1
---- RESULTS
2009-01-01 00:00:00
2009-01-01 00:01:00
2009-01-01 00:02:00.100000000
2009-01-01 00:03:00.300000000
2009-01-01 00:04:00.600000000
2009-01-01 00:05:00.100000000
2009-01-01 00:06:00.150000000
2009-01-01 00:07:00.210000000
2009-01-01 00:08:00.280000000
2009-01-01 00:09:00.360000000
2009-01-02 00:10:00.450000000
2009-01-02 00:11:00.450000000
2009-01-02 00:12:00.460000000
2009-01-02 00:13:00.480000000
2009-01-02 00:14:00.510000000
2009-01-02 00:15:00.550000000
2009-01-02 00:16:00.600000000
2009-01-02 00:17:00.660000000
2009-01-02 00:18:00.730000000
2009-01-02 00:19:00.810000000
2009-01-03 00:20:00.900000000
2009-01-03 00:21:00.900000000
2009-01-03 00:22:00.910000000
2009-01-03 00:23:00.930000000
2009-01-03 00:24:00.960000000
---- TYPES
timestamp
====
---- QUERY
# Joins with multiple exprs
select j.*, d.* from JoinTbl j inner join DimTbl d on
(j.test_name = d.name AND j.test_zip = d.zip)
---- RESULTS
1001,'Name1',94611,5000,1001,'Name1',94611
1002,'Name2',94611,5000,1002,'Name2',94611
---- TYPES
bigint, string, int, int, bigint, string, int
====
---- QUERY
select j.*, d.* from JoinTbl j inner join DimTbl d on
(j.test_zip = d.zip AND j.test_name = d.name)
---- RESULTS
1001,'Name1',94611,5000,1001,'Name1',94611
1002,'Name2',94611,5000,1002,'Name2',94611
---- TYPES
bigint, string, int, int, bigint, string, int
====
---- QUERY
# join between hdfs and hbase, extra join predicate, extra scan predicates, nulls in
# joins cols and non-equality join predicate
# (alltypesagg.tinyint_col contains nulls instead of 0s)
# Should be same result as the test below
select a.tinyint_col, b.id, a.string_col, a.tinyint_col + b.tinyint_col
from alltypesagg a join functional_hbase.alltypessmall b
on (a.tinyint_col = b.id and a.tinyint_col + b.tinyint_col < 5)
where a.month=1
and a.day=1
and a.string_col > '88'
and b.bool_col = false
---- RESULTS
1,1,'881',2
1,1,'891',2
1,1,'901',2
1,1,'91',2
1,1,'911',2
1,1,'921',2
1,1,'931',2
1,1,'941',2
1,1,'951',2
1,1,'961',2
1,1,'971',2
1,1,'981',2
1,1,'991',2
---- TYPES
tinyint, int, string, smallint
====
---- QUERY
# Testing anti join
select j.* from JoinTbl j anti join DimTbl d on j.test_id = d.id
---- RESULTS
1106,'Name6',94612,5000
1106,'Name16',94612,5000
1106,'Name6',94616,5000
1106,'Name16',94616,5000
1106,'Name6',94612,15000
1106,'Name16',94612,15000
1106,'Name6',94616,15000
1106,'Name16',94616,15000
---- TYPES
bigint, string, int, int
====
---- QUERY
# Testing anti join on string column
select j.* from JoinTbl j anti join DimTbl d on j.test_name = d.name
---- RESULTS
1006,'Name16',94612,5000
1106,'Name16',94612,5000
1006,'Name16',94616,5000
1106,'Name16',94616,5000
1006,'Name16',94612,15000
1106,'Name16',94612,15000
1006,'Name16',94616,15000
1106,'Name16',94616,15000
---- TYPES
bigint, string, int, int
====
---- QUERY
# Testing multi-way joins that include an anti join
select count(*) from JoinTbl j anti join DimTbl d on j.test_id = d.id
left outer join JoinTbl k on j.test_id = k.test_id
---- RESULTS
64
---- TYPES
bigint
====
---- QUERY
# Testing multi-way joins that include only anti joins
select count(*) from JoinTbl j anti join DimTbl d on j.test_id = d.id
anti join JoinTbl k on j.test_id = k.test_id
---- RESULTS
0
---- TYPES
bigint
====
---- QUERY
# join between hdfs and hbase, extra join predicate, extra scan predicates, nulls in joins cols
# (alltypesagg.tinyint_col contains nulls instead of 0s)
# Should be same result as the test below
select a.tinyint_col, b.id, a.string_col
from alltypesagg a join functional_hbase.alltypessmall b on (a.tinyint_col = b.id)
where a.month=1
and a.day=1
and a.tinyint_col + b.tinyint_col < 5
and a.string_col > '88'
and b.bool_col = false
---- RESULTS
1,1,'881'
1,1,'891'
1,1,'901'
1,1,'91'
1,1,'911'
1,1,'921'
1,1,'931'
1,1,'941'
1,1,'951'
1,1,'961'
1,1,'971'
1,1,'981'
1,1,'991'
---- TYPES
tinyint, int, string
====
---- QUERY
# join between two tables, extra join predicate, extra scan predicates, nulls in joins cols
# (alltypesagg.tinyint_col contains nulls instead of 0s)
select a.tinyint_col, b.id, a.string_col
from alltypesagg a join alltypessmall b on (a.tinyint_col = b.id)
where a.month=1
and a.day=1
and a.tinyint_col + b.tinyint_col < 5
and a.string_col > '88'
and b.bool_col = false
---- RESULTS
1,1,'881'
1,1,'891'
1,1,'901'
1,1,'91'
1,1,'911'
1,1,'921'
1,1,'931'
1,1,'941'
1,1,'951'
1,1,'961'
1,1,'971'
1,1,'981'
1,1,'991'
---- TYPES
tinyint, int, string
====
---- QUERY
# reversing the order of the tables produces the same result
select a.tinyint_col, b.id, a.string_col
from alltypessmall b join alltypesagg a on (a.tinyint_col = b.id)
where a.month=1
and a.day=1
and a.tinyint_col + b.tinyint_col < 5
and a.string_col > '88'
and b.bool_col = false
---- RESULTS
1,1,'881'
1,1,'891'
1,1,'901'
1,1,'91'
1,1,'911'
1,1,'921'
1,1,'931'
1,1,'941'
1,1,'951'
1,1,'961'
1,1,'971'
1,1,'981'
1,1,'991'
---- TYPES
tinyint, int, string
====
---- QUERY
# join between three tables, extra join predicates, extra scan predicates, nulls in joins cols
# (alltypesagg.tinyint_col contains nulls instead of 0s)
select a.smallint_col, b.id, a.tinyint_col, c.id, a.int_col, b.float_col, c.string_col
from alltypesagg a
join alltypessmall b on (a.smallint_col = b.id)
join alltypessmall c on (a.tinyint_col = c.id)
where a.month=1
and a.day=1
and a.int_col > 899
and b.float_col > 4.5
and c.string_col < '7'
and a.int_col + b.float_col + cast(c.string_col as float) < 1000
---- RESULTS
15,15,5,5,915,5.5,'5'
16,16,6,6,916,6.599999904632568,'6'
31,31,1,1,931,6.599999904632568,'1'
32,32,2,2,932,7.699999809265137,'2'
33,33,3,3,933,8.800000190734863,'3'
34,34,4,4,934,9.899999618530273,'4'
41,41,1,1,941,6.599999904632568,'1'
42,42,2,2,942,7.699999809265137,'2'
43,43,3,3,943,8.800000190734863,'3'
44,44,4,4,944,9.899999618530273,'4'
5,5,5,5,905,5.5,'5'
55,55,5,5,955,5.5,'5'
56,56,6,6,956,6.599999904632568,'6'
6,6,6,6,906,6.599999904632568,'6'
65,65,5,5,965,5.5,'5'
66,66,6,6,966,6.599999904632568,'6'
81,81,1,1,981,6.599999904632568,'1'
82,82,2,2,982,7.699999809265137,'2'
83,83,3,3,983,8.800000190734863,'3'
84,84,4,4,984,9.899999618530273,'4'
91,91,1,1,991,6.599999904632568,'1'
---- TYPES
smallint, int, tinyint, int, int, float, string
====
---- QUERY
# reversing the order produces the same results
select a.smallint_col, b.id, a.tinyint_col, c.id, a.int_col, b.float_col, c.string_col
from alltypessmall b
join alltypesagg a on (a.smallint_col = b.id)
join alltypessmall c on (a.tinyint_col = c.id)
where a.month=1
and a.day=1
and a.int_col > 899
and b.float_col > 4.5
and c.string_col < '7'
and a.int_col + b.float_col + cast(c.string_col as float) < 1000
---- RESULTS
15,15,5,5,915,5.5,'5'
16,16,6,6,916,6.599999904632568,'6'
31,31,1,1,931,6.599999904632568,'1'
32,32,2,2,932,7.699999809265137,'2'
33,33,3,3,933,8.800000190734863,'3'
34,34,4,4,934,9.899999618530273,'4'
41,41,1,1,941,6.599999904632568,'1'
42,42,2,2,942,7.699999809265137,'2'
43,43,3,3,943,8.800000190734863,'3'
44,44,4,4,944,9.899999618530273,'4'
5,5,5,5,905,5.5,'5'
55,55,5,5,955,5.5,'5'
56,56,6,6,956,6.599999904632568,'6'
6,6,6,6,906,6.599999904632568,'6'
65,65,5,5,965,5.5,'5'
66,66,6,6,966,6.599999904632568,'6'
81,81,1,1,981,6.599999904632568,'1'
82,82,2,2,982,7.699999809265137,'2'
83,83,3,3,983,8.800000190734863,'3'
84,84,4,4,984,9.899999618530273,'4'
91,91,1,1,991,6.599999904632568,'1'
---- TYPES
smallint, int, tinyint, int, int, float, string
====
---- QUERY
# joins on empty tables
select * from emptytable t1 join emptytable t2 on (t1.field=t2.field)
---- RESULTS
---- TYPES
string, int, string, int
====
---- QUERY
select * from emptytable t1 join greptiny t2 on (t1.field=t2.field)
---- RESULTS
---- TYPES
string, int, string
====
---- QUERY
select * from greptiny t1 join emptytable t2 on (t1.field=t2.field)
---- RESULTS
---- TYPES
string, string, int
====
---- QUERY
# cross join
select t1.id, t2.id from alltypestiny t1 cross join alltypestiny t2
where (t1.id < 3 and t2.id < 3)
order by t1.id, t2.id limit 100
---- RESULTS
0,0
0,1
0,2
1,0
1,1
1,2
2,0
2,1
2,2
---- TYPES
int, int
====
---- QUERY
# cross join with an empty table
select t1.id, e.field from alltypestiny t1 cross join emptytable e
---- RESULTS
---- TYPES
int, string
====
---- QUERY
# check a larger cross join produces the expected number of rows
select count(*) from functional.AllTypesSmall t1 cross join functional.AllTypesSmall t2
---- RESULTS
10000
---- TYPES
bigint
====
---- QUERY
# cross join with nulls and constant table
select id, tinyint_col, t1.c from functional.alltypesagg
cross join (values(NULL c, 1, 2)) as t1
order by tinyint_col nulls first, id, t1.c
limit 6
---- RESULTS
0,NULL,NULL
0,NULL,NULL
10,NULL,NULL
10,NULL,NULL
20,NULL,NULL
20,NULL,NULL
---- TYPES
int, tinyint, null
====
---- QUERY
# check cross joins within a subquery
select t1.id as t1_id, t2_id, t3_id from alltypestiny t1
cross join (select t2.id as t2_id, t3.id as t3_id from alltypestiny t2
cross join alltypestiny t3) t4
where t1.id < 2 and t2_id < 2 and t3_id < 2
order by t1.id, t2_id, t3_id
limit 10
---- RESULTS
0,0,0
0,0,1
0,1,0
0,1,1
1,0,0
1,0,1
1,1,0
1,1,1
---- TYPES
int, int, int
====
---- QUERY
# cross join between two tables, extra where predicates, extra scan predicates, nulls in
# joins cols (alltypesagg.tinyint_col contains nulls instead of 0s)
select a.tinyint_col, b.id, a.string_col
from alltypesagg a cross join alltypessmall b
where a.tinyint_col = b.id
and a.month=1
and a.day=1
and a.tinyint_col + b.tinyint_col < 5
and a.string_col > '88'
and b.bool_col = false
---- RESULTS
1,1,'91'
1,1,'881'
1,1,'891'
1,1,'901'
1,1,'911'
1,1,'921'
1,1,'931'
1,1,'941'
1,1,'951'
1,1,'961'
1,1,'971'
1,1,'981'
1,1,'991'
---- TYPES
tinyint, int, string
====
---- QUERY
# join with three tables and then a cross join, extra where predicates, extra scan
# predicates, nulls in joins cols (alltypesagg.tinyint_col contains nulls instead of
# 0s)
select a.smallint_col, b.id, a.tinyint_col, c.id, a.int_col, b.float_col, c.string_col, d.id
from alltypesagg a
join alltypessmall b on (a.smallint_col = b.id)
join alltypessmall c on (a.tinyint_col = c.id)
cross join alltypestiny d
where a.month=1
and a.day=1
and a.int_col > 899
and b.float_col > 4.5
and c.string_col < '4'
and a.int_col + b.float_col + cast(c.string_col as float) < 1000
and d.id < 2
order by a.id, b.id, c.id, d.id
limit 100
---- RESULTS
31,31,1,1,931,6.599999904632568,'1',0
31,31,1,1,931,6.599999904632568,'1',1
32,32,2,2,932,7.699999809265137,'2',0
32,32,2,2,932,7.699999809265137,'2',1
33,33,3,3,933,8.800000190734863,'3',0
33,33,3,3,933,8.800000190734863,'3',1
41,41,1,1,941,6.599999904632568,'1',0
41,41,1,1,941,6.599999904632568,'1',1
42,42,2,2,942,7.699999809265137,'2',0
42,42,2,2,942,7.699999809265137,'2',1
43,43,3,3,943,8.800000190734863,'3',0
43,43,3,3,943,8.800000190734863,'3',1
81,81,1,1,981,6.599999904632568,'1',0
81,81,1,1,981,6.599999904632568,'1',1
82,82,2,2,982,7.699999809265137,'2',0
82,82,2,2,982,7.699999809265137,'2',1
83,83,3,3,983,8.800000190734863,'3',0
83,83,3,3,983,8.800000190734863,'3',1
91,91,1,1,991,6.599999904632568,'1',0
91,91,1,1,991,6.599999904632568,'1',1
---- TYPES
SMALLINT, INT, TINYINT, INT, INT, FLOAT, STRING, INT
====
---- QUERY
# cross join between hdfs and hbase, where predicate, extra scan predicates, nulls
# in joins cols and non-equality join predicate
# (alltypesagg.tinyint_col contains nulls instead of 0s)
select a.tinyint_col, b.id, a.string_col, a.tinyint_col + b.tinyint_col
from alltypesagg a cross join functional_hbase.alltypessmall b
where a.tinyint_col = b.id
and a.tinyint_col + b.tinyint_col < 5
and a.month=1
and a.day=1
and a.string_col > '88'
and b.bool_col = false
---- RESULTS
1,1,'91',2
1,1,'881',2
1,1,'891',2
1,1,'901',2
1,1,'911',2
1,1,'921',2
1,1,'931',2
1,1,'941',2
1,1,'951',2
1,1,'961',2
1,1,'971',2
1,1,'981',2
1,1,'991',2
---- TYPES
tinyint, int, string, smallint
====
---- QUERY
# FULL OUTER JOIN between two inline views followed by a GROUP BY (IMPALA-964)
select 1 FROM (VALUES(1 x, 1 y)) a RIGHT OUTER JOIN (VALUES(1 x, 1 y)) b
ON (a.x = b.y) GROUP BY a.x
---- RESULTS
1
---- TYPES
TINYINT
====
---- QUERY
# Test joins with union inputs. One input is a union.
select a.id, b.id, a.string_col, b.string_col
from
(select id, string_col from functional.alltypessmall
where year = 2009 and month = 1
union all
select id, string_col from functional.alltypessmall
where year = 2009 and month = 2
union all
select 0, '1234') a
inner join
functional.alltypestiny b
on a.id = b.id
where b.id < 5
---- RESULTS
0,0,'0','0'
0,0,'1234','0'
1,1,'1','1'
2,2,'2','0'
3,3,'3','1'
4,4,'4','0'
---- TYPES
INT, INT, STRING, STRING
====
---- QUERY
# Test joins with union inputs. One input is a union.
select a.id, b.id, a.string_col, b.string_col
from
functional.alltypestiny b
left outer join
(select id, string_col from functional.alltypessmall
where year = 2009 and month = 1
union all
select id, string_col from functional.alltypessmall
where year = 2009 and month = 2
union all
select 0, '1234') a
on a.id = b.id
where b.id < 5
---- RESULTS
0,0,'0','0'
0,0,'1234','0'
1,1,'1','1'
2,2,'2','0'
3,3,'3','1'
4,4,'4','0'
---- TYPES
INT, INT, STRING, STRING
====
---- QUERY
# Test joins with union inputs. Both inputs are a union.
select a.id, b.id, a.string_col, b.string_col
from
(select id, string_col from functional.alltypessmall
where year = 2009 and month = 1
union all
select id, string_col from functional.alltypessmall
where year = 2009 and month = 2
union all
select 0, '1234') a
full outer join
(select id, string_col from functional.alltypessmall
where year = 2009 and month = 1
union all
select id, string_col from functional.alltypessmall
where year = 2009 and month = 2
union all
select 0, '5678') b
on a.id = b.id
where b.id < 5
---- RESULTS
0,0,'0','0'
0,0,'0','5678'
0,0,'1234','0'
0,0,'1234','5678'
1,1,'1','1'
2,2,'2','2'
3,3,'3','3'
4,4,'4','4'
---- TYPES
INT, INT, STRING, STRING
====
---- QUERY
# Regression test for IMPALA-1123. Tests that hash exchanges feeding the same
# hash-partitioned fragment use type-identical partition exprs.
select straight_join count(*) from
(select tinyint_col from functional.alltypessmall
union distinct
select tinyint_col from functional.alltypessmall) a
inner join [shuffle]
(select smallint_col from functional.alltypessmall
union distinct
select smallint_col from functional.alltypessmall) b
on a.tinyint_col = b.smallint_col
---- RESULTS
10
---- TYPES
BIGINT
====
---- QUERY
# Regression test for IMPALA-1123. The plan below has one big hash-partitioned
# fragment with two joins and a merge agg since their partition exprs are all
# compatible. The single fragment is fed by multiple hash-partitioning senders,
# and we need to ensure that the partition exprs used by the senders are cast
# to identical types because the same value will have different hashes for
# different types. Minimal explain plan:
#
# 15:AGGREGATE [MERGE FINALIZE]
# 14:EXCHANGE [UNPARTITIONED]
# 08:AGGREGATE
# 07:HASH JOIN [INNER JOIN, PARTITIONED]
# |--13:AGGREGATE [MERGE FINALIZE]
# | 12:EXCHANGE [HASH(tinyint_col)]
# | 05:AGGREGATE
# | 04:SCAN HDFS [functional.alltypessmall]
# 06:HASH JOIN [INNER JOIN, PARTITIONED]
# |--03:HASH JOIN [INNER JOIN, PARTITIONED]
# | |--10:EXCHANGE [HASH(t2.smallint_col)]
# | | 02:SCAN HDFS [functional.alltypessmall t2]
# | 09:EXCHANGE [HASH(t1.tinyint_col)]
# | 01:SCAN HDFS [functional.alltypessmall t1]
# 11:EXCHANGE [HASH(a.int_col)]
# 00:SCAN HDFS [functional.alltypessmall a]
#
select straight_join count(*) from
functional.alltypessmall a
inner join [shuffle]
(select straight_join t2.* from
functional.alltypessmall t1
inner join [shuffle]
functional.alltypessmall t2
on t1.tinyint_col = t2.smallint_col) b
on a.int_col = b.smallint_col
inner join [shuffle]
(select distinct tinyint_col
from functional.alltypessmall) c
on a.int_col = c.tinyint_col
---- RESULTS
11200
---- TYPES
BIGINT
====
---- QUERY
# Same query as above with broadcast joins. Results should be identical.
select straight_join count(*) from
functional.alltypessmall a
inner join [broadcast]
(select straight_join t2.* from
functional.alltypessmall t1
inner join [broadcast]
functional.alltypessmall t2
on t1.tinyint_col = t2.smallint_col) b
on a.int_col = b.smallint_col
inner join [broadcast]
(select distinct tinyint_col
from functional.alltypessmall) c
on a.int_col = c.tinyint_col
---- RESULTS
11200
---- TYPES
BIGINT
====