Files
impala/testdata/workloads/functional-query/queries/QueryTest/joins.test
Dimitris Tsirogiannis 2d7a8b7c70 IMPALA-964: Full outer join on values() followed by group by hits a
preconditions check

This commit fixes IMPALA-964 where full outer join between two inline
views followed by a group by (e.g. select 1 FROM (VALUES(1 x, 1 y)) a
FULL OUTER JOIN (VALUES(1 x, 1 y)) b ON (a.x = b.y) GROUP BY a.x;)
hits a preconditions check. This check evaluates if the numNodes
(number of nodes for the purpose of resource estimation) variable
is greater or equal to zero and is triggered when we try to compute
the resource estimates (number of distinct values) of a plan fragment.

The following changes are included in this commit:
1. Modified the getNumDistinctValues function in PlanFragment class to
consider the special case where the numNodes of a plan fragment is -1.
2. Added a test case in QueryTest/joins.test.

Change-Id: I2962ed5079e174d0e76ad990ab84e1fb1a4607ef
Reviewed-on: http://gerrit.ent.cloudera.com:8080/2466
Reviewed-by: Marcel Kornacker <marcel@cloudera.com>
Tested-by: jenkins
Reviewed-on: http://gerrit.ent.cloudera.com:8080/2514
Reviewed-by: Dimitris Tsirogiannis <dtsirogiannis@cloudera.com>
2014-05-11 19:30:38 -07:00

436 lines
11 KiB
Plaintext

====
---- QUERY
# Test join on timestamp, hashing was not working properly
select a.timestamp_col from alltypessmall a inner join alltypessmall b on
(a.timestamp_col = b.timestamp_col)
where a.year=2009 and a.month=1 and b.year=2009 and b.month=1
---- TYPES
timestamp
---- RESULTS
2009-01-01 00:00:00
2009-01-01 00:01:00
2009-01-01 00:02:00.100000000
2009-01-01 00:03:00.300000000
2009-01-01 00:04:00.600000000
2009-01-01 00:05:00.100000000
2009-01-01 00:06:00.150000000
2009-01-01 00:07:00.210000000
2009-01-01 00:08:00.280000000
2009-01-01 00:09:00.360000000
2009-01-02 00:10:00.450000000
2009-01-02 00:11:00.450000000
2009-01-02 00:12:00.460000000
2009-01-02 00:13:00.480000000
2009-01-02 00:14:00.510000000
2009-01-02 00:15:00.550000000
2009-01-02 00:16:00.600000000
2009-01-02 00:17:00.660000000
2009-01-02 00:18:00.730000000
2009-01-02 00:19:00.810000000
2009-01-03 00:20:00.900000000
2009-01-03 00:21:00.900000000
2009-01-03 00:22:00.910000000
2009-01-03 00:23:00.930000000
2009-01-03 00:24:00.960000000
====
---- QUERY
# Joins with multiple exprs
select j.*, d.* from JoinTbl j inner join DimTbl d on
(j.test_name = d.name AND j.test_zip = d.zip)
---- TYPES
bigint, string, int, int, bigint, string, int
---- RESULTS
1001,'Name1',94611,5000,1001,'Name1',94611
1002,'Name2',94611,5000,1002,'Name2',94611
====
---- QUERY
select j.*, d.* from JoinTbl j inner join DimTbl d on
(j.test_zip = d.zip AND j.test_name = d.name)
---- TYPES
bigint, string, int, int, bigint, string, int
---- RESULTS
1001,'Name1',94611,5000,1001,'Name1',94611
1002,'Name2',94611,5000,1002,'Name2',94611
====
---- QUERY
# join between hdfs and hbase, extra join predicate, extra scan predicates, nulls in
# joins cols and non-equality join predicate
# (alltypesagg.tinyint_col contains nulls instead of 0s)
# Should be same result as the test below
select a.tinyint_col, b.id, a.string_col, a.tinyint_col + b.tinyint_col
from alltypesagg a join functional_hbase.alltypessmall b
on (a.tinyint_col = b.id and a.tinyint_col + b.tinyint_col < 5)
where a.month=1
and a.day=1
and a.string_col > '88'
and b.bool_col = false
---- TYPES
tinyint, int, string, smallint
---- RESULTS
1,1,'881',2
1,1,'891',2
1,1,'901',2
1,1,'91',2
1,1,'911',2
1,1,'921',2
1,1,'931',2
1,1,'941',2
1,1,'951',2
1,1,'961',2
1,1,'971',2
1,1,'981',2
1,1,'991',2
====
---- QUERY
# join between hdfs and hbase, extra join predicate, extra scan predicates, nulls in joins cols
# (alltypesagg.tinyint_col contains nulls instead of 0s)
# Should be same result as the test below
select a.tinyint_col, b.id, a.string_col
from alltypesagg a join functional_hbase.alltypessmall b on (a.tinyint_col = b.id)
where a.month=1
and a.day=1
and a.tinyint_col + b.tinyint_col < 5
and a.string_col > '88'
and b.bool_col = false
---- TYPES
tinyint, int, string
---- RESULTS
1,1,'881'
1,1,'891'
1,1,'901'
1,1,'91'
1,1,'911'
1,1,'921'
1,1,'931'
1,1,'941'
1,1,'951'
1,1,'961'
1,1,'971'
1,1,'981'
1,1,'991'
====
---- QUERY
# join between two tables, extra join predicate, extra scan predicates, nulls in joins cols
# (alltypesagg.tinyint_col contains nulls instead of 0s)
select a.tinyint_col, b.id, a.string_col
from alltypesagg a join alltypessmall b on (a.tinyint_col = b.id)
where a.month=1
and a.day=1
and a.tinyint_col + b.tinyint_col < 5
and a.string_col > '88'
and b.bool_col = false
---- TYPES
tinyint, int, string
---- RESULTS
1,1,'881'
1,1,'891'
1,1,'901'
1,1,'91'
1,1,'911'
1,1,'921'
1,1,'931'
1,1,'941'
1,1,'951'
1,1,'961'
1,1,'971'
1,1,'981'
1,1,'991'
====
---- QUERY
# reversing the order of the tables produces the same result
select a.tinyint_col, b.id, a.string_col
from alltypessmall b join alltypesagg a on (a.tinyint_col = b.id)
where a.month=1
and a.day=1
and a.tinyint_col + b.tinyint_col < 5
and a.string_col > '88'
and b.bool_col = false
---- TYPES
tinyint, int, string
---- RESULTS
1,1,'881'
1,1,'891'
1,1,'901'
1,1,'91'
1,1,'911'
1,1,'921'
1,1,'931'
1,1,'941'
1,1,'951'
1,1,'961'
1,1,'971'
1,1,'981'
1,1,'991'
====
---- QUERY
# join between three tables, extra join predicates, extra scan predicates, nulls in joins cols
# (alltypesagg.tinyint_col contains nulls instead of 0s)
select a.smallint_col, b.id, a.tinyint_col, c.id, a.int_col, b.float_col, c.string_col
from alltypesagg a
join alltypessmall b on (a.smallint_col = b.id)
join alltypessmall c on (a.tinyint_col = c.id)
where a.month=1
and a.day=1
and a.int_col > 899
and b.float_col > 4.5
and c.string_col < '7'
and a.int_col + b.float_col + cast(c.string_col as float) < 1000
---- TYPES
smallint, int, tinyint, int, int, float, string
---- RESULTS
15,15,5,5,915,5.5,'5'
16,16,6,6,916,6.599999904632568,'6'
31,31,1,1,931,6.599999904632568,'1'
32,32,2,2,932,7.699999809265137,'2'
33,33,3,3,933,8.800000190734863,'3'
34,34,4,4,934,9.899999618530273,'4'
41,41,1,1,941,6.599999904632568,'1'
42,42,2,2,942,7.699999809265137,'2'
43,43,3,3,943,8.800000190734863,'3'
44,44,4,4,944,9.899999618530273,'4'
5,5,5,5,905,5.5,'5'
55,55,5,5,955,5.5,'5'
56,56,6,6,956,6.599999904632568,'6'
6,6,6,6,906,6.599999904632568,'6'
65,65,5,5,965,5.5,'5'
66,66,6,6,966,6.599999904632568,'6'
81,81,1,1,981,6.599999904632568,'1'
82,82,2,2,982,7.699999809265137,'2'
83,83,3,3,983,8.800000190734863,'3'
84,84,4,4,984,9.899999618530273,'4'
91,91,1,1,991,6.599999904632568,'1'
====
---- QUERY
# reversing the order produces the same results
select a.smallint_col, b.id, a.tinyint_col, c.id, a.int_col, b.float_col, c.string_col
from alltypessmall b
join alltypesagg a on (a.smallint_col = b.id)
join alltypessmall c on (a.tinyint_col = c.id)
where a.month=1
and a.day=1
and a.int_col > 899
and b.float_col > 4.5
and c.string_col < '7'
and a.int_col + b.float_col + cast(c.string_col as float) < 1000
---- TYPES
smallint, int, tinyint, int, int, float, string
---- RESULTS
15,15,5,5,915,5.5,'5'
16,16,6,6,916,6.599999904632568,'6'
31,31,1,1,931,6.599999904632568,'1'
32,32,2,2,932,7.699999809265137,'2'
33,33,3,3,933,8.800000190734863,'3'
34,34,4,4,934,9.899999618530273,'4'
41,41,1,1,941,6.599999904632568,'1'
42,42,2,2,942,7.699999809265137,'2'
43,43,3,3,943,8.800000190734863,'3'
44,44,4,4,944,9.899999618530273,'4'
5,5,5,5,905,5.5,'5'
55,55,5,5,955,5.5,'5'
56,56,6,6,956,6.599999904632568,'6'
6,6,6,6,906,6.599999904632568,'6'
65,65,5,5,965,5.5,'5'
66,66,6,6,966,6.599999904632568,'6'
81,81,1,1,981,6.599999904632568,'1'
82,82,2,2,982,7.699999809265137,'2'
83,83,3,3,983,8.800000190734863,'3'
84,84,4,4,984,9.899999618530273,'4'
91,91,1,1,991,6.599999904632568,'1'
====
---- QUERY
# joins on empty tables
select * from emptytable t1 join emptytable t2 on (t1.field=t2.field)
---- TYPES
string, int, string, int
---- RESULTS
====
---- QUERY
select * from emptytable t1 join greptiny t2 on (t1.field=t2.field)
---- TYPES
string, int, string
---- RESULTS
====
---- QUERY
select * from greptiny t1 join emptytable t2 on (t1.field=t2.field)
---- TYPES
string, string, int
---- RESULTS
====
---- QUERY
# cross join
select t1.id, t2.id from alltypestiny t1 cross join alltypestiny t2
where (t1.id < 3 and t2.id < 3)
order by t1.id, t2.id limit 100
---- TYPES
int, int
---- RESULTS
0,0
0,1
0,2
1,0
1,1
1,2
2,0
2,1
2,2
====
---- QUERY
# cross join with an empty table
select t1.id, e.field from alltypestiny t1 cross join emptytable e
---- TYPES
int, string
---- RESULTS
====
---- QUERY
# check a larger cross join produces the expected number of rows
select count(*) from functional.AllTypesSmall t1 cross join functional.AllTypesSmall t2
---- TYPES
bigint
---- RESULTS
10000
====
---- QUERY
# cross join with nulls and constant table
select id, tinyint_col, t1.c from functional.alltypesagg
cross join (values(NULL c, 1, 2)) as t1
order by tinyint_col nulls first, id, t1.c
limit 6
---- RESULTS
0,NULL,NULL
10,NULL,NULL
20,NULL,NULL
30,NULL,NULL
40,NULL,NULL
50,NULL,NULL
---- TYPES
int, tinyint, null
====
---- QUERY
# check cross joins within a subquery
select t1.id as t1_id, t2_id, t3_id from alltypestiny t1
cross join (select t2.id as t2_id, t3.id as t3_id from alltypestiny t2
cross join alltypestiny t3) t4
where t1.id < 2 and t2_id < 2 and t3_id < 2
order by t1.id, t2_id, t3_id
limit 10
---- RESULTS
0,0,0
0,0,1
0,1,0
0,1,1
1,0,0
1,0,1
1,1,0
1,1,1
---- TYPES
int, int, int
====
---- QUERY
# cross join between two tables, extra where predicates, extra scan predicates, nulls in
# joins cols (alltypesagg.tinyint_col contains nulls instead of 0s)
select a.tinyint_col, b.id, a.string_col
from alltypesagg a cross join alltypessmall b
where a.tinyint_col = b.id
and a.month=1
and a.day=1
and a.tinyint_col + b.tinyint_col < 5
and a.string_col > '88'
and b.bool_col = false
---- RESULTS
1,1,'91'
1,1,'881'
1,1,'891'
1,1,'901'
1,1,'911'
1,1,'921'
1,1,'931'
1,1,'941'
1,1,'951'
1,1,'961'
1,1,'971'
1,1,'981'
1,1,'991'
---- TYPES
tinyint, int, string
====
---- QUERY
# join with three tables and then a cross join, extra where predicates, extra scan
# predicates, nulls in joins cols (alltypesagg.tinyint_col contains nulls instead of
# 0s)
select a.smallint_col, b.id, a.tinyint_col, c.id, a.int_col, b.float_col, c.string_col, d.id
from alltypesagg a
join alltypessmall b on (a.smallint_col = b.id)
join alltypessmall c on (a.tinyint_col = c.id)
cross join alltypestiny d
where a.month=1
and a.day=1
and a.int_col > 899
and b.float_col > 4.5
and c.string_col < '4'
and a.int_col + b.float_col + cast(c.string_col as float) < 1000
and d.id < 2
order by a.id, b.id, c.id, d.id
limit 100
---- RESULTS
31,31,1,1,931,6.599999904632568,'1',0
31,31,1,1,931,6.599999904632568,'1',1
32,32,2,2,932,7.699999809265137,'2',0
32,32,2,2,932,7.699999809265137,'2',1
33,33,3,3,933,8.800000190734863,'3',0
33,33,3,3,933,8.800000190734863,'3',1
41,41,1,1,941,6.599999904632568,'1',0
41,41,1,1,941,6.599999904632568,'1',1
42,42,2,2,942,7.699999809265137,'2',0
42,42,2,2,942,7.699999809265137,'2',1
43,43,3,3,943,8.800000190734863,'3',0
43,43,3,3,943,8.800000190734863,'3',1
81,81,1,1,981,6.599999904632568,'1',0
81,81,1,1,981,6.599999904632568,'1',1
82,82,2,2,982,7.699999809265137,'2',0
82,82,2,2,982,7.699999809265137,'2',1
83,83,3,3,983,8.800000190734863,'3',0
83,83,3,3,983,8.800000190734863,'3',1
91,91,1,1,991,6.599999904632568,'1',0
91,91,1,1,991,6.599999904632568,'1',1
---- TYPES
SMALLINT, INT, TINYINT, INT, INT, FLOAT, STRING, INT
====
---- QUERY
# cross join between hdfs and hbase, where predicate, extra scan predicates, nulls
# in joins cols and non-equality join predicate
# (alltypesagg.tinyint_col contains nulls instead of 0s)
select a.tinyint_col, b.id, a.string_col, a.tinyint_col + b.tinyint_col
from alltypesagg a cross join functional_hbase.alltypessmall b
where a.tinyint_col = b.id
and a.tinyint_col + b.tinyint_col < 5
and a.month=1
and a.day=1
and a.string_col > '88'
and b.bool_col = false
---- RESULTS
1,1,'91',2
1,1,'881',2
1,1,'891',2
1,1,'901',2
1,1,'911',2
1,1,'921',2
1,1,'931',2
1,1,'941',2
1,1,'951',2
1,1,'961',2
1,1,'971',2
1,1,'981',2
1,1,'991',2
---- TYPES
tinyint, int, string, smallint
====
---- QUERY
# FULL OUTER JOIN between two inline views followed by a GROUP BY (IMPALA-964)
select 1 FROM (VALUES(1 x, 1 y)) a RIGHT OUTER JOIN (VALUES(1 x, 1 y)) b
ON (a.x = b.y) GROUP BY a.x
---- RESULTS
1
---- TYPES
TINYINT
====