mirror of
https://github.com/apache/impala.git
synced 2026-01-06 15:01:43 -05:00
preconditions check This commit fixes IMPALA-964 where full outer join between two inline views followed by a group by (e.g. select 1 FROM (VALUES(1 x, 1 y)) a FULL OUTER JOIN (VALUES(1 x, 1 y)) b ON (a.x = b.y) GROUP BY a.x;) hits a preconditions check. This check evaluates if the numNodes (number of nodes for the purpose of resource estimation) variable is greater or equal to zero and is triggered when we try to compute the resource estimates (number of distinct values) of a plan fragment. The following changes are included in this commit: 1. Modified the getNumDistinctValues function in PlanFragment class to consider the special case where the numNodes of a plan fragment is -1. 2. Added a test case in QueryTest/joins.test. Change-Id: I2962ed5079e174d0e76ad990ab84e1fb1a4607ef Reviewed-on: http://gerrit.ent.cloudera.com:8080/2466 Reviewed-by: Marcel Kornacker <marcel@cloudera.com> Tested-by: jenkins Reviewed-on: http://gerrit.ent.cloudera.com:8080/2514 Reviewed-by: Dimitris Tsirogiannis <dtsirogiannis@cloudera.com>
436 lines
11 KiB
Plaintext
436 lines
11 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Test join on timestamp, hashing was not working properly
|
|
select a.timestamp_col from alltypessmall a inner join alltypessmall b on
|
|
(a.timestamp_col = b.timestamp_col)
|
|
where a.year=2009 and a.month=1 and b.year=2009 and b.month=1
|
|
---- TYPES
|
|
timestamp
|
|
---- RESULTS
|
|
2009-01-01 00:00:00
|
|
2009-01-01 00:01:00
|
|
2009-01-01 00:02:00.100000000
|
|
2009-01-01 00:03:00.300000000
|
|
2009-01-01 00:04:00.600000000
|
|
2009-01-01 00:05:00.100000000
|
|
2009-01-01 00:06:00.150000000
|
|
2009-01-01 00:07:00.210000000
|
|
2009-01-01 00:08:00.280000000
|
|
2009-01-01 00:09:00.360000000
|
|
2009-01-02 00:10:00.450000000
|
|
2009-01-02 00:11:00.450000000
|
|
2009-01-02 00:12:00.460000000
|
|
2009-01-02 00:13:00.480000000
|
|
2009-01-02 00:14:00.510000000
|
|
2009-01-02 00:15:00.550000000
|
|
2009-01-02 00:16:00.600000000
|
|
2009-01-02 00:17:00.660000000
|
|
2009-01-02 00:18:00.730000000
|
|
2009-01-02 00:19:00.810000000
|
|
2009-01-03 00:20:00.900000000
|
|
2009-01-03 00:21:00.900000000
|
|
2009-01-03 00:22:00.910000000
|
|
2009-01-03 00:23:00.930000000
|
|
2009-01-03 00:24:00.960000000
|
|
====
|
|
---- QUERY
|
|
# Joins with multiple exprs
|
|
select j.*, d.* from JoinTbl j inner join DimTbl d on
|
|
(j.test_name = d.name AND j.test_zip = d.zip)
|
|
---- TYPES
|
|
bigint, string, int, int, bigint, string, int
|
|
---- RESULTS
|
|
1001,'Name1',94611,5000,1001,'Name1',94611
|
|
1002,'Name2',94611,5000,1002,'Name2',94611
|
|
====
|
|
---- QUERY
|
|
select j.*, d.* from JoinTbl j inner join DimTbl d on
|
|
(j.test_zip = d.zip AND j.test_name = d.name)
|
|
---- TYPES
|
|
bigint, string, int, int, bigint, string, int
|
|
---- RESULTS
|
|
1001,'Name1',94611,5000,1001,'Name1',94611
|
|
1002,'Name2',94611,5000,1002,'Name2',94611
|
|
====
|
|
---- QUERY
|
|
# join between hdfs and hbase, extra join predicate, extra scan predicates, nulls in
|
|
# joins cols and non-equality join predicate
|
|
# (alltypesagg.tinyint_col contains nulls instead of 0s)
|
|
# Should be same result as the test below
|
|
select a.tinyint_col, b.id, a.string_col, a.tinyint_col + b.tinyint_col
|
|
from alltypesagg a join functional_hbase.alltypessmall b
|
|
on (a.tinyint_col = b.id and a.tinyint_col + b.tinyint_col < 5)
|
|
where a.month=1
|
|
and a.day=1
|
|
and a.string_col > '88'
|
|
and b.bool_col = false
|
|
---- TYPES
|
|
tinyint, int, string, smallint
|
|
---- RESULTS
|
|
1,1,'881',2
|
|
1,1,'891',2
|
|
1,1,'901',2
|
|
1,1,'91',2
|
|
1,1,'911',2
|
|
1,1,'921',2
|
|
1,1,'931',2
|
|
1,1,'941',2
|
|
1,1,'951',2
|
|
1,1,'961',2
|
|
1,1,'971',2
|
|
1,1,'981',2
|
|
1,1,'991',2
|
|
====
|
|
---- QUERY
|
|
# join between hdfs and hbase, extra join predicate, extra scan predicates, nulls in joins cols
|
|
# (alltypesagg.tinyint_col contains nulls instead of 0s)
|
|
# Should be same result as the test below
|
|
select a.tinyint_col, b.id, a.string_col
|
|
from alltypesagg a join functional_hbase.alltypessmall b on (a.tinyint_col = b.id)
|
|
where a.month=1
|
|
and a.day=1
|
|
and a.tinyint_col + b.tinyint_col < 5
|
|
and a.string_col > '88'
|
|
and b.bool_col = false
|
|
---- TYPES
|
|
tinyint, int, string
|
|
---- RESULTS
|
|
1,1,'881'
|
|
1,1,'891'
|
|
1,1,'901'
|
|
1,1,'91'
|
|
1,1,'911'
|
|
1,1,'921'
|
|
1,1,'931'
|
|
1,1,'941'
|
|
1,1,'951'
|
|
1,1,'961'
|
|
1,1,'971'
|
|
1,1,'981'
|
|
1,1,'991'
|
|
====
|
|
---- QUERY
|
|
# join between two tables, extra join predicate, extra scan predicates, nulls in joins cols
|
|
# (alltypesagg.tinyint_col contains nulls instead of 0s)
|
|
select a.tinyint_col, b.id, a.string_col
|
|
from alltypesagg a join alltypessmall b on (a.tinyint_col = b.id)
|
|
where a.month=1
|
|
and a.day=1
|
|
and a.tinyint_col + b.tinyint_col < 5
|
|
and a.string_col > '88'
|
|
and b.bool_col = false
|
|
---- TYPES
|
|
tinyint, int, string
|
|
---- RESULTS
|
|
1,1,'881'
|
|
1,1,'891'
|
|
1,1,'901'
|
|
1,1,'91'
|
|
1,1,'911'
|
|
1,1,'921'
|
|
1,1,'931'
|
|
1,1,'941'
|
|
1,1,'951'
|
|
1,1,'961'
|
|
1,1,'971'
|
|
1,1,'981'
|
|
1,1,'991'
|
|
====
|
|
---- QUERY
|
|
# reversing the order of the tables produces the same result
|
|
select a.tinyint_col, b.id, a.string_col
|
|
from alltypessmall b join alltypesagg a on (a.tinyint_col = b.id)
|
|
where a.month=1
|
|
and a.day=1
|
|
and a.tinyint_col + b.tinyint_col < 5
|
|
and a.string_col > '88'
|
|
and b.bool_col = false
|
|
---- TYPES
|
|
tinyint, int, string
|
|
---- RESULTS
|
|
1,1,'881'
|
|
1,1,'891'
|
|
1,1,'901'
|
|
1,1,'91'
|
|
1,1,'911'
|
|
1,1,'921'
|
|
1,1,'931'
|
|
1,1,'941'
|
|
1,1,'951'
|
|
1,1,'961'
|
|
1,1,'971'
|
|
1,1,'981'
|
|
1,1,'991'
|
|
====
|
|
---- QUERY
|
|
# join between three tables, extra join predicates, extra scan predicates, nulls in joins cols
|
|
# (alltypesagg.tinyint_col contains nulls instead of 0s)
|
|
select a.smallint_col, b.id, a.tinyint_col, c.id, a.int_col, b.float_col, c.string_col
|
|
from alltypesagg a
|
|
join alltypessmall b on (a.smallint_col = b.id)
|
|
join alltypessmall c on (a.tinyint_col = c.id)
|
|
where a.month=1
|
|
and a.day=1
|
|
and a.int_col > 899
|
|
and b.float_col > 4.5
|
|
and c.string_col < '7'
|
|
and a.int_col + b.float_col + cast(c.string_col as float) < 1000
|
|
---- TYPES
|
|
smallint, int, tinyint, int, int, float, string
|
|
---- RESULTS
|
|
15,15,5,5,915,5.5,'5'
|
|
16,16,6,6,916,6.599999904632568,'6'
|
|
31,31,1,1,931,6.599999904632568,'1'
|
|
32,32,2,2,932,7.699999809265137,'2'
|
|
33,33,3,3,933,8.800000190734863,'3'
|
|
34,34,4,4,934,9.899999618530273,'4'
|
|
41,41,1,1,941,6.599999904632568,'1'
|
|
42,42,2,2,942,7.699999809265137,'2'
|
|
43,43,3,3,943,8.800000190734863,'3'
|
|
44,44,4,4,944,9.899999618530273,'4'
|
|
5,5,5,5,905,5.5,'5'
|
|
55,55,5,5,955,5.5,'5'
|
|
56,56,6,6,956,6.599999904632568,'6'
|
|
6,6,6,6,906,6.599999904632568,'6'
|
|
65,65,5,5,965,5.5,'5'
|
|
66,66,6,6,966,6.599999904632568,'6'
|
|
81,81,1,1,981,6.599999904632568,'1'
|
|
82,82,2,2,982,7.699999809265137,'2'
|
|
83,83,3,3,983,8.800000190734863,'3'
|
|
84,84,4,4,984,9.899999618530273,'4'
|
|
91,91,1,1,991,6.599999904632568,'1'
|
|
====
|
|
---- QUERY
|
|
# reversing the order produces the same results
|
|
select a.smallint_col, b.id, a.tinyint_col, c.id, a.int_col, b.float_col, c.string_col
|
|
from alltypessmall b
|
|
join alltypesagg a on (a.smallint_col = b.id)
|
|
join alltypessmall c on (a.tinyint_col = c.id)
|
|
where a.month=1
|
|
and a.day=1
|
|
and a.int_col > 899
|
|
and b.float_col > 4.5
|
|
and c.string_col < '7'
|
|
and a.int_col + b.float_col + cast(c.string_col as float) < 1000
|
|
---- TYPES
|
|
smallint, int, tinyint, int, int, float, string
|
|
---- RESULTS
|
|
15,15,5,5,915,5.5,'5'
|
|
16,16,6,6,916,6.599999904632568,'6'
|
|
31,31,1,1,931,6.599999904632568,'1'
|
|
32,32,2,2,932,7.699999809265137,'2'
|
|
33,33,3,3,933,8.800000190734863,'3'
|
|
34,34,4,4,934,9.899999618530273,'4'
|
|
41,41,1,1,941,6.599999904632568,'1'
|
|
42,42,2,2,942,7.699999809265137,'2'
|
|
43,43,3,3,943,8.800000190734863,'3'
|
|
44,44,4,4,944,9.899999618530273,'4'
|
|
5,5,5,5,905,5.5,'5'
|
|
55,55,5,5,955,5.5,'5'
|
|
56,56,6,6,956,6.599999904632568,'6'
|
|
6,6,6,6,906,6.599999904632568,'6'
|
|
65,65,5,5,965,5.5,'5'
|
|
66,66,6,6,966,6.599999904632568,'6'
|
|
81,81,1,1,981,6.599999904632568,'1'
|
|
82,82,2,2,982,7.699999809265137,'2'
|
|
83,83,3,3,983,8.800000190734863,'3'
|
|
84,84,4,4,984,9.899999618530273,'4'
|
|
91,91,1,1,991,6.599999904632568,'1'
|
|
====
|
|
---- QUERY
|
|
# joins on empty tables
|
|
select * from emptytable t1 join emptytable t2 on (t1.field=t2.field)
|
|
---- TYPES
|
|
string, int, string, int
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
select * from emptytable t1 join greptiny t2 on (t1.field=t2.field)
|
|
---- TYPES
|
|
string, int, string
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
select * from greptiny t1 join emptytable t2 on (t1.field=t2.field)
|
|
---- TYPES
|
|
string, string, int
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# cross join
|
|
select t1.id, t2.id from alltypestiny t1 cross join alltypestiny t2
|
|
where (t1.id < 3 and t2.id < 3)
|
|
order by t1.id, t2.id limit 100
|
|
---- TYPES
|
|
int, int
|
|
---- RESULTS
|
|
0,0
|
|
0,1
|
|
0,2
|
|
1,0
|
|
1,1
|
|
1,2
|
|
2,0
|
|
2,1
|
|
2,2
|
|
====
|
|
---- QUERY
|
|
# cross join with an empty table
|
|
select t1.id, e.field from alltypestiny t1 cross join emptytable e
|
|
---- TYPES
|
|
int, string
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# check a larger cross join produces the expected number of rows
|
|
select count(*) from functional.AllTypesSmall t1 cross join functional.AllTypesSmall t2
|
|
---- TYPES
|
|
bigint
|
|
---- RESULTS
|
|
10000
|
|
====
|
|
---- QUERY
|
|
# cross join with nulls and constant table
|
|
select id, tinyint_col, t1.c from functional.alltypesagg
|
|
cross join (values(NULL c, 1, 2)) as t1
|
|
order by tinyint_col nulls first, id, t1.c
|
|
limit 6
|
|
---- RESULTS
|
|
0,NULL,NULL
|
|
10,NULL,NULL
|
|
20,NULL,NULL
|
|
30,NULL,NULL
|
|
40,NULL,NULL
|
|
50,NULL,NULL
|
|
---- TYPES
|
|
int, tinyint, null
|
|
====
|
|
---- QUERY
|
|
# check cross joins within a subquery
|
|
select t1.id as t1_id, t2_id, t3_id from alltypestiny t1
|
|
cross join (select t2.id as t2_id, t3.id as t3_id from alltypestiny t2
|
|
cross join alltypestiny t3) t4
|
|
where t1.id < 2 and t2_id < 2 and t3_id < 2
|
|
order by t1.id, t2_id, t3_id
|
|
limit 10
|
|
---- RESULTS
|
|
0,0,0
|
|
0,0,1
|
|
0,1,0
|
|
0,1,1
|
|
1,0,0
|
|
1,0,1
|
|
1,1,0
|
|
1,1,1
|
|
---- TYPES
|
|
int, int, int
|
|
====
|
|
---- QUERY
|
|
# cross join between two tables, extra where predicates, extra scan predicates, nulls in
|
|
# joins cols (alltypesagg.tinyint_col contains nulls instead of 0s)
|
|
select a.tinyint_col, b.id, a.string_col
|
|
from alltypesagg a cross join alltypessmall b
|
|
where a.tinyint_col = b.id
|
|
and a.month=1
|
|
and a.day=1
|
|
and a.tinyint_col + b.tinyint_col < 5
|
|
and a.string_col > '88'
|
|
and b.bool_col = false
|
|
---- RESULTS
|
|
1,1,'91'
|
|
1,1,'881'
|
|
1,1,'891'
|
|
1,1,'901'
|
|
1,1,'911'
|
|
1,1,'921'
|
|
1,1,'931'
|
|
1,1,'941'
|
|
1,1,'951'
|
|
1,1,'961'
|
|
1,1,'971'
|
|
1,1,'981'
|
|
1,1,'991'
|
|
---- TYPES
|
|
tinyint, int, string
|
|
====
|
|
---- QUERY
|
|
# join with three tables and then a cross join, extra where predicates, extra scan
|
|
# predicates, nulls in joins cols (alltypesagg.tinyint_col contains nulls instead of
|
|
# 0s)
|
|
select a.smallint_col, b.id, a.tinyint_col, c.id, a.int_col, b.float_col, c.string_col, d.id
|
|
from alltypesagg a
|
|
join alltypessmall b on (a.smallint_col = b.id)
|
|
join alltypessmall c on (a.tinyint_col = c.id)
|
|
cross join alltypestiny d
|
|
where a.month=1
|
|
and a.day=1
|
|
and a.int_col > 899
|
|
and b.float_col > 4.5
|
|
and c.string_col < '4'
|
|
and a.int_col + b.float_col + cast(c.string_col as float) < 1000
|
|
and d.id < 2
|
|
order by a.id, b.id, c.id, d.id
|
|
limit 100
|
|
---- RESULTS
|
|
31,31,1,1,931,6.599999904632568,'1',0
|
|
31,31,1,1,931,6.599999904632568,'1',1
|
|
32,32,2,2,932,7.699999809265137,'2',0
|
|
32,32,2,2,932,7.699999809265137,'2',1
|
|
33,33,3,3,933,8.800000190734863,'3',0
|
|
33,33,3,3,933,8.800000190734863,'3',1
|
|
41,41,1,1,941,6.599999904632568,'1',0
|
|
41,41,1,1,941,6.599999904632568,'1',1
|
|
42,42,2,2,942,7.699999809265137,'2',0
|
|
42,42,2,2,942,7.699999809265137,'2',1
|
|
43,43,3,3,943,8.800000190734863,'3',0
|
|
43,43,3,3,943,8.800000190734863,'3',1
|
|
81,81,1,1,981,6.599999904632568,'1',0
|
|
81,81,1,1,981,6.599999904632568,'1',1
|
|
82,82,2,2,982,7.699999809265137,'2',0
|
|
82,82,2,2,982,7.699999809265137,'2',1
|
|
83,83,3,3,983,8.800000190734863,'3',0
|
|
83,83,3,3,983,8.800000190734863,'3',1
|
|
91,91,1,1,991,6.599999904632568,'1',0
|
|
91,91,1,1,991,6.599999904632568,'1',1
|
|
---- TYPES
|
|
SMALLINT, INT, TINYINT, INT, INT, FLOAT, STRING, INT
|
|
====
|
|
---- QUERY
|
|
# cross join between hdfs and hbase, where predicate, extra scan predicates, nulls
|
|
# in joins cols and non-equality join predicate
|
|
# (alltypesagg.tinyint_col contains nulls instead of 0s)
|
|
select a.tinyint_col, b.id, a.string_col, a.tinyint_col + b.tinyint_col
|
|
from alltypesagg a cross join functional_hbase.alltypessmall b
|
|
where a.tinyint_col = b.id
|
|
and a.tinyint_col + b.tinyint_col < 5
|
|
and a.month=1
|
|
and a.day=1
|
|
and a.string_col > '88'
|
|
and b.bool_col = false
|
|
---- RESULTS
|
|
1,1,'91',2
|
|
1,1,'881',2
|
|
1,1,'891',2
|
|
1,1,'901',2
|
|
1,1,'911',2
|
|
1,1,'921',2
|
|
1,1,'931',2
|
|
1,1,'941',2
|
|
1,1,'951',2
|
|
1,1,'961',2
|
|
1,1,'971',2
|
|
1,1,'981',2
|
|
1,1,'991',2
|
|
---- TYPES
|
|
tinyint, int, string, smallint
|
|
====
|
|
---- QUERY
|
|
# FULL OUTER JOIN between two inline views followed by a GROUP BY (IMPALA-964)
|
|
select 1 FROM (VALUES(1 x, 1 y)) a RIGHT OUTER JOIN (VALUES(1 x, 1 y)) b
|
|
ON (a.x = b.y) GROUP BY a.x
|
|
---- RESULTS
|
|
1
|
|
---- TYPES
|
|
TINYINT
|
|
====
|