mirror of
https://github.com/apache/impala.git
synced 2026-01-03 15:00:52 -05:00
Implement nested-loop join in Impala with support for multiple join modes, including inner, outer, semi and anti joins. Null-aware left anti-join is not currently supported. Summary of changes: Introduced the NestedLoopJoinNode class in the FE that represents the nested loop join. Common functionality between NestedLoopJoinNode and HashJoinNode (e.g. cardinality estimation) was moved to the JoinNode class. In the BE, introduced the NestedLoopJoinNode class that implements the nested-loop join execution strategy. Change-Id: I238ec7dc0080f661847e5e1b84e30d61c3b0bb5c Reviewed-on: http://gerrit.cloudera.org:8080/652 Reviewed-by: Dimitris Tsirogiannis <dtsirogiannis@cloudera.com> Tested-by: Internal Jenkins
122 lines
2.5 KiB
Plaintext
122 lines
2.5 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Right outer join with non-equi join predicates
|
|
select straight_join a.id, b.id, a.int_col, b.int_col
|
|
from alltypestiny a right outer join (values(1 as id, 1 as int_col)) b
|
|
on a.id != b.id or a.int_col < b.int_col
|
|
---- RESULTS
|
|
6,1,0,1
|
|
7,1,1,1
|
|
0,1,0,1
|
|
2,1,0,1
|
|
3,1,1,1
|
|
4,1,0,1
|
|
5,1,1,1
|
|
---- TYPES
|
|
INT, TINYINT, INT, TINYINT
|
|
====
|
|
---- QUERY
|
|
# Right semi join with non-equi join predicates
|
|
select straight_join *
|
|
from alltypesagg a right semi join (values((1 as id, 1 as int_col), (2,2))) b
|
|
on a.id != b.id
|
|
---- RESULTS
|
|
1,1
|
|
2,2
|
|
---- TYPES
|
|
TINYINT, TINYINT
|
|
====
|
|
---- QUERY
|
|
# Right anti join with non-equi join predicates
|
|
select straight_join *
|
|
from alltypestiny a right anti join (values((1 as id, 1 as int_col),(2,2),(3,3))) b
|
|
on a.id < b.id
|
|
---- RESULTS
|
|
---- TYPES
|
|
TINYINT, TINYINT
|
|
====
|
|
---- QUERY
|
|
# Full outer join with non-equi join predicates
|
|
select straight_join a.id, b.id
|
|
from alltypestiny a full outer join (values((1 as id), (100))) b
|
|
on a.id != b.id
|
|
---- RESULTS
|
|
6,1
|
|
6,100
|
|
7,1
|
|
7,100
|
|
4,1
|
|
4,100
|
|
5,1
|
|
5,100
|
|
2,1
|
|
2,100
|
|
3,1
|
|
3,100
|
|
0,1
|
|
0,100
|
|
1,100
|
|
---- TYPES
|
|
INT, TINYINT
|
|
====
|
|
---- QUERY
|
|
# Multi-way join query with right semi, right outer and full outer joins with
|
|
# non-equi join predicates
|
|
select straight_join *
|
|
from alltypestiny a right semi join (values(1 as tinyint_col, 1 as id)) c
|
|
on a.tinyint_col < c.tinyint_col
|
|
right outer join (values(1 as bigint_col)) d
|
|
on c.tinyint_col >= d.bigint_col
|
|
full outer join (values((1 as id, 1 as int_col),(2,2),(3,3))) e
|
|
on c.tinyint_col != e.id
|
|
---- RESULTS
|
|
1,1,1,2,2
|
|
1,1,1,3,3
|
|
NULL,NULL,NULL,1,1
|
|
---- TYPES
|
|
TINYINT, TINYINT, TINYINT, TINYINT, TINYINT
|
|
====
|
|
---- QUERY
|
|
# Right semi join where the probe input is significantly smaller than the build input.
|
|
select straight_join a.id, b.id
|
|
from (values(1 as tinyint_col, 1 as id)) a right outer join alltypessmall b
|
|
on a.tinyint_col != b.tinyint_col
|
|
order by a.id, b.id
|
|
limit 2
|
|
---- RESULTS
|
|
1,0
|
|
1,2
|
|
---- TYPES
|
|
TINYINT,INT
|
|
====
|
|
---- QUERY
|
|
# Right outer join where none of the build rows matches a probe row.
|
|
select straight_join count(*)
|
|
from (values(10000 as id)) a right outer join alltypesagg b
|
|
on a.id < b.id
|
|
where a.id is null
|
|
---- RESULTS
|
|
11000
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# Full outer join where the probe input is smaller than the build input and
|
|
# it can fit in a single row batch.
|
|
select straight_join a.id, b.id
|
|
from (values(1 as id)) a full outer join alltypestiny b
|
|
on a.id < b.id
|
|
order by a.id, b.id
|
|
---- RESULTS
|
|
1,2
|
|
1,3
|
|
1,4
|
|
1,5
|
|
1,6
|
|
1,7
|
|
NULL,0
|
|
NULL,1
|
|
---- TYPES
|
|
TINYINT, INT
|
|
====
|