mirror of
https://github.com/apache/impala.git
synced 2026-01-02 03:00:32 -05:00
The NLJ node did not follow the expected protocol when need_to_return is set on a row batch, which means that memory referenced by a rowbatch can be freed or reused the next time GetNext() is called on the child. This patch changes the NLJ node to follow the protocol by deep copying all build side row batches when the need_to_return_ flag is set on the row batches. This prevents the row batches from referencing memory that may be freed or reused. Reenable test that was disabled because of IMPALA-2332 since this was the root cause. Change-Id: Idcbb8df12c292b9e2b243e1cef5bdfc1366898d1 Reviewed-on: http://gerrit.cloudera.org:8080/810 Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com> Tested-by: Internal Jenkins
152 lines
3.3 KiB
Plaintext
152 lines
3.3 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Right outer join with non-equi join predicates
|
|
select straight_join a.id, b.id, a.int_col, b.int_col
|
|
from alltypestiny a right outer join (values(1 as id, 1 as int_col)) b
|
|
on a.id != b.id or a.int_col < b.int_col
|
|
---- RESULTS
|
|
6,1,0,1
|
|
7,1,1,1
|
|
0,1,0,1
|
|
2,1,0,1
|
|
3,1,1,1
|
|
4,1,0,1
|
|
5,1,1,1
|
|
---- TYPES
|
|
INT, TINYINT, INT, TINYINT
|
|
====
|
|
---- QUERY
|
|
# Right semi join with non-equi join predicates
|
|
select straight_join *
|
|
from alltypesagg a right semi join (values((1 as id, 1 as int_col), (2,2))) b
|
|
on a.id != b.id
|
|
---- RESULTS
|
|
1,1
|
|
2,2
|
|
---- TYPES
|
|
TINYINT, TINYINT
|
|
====
|
|
---- QUERY
|
|
# Right anti join with non-equi join predicates
|
|
select straight_join *
|
|
from alltypestiny a right anti join (values((1 as id, 1 as int_col),(2,2),(3,3))) b
|
|
on a.id < b.id
|
|
---- RESULTS
|
|
---- TYPES
|
|
TINYINT, TINYINT
|
|
====
|
|
---- QUERY
|
|
# Full outer join with non-equi join predicates
|
|
select straight_join a.id, b.id
|
|
from alltypestiny a full outer join (values((1 as id), (100))) b
|
|
on a.id != b.id
|
|
---- RESULTS
|
|
6,1
|
|
6,100
|
|
7,1
|
|
7,100
|
|
4,1
|
|
4,100
|
|
5,1
|
|
5,100
|
|
2,1
|
|
2,100
|
|
3,1
|
|
3,100
|
|
0,1
|
|
0,100
|
|
1,100
|
|
---- TYPES
|
|
INT, TINYINT
|
|
====
|
|
---- QUERY
|
|
# Multi-way join query with right semi, right outer and full outer joins with
|
|
# non-equi join predicates
|
|
select straight_join *
|
|
from alltypestiny a right semi join (values(1 as tinyint_col, 1 as id)) c
|
|
on a.tinyint_col < c.tinyint_col
|
|
right outer join (values(1 as bigint_col)) d
|
|
on c.tinyint_col >= d.bigint_col
|
|
full outer join (values((1 as id, 1 as int_col),(2,2),(3,3))) e
|
|
on c.tinyint_col != e.id
|
|
---- RESULTS
|
|
1,1,1,2,2
|
|
1,1,1,3,3
|
|
NULL,NULL,NULL,1,1
|
|
---- TYPES
|
|
TINYINT, TINYINT, TINYINT, TINYINT, TINYINT
|
|
====
|
|
---- QUERY
|
|
# Right semi join where the probe input is significantly smaller than the build input.
|
|
select straight_join a.id, b.id
|
|
from (values(1 as tinyint_col, 1 as id)) a right outer join alltypessmall b
|
|
on a.tinyint_col != b.tinyint_col
|
|
order by a.id, b.id
|
|
limit 2
|
|
---- RESULTS
|
|
1,0
|
|
1,2
|
|
---- TYPES
|
|
TINYINT,INT
|
|
====
|
|
---- QUERY
|
|
# Right outer join where none of the build rows matches a probe row.
|
|
select straight_join count(*)
|
|
from (values(10000 as id)) a right outer join alltypesagg b
|
|
on a.id < b.id
|
|
where a.id is null
|
|
---- RESULTS
|
|
11000
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# Full outer join where the probe input is smaller than the build input and
|
|
# it can fit in a single row batch.
|
|
select straight_join a.id, b.id
|
|
from (values(1 as id)) a full outer join alltypestiny b
|
|
on a.id < b.id
|
|
order by a.id, b.id
|
|
---- RESULTS
|
|
1,2
|
|
1,3
|
|
1,4
|
|
1,5
|
|
1,6
|
|
1,7
|
|
NULL,0
|
|
NULL,1
|
|
---- TYPES
|
|
TINYINT, INT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-2319: the NLJ node returned more rows than limit in certain cases. This resulted
|
|
# in extra rows being returned to the client if the NLJ was the topmost node in the plan,
|
|
# which is the case when this query is executed on a single node. If this same query is
|
|
# executed on multiple nodes, the exchange enforces the limit and masks the bug.
|
|
select straight_join a, cnt from (values(1 a)) v1
|
|
cross join
|
|
(select id, count(*) cnt
|
|
from alltypes
|
|
group by id) v2 limit 5;
|
|
---- RESULTS
|
|
1,1
|
|
1,1
|
|
1,1
|
|
1,1
|
|
1,1
|
|
---- TYPES
|
|
TINYINT,BIGINT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-2207: Aggregation feeding into build side of nested loop join.
|
|
# We want to test that this query doesn't crash or hit a DCHECK.
|
|
select straight_join * from (values(1 a)) v1
|
|
cross join
|
|
(select id, count(*) cnt
|
|
from alltypes
|
|
group by id) v2;
|
|
---- TYPES
|
|
TINYINT,INT,BIGINT
|
|
====
|