From d5e0e2eebc1aaa00b27ee464bfcba42f26827712 Mon Sep 17 00:00:00 2001 From: Alex Behm Date: Thu, 1 Oct 2015 14:10:35 -0700 Subject: [PATCH] IMPALA-2456: For hash joins inside a subplan, open child(0) before doing the build. The bug: A query with a subplan containing a hash join with unnest nodes on both the build and probe sides would not project the collectionn-typed slots referenced in unnest nodes of the probe side. The reason is that we used to first complete the hash join build before opening the probe side. Since the build does a deep-copy those collection-typed slots to be unnested in the probe side would not be projected. Example query that exhibited the bug: subplan hash join nested-loop join singular row src unnest t.c1 unnest t.c2 scan t The tuple of 't' has two-collection typed slots, one for 't.c1', and another for 't.c2'. If the hash join completes the build without opening the probe side, then the 't.c2' slot would not be projected and deep copied into the build-side hash table. That collection would then be returned in GetNext() of the hash join. The fix: For hash joins inside a subplan, open child(0) before doing the build. Change-Id: I569107b5ecafdbb75f3562707947ecc73951140c Reviewed-on: http://gerrit.cloudera.org:8080/1128 Reviewed-by: Alex Behm Tested-by: Internal Jenkins --- be/src/exec/blocking-join-node.cc | 9 ++++++++ .../QueryTest/nested-types-runtime.test | 22 +++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/be/src/exec/blocking-join-node.cc b/be/src/exec/blocking-join-node.cc index 0f7676164..69fca7023 100644 --- a/be/src/exec/blocking-join-node.cc +++ b/be/src/exec/blocking-join-node.cc @@ -184,6 +184,15 @@ Status BlockingJoinNode::Open(RuntimeState* state) { // are fully constructed. RETURN_IF_ERROR(build_side_status.Get()); RETURN_IF_ERROR(open_status); + } else if (IsInSubplan()) { + // When inside a subplan, open the first child before doing the build such that + // UnnestNodes on the probe side are opened and project their unnested collection + // slots. Otherwise, the build might unnecessarily deep-copy those collection slots, + // and this node would return them in GetNext(). + // TODO: Remove this special-case behavior for subplans once we have proper + // projection. See UnnestNode for details on the current projection implementation. + RETURN_IF_ERROR(child(0)->Open(state)); + RETURN_IF_ERROR(ConstructBuildSide(state)); } else { RETURN_IF_ERROR(ConstructBuildSide(state)); RETURN_IF_ERROR(child(0)->Open(state)); diff --git a/testdata/workloads/functional-query/queries/QueryTest/nested-types-runtime.test b/testdata/workloads/functional-query/queries/QueryTest/nested-types-runtime.test index 0ce4f6730..fc7032484 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/nested-types-runtime.test +++ b/testdata/workloads/functional-query/queries/QueryTest/nested-types-runtime.test @@ -375,3 +375,25 @@ where c_custkey < 10 ---- TYPES BIGINT,BIGINT,BIGINT,INT,BIGINT ==== +---- QUERY +# IMPALA-2456: Test subplan that contains a hash join with unnest nodes on both the build +# and probe sides of the join. Tests projection of collection-typed slots on both sides +# of the join. +select c_custkey, o_orderkey +from tpch_nested_parquet.customer c +inner join c.c_orders o1 +left anti join + (select o2.o_orderkey x + from c.c_orders o2, c.c_orders o3) v +on c.c_custkey = v.x +where c_custkey < 2 +---- RESULTS +1,454791 +1,579908 +1,3868359 +1,4273923 +1,4808192 +1,5133509 +---- TYPES +bigint,bigint +====