IMPALA-14275: Ignore produced runtime filters for tuple cache keys

PlanNode's list of runtime filters includes both runtime filters
consumed and produced. The code for incorporating runtime filters
into the tuple cache key doesn't make a distinction between the
two. This means that JoinNodes that produce runtime filters hash
their children more than once. This only applies to mt_dop=0,
because mt_dop>0 produces the runtime filter from a separate build
side fragment. This hasn't produced a noticeable issue, but it is
still wrong. This ignores produced runtime filters.

Testing:
 - Added a test case in TupleCacheTest

Change-Id: I5d132a5cf7de1ce19b55545171799d8f38bb8c3d
Reviewed-on: http://gerrit.cloudera.org:8080/23227
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Michael Smith <michael.smith@cloudera.com>
This commit is contained in:
Joe McDonnell
2025-07-30 11:05:21 -07:00
committed by Michael Smith
parent 810ade2819
commit 22898abbc4
2 changed files with 22 additions and 3 deletions

View File

@@ -1368,10 +1368,14 @@ abstract public class PlanNode extends TreeNode<PlanNode> {
return;
}
// Include the build-side of a RuntimeFilter; look past the 1st ExchangeNode.
// If the build-side is hashable, merge the hash. Otherwise mark this node as
// ineligible because the RuntimeFilter is too complex to reason about.
// For runtime filters consumed by this node, include the build-side of the
// RuntimeFilter (look past the 1st ExchangeNode). If the build-side is hashable,
// merge the hash. Otherwise mark this node as ineligible because the RuntimeFilter
// is too complex to reason about.
for (RuntimeFilter filter : runtimeFilters_) {
// We should only include runtime filters consumed by this node. If this node is
// the source of the runtime filter, skip it.
if (filter.getSrc() == this) continue;
// We want the build side of the join.
PlanNode build = filter.getSrc().getBuildNode();
Preconditions.checkState(!build.contains(this),

View File

@@ -118,6 +118,21 @@ public class TupleCacheTest extends PlannerTestBase {
String.format(basicJoinTmpl, "probe.id = build.id"),
String.format(basicJoinTmpl, "probe.id = build.id and build.id < 100"),
isDistributedPlan);
// JoinNodes produce runtime filters and don't consume them. Verify that produced
// runtime filters don't get incorporated into the hash.
List<PlanNode> cacheEligibleNodes =
getCacheEligibleNodes(String.format(basicJoinTmpl, "probe.id = build.id"));
for (PlanNode node : cacheEligibleNodes) {
if (node instanceof JoinNode) {
// The join node should not have any hash trace elements due to runtime filters
List<HashTraceElement> hashTraces = node.getTupleCacheInfo().getHashTraces();
for (HashTraceElement hashTrace : hashTraces) {
assertTrue(hashTrace.getComment().indexOf("runtime filter") == -1);
assertTrue(hashTrace.getComment().indexOf("RF0") == -1);
}
}
}
}
}