IMPALA-13437 (part 2): Implement cost-based tuple cache placement

This changes the default behavior of the tuple cache to consider
cost when placing the TupleCacheNodes. It tries to pick the best
locations within a budget. First, it eliminates unprofitable locations
via a threshold. Next, it ranks the remaining locations by their
profitability. Finally, it picks the best locations in rank order
until it reaches the budget.

The threshold is based on the ratio of processing cost for regular
execution versus the processing cost for reading from the cache.
If the ratio is below the threshold, the location is eliminated.
The threshold is specified by the tuple_cache_required_cost_reduction_factor
query option. This defaults to 3.0, which means that the cost of
reading from the cache must be less than 1/3 the cost of computing
the value normally. A higher value makes this more restrictive
about caching locations, which pushes in the direction of lower
overhead.

The ranking is based on the cost reduction per byte. This is given
by the formula:
 (regular processing cost - cost to read from cache) / estimated serialized size
This prefers locations with small results or high reduction in cost.

The budget is based on the estimated serialized size per node. This
limits the total caching that a query will do. A higher value allows more
caching, which can increase the overhead on the first run of a query. A lower
value is less aggressive and can limit the overhead at the expense of less
caching. This uses a per-node limit as the limit should scale based on the
size of the executor group as each executor brings extra capacity. The budget
is specified by the tuple_cache_budget_bytes_per_executor.

The old behavior to place the tuple cache at all eligible locations is
still available via the tuple_cache_placement_policy query option. The
default is the cost_based policy described above, but the old behavior
is available via the all_eligible policy. This is useful for correctness
testing (and the existing tuple cache test cases).

This changes the explain plan output:
 - The hash trace is only enabled at VERBOSE level. This means that the regular
   profile will not contain the hash trace, as the regular profile uses EXTENDED.
 - This adds additional information at VERBOSE to display the cost information
   for each plan node. This can help trace why a particular location was
   not picked.

Testing:
 - This adds a TPC-DS planner test with tuple caching enabled (based on the
   existing TpcdsCpuCostPlannerTest)
 - This modifies existing tests to adapt to changes in the explain plan output

Change-Id: Ifc6e7b95621a7937d892511dc879bf7c8da07cdc
Reviewed-on: http://gerrit.cloudera.org:8080/23219
Reviewed-by: Michael Smith <michael.smith@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
Joe McDonnell
2025-07-21 16:48:57 -07:00
parent 3181fe1800
commit ca356a8df5
125 changed files with 140399 additions and 66 deletions

View File

@@ -153,7 +153,7 @@ public abstract class TreeNode<NodeType extends TreeNode<NodeType>> {
*/
@SuppressWarnings("unchecked")
public <C extends TreeNode<NodeType>, D extends C> void collectAll(
Predicate<? super C> predicate, List<D> matches) {
Predicate<? super C> predicate, Collection<D> matches) {
if (predicate.apply((C) this)) matches.add((D) this);
for (NodeType child: children_) child.collectAll(predicate, matches);
}

View File

@@ -449,35 +449,9 @@ abstract public class PlanNode extends TreeNode<PlanNode> {
}
}
if (detailLevel.ordinal() >= TExplainLevel.EXTENDED.ordinal()) {
if (getTupleCacheInfo() != null && getTupleCacheInfo().isEligible()) {
// This PlanNode is eligible for tuple caching, so there may be TupleCacheNodes
// above this point. For debuggability, display this node's contribution to the
// tuple cache key by printing its hash trace.
//
// Print trace in chunks to avoid excessive wrapping and padding in impala-shell.
// There are other explain lines at VERBOSE level that are over 100 chars long so
// we limit the key chunk length similarly here.
expBuilder.append(detailPrefix + "tuple cache key: " +
getTupleCacheInfo().getHashString() + "\n");
expBuilder.append(detailPrefix + "tuple cache hash trace:\n");
final int keyFormatWidth = 100;
for (HashTraceElement elem : getTupleCacheInfo().getHashTraces()) {
final String hashTrace = elem.getHashTrace();
if (hashTrace.length() < keyFormatWidth) {
expBuilder.append(String.format("%s %s: %s\n", detailPrefix,
elem.getComment(), hashTrace));
} else {
expBuilder.append(String.format("%s %s:\n", detailPrefix,
elem.getComment()));
for (int idx = 0; idx < hashTrace.length(); idx += keyFormatWidth) {
int stopIdx = Math.min(hashTrace.length(), idx + keyFormatWidth);
expBuilder.append(String.format("%s [%s]\n", detailPrefix,
hashTrace.substring(idx, stopIdx)));
}
}
}
}
// Print information about tuple caching if available
if (getTupleCacheInfo() != null) {
expBuilder.append(getTupleCacheInfo().getExplainString(detailPrefix, detailLevel));
}
// Print the children. Do not traverse into the children of an Exchange node to

View File

@@ -0,0 +1,31 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.impala.planner;
import java.util.Set;
/**
* The all_eligible tuple cache placement policy simply returns all the eligible
* locations. This null policy is useful for correctness testing, because we want
* to check correctness for all locations without regard for cost.
*/
public class TupleCacheAllEligiblePolicy implements TupleCachePlacementPolicy {
public Set<PlanNode> getFinalCachingLocations(Set<PlanNode> eligibleLocations) {
return eligibleLocations;
}
}

View File

@@ -0,0 +1,160 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.impala.planner;
import java.util.Comparator;
import java.util.HashSet;
import java.util.PriorityQueue;
import java.util.Set;
import com.google.common.base.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.impala.thrift.TQueryOptions;
/**
* The cost-based tuple cache placement policy uses the processing cost information
* to try to pick the best locations. It has three parts:
* 1. Threshold - Locations must meet a certain cost threshold to be considered. The
* threshold is currently based on the ratio of the regular processing
* cost versus the cost of reading from the cache. This ratio must exceed
* the tuple_cache_required_cost_improvement_factor to be considered.
* For example, if the tuple_cache_required_cost_improvement_factor is
* 3.0, then the cost of reading from the cache must be 1/3rd the cost of
* starting from scratch.
* 2. Ranking - The locations that meet the threshold are ranked based on the
* greatest cost improvement per byte.
* 3. Budget - Locations are picked by the ranking order until the budget is reached.
* Currently, the budget is based on the number of bytes per executor
* set by the tuple_cache_budget_bytes_per_executor.
*
* The algorithm is fairly flexible, so the specific threshold, ranking, and budget
* could be modified over time.
*/
public class TupleCacheCostBasedPolicy implements TupleCachePlacementPolicy {
private final static Logger LOG =
LoggerFactory.getLogger(TupleCacheCostBasedPolicy.class);
private class CostReductionPerByteComparator implements Comparator<PlanNode> {
@Override
public int compare(PlanNode n1, PlanNode n2) {
Double n1_cost_density = computeCostReductionPerByte(n1);
Double n2_cost_density = computeCostReductionPerByte(n2);
// To order things such that the highest cost density comes first, we need to flip
// the sign on the comparison.
int result = -n1_cost_density.compareTo(n2_cost_density);
if (result != 0) return result;
// Two locations can have the same cost, so this uses the plan node id to break
// ties to make it consistent.
return n1.getId().asInt() - n2.getId().asInt();
}
}
private final Comparator<PlanNode> rankingComparator_;
private final TQueryOptions queryOptions_;
public TupleCacheCostBasedPolicy(TQueryOptions queryOptions) {
rankingComparator_ = new CostReductionPerByteComparator();
queryOptions_ = queryOptions;
}
private boolean meetsRequiredCostReductionFactor(PlanNode node) {
long cumulativeProcessingCost =
node.getTupleCacheInfo().getCumulativeProcessingCost();
// To avoid division by zero and exotic floating point behavior, require the cache
// read processing cost to be 1 or above.
long cacheReadProcessingCost =
Math.max(node.getTupleCacheInfo().getReadProcessingCost(), 1);
double costReductionFactor =
(double) cumulativeProcessingCost / cacheReadProcessingCost;
double requiredCostReductionFactor =
queryOptions_.tuple_cache_required_cost_reduction_factor;
if (costReductionFactor < requiredCostReductionFactor) {
LOG.trace(String.format("%s eliminated (cost reduction factor %f < threshold %f)",
node.getDisplayLabel(), costReductionFactor, requiredCostReductionFactor));
return false;
}
return true;
}
private boolean meetsCostThresholds(PlanNode node) {
// Filter out locations without statistics
if (node.getTupleCacheInfo().getEstimatedSerializedSize() < 0) {
LOG.trace(node.getDisplayLabel() + " eliminated due to missing statistics");
return false;
}
// Filter out locations that exceed the budget. They can never be picked.
long budget = queryOptions_.tuple_cache_budget_bytes_per_executor;
long bytesPerExecutor = node.getTupleCacheInfo().getEstimatedSerializedSizePerNode();
if (bytesPerExecutor > budget) {
LOG.trace(String.format("%s eliminated (bytes per executor %d > budget %d)",
node.getDisplayLabel(), bytesPerExecutor, budget));
return false;
}
return meetsRequiredCostReductionFactor(node);
}
private Double computeCostReductionPerByte(PlanNode node) {
long cumulativeProcessingCost =
node.getTupleCacheInfo().getCumulativeProcessingCost();
long cacheReadProcessingCost = node.getTupleCacheInfo().getReadProcessingCost();
long estimatedSerializedSize = node.getTupleCacheInfo().getEstimatedSerializedSize();
long costReduction = cumulativeProcessingCost - cacheReadProcessingCost;
// The estimated serialized size can be zero when the cardinality is zero. To keep
// this from being infinite, increment the serialized size by one.
return Double.valueOf((double) costReduction / (estimatedSerializedSize + 1));
}
public Set<PlanNode> getFinalCachingLocations(Set<PlanNode> eligibleLocations) {
Preconditions.checkState(eligibleLocations.size() > 0);
PriorityQueue<PlanNode> sortedLocations =
new PriorityQueue<PlanNode>(eligibleLocations.size(), rankingComparator_);
for (PlanNode node : eligibleLocations) {
if (meetsCostThresholds(node)) {
sortedLocations.add(node);
}
}
Set<PlanNode> finalLocations = new HashSet<PlanNode>();
// We pick the best locations (by the sorting order) until we reach the budget. This
// uses the bytes per executor as the units for the budget.
long remainingBytesPerExecutorBudget =
queryOptions_.tuple_cache_budget_bytes_per_executor;
// This continues past a location that would exceed the budget. That allows
// smaller locations later in the list to have a chance to fit in the
// remaining budget. This also means that one large entry early in the list
// won't block any other locations from being considered.
while (sortedLocations.size() > 0) {
PlanNode node = sortedLocations.poll();
long curBytesPerExecutor =
node.getTupleCacheInfo().getEstimatedSerializedSizePerNode();
if (curBytesPerExecutor > remainingBytesPerExecutorBudget) {
LOG.trace(String.format(
"Skipped %s (bytes per executor: %d, remaining budget: %d)",
node.getDisplayLabel(), curBytesPerExecutor,
remainingBytesPerExecutorBudget));
continue;
}
LOG.trace(String.format("Picked %s (bytes per executor: %d, remaining budget: %d)",
node.getDisplayLabel(), curBytesPerExecutor, remainingBytesPerExecutorBudget));
finalLocations.add(node);
remainingBytesPerExecutorBudget -= curBytesPerExecutor;
}
return finalLocations;
}
}

View File

@@ -36,6 +36,7 @@ import org.apache.impala.catalog.FeView;
import org.apache.impala.common.IdGenerator;
import org.apache.impala.common.PrintUtils;
import org.apache.impala.common.ThriftSerializationCtx;
import org.apache.impala.service.BackendConfig;
import org.apache.impala.thrift.TExplainLevel;
import org.apache.impala.thrift.TFileSplitGeneratorSpec;
import org.apache.impala.thrift.TScanRange;
@@ -176,6 +177,16 @@ public class TupleCacheInfo {
// the filtered cardinality.
private long estimatedSerializedSize_ = -1;
// Estimated size divided by the expected number of nodes. This is used by the cost
// based placement for the budget contribution.
private long estimatedSerializedSizePerNode_ = -1;
// Processing cost for writing this location to the cache
private long writeProcessingCost_ = -1;
// Processing cost for reading this location from the cache
private long readProcessingCost_ = -1;
public TupleCacheInfo(DescriptorTable descTbl) {
ineligibilityReasons_ = EnumSet.noneOf(IneligibilityReason.class);
descriptorTable_ = descTbl;
@@ -205,16 +216,12 @@ public class TupleCacheInfo {
}
public String getHashString() {
Preconditions.checkState(isEligible(),
"TupleCacheInfo only has a hash if it is cache eligible");
Preconditions.checkState(finalized_, "TupleCacheInfo not finalized");
checkFinalizedAndEligible("a hash");
return finalizedHashString_;
}
public List<HashTraceElement> getHashTraces() {
Preconditions.checkState(isEligible(),
"TupleCacheInfo only has a hash trace if it is cache eligible");
Preconditions.checkState(finalized_, "TupleCacheInfo not finalized");
checkFinalizedAndEligible("a hash trace");
return hashTraces_;
}
@@ -232,19 +239,36 @@ public class TupleCacheInfo {
}
public long getCumulativeProcessingCost() {
Preconditions.checkState(isEligible(),
"TupleCacheInfo only has cost information if it is cache eligible.");
Preconditions.checkState(finalized_, "TupleCacheInfo not finalized");
checkFinalizedAndEligible("cost information");
return cumulativeProcessingCost_;
}
public long getEstimatedSerializedSize() {
Preconditions.checkState(isEligible(),
"TupleCacheInfo only has cost information if it is cache eligible.");
Preconditions.checkState(finalized_, "TupleCacheInfo not finalized");
checkFinalizedAndEligible("cost information");
return estimatedSerializedSize_;
}
public long getEstimatedSerializedSizePerNode() {
checkFinalizedAndEligible("cost information");
return estimatedSerializedSizePerNode_;
}
public long getWriteProcessingCost() {
checkFinalizedAndEligible("cost information");
return writeProcessingCost_;
}
public long getReadProcessingCost() {
checkFinalizedAndEligible("cost information");
return readProcessingCost_;
}
private void checkFinalizedAndEligible(String contextString) {
Preconditions.checkState(isEligible(),
"TupleCacheInfo only has %s if it is cache eligible.", contextString);
Preconditions.checkState(finalized_, "TupleCacheInfo not finalized");
}
/**
* Calculate the tuple cache cost information for this plan node. This must be called
* with the matching PlanNode for this TupleCacheInfo. This pulls in any information
@@ -259,6 +283,8 @@ public class TupleCacheInfo {
"TupleCacheInfo only calculates cost information if it is cache eligible.");
Preconditions.checkState(thisPlanNode.getTupleCacheInfo() == this,
"calculateCostInformation() must be called with its enclosing PlanNode");
Preconditions.checkState(thisPlanNode.getNumNodes() > 0,
"PlanNode fragment must have nodes");
// This was already called on our children, which are known to be eligible.
// Pull in the information from our children.
@@ -280,6 +306,23 @@ public class TupleCacheInfo {
long cardinality = thisPlanNode.getFilteredCardinality();
estimatedSerializedSize_ = (long) Math.round(
ExchangeNode.getAvgSerializedRowSize(thisPlanNode) * cardinality);
estimatedSerializedSizePerNode_ =
(long) estimatedSerializedSize_ / thisPlanNode.getNumNodes();
double costCoefficientWriteBytes =
BackendConfig.INSTANCE.getTupleCacheCostCoefficientWriteBytes();
double costCoefficientWriteRows =
BackendConfig.INSTANCE.getTupleCacheCostCoefficientWriteRows();
writeProcessingCost_ =
(long) (estimatedSerializedSize_ * costCoefficientWriteBytes +
cardinality * costCoefficientWriteRows);
double costCoefficientReadBytes =
BackendConfig.INSTANCE.getTupleCacheCostCoefficientReadBytes();
double costCoefficientReadRows =
BackendConfig.INSTANCE.getTupleCacheCostCoefficientReadRows();
readProcessingCost_ =
(long) (estimatedSerializedSize_ * costCoefficientReadBytes +
cardinality * costCoefficientReadRows);
}
}
@@ -561,6 +604,53 @@ public class TupleCacheInfo {
return builder.toString();
}
public String getExplainHashTrace(String detailPrefix) {
StringBuilder output = new StringBuilder();
final int keyFormatWidth = 100;
for (HashTraceElement elem : getHashTraces()) {
final String hashTrace = elem.getHashTrace();
if (hashTrace.length() < keyFormatWidth) {
output.append(String.format("%s %s: %s\n", detailPrefix, elem.getComment(),
hashTrace));
} else {
output.append(String.format("%s %s:\n", detailPrefix, elem.getComment()));
for (int idx = 0; idx < hashTrace.length(); idx += keyFormatWidth) {
int stopIdx = Math.min(hashTrace.length(), idx + keyFormatWidth);
output.append(String.format("%s [%s]\n", detailPrefix,
hashTrace.substring(idx, stopIdx)));
}
}
}
return output.toString();
}
public String getExplainString(String detailPrefix, TExplainLevel detailLevel) {
if (detailLevel.ordinal() >= TExplainLevel.VERBOSE.ordinal()) {
// At extended level, provide information about whether this location is
// eligible. If it is, provide the cache key and cost information.
StringBuilder output = new StringBuilder();
if (isEligible()) {
output.append(String.format("%stuple cache key: %s\n", detailPrefix,
getHashString()));
output.append(getCostExplainString(detailPrefix));
// This PlanNode is eligible for tuple caching, so there may be TupleCacheNodes
// above this point. For debuggability, display this node's contribution to the
// tuple cache key by printing its hash trace.
//
// Print trace in chunks to avoid excessive wrapping and padding in impala-shell.
// There are other explain lines at VERBOSE level that are over 100 chars long so
// we limit the key chunk length similarly here.
output.append(getExplainHashTrace(detailPrefix));
} else {
output.append(String.format("%stuple cache ineligibility reasons: %s\n",
detailPrefix, getIneligibilityReasonsString()));
}
return output.toString();
} else {
return "";
}
}
/**
* Produce explain output describing the cost information for this tuple cache location
*/
@@ -573,9 +663,22 @@ public class TupleCacheInfo {
output.append("unavailable");
}
output.append("\n");
output.append(detailPrefix + "estimated serialized size per node: ");
if (estimatedSerializedSizePerNode_ > -1) {
output.append(PrintUtils.printBytes(estimatedSerializedSizePerNode_));
} else {
output.append("unavailable");
}
output.append("\n");
output.append(detailPrefix + "cumulative processing cost: ");
output.append(getCumulativeProcessingCost());
output.append("\n");
output.append(detailPrefix + "cache read processing cost: ");
output.append(getReadProcessingCost());
output.append("\n");
output.append(detailPrefix + "cache write processing cost: ");
output.append(getWriteProcessingCost());
output.append("\n");
return output.toString();
}

View File

@@ -0,0 +1,32 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.impala.planner;
import java.util.Set;
/**
* Shared interface for all tuple cache placement policies.
*/
public interface TupleCachePlacementPolicy {
/**
* Given a set of eligible locations, return a set of locations that should actually
* be used. The return set must be a subset of the eligible locations passed in.
*/
public Set<PlanNode> getFinalCachingLocations(Set<PlanNode> eligibleLocations);
}

View File

@@ -17,10 +17,13 @@
package org.apache.impala.planner;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.apache.impala.common.ImpalaException;
import org.apache.impala.thrift.TQueryOptions;
import org.apache.impala.thrift.TTupleCachePlacementPolicy;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -32,16 +35,34 @@ import com.google.common.base.Preconditions;
* the plan tree be in a stable form that won't later change. That means that this is
* designed to run as the last step in planning.
*
* The current algorithm is to add a TupleCacheNode at every eligible location. This will
* need to be refined with cost calculations later.
* The cache placement algorithm is controlled by the 'tuple_cache_placement_policy' query
* option. See descriptions of these policies at {@link TupleCacheAllEligiblePolicy} and
* {@link TupleCacheCostBasedPolicy}.
*/
public class TupleCachePlanner {
private final static Logger LOG = LoggerFactory.getLogger(TupleCachePlanner.class);
private final PlannerContext ctx_;
private final TQueryOptions queryOptions_;
private final TupleCachePlacementPolicy placementPolicy_;
public TupleCachePlanner(PlannerContext ctx) {
ctx_ = ctx;
queryOptions_ =
ctx_.getRootAnalyzer().getQueryCtx().client_request.getQuery_options();
TTupleCachePlacementPolicy policy = queryOptions_.getTuple_cache_placement_policy();
LOG.info("Using tuple cache placement policy: " + policy);
switch (policy) {
case ALL_ELIGIBLE:
placementPolicy_ = new TupleCacheAllEligiblePolicy();
break;
case COST_BASED:
placementPolicy_ = new TupleCacheCostBasedPolicy(queryOptions_);
break;
default:
Preconditions.checkState(false, "Unexpected placement policy: " + policy);
placementPolicy_ = null;
}
}
/**
@@ -56,8 +77,25 @@ public class TupleCachePlanner {
root.computeTupleCacheInfo(ctx_.getRootAnalyzer().getDescTbl(),
ctx_.getRootAnalyzer().getQueryCtx().query_options_result_hash);
// Step 2: Build up the new PlanNode tree with TupleCacheNodes added
PlanNode newRoot = buildCachingPlan(root);
// Step 2: Collect eligible locations
Set<PlanNode> eligibleLocations = new HashSet<PlanNode>();
root.collectAll(
(node) -> {
return node.getTupleCacheInfo().isEligible() && !node.omitTupleCache();
}, eligibleLocations);
// If there are no eligible locations, we're done
if (eligibleLocations.size() == 0) {
return plan;
}
// Step 3: Use the placement policy to compute the final locations
Set<PlanNode> finalLocations =
placementPolicy_.getFinalCachingLocations(eligibleLocations);
// Step 4: Build up the new PlanNode tree with TupleCacheNodes added in the specified
// locations
PlanNode newRoot = buildCachingPlan(root, finalLocations);
// Since buildCachingPlan is modifying things in place, verify that the top-most plan
// fragment's plan root matches with the newRoot returned.
Preconditions.checkState(plan.get(0).getPlanRoot() == newRoot);
@@ -71,35 +109,29 @@ public class TupleCachePlanner {
/**
* Add TupleCacheNodes at every eligible location via a bottom-up traversal of the tree.
*/
private PlanNode buildCachingPlan(PlanNode node) throws ImpalaException {
private PlanNode buildCachingPlan(PlanNode node, Set<PlanNode> locations)
throws ImpalaException {
// Recurse through the children applying the caching policy
for (int i = 0; i < node.getChildCount(); i++) {
node.setChild(i, buildCachingPlan(node.getChild(i)));
node.setChild(i, buildCachingPlan(node.getChild(i), locations));
}
// If this node is not eligible, then we are done
if (!node.getTupleCacheInfo().isEligible()) {
// If this node is not in the list of caching locations, then we are done.
if (!locations.contains(node)) {
return node;
}
// If node omits tuple cache placement - such as Exchange and Union nodes, where it
// would not be beneficial - skip it.
if (node.omitTupleCache()) {
return node;
}
// Locations that are not eligible were already filtered out by the collection phase
Preconditions.checkState(
node.getTupleCacheInfo().isEligible() && !node.omitTupleCache(),
"Final location must be eligible");
// Should we cache above this node?
// Simplest policy: always cache if eligible
// TODO: Make this more complicated (e.g. cost calculations)
if (LOG.isTraceEnabled()) {
LOG.trace("Adding TupleCacheNode above node " + node.getId().toString());
}
// Get current query options
TQueryOptions queryOptions =
ctx_.getRootAnalyzer().getQueryCtx().client_request.getQuery_options();
// Allocate TupleCacheNode
TupleCacheNode tupleCacheNode = new TupleCacheNode(ctx_.getNextNodeId(), node,
queryOptions.isEnable_tuple_cache_verification());
queryOptions_.isEnable_tuple_cache_verification());
tupleCacheNode.init(ctx_.getRootAnalyzer());
PlanFragment curFragment = node.getFragment();
if (node == curFragment.getPlanRoot()) {

View File

@@ -597,4 +597,20 @@ public class BackendConfig {
public boolean truncateExternalTablesWithHms() {
return backendCfg_.truncate_external_tables_with_hms;
}
public double getTupleCacheCostCoefficientWriteBytes() {
return backendCfg_.tuple_cache_cost_coefficient_write_bytes;
}
public double getTupleCacheCostCoefficientWriteRows() {
return backendCfg_.tuple_cache_cost_coefficient_write_rows;
}
public double getTupleCacheCostCoefficientReadBytes() {
return backendCfg_.tuple_cache_cost_coefficient_read_bytes;
}
public double getTupleCacheCostCoefficientReadRows() {
return backendCfg_.tuple_cache_cost_coefficient_read_rows;
}
}

View File

@@ -0,0 +1,719 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.impala.planner;
import com.google.common.collect.Sets;
import com.google.common.io.Files;
import org.apache.impala.catalog.SideloadTableStats;
import org.apache.impala.common.ByteUnits;
import org.apache.impala.common.RuntimeEnv;
import org.apache.impala.thrift.TExecutorGroupSet;
import org.apache.impala.thrift.TQueryOptions;
import org.apache.impala.thrift.TReplicaPreference;
import org.apache.impala.thrift.TSlotCountStrategy;
import org.apache.impala.thrift.TUpdateExecutorMembershipRequest;
import org.apache.impala.util.ExecutorMembershipSnapshot;
import org.apache.impala.util.RequestPoolService;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import java.io.File;
import java.io.IOException;
import java.net.URISyntaxException;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
/**
* This tests cost-based tuple cache placement for TPC-DS using the same setup as
* TpcdsCpuCostPlannerTest, with a simulated 1TB scale and COMPUTE_PROCESSING_COST
* option enabled.
*/
public class TpcdsTupleCachePlannerTest extends PlannerTestBase {
// Pool definitions and includes memory resource limits, copied to a temporary file
private static final String ALLOCATION_FILE = "fair-scheduler-3-groups.xml";
// Contains per-pool configurations for maximum number of running queries and queued
// requests.
private static final String LLAMA_CONFIG_FILE = "llama-site-3-groups.xml";
// Planner test option to run each planner test.
private static Set<PlannerTestOption> testOptions = tpcdsParquetTestOptions();
// Query option to run each planner test.
private static TQueryOptions options =
tpcdsParquetQueryOptions()
.setCompute_processing_cost(true)
.setMax_fragment_instances_per_node(12)
.setReplica_preference(TReplicaPreference.REMOTE)
.setSlot_count_strategy(TSlotCountStrategy.PLANNER_CPU_ASK)
.setMem_estimate_scale_for_spilling_operator(1.0)
.setPlanner_testcase_mode(true)
// Required so that output doesn't vary by whether scanned tables have stats &
// numRows property or not.
.setDisable_hdfs_num_rows_estimate(true)
.setEnable_tuple_cache(true);
// Database name to run this test.
private static String testDb = "tpcds_partitioned_parquet_snap";
// Map of table stats that is obtained through loadStatsJson().
private static Map<String, Map<String, SideloadTableStats>> sideloadStats;
// Granular scan limit that will injected into individual ScanNode of tables.
private static Map<String, Long> scanRangeLimit = new HashMap<String, Long>() {
{
// split a 5752989 bytes file to 10 ranges.
put("customer", 580 * ByteUnits.KILOBYTE);
// split a 1218792 bytes file to 10 ranges.
put("customer_address", 125 * ByteUnits.KILOBYTE);
// split a 7848768 bytes file to 10 ranges.
put("customer_demographics", 790 * ByteUnits.KILOBYTE);
// split a 1815300 bytes file to 4 ranges.
put("item", 500L * ByteUnits.KILOBYTE);
}
};
// Temporary folder to copy admission control files into.
// Do not annotate with JUnit @Rule because we want to keep the tempFolder the same
// for entire lifetime of test class.
private static TemporaryFolder tempFolder;
/**
* Returns a {@link File} for the file on the classpath.
*/
private static File getClasspathFile(String filename) throws URISyntaxException {
return new File(
TpcdsCpuCostPlannerTest.class.getClassLoader().getResource(filename).toURI());
}
private static void setupAdmissionControl() throws IOException, URISyntaxException {
// Start admission control with config file fair-scheduler-3-groups.xml
// and llama-site-3-groups.xml
tempFolder = new TemporaryFolder();
tempFolder.create();
File allocationConfFile = tempFolder.newFile(ALLOCATION_FILE);
Files.copy(getClasspathFile(ALLOCATION_FILE), allocationConfFile);
File llamaConfFile = tempFolder.newFile(LLAMA_CONFIG_FILE);
Files.copy(getClasspathFile(LLAMA_CONFIG_FILE), llamaConfFile);
// Intentionally mark isTest = false to cache poolService as a singleton.
RequestPoolService poolService =
RequestPoolService.getInstance(allocationConfFile.getAbsolutePath(),
llamaConfFile.getAbsolutePath(), /* isTest */ false);
poolService.start();
}
@BeforeClass
public static void setUp() throws Exception {
// Mimic the 10 node test mini-cluster with admission control enabled.
setupAdmissionControl();
// Add 10 node executor group set root.large. This group set also set with
// impala.admission-control.max-query-mem-limit.root.large = 50GB.
setUpTestCluster(10, 10, "root.large");
setUpKuduClientAndLogDir();
Paths.get(outDir_.toString(), "tpcds_tuple_cache").toFile().mkdirs();
// Sideload stats through RuntimeEnv.
RuntimeEnv.INSTANCE.setTestEnv(true);
sideloadStats = loadStatsJson("tpcds_cpu_cost/stats-3TB.json");
RuntimeEnv.INSTANCE.setSideloadStats(sideloadStats);
// Artificially split single file table into mutiple scan ranges so that the scan
// looks like a multi-files table.
for (Map.Entry<String, Long> entry : scanRangeLimit.entrySet()) {
RuntimeEnv.INSTANCE.addTableScanRangeLimit(
testDb, entry.getKey(), entry.getValue());
}
invalidateTables();
}
@AfterClass
public static void unsetMetadataScaleAndStopPoolService() {
RuntimeEnv.INSTANCE.dropSideloadStats();
RuntimeEnv.INSTANCE.dropTableScanRangeLimit();
invalidateTables();
RequestPoolService.getInstance().stop();
tempFolder.delete();
}
/**
* Invalidate tables to reload them with new stats.
*/
private static void invalidateTables() {
for (String db : sideloadStats.keySet()) {
for (String table : sideloadStats.get(db).keySet()) {
catalog_.getSrcCatalog().invalidateTableIfExists(testDb, table);
}
}
}
@Test
public void testQ1() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q01", testDb, options, testOptions);
}
@Test
public void testQ2() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q02", testDb, options, testOptions);
}
@Test
public void testQ3() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q03", testDb, options, testOptions);
}
@Test
public void testQ4() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q04", testDb, options, testOptions);
}
@Test
public void testQ5() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q05", testDb, options, testOptions);
}
@Test
public void testQ6() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q06", testDb, options, testOptions);
}
@Test
public void testQ7() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q07", testDb, options, testOptions);
}
@Test
public void testQ8() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q08", testDb, options, testOptions);
}
@Test
public void testQ9() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q09", testDb, options, testOptions);
}
@Test
public void testQ10() {
// This is an official variant of q10 that uses a rewrite for lack of support for
// multiple subqueries in disjunctive predicates.
runPlannerTestFile("tpcds_tuple_cache/tpcds-q10a", testDb, options, testOptions);
}
@Test
public void testQ11() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q11", testDb, options, testOptions);
}
@Test
public void testQ12() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q12", testDb, options, testOptions);
}
@Test
public void testQ13() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q13", testDb, options, testOptions);
}
@Test
public void testQ14a() {
// First of the two query statements from the official q14.
runPlannerTestFile("tpcds_tuple_cache/tpcds-q14a", testDb, options, testOptions);
}
@Test
public void testQ14b() {
// Second of the two query statements from the official q14.
runPlannerTestFile("tpcds_tuple_cache/tpcds-q14b", testDb, options, testOptions);
}
@Test
public void testQ15() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q15", testDb, options, testOptions);
}
@Test
public void testQ16() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q16", testDb, options, testOptions);
}
@Test
public void testQ17() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q17", testDb, options, testOptions);
}
@Test
public void testQ18() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q18", testDb, options, testOptions);
}
@Test
public void testQ19() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q19", testDb, options, testOptions);
}
@Test
public void testQ20() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q20", testDb, options, testOptions);
}
@Test
public void testQ21() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q21", testDb, options, testOptions);
}
@Test
public void testQ22() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q22", testDb, options, testOptions);
}
@Test
public void testQ23a() {
// First of the two query statements from the official q23.
runPlannerTestFile("tpcds_tuple_cache/tpcds-q23a", testDb, options, testOptions);
}
@Test
public void testQ23b() {
// Second of the two query statements from the official q23.
runPlannerTestFile("tpcds_tuple_cache/tpcds-q23b", testDb, options, testOptions);
}
@Test
public void testQ24a() {
// First of the two query statements from the official q24.
runPlannerTestFile("tpcds_tuple_cache/tpcds-q24a", testDb, options, testOptions);
}
@Test
public void testQ24b() {
// Second of the two query statements from the official q24.
runPlannerTestFile("tpcds_tuple_cache/tpcds-q24b", testDb, options, testOptions);
}
@Test
public void testQ25() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q25", testDb, options, testOptions);
}
@Test
public void testQ26() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q26", testDb, options, testOptions);
}
@Test
public void testQ27() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q27", testDb, options, testOptions);
}
@Test
public void testQ28() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q28", testDb, options, testOptions);
}
@Test
public void testQ29() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q29", testDb, options, testOptions);
}
@Test
public void testQ30() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q30", testDb, options, testOptions);
}
@Test
public void testQ31() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q31", testDb, options, testOptions);
}
@Test
public void testQ32() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q32", testDb, options, testOptions);
}
@Test
public void testQ33() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q33", testDb, options, testOptions);
}
@Test
public void testQ34() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q34", testDb, options, testOptions);
}
@Test
public void testQ35() {
// This is an official variant of q35 that uses a rewrite for lack of support for
// multiple subqueries in disjunctive predicates.
runPlannerTestFile("tpcds_tuple_cache/tpcds-q35a", testDb, options, testOptions);
}
@Test
public void testQ36() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q36", testDb, options, testOptions);
}
@Test
public void testQ37() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q37", testDb, options, testOptions);
}
@Test
public void testQ38() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q38", testDb, options, testOptions);
}
@Test
public void testQ39a() {
// First of the two query statements from the official q39.
runPlannerTestFile("tpcds_tuple_cache/tpcds-q39a", testDb, options, testOptions);
}
@Test
public void testQ39b() {
// Second of the two query statements from the official q39.
runPlannerTestFile("tpcds_tuple_cache/tpcds-q39b", testDb, options, testOptions);
}
@Test
public void testQ40() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q40", testDb, options, testOptions);
}
@Test
public void testQ41() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q41", testDb, options, testOptions);
}
@Test
public void testQ42() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q42", testDb, options, testOptions);
}
@Test
public void testQ43() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q43", testDb, options, testOptions);
}
@Test
public void testQ44() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q44", testDb, options, testOptions);
}
@Test
public void testQ45() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q45", testDb, options, testOptions);
}
@Test
public void testQ46() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q46", testDb, options, testOptions);
}
@Test
public void testQ47() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q47", testDb, options, testOptions);
}
@Test
public void testQ48() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q48", testDb, options, testOptions);
}
@Test
public void testQ49() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q49", testDb, options, testOptions);
}
@Test
public void testQ50() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q50", testDb, options, testOptions);
}
@Test
public void testQ51() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q51", testDb, options, testOptions);
}
@Test
public void testQ52() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q52", testDb, options, testOptions);
}
@Test
public void testQ53() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q53", testDb, options, testOptions);
}
@Test
public void testQ54() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q54", testDb, options, testOptions);
}
@Test
public void testQ55() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q55", testDb, options, testOptions);
}
@Test
public void testQ56() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q56", testDb, options, testOptions);
}
@Test
public void testQ57() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q57", testDb, options, testOptions);
}
@Test
public void testQ58() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q58", testDb, options, testOptions);
}
@Test
public void testQ59() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q59", testDb, options, testOptions);
}
@Test
public void testQ60() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q60", testDb, options, testOptions);
}
@Test
public void testQ61() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q61", testDb, options, testOptions);
}
@Test
public void testQ62() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q62", testDb, options, testOptions);
}
@Test
public void testQ63() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q63", testDb, options, testOptions);
}
@Test
public void testQ64() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q64", testDb, options, testOptions);
}
@Test
public void testQ65() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q65", testDb, options, testOptions);
}
@Test
public void testQ66() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q66", testDb, options, testOptions);
}
@Test
public void testQ67() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q67", testDb, options, testOptions);
}
@Test
public void testQ68() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q68", testDb, options, testOptions);
}
@Test
public void testQ69() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q69", testDb, options, testOptions);
}
@Test
public void testQ70() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q70", testDb, options, testOptions);
}
@Test
public void testQ71() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q71", testDb, options, testOptions);
}
@Test
public void testQ72() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q72", testDb, options, testOptions);
}
@Test
public void testQ73() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q73", testDb, options, testOptions);
}
@Test
public void testQ74() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q74", testDb, options, testOptions);
}
@Test
public void testQ75() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q75", testDb, options, testOptions);
}
@Test
public void testQ76() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q76", testDb, options, testOptions);
}
@Test
public void testQ77() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q77", testDb, options, testOptions);
}
@Test
public void testQ78() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q78", testDb, options, testOptions);
}
@Test
public void testQ79() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q79", testDb, options, testOptions);
}
@Test
public void testQ80() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q80", testDb, options, testOptions);
}
@Test
public void testQ81() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q81", testDb, options, testOptions);
}
@Test
public void testQ82() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q82", testDb, options, testOptions);
}
@Test
public void testQ83() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q83", testDb, options, testOptions);
}
@Test
public void testQ84() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q84", testDb, options, testOptions);
}
@Test
public void testQ85() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q85", testDb, options, testOptions);
}
@Test
public void testQ86() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q86", testDb, options, testOptions);
}
@Test
public void testQ87() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q87", testDb, options, testOptions);
}
@Test
public void testQ88() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q88", testDb, options, testOptions);
}
@Test
public void testQ89() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q89", testDb, options, testOptions);
}
@Test
public void testQ90() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q90", testDb, options, testOptions);
}
@Test
public void testQ91() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q91", testDb, options, testOptions);
}
@Test
public void testQ92() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q92", testDb, options, testOptions);
}
@Test
public void testQ93() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q93", testDb, options, testOptions);
}
@Test
public void testQ94() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q94", testDb, options, testOptions);
}
@Test
public void testQ95() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q95", testDb, options, testOptions);
}
@Test
public void testQ96() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q96", testDb, options, testOptions);
}
@Test
public void testQ97() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q97", testDb, options, testOptions);
}
@Test
public void testQ98() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q98", testDb, options, testOptions);
}
@Test
public void testQ99() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-q99", testDb, options, testOptions);
}
@Test
public void testNonTpcdsDdl() {
// This is a copy of PlannerTest.testDdl().
// Not using tpcds_partitioned_parquet_snap db, but piggy-backed to test them
// under costing setup.
runPlannerTestFile("tpcds_tuple_cache/ddl", testDb, options, testOptions);
}
@Test
public void testTpcdsDdlParquet() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-ddl-parquet", testDb, options,
testOptions);
}
@Test
public void testTpcdsDdlIceberg() {
runPlannerTestFile("tpcds_tuple_cache/tpcds-ddl-iceberg", testDb, options,
testOptions);
}
}

View File

@@ -224,9 +224,14 @@ public class TestUtils {
static IgnoreValueFilter SCAN_RANGE_ROW_COUNT_FILTER =
new IgnoreValueFilter("max-scan-range-rows", PrintUtils.METRIC_REGEX);
// Ignore the tuple cache key, as it can vary across different dataloads
static IgnoreValueFilter TUPLE_CACHE_KEY_FILTER =
new IgnoreValueFilter("cache key", " \\S+", ':');
// Filters that are always applied
private static final List<ResultFilter> DEFAULT_FILTERS = Arrays.<ResultFilter>asList(
SCAN_RANGE_ROW_COUNT_FILTER, new PathFilter("hdfs:"), new PathFilter("file: "));
SCAN_RANGE_ROW_COUNT_FILTER, TUPLE_CACHE_KEY_FILTER, new PathFilter("hdfs:"),
new PathFilter("file: "));
// Filters that ignore the values of resource requirements that appear in
// "EXTENDED" and above explain plans.