IMPALA-13712: Calcite Planner - Enable constant folding

Constant folding is enabled by this patch.

Calcite does constant folding via the RexExecutor.reduce()
method. However, we need to use Impala's constant folding algorithm
to ensure that Impala expressions are folded. This is done through
the derived class ImpalaRexExecutor and is called from the
Simplify rules.

The ImpalaRexExecutor calls an internal shuttle class which
recursively walks through the RexNode which checks if portions of
the expression can be constant folded.

Some expressions are not folded due to various reasons:

- We avoid folding 'cast(1.2 as double)' type expressions
  because folding this creates an inexact number, and this is
  problematic for partition pruning directory names on double
  columns which contain the exact number (1.2 in this case).

- Interval expressions are skipped temporarily since the Expr
  class generated is not meant to be simplified. However, an
  Expr object that contains an IntervalExpr may be simplified.

There is a special case that needed to be handled for a values
query with different sized arguments across rows. In Calcite
version 1.40 (not yet upgraded as of this commit), an extra
cast is added around smaller strings to ensure the char(x)
is the same size across all rows. However, this adds extra spaces
to the string which causes results different from the original
Impala planner. This must be caught before Calcite converts the
abstract syntax tree into a RelNode logical tree. A special
RexExecutor has been created to handle this which looks for char
casts around a char literal and removes it. This is fine because
the literal will be changed into a string in the "coercenodes"
module.

Change-Id: I98c21ef75b2f5f8e3390ff5de5fdf45d9645b326
Reviewed-on: http://gerrit.cloudera.org:8080/23723
Reviewed-by: Michael Smith <michael.smith@cloudera.com>
Reviewed-by: Joe McDonnell <joemcdonnell@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
Steve Carlin
2025-11-07 15:56:29 -08:00
committed by Michael Smith
parent 3dac0135fb
commit 593b0bfad3
99 changed files with 5911 additions and 4959 deletions

View File

@@ -23,6 +23,7 @@ import com.google.common.collect.Lists;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.fun.SqlPosixRegexOperator;
@@ -36,8 +37,10 @@ import org.apache.impala.analysis.CompoundPredicate;
import org.apache.impala.analysis.Expr;
import org.apache.impala.analysis.FunctionCallExpr;
import org.apache.impala.analysis.IsNullPredicate;
import org.apache.impala.analysis.NumericLiteral;
import org.apache.impala.analysis.TimestampArithmeticExpr;
import org.apache.impala.calcite.operators.ImpalaInOperator;
import org.apache.impala.calcite.rules.ImpalaRexExecutor;
import org.apache.impala.calcite.type.ImpalaTypeConverter;
import org.apache.impala.catalog.Function;
import org.apache.impala.catalog.Type;
@@ -45,6 +48,7 @@ import org.apache.impala.common.ImpalaException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
@@ -196,6 +200,15 @@ public class RexCallConverter {
return params.get(0);
}
// Hack logic: Partition pruning needs the exact number when the column
// is decimal. We need to keep the cast(<some decimal> as double) so the
// partition pruner can keep the right value.
if (ImpalaRexExecutor.isImplicitCastDecimalToInexact(call)) {
RexLiteral literal = (RexLiteral) call.getOperands().get(0);
BigDecimal value = (BigDecimal) RexLiteral.value(literal);
return new NumericLiteral(value, impalaRetType);
}
// Small hack: Most cast expressions have "isImplicit" set to true. If this
// is the case, then it blocks "analyze" from working through the cast. We
// need to analyze the expression before creating the cast around it.

View File

@@ -55,16 +55,29 @@ public class ExprConjunctsConverter {
public ExprConjunctsConverter(RexNode conjunct, List<Expr> inputExprs,
RexBuilder rexBuilder, Analyzer analyzer) throws ImpalaException {
this(conjunct, inputExprs, rexBuilder, analyzer, true);
}
public ExprConjunctsConverter(RexNode conjunct, List<Expr> inputExprs,
RexBuilder rexBuilder, Analyzer analyzer, boolean splitAndConjuncts)
throws ImpalaException {
ImmutableList.Builder<Expr> builder = new ImmutableList.Builder();
if (conjunct != null) {
CreateExprVisitor visitor =
new CreateExprVisitor(rexBuilder, inputExprs, analyzer);
RexNode expandedConjunct = ImpalaRexUtil.expandSearch(rexBuilder, conjunct);
List<RexNode> andOperands = getAndConjuncts(expandedConjunct);
for (RexNode andOperand : andOperands) {
Expr convertedExpr = CreateExprVisitor.getExpr(visitor, andOperand);
// if splitAndConjuncts is false, there will be only one operand containing
// all the 'and' conjuncts. If it is true, each top level 'and' will be
// a member in the list. Separating out the 'and' clauses is needed for partition
// pruning, because if the 'and' conjunct meets pruning conditions, the
// clause is used to remove directories and not needed when checking
// on each individual row.
List<RexNode> operands = splitAndConjuncts
? getAndConjuncts(expandedConjunct)
: Lists.newArrayList(expandedConjunct);
for (RexNode operand : operands) {
Expr convertedExpr = CreateExprVisitor.getExpr(visitor, operand);
builder.add(convertedExpr);
}
}

View File

@@ -21,9 +21,11 @@ import org.apache.calcite.plan.RelOptRule;
import org.apache.calcite.plan.RelOptRuleCall;
import org.apache.calcite.rel.core.Filter;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.rex.RexExecutor;
import org.apache.calcite.rex.RexNode;
import org.apache.impala.calcite.operators.ImpalaRexSimplify;
import org.apache.impala.calcite.operators.ImpalaRexUtil;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import java.util.ArrayList;
@@ -32,6 +34,7 @@ import java.util.List;
/**
* ImpalaFilterSimplifyRule calls the given ImpalaRexSimplify.simplify()
* method (derived from Calcite's RexSimplify) for the filter condition.
* It also calls ImpalaRexExecutor.reduce() which does constant folding.
*/
public class ImpalaFilterSimplifyRule extends RelOptRule {
@@ -48,8 +51,13 @@ public class ImpalaFilterSimplifyRule extends RelOptRule {
RelOptCluster cluster = filter.getCluster();
RexBuilder rexBuilder = cluster.getRexBuilder();
RexNode condition = filter.getCondition();
RexExecutor executor = simplifier_.getRexExecutor();
RexNode newCondition = simplifier_.simplify(condition);
List<RexNode> reducedExprs = new ArrayList<>();
executor.reduce(rexBuilder, ImmutableList.of(newCondition), reducedExprs);
Preconditions.checkState(reducedExprs.size() == 1);
newCondition = reducedExprs.get(0);
if (newCondition.equals(condition)) {
return;

View File

@@ -21,6 +21,7 @@ import org.apache.calcite.plan.RelOptRule;
import org.apache.calcite.plan.RelOptRuleCall;
import org.apache.calcite.rel.core.Project;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.rex.RexExecutor;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.rex.RexUtil;
import org.apache.impala.calcite.operators.ImpalaRexSimplify;
@@ -33,6 +34,7 @@ import java.util.List;
* ImpalaProjectSimplifyRule calls the given ImpalaRexSimplify.simplify()
* method (derived from Calcite's RexSimplify) for all the columns in the
* Project RelNode.
* It also calls ImpalaRexExecutor.reduce() which does constant folding.
*/
public class ImpalaProjectSimplifyRule extends RelOptRule {
@@ -48,6 +50,7 @@ public class ImpalaProjectSimplifyRule extends RelOptRule {
Project projectRel = call.rel(0);
RelOptCluster cluster = projectRel.getCluster();
RexBuilder rexBuilder = cluster.getRexBuilder();
RexExecutor executor = simplifier_.getRexExecutor();
List<RexNode> newProjects = new ArrayList<>();
boolean projectChanged = false;
@@ -66,13 +69,17 @@ public class ImpalaProjectSimplifyRule extends RelOptRule {
newProjects.add(newProject);
}
if (!projectChanged) {
List<RexNode> reducedExprs = new ArrayList<>();
executor.reduce(rexBuilder, newProjects, reducedExprs);
if (!projectChanged &&
RexUtil.strings(projectRel.getProjects()).equals(RexUtil.strings(reducedExprs))) {
return;
}
// only create the new project if any projects changed.
Project newProjectRel = projectRel.copy(projectRel.getTraitSet(),
projectRel.getInput(0), newProjects, projectRel.getRowType());
projectRel.getInput(0), reducedExprs, projectRel.getRowType());
call.transformTo(newProjectRel);
}
}

View File

@@ -0,0 +1,298 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.impala.calcite.rules;
import com.google.common.base.Preconditions;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.RelShuttleImpl;
import org.apache.calcite.rel.logical.LogicalFilter;
import org.apache.calcite.rel.logical.LogicalProject;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexExecutor;
import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.rex.RexShuttle;
import org.apache.calcite.rex.RexUtil;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.calcite.sql.type.SqlTypeUtil;
import org.apache.calcite.util.TimestampString;
import org.apache.impala.analysis.Analyzer;
import org.apache.impala.analysis.Expr;
import org.apache.impala.analysis.FunctionCallExpr;
import org.apache.impala.analysis.LiteralExpr;
import org.apache.impala.calcite.rel.util.ExprConjunctsConverter;
import org.apache.impala.common.AnalysisException;
import org.apache.impala.common.ImpalaException;
import org.apache.impala.service.FeSupport;
import org.apache.impala.thrift.TColumnValue;
import org.apache.impala.thrift.TQueryCtx;
import java.io.UnsupportedEncodingException;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.List;
import org.apache.commons.lang3.exception.ExceptionUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* ImpalaRexExecutor does the constant folding of Impala RexNode objects.
*/
public class ImpalaRexExecutor implements RexExecutor {
protected static final Logger LOG =
LoggerFactory.getLogger(ImpalaRexExecutor.class.getName());
private final Analyzer analyzer_;
private final TQueryCtx queryCtx_;
// Reducer class used for testing purposes for injection.
private final Reducer reducer_;
public ImpalaRexExecutor(Analyzer analyzer, TQueryCtx queryCtx,
Reducer reducer) {
analyzer_ = analyzer;
queryCtx_ = queryCtx;
reducer_ = reducer;
}
/**
* The main routine that gets called when doing the constant folding. The
* constExps contains the RexNodes that will be constant folded and the
* reducedValues contain the RexNodes after constant folding. The size of these
* two arrays must match at the end of this method.
*/
@Override
public void reduce(RexBuilder rexBuilder, List<RexNode> constExps,
List<RexNode> reducedValues) {
ReduceRexNodeShuttle shuttle =
new ReduceRexNodeShuttle(analyzer_, queryCtx_, rexBuilder, reducer_);
for (RexNode rexNode : constExps) {
RexNode newProject = rexNode.accept(shuttle);
if (rexNode.getType() != newProject.getType()) {
newProject = rexBuilder.makeCast(rexNode.getType(), newProject,
rexNode.getType().isNullable());
}
reducedValues.add(newProject);
}
Preconditions.checkState(reducedValues.size() == constExps.size());
}
private static boolean isReducible(RexNode rexNode) {
// may already be reduced to a literal
if (!(rexNode instanceof RexCall)) {
return false;
}
RexCall call = (RexCall) rexNode;
// cannot reduce interval operation by itself. An example of This will look like
// *(14 INT : 86400000 INTERVAL) with a return type of INTERVAL. This rexCall
// can be a parameter of some date time expression (e.g. time_add(time, interval))
// which can be folded if the time portion is constant.
if (SqlTypeUtil.isInterval(call.getType())) {
return false;
}
// operands should all be literals or an implicit cast of a literal
for (RexNode operand : call.getOperands()) {
if (!isLiteralOrCastOfLiteral(operand) && !isIntervalConst(operand)) {
return false;
}
}
// special cast to ignore: An implicit cast from a decimal to a double
// needs to be kept as/is. If there is a partition on the double column,
// the exact value is needed. Converting to a double causes an inexact
// value to be created and interferes with the partitioned directory name.
if (isImplicitCastDecimalToInexact(call)) {
return false;
}
return true;
}
public static boolean isImplicitCastDecimalToInexact(RexCall call) {
if (call.getKind() != SqlKind.CAST) {
return false;
}
SqlTypeName sqlTypeName = call.getType().getSqlTypeName();
if (sqlTypeName != SqlTypeName.DOUBLE && sqlTypeName != SqlTypeName.FLOAT) {
return false;
}
RexNode operand = call.getOperands().get(0);
if (!(operand instanceof RexLiteral)) {
return false;
}
if (operand.getType().getSqlTypeName() != SqlTypeName.DECIMAL) {
return false;
}
return true;
}
private static boolean isLiteralOrCastOfLiteral(RexNode operand) {
while ((operand instanceof RexCall) &&
((RexCall) operand).getKind() == SqlKind.CAST) {
operand = ((RexCall) operand).getOperands().get(0);
}
return (operand instanceof RexLiteral);
}
private static boolean isIntervalConst(RexNode operand) {
if (!(operand instanceof RexCall)) {
return false;
}
RexCall call = (RexCall) operand;
if (!call.getKind().equals(SqlKind.TIMES)) {
return false;
}
if (!SqlTypeUtil.isInterval(call.getType())) {
return false;
}
if (!(call.getOperands().get(0) instanceof RexLiteral) &&
!(call.getOperands().get(1) instanceof RexLiteral)) {
return false;
}
return true;
}
private static RexNode getRexNodeFromColumnValue(RexBuilder builder, RexNode constExp,
TColumnValue colVal) {
RelDataType returnType = constExp.getType();
if (colVal.isSetBool_val()) {
return builder.makeLiteral(colVal.bool_val, returnType);
} else if (colVal.isSetByte_val()) {
return builder.makeLiteral(colVal.byte_val, returnType);
} else if (colVal.isSetShort_val()) {
return builder.makeLiteral(colVal.short_val, returnType);
} else if (colVal.isSetInt_val()) {
return builder.makeLiteral(colVal.int_val, returnType);
} else if (colVal.isSetLong_val()) {
return builder.makeLiteral(colVal.long_val, returnType);
} else if (colVal.isSetDouble_val()) {
return builder.makeLiteral(colVal.double_val, returnType);
} else if (colVal.isSetString_val()) {
if (returnType.getSqlTypeName() == SqlTypeName.TIMESTAMP) {
RexNode timestamp = builder.makeTimestampLiteral(
new TimestampString(colVal.string_val),
constExp.getType().getPrecision());
RelDataType type = builder.getTypeFactory().createTypeWithNullability(
timestamp.getType(), constExp.getType().isNullable());
return builder.makeCast(type, timestamp, true);
}
// decimal is in string val, strings are in binaryVal
return builder.makeLiteral(new BigDecimal(colVal.string_val), returnType, true);
} else if (colVal.isSetBinary_val()) {
byte[] bytes = new byte[colVal.binary_val.remaining()];
colVal.binary_val.get(bytes);
// Converting strings between the BE/FE does not work properly for the
// extended ASCII characters above 127. Bail in such cases to avoid
// producing incorrect results.
for (byte b: bytes) {
if (b < 0) {
return constExp;
}
}
try {
String newString = new String(bytes, "US-ASCII");
return builder.makeLiteral(newString, returnType, true);
} catch (UnsupportedEncodingException e) {
LOG.debug("Could not interpret return value for " + colVal);
return constExp;
}
}
Preconditions.checkState(!colVal.isSetTimestamp_val(),
"Simplified into timestamp constant but this should not happen");
Preconditions.checkState(!colVal.isSetDecimal_val(),
"Simplified into decimal constant but this should not happen");
return builder.makeNullLiteral(returnType);
}
/**
* The RexNode shuttle that walks through the expression.
*/
private static class ReduceRexNodeShuttle extends RexShuttle {
private final Analyzer analyzer_;
private final TQueryCtx queryCtx_;
private final RexBuilder rexBuilder_;
private final Reducer reducer_;
private ReduceRexNodeShuttle(Analyzer analyzer, TQueryCtx queryCtx,
RexBuilder rexBuilder, Reducer reducer) {
this.analyzer_ = analyzer;
this.queryCtx_ = queryCtx;
this.rexBuilder_ = rexBuilder;
this.reducer_ = reducer;
}
@Override
public RexNode visitCall(RexCall call) {
// recursively call children first.
RexNode reducedNode = super.visitCall(call);
if (!isReducible(reducedNode)) {
return reducedNode;
}
RexCall reducedCall = (RexCall) reducedNode;
try {
// convert RexNode into Expr objects
ExprConjunctsConverter converter = new ExprConjunctsConverter(
reducedCall, null, rexBuilder_, analyzer_, false);
Preconditions.checkState(converter.getImpalaConjuncts().size() == 1);
Expr expr = converter.getImpalaConjuncts().get(0);
if (expr instanceof FunctionCallExpr) {
FunctionCallExpr funcCallExpr = (FunctionCallExpr) expr;
if (!funcCallExpr.isConstantImpl()) {
return reducedCall;
}
}
TColumnValue colValue = reducer_.reduce(expr, queryCtx_);
return getRexNodeFromColumnValue(rexBuilder_, reducedCall, colValue);
} catch (Exception e) {
LOG.debug("Exception thrown in constant folding, constant folding " +
"not done: " + e);
LOG.debug(ExceptionUtils.getStackTrace(e));
return reducedCall;
}
}
}
/**
* Interface which allows a hook for JUnit tests, avoiding the necessity of
* a call to the backend.
*/
public static interface Reducer {
public TColumnValue reduce(Expr expr, TQueryCtx queryCtx) throws ImpalaException;
}
/**
* Implementation of the Reducer class which calls the backend via FeSupport.
*/
public static class ReducerImpl implements Reducer {
@Override
public TColumnValue reduce(Expr expr, TQueryCtx queryCtx) throws ImpalaException {
return FeSupport.EvalExprWithoutRowBounded(expr, queryCtx,
LiteralExpr.MAX_STRING_LITERAL_SIZE);
}
}
}

View File

@@ -0,0 +1,104 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.impala.calcite.rules;
import org.apache.calcite.DataContexts;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexExecutorImpl;
import org.apache.calcite.rex.RexLiteral;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.type.SqlTypeName;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* RemoveUnraggedCharCastRexExecutor is a workaround needed until a
* solution is found for CALCITE-7244. Calcite always creates a string
* RexLiteral as type CHAR<x>, whereas Impala needs the string literal as
* STRING (a "create table my_string_tbl as select 'hello'" will create a
* STRING type column). The conversion from CHAR to STRING is handled in
* the CoercedNodes part of the code which is run after the SqlNode abstract
* syntax tree has been converted into the Logical Node tree.
*
* However, there is a special case that causes problems that occurs before
* the RelNode tree creation, at SqlToRelNodeConverter time. In the case of
* the following query: "values(('big'), ('bigger'))", the SqlToRelNodeConverter
* needs to ensure that both rows are the same size. So a char cast of the bigger
* size is added around the first string.
*
* After Calcite creates this cast, the string changes when it is reduced. The
* 'big' string becomes a space padded 'big ' string. This Calcite reduction
* causes results inconsistent from what the original Impala planner produces.
*
* In order to avoid this, this executor removes the outer cast if the operand
* of a CHAR cast expression is a CHAR cast.
*
* Note that the only possible place this expression can happen is through this
* mechanism. The CAST operator is only used for implicit chars, that is, casts
* that are created internally. If the SQL actually contained a cast, the
* EXPLICIT_CAST operator would be used and the logic here would not affect
* that expression.
*/
public class RemoveUnraggedCharCastRexExecutor extends RexExecutorImpl {
protected static final Logger LOG =
LoggerFactory.getLogger(RemoveUnraggedCharCastRexExecutor.class.getName());
public RemoveUnraggedCharCastRexExecutor() {
super(DataContexts.EMPTY);
}
@Override
public void reduce(RexBuilder rexBuilder, List<RexNode> constExps,
List<RexNode> reducedValues) {
if (!rexNodeIsCharCastOfChar(constExps)) {
super.reduce(rexBuilder, constExps, reducedValues);
return;
}
reducedValues.add(((RexCall)constExps.get(0)).getOperands().get(0));
}
private boolean rexNodeIsCharCastOfChar(List<RexNode> rexNodes) {
if (rexNodes.size() > 1 || rexNodes.size() == 0) {
return false;
}
if (!(rexNodes.get(0) instanceof RexCall)) {
return false;
}
RexCall constExp = (RexCall) rexNodes.get(0);
if (!constExp.getKind().equals(SqlKind.CAST)) {
return false;
}
if (!constExp.getType().getSqlTypeName().equals(SqlTypeName.CHAR)) {
return false;
}
RexNode operand = constExp.getOperands().get(0);
if (!(operand instanceof RexLiteral)) {
return false;
}
if (!operand.getType().getSqlTypeName().equals(SqlTypeName.CHAR)) {
return false;
}
return true;
}
}

View File

@@ -145,7 +145,7 @@ public class CalciteJniFrontend extends JniFrontend {
// Optimize the query
CalciteOptimizer optimizer = new CalciteOptimizer(validator,
queryCtx.getTimeline());
mdHandler.getAnalyzer(), queryCtx.getTimeline(), queryCtx.getTQueryCtx());
ImpalaPlanRel optimizedPlan = optimizer.optimize(logicalPlan);
markEvent(mdHandler, optimizedPlan, queryCtx, "Optimized logical plan");

View File

@@ -40,6 +40,7 @@ import org.apache.calcite.sql.SqlExplainLevel;
import org.apache.calcite.sql.validate.SqlValidator;
import org.apache.calcite.sql2rel.RelFieldTrimmer;
import org.apache.calcite.tools.RelBuilder;
import org.apache.impala.analysis.Analyzer;
import org.apache.impala.calcite.coercenodes.CoerceNodes;
import org.apache.impala.calcite.operators.ImpalaRexSimplify;
import org.apache.impala.calcite.rel.node.ConvertToImpalaRelRules;
@@ -47,8 +48,10 @@ import org.apache.impala.calcite.rel.node.ImpalaPlanRel;
import org.apache.impala.calcite.rules.ImpalaCoreRules;
import org.apache.impala.calcite.rules.ImpalaFilterSimplifyRule;
import org.apache.impala.calcite.rules.ImpalaProjectSimplifyRule;
import org.apache.impala.calcite.rules.ImpalaRexExecutor;
import org.apache.impala.calcite.util.LogUtil;
import org.apache.impala.common.ImpalaException;
import org.apache.impala.thrift.TQueryCtx;
import org.apache.impala.util.EventSequence;
import java.util.List;
@@ -71,6 +74,10 @@ public class CalciteOptimizer implements CompilerStep {
private final EventSequence timeline_;
private final Analyzer analyzer_;
private final TQueryCtx queryCtx_;
JoinProjectTransposeRule.Config JOIN_PROJECT_LEFT =
JoinProjectTransposeRule.Config.LEFT_OUTER
.withOperandSupplier(b0 ->
@@ -95,12 +102,17 @@ public class CalciteOptimizer implements CompilerStep {
this.reader_ = analysisResult.getCatalogReader();
this.validator_ = analysisResult.getSqlValidator();
this.timeline_ = timeline;
this.analyzer_ = analysisResult.getAnalyzer();
this.queryCtx_ = analyzer_.getQueryCtx();
}
public CalciteOptimizer(CalciteValidator validator, EventSequence timeline) {
public CalciteOptimizer(CalciteValidator validator, Analyzer analyzer,
EventSequence timeline, TQueryCtx queryCtx) {
this.reader_ = validator.getCatalogReader();
this.validator_ = validator.getSqlValidator();
this.timeline_ = timeline;
this.queryCtx_ = queryCtx;
this.analyzer_ = analyzer;
}
public ImpalaPlanRel optimize(RelNode logPlan) throws ImpalaException {
@@ -109,7 +121,9 @@ public class CalciteOptimizer implements CompilerStep {
RexBuilder rexBuilder = logPlan.getCluster().getRexBuilder();
ImpalaRexSimplify simplifier = new ImpalaRexSimplify(rexBuilder, RexUtil.EXECUTOR);
ImpalaRexExecutor rexExecutor = new ImpalaRexExecutor(analyzer_, queryCtx_,
new ImpalaRexExecutor.ReducerImpl());
ImpalaRexSimplify simplifier = new ImpalaRexSimplify(rexBuilder, rexExecutor);
// Run some essential rules needed to create working RelNodes before doing
// optimization

View File

@@ -53,6 +53,8 @@ import org.apache.calcite.tools.Frameworks;
import org.apache.calcite.tools.RelBuilder;
import org.apache.impala.calcite.operators.ImpalaConvertletTable;
import org.apache.impala.calcite.rules.ImpalaCoreRules;
import org.apache.impala.calcite.rules.ImpalaRexExecutor;
import org.apache.impala.calcite.rules.RemoveUnraggedCharCastRexExecutor;
import org.apache.impala.calcite.schema.ImpalaRelMetadataProvider;
import org.apache.impala.calcite.util.LogUtil;
@@ -87,6 +89,7 @@ public class CalciteRelNodeConverter implements CompilerStep {
this.sqlValidator_ = analysisResult.getSqlValidator();
this.planner_ = new VolcanoPlanner();
planner_.addRelTraitDef(ConventionTraitDef.INSTANCE);
planner_.setExecutor(new RemoveUnraggedCharCastRexExecutor());
cluster_ =
RelOptCluster.create(planner_, new RexBuilder(typeFactory_));
viewExpander_ = createViewExpander(
@@ -100,6 +103,7 @@ public class CalciteRelNodeConverter implements CompilerStep {
this.sqlValidator_ = validator.getSqlValidator();
this.planner_ = new VolcanoPlanner();
planner_.addRelTraitDef(ConventionTraitDef.INSTANCE);
planner_.setExecutor(new RemoveUnraggedCharCastRexExecutor());
cluster_ =
RelOptCluster.create(planner_, new RexBuilder(typeFactory_));
viewExpander_ = createViewExpander(validator.getCatalogReader()

View File

@@ -0,0 +1,412 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.impala.planner;
import org.apache.impala.common.ImpalaException;
import org.apache.impala.common.RuntimeEnv;
import org.apache.impala.thrift.TQueryOptions;
import org.apache.impala.thrift.TSessionState;
import org.junit.AfterClass;
import org.junit.BeforeClass;
import org.junit.Test;
import org.junit.rules.TemporaryFolder;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.calcite.jdbc.JavaTypeFactoryImpl;
import org.apache.calcite.rel.core.Project;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.metadata.DefaultRelMetadataProvider;
import org.apache.calcite.rel.metadata.JaninoRelMetadataProvider;
import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.rel.type.RelDataTypeFactory;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.rex.RexExecutor;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.sql.SqlNode;
import org.apache.impala.analysis.Analyzer;
import org.apache.impala.analysis.Expr;
import org.apache.impala.calcite.operators.ImpalaOperatorTable;
import org.apache.impala.calcite.rules.ImpalaRexExecutor;
import org.apache.impala.calcite.service.CalciteJniFrontend.QueryContext;
import org.apache.impala.calcite.service.CalciteMetadataHandler;
import org.apache.impala.calcite.service.CalciteQueryParser;
import org.apache.impala.calcite.service.CalciteRelNodeConverter;
import org.apache.impala.calcite.service.CalciteValidator;
import org.apache.impala.calcite.type.ImpalaTypeSystemImpl;
import org.apache.impala.catalog.BuiltinsDb;
import org.apache.impala.thrift.TColumnValue;
import org.apache.impala.thrift.TQueryCtx;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import static org.junit.Assert.*;
public class TestReduceExprShuttle extends PlannerTestBase {
// Query option to run each planner test.
private static TQueryOptions options =
tpcdsParquetQueryOptions();
@BeforeClass
public static void setUpClass() throws Exception {
RuntimeEnv.INSTANCE.setTestEnv(true);
ImpalaOperatorTable.create(BuiltinsDb.getInstance());
RelMetadataQuery.THREAD_PROVIDERS.set(
JaninoRelMetadataProvider.of(DefaultRelMetadataProvider.INSTANCE));
}
@AfterClass
public static void cleanUpClass() {
RuntimeEnv.INSTANCE.reset();
}
@Test
public void testFoldAddTinyInt() {
try {
ReduceShuttleObjects queryObj = new ReduceShuttleObjects("SELECT 1 + 1");
TColumnValue reducedValue = new TColumnValue();
reducedValue.setShort_val((short)2);
TestReducerTmp testReducer = new TestReducerTmp("add(1, 1)", reducedValue);
List<RexNode> reducedExprs = new ArrayList<>();
RexExecutor executor = new ImpalaRexExecutor(
queryObj.analyzer_, queryObj.queryCtx_, testReducer);
executor.reduce(queryObj.rexBuilder_, queryObj.project_.getProjects(),
reducedExprs);
assertEquals(1, reducedExprs.size(), 1);
assertEquals("2:SMALLINT", reducedExprs.get(0).toString());
} catch (ImpalaException e) {
throw new RuntimeException(e);
}
}
@Test
public void testFoldAddSmallInt() {
try {
String expr = "CAST(1 AS SMALLINT) + CAST(2 AS SMALLINT)";
ReduceShuttleObjects queryObj =
new ReduceShuttleObjects("SELECT " + expr);
TColumnValue reducedCast1 = new TColumnValue();
reducedCast1.setShort_val((short)1);
TColumnValue reducedCast2 = new TColumnValue();
reducedCast2.setShort_val((short)2);
TColumnValue reducedAdd = new TColumnValue();
reducedAdd.setInt_val((int)3);
Map<String, TColumnValue> map = ImmutableMap.of
("1", reducedCast1,
"2", reducedCast2,
"add(1, 2)", reducedAdd);
TestReducerTmp testReducer = new TestReducerTmp(map);
List<RexNode> reducedExprs = new ArrayList<>();
RexExecutor executor = new ImpalaRexExecutor(
queryObj.analyzer_, queryObj.queryCtx_, testReducer);
executor.reduce(queryObj.rexBuilder_, queryObj.project_.getProjects(),
reducedExprs);
assertEquals(1, reducedExprs.size(), 1);
assertEquals("CAST(3):INTEGER", reducedExprs.get(0).toString());
} catch (ImpalaException e) {
throw new RuntimeException(e);
}
}
@Test
public void testFoldAddInt() {
try {
String expr = "CAST(1 AS INT) + CAST(2 AS INT)";
ReduceShuttleObjects queryObj =
new ReduceShuttleObjects("SELECT " + expr);
TColumnValue reducedCast1 = new TColumnValue();
reducedCast1.setInt_val(1);
TColumnValue reducedCast2 = new TColumnValue();
reducedCast2.setInt_val(2);
TColumnValue reducedAdd = new TColumnValue();
reducedAdd.setLong_val(3);
Map<String, TColumnValue> map = ImmutableMap.of
("1", reducedCast1,
"2", reducedCast2,
"add(1, 2)", reducedAdd);
TestReducerTmp testReducer = new TestReducerTmp(map);
List<RexNode> reducedExprs = new ArrayList<>();
RexExecutor executor = new ImpalaRexExecutor(
queryObj.analyzer_, queryObj.queryCtx_, testReducer);
executor.reduce(queryObj.rexBuilder_, queryObj.project_.getProjects(),
reducedExprs);
assertEquals(1, reducedExprs.size(), 1);
assertEquals("CAST(3:BIGINT):BIGINT", reducedExprs.get(0).toString());
} catch (ImpalaException e) {
throw new RuntimeException(e);
}
}
@Test
public void testFoldAddDecimal() {
try {
String expr = "1.1 + 2.2";
ReduceShuttleObjects queryObj =
new ReduceShuttleObjects("SELECT " + expr);
TColumnValue reducedCast1 = new TColumnValue();
reducedCast1.setString_val("3.3");
Map<String, TColumnValue> map = ImmutableMap.of
("add(1.1, 2.2)", reducedCast1);
TestReducerTmp testReducer = new TestReducerTmp(map);
List<RexNode> reducedExprs = new ArrayList<>();
RexExecutor executor = new ImpalaRexExecutor(
queryObj.analyzer_, queryObj.queryCtx_, testReducer);
executor.reduce(queryObj.rexBuilder_, queryObj.project_.getProjects(),
reducedExprs);
assertEquals(1, reducedExprs.size(), 1);
assertEquals("3.3:DECIMAL(3, 1)", reducedExprs.get(0).toString());
} catch (ImpalaException e) {
throw new RuntimeException(e);
}
}
@Test
public void testFoldConcatString() {
try {
String expr = "CONCAT(cast('a' as string), cast('b' as string))";
ReduceShuttleObjects queryObj =
new ReduceShuttleObjects("SELECT " + expr);
TColumnValue reducedCast1 = new TColumnValue();
reducedCast1.setBinary_val("a".getBytes());
TColumnValue reducedCast2 = new TColumnValue();
reducedCast2.setBinary_val("b".getBytes());
TColumnValue reducedAdd = new TColumnValue();
reducedAdd.setBinary_val("ab".getBytes());
Map<String, TColumnValue> map = ImmutableMap.of
("'a'", reducedCast1,
"'b'", reducedCast2,
"concat('a', 'b')", reducedAdd);
TestReducerTmp testReducer = new TestReducerTmp(map);
List<RexNode> reducedExprs = new ArrayList<>();
RexExecutor executor = new ImpalaRexExecutor(
queryObj.analyzer_, queryObj.queryCtx_, testReducer);
executor.reduce(queryObj.rexBuilder_, queryObj.project_.getProjects(),
reducedExprs);
assertEquals(1, reducedExprs.size(), 1);
assertEquals("CAST(_UTF-8'ab':VARCHAR(2147483647) CHARACTER SET \"UTF-8\"):" +
"VARCHAR(2147483647) CHARACTER SET \"UTF-8\"", reducedExprs.get(0).toString());
} catch (ImpalaException e) {
throw new RuntimeException(e);
}
}
@Test
public void testBoolean() {
try {
String expr = "istrue(false)";
ReduceShuttleObjects queryObj =
new ReduceShuttleObjects("SELECT " + expr);
TColumnValue reducedValue = new TColumnValue();
reducedValue.setBool_val(false);
TestReducerTmp testReducer = new TestReducerTmp("istrue(FALSE)", reducedValue);
List<RexNode> reducedExprs = new ArrayList<>();
RexExecutor executor = new ImpalaRexExecutor(
queryObj.analyzer_, queryObj.queryCtx_, testReducer);
executor.reduce(queryObj.rexBuilder_, queryObj.project_.getProjects(),
reducedExprs);
assertEquals(1, reducedExprs.size(), 1);
assertEquals("CAST(false):BOOLEAN", reducedExprs.get(0).toString());
} catch (ImpalaException e) {
throw new RuntimeException(e);
}
}
@Test
public void testPartialExpr() {
try {
ReduceShuttleObjects queryObj = new ReduceShuttleObjects(
"SELECT 1 + 1 + tinyint_col from functional.alltypestiny");
TColumnValue reducedValue = new TColumnValue();
reducedValue.setShort_val((short)2);
TestReducerTmp testReducer = new TestReducerTmp("add(1, 1)", reducedValue);
List<RexNode> reducedExprs = new ArrayList<>();
RexExecutor executor = new ImpalaRexExecutor(
queryObj.analyzer_, queryObj.queryCtx_, testReducer);
executor.reduce(queryObj.rexBuilder_, queryObj.project_.getProjects(),
reducedExprs);
assertEquals(1, reducedExprs.size(), 1);
assertEquals("+(2:SMALLINT, $2)", reducedExprs.get(0).toString());
} catch (ImpalaException e) {
throw new RuntimeException(e);
}
}
@Test
public void testNonDeterministic() {
try {
String expr = "rand()";
ReduceShuttleObjects queryObj =
new ReduceShuttleObjects("SELECT " + expr);
TestReducerTmp testReducer = new TestReducerTmp();
List<RexNode> reducedExprs = new ArrayList<>();
RexExecutor executor = new ImpalaRexExecutor(
queryObj.analyzer_, queryObj.queryCtx_, testReducer);
executor.reduce(queryObj.rexBuilder_, queryObj.project_.getProjects(),
reducedExprs);
assertEquals(1, reducedExprs.size(), 1);
assertEquals("RAND()", reducedExprs.get(0).toString());
} catch (ImpalaException e) {
throw new RuntimeException(e);
}
}
@Test
public void testFoldMultipleFields() {
try {
String expr = "1 + 2, 3 + 4";
ReduceShuttleObjects queryObj =
new ReduceShuttleObjects("SELECT " + expr);
TColumnValue reducedAdd1 = new TColumnValue();
reducedAdd1.setShort_val((short)3);
TColumnValue reducedAdd2 = new TColumnValue();
reducedAdd2.setShort_val((short)7);
Map<String, TColumnValue> map = ImmutableMap.of
("add(1, 2)", reducedAdd1,
"add(3, 4)", reducedAdd2);
TestReducerTmp testReducer = new TestReducerTmp(map);
List<RexNode> reducedExprs = new ArrayList<>();
RexExecutor executor = new ImpalaRexExecutor(
queryObj.analyzer_, queryObj.queryCtx_, testReducer);
executor.reduce(queryObj.rexBuilder_, queryObj.project_.getProjects(),
reducedExprs);
assertEquals(2, reducedExprs.size());
assertEquals("3:SMALLINT", reducedExprs.get(0).toString());
assertEquals("7:SMALLINT", reducedExprs.get(1).toString());
} catch (ImpalaException e) {
throw new RuntimeException(e);
}
}
@Test
public void testFoldTimestamp() {
try {
String expr = "add_months(cast('2012-07-01 00:00:00' as timestamp), " +
"cast(2 as integer))";
ReduceShuttleObjects queryObj =
new ReduceShuttleObjects("SELECT " + expr);
TColumnValue reducedTime1 = new TColumnValue();
reducedTime1.setString_val("2012-07-01 00:00:00");
TColumnValue reducedTime2 = new TColumnValue();
reducedTime2.setString_val("2012-09-01 00:00:00");
TColumnValue reducedInt = new TColumnValue();
reducedInt.setInt_val(2);
Map<String, TColumnValue> map = ImmutableMap.of
("'2012-07-01 00:00:00'", reducedTime1,
"add_months(casttotimestamp('2012-07-01 00:00:00'), 2)", reducedTime2,
"2", reducedInt);
TestReducerTmp testReducer = new TestReducerTmp(map);
List<RexNode> reducedExprs = new ArrayList<>();
RexExecutor executor = new ImpalaRexExecutor(
queryObj.analyzer_, queryObj.queryCtx_, testReducer);
executor.reduce(queryObj.rexBuilder_, queryObj.project_.getProjects(),
reducedExprs);
assertEquals(1, reducedExprs.size());
assertEquals("CAST(2012-09-01 00:00:00:TIMESTAMP(15)):TIMESTAMP(15)",
reducedExprs.get(0).toString());
} catch (ImpalaException e) {
throw new RuntimeException(e);
}
}
private static class ReduceShuttleObjects {
public final Analyzer analyzer_;
public final TQueryCtx queryCtx_;
public final RexBuilder rexBuilder_;
public final Project project_;
public ReduceShuttleObjects(String query) throws ImpalaException {
QueryContext queryCtx = new QueryContext(options, frontend_, query);
queryCtx_ = queryCtx.getTQueryCtx();
TSessionState session = new TSessionState();
session.setConnected_user("dummy");
queryCtx.getTQueryCtx().setSession(session);
CalciteQueryParser queryParser = new CalciteQueryParser(queryCtx);
SqlNode parsedSqlNode = queryParser.parse();
CalciteMetadataHandler mdHandler =
new CalciteMetadataHandler(parsedSqlNode, queryCtx);
analyzer_ = mdHandler.getAnalyzer();
CalciteValidator validator = new CalciteValidator(mdHandler, queryCtx);
SqlNode validatedNode = validator.validate(parsedSqlNode);
CalciteRelNodeConverter relNodeConverter = new CalciteRelNodeConverter(validator);
RelNode rootNode = relNodeConverter.convert(validatedNode);
Preconditions.checkState(rootNode instanceof Project);
project_ = (Project) rootNode;
RelDataTypeFactory typeFactory =
new JavaTypeFactoryImpl(new ImpalaTypeSystemImpl());
rexBuilder_ = new RexBuilder(typeFactory);
}
}
private static class TestReducerTmp implements ImpalaRexExecutor.Reducer {
private Map<String, TColumnValue> valueMap_ = new HashMap<>();
public TestReducerTmp() {
}
public TestReducerTmp(String expr, TColumnValue reducedValue) {
valueMap_.put(expr, reducedValue);
}
public TestReducerTmp(Map<String, TColumnValue> valueMap) {
valueMap_.putAll(valueMap);
}
@Override
public TColumnValue reduce(Expr expr, TQueryCtx queryCtx) throws ImpalaException {
assertTrue(valueMap_.containsKey(expr.toSql()));
return valueMap_.get(expr.toSql());
}
}
}