IMPALA-13516: Fix handling of cast functions

There were some cast functions that were failing. There were
several reasons behind this.  One reason was because Calcite
classifies all integers as an "int" even if they can be other
smaller types (e.g. tinyint). Normally this is handled by the
"CoerceNodes" portion, but it is impossible to tell the type if the
query had the phrase "select cast(1 as integer)" or "select 1"
since both would show up to CoerceNodes as "select 1:INT"

In order to handle this an "explicit_cast" operator now exists and
is used when the cast function is parsed within the commit. The
explicit_cast operator has to be different from the "cast" Calcite
operator in order to avoid being optimized out in various portions
of the compilation.

Change-Id: I1edabc942de1c4030331bc29612c41b392cd8a05
Reviewed-on: http://gerrit.cloudera.org:8080/22034
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Reviewed-by: Michael Smith <michael.smith@cloudera.com>
This commit is contained in:
Steve Carlin
2024-11-05 13:42:18 -08:00
committed by Michael Smith
parent 3d58eca937
commit b703e68cda
7 changed files with 148 additions and 1 deletions

View File

@@ -232,6 +232,10 @@ public class CoerceOperandShuttle extends RexShuttle {
return false;
}
if (rexCall.getOperator().getName().equals("EXPLICIT_CAST")) {
return false;
}
return true;
}

View File

@@ -74,6 +74,7 @@ public class FunctionResolver {
// Map of Calcite names to an Impala function name when the names are different
public static Map<String, String> CALCITE_NAME_TO_IMPALA_FUNC =
ImmutableMap.<String, String> builder()
.put("explicit_cast", "cast")
.put("||", "concat")
.build();

View File

@@ -79,6 +79,10 @@ public class RexCallConverter {
return createCastExpr(rexCall, params, analyzer);
}
if (rexCall.getOperator().getName().toLowerCase().equals("explicit_cast")) {
return createCastExpr(rexCall, params, analyzer);
}
String funcName = rexCall.getOperator().getName().toLowerCase();
// Date addition expressions have special handling.

View File

@@ -125,7 +125,7 @@ public class CommonOperatorFunctions {
}
// return true if any operand type is nullable.
private static boolean isNullable(List<RelDataType> operandTypes) {
public static boolean isNullable(List<RelDataType> operandTypes) {
return operandTypes.stream().anyMatch(rdt -> rdt.isNullable());
}
}

View File

@@ -0,0 +1,103 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to you under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.impala.calcite.operators;
import com.google.common.base.Preconditions;
import org.apache.calcite.jdbc.JavaTypeFactoryImpl;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeFactory;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.sql.SqlFunctionCategory;
import org.apache.calcite.sql.SqlOperatorBinding;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.SqlSyntax;
import org.apache.calcite.sql.type.OperandTypes;
import org.apache.calcite.sql.type.ReturnTypes;
import org.apache.calcite.util.Optionality;
import org.apache.impala.calcite.functions.FunctionResolver;
import org.apache.impala.calcite.type.ImpalaTypeConverter;
import org.apache.impala.calcite.type.ImpalaTypeSystemImpl;
import org.apache.impala.catalog.Function;
import java.util.List;
/**
* ImpalaCastFunction is the operator that handles explicit casts within the
* parsed query. It is created in order to avoid Calcite optimizing out the
* operator where we can potentially lose the originally designated type.
*/
public class ImpalaCastFunction extends ImpalaOperator {
public static ImpalaCastFunction INSTANCE = new ImpalaCastFunction();
public ImpalaCastFunction() {
super("EXPLICIT_CAST");
}
@Override
public RelDataType inferReturnType(SqlOperatorBinding opBinding) {
final List<RelDataType> operandTypes =
CommonOperatorFunctions.getOperandTypes(opBinding);
String castFunctionName = "castto" + getCastToName(operandTypes.get(1));
RexBuilder rexBuilder =
new RexBuilder(new JavaTypeFactoryImpl(new ImpalaTypeSystemImpl()));
RelDataTypeFactory factory = rexBuilder.getTypeFactory();
List<RelDataType> castFromList = operandTypes.subList(0, 1);
return CommonOperatorFunctions.isNullable(castFromList)
? factory.createTypeWithNullability(operandTypes.get(1), true)
: operandTypes.get(1);
}
private String getCastToName(RelDataType type) {
switch (type.getSqlTypeName()) {
case TINYINT:
return "tinyint";
case SMALLINT:
return "smallint";
case INTEGER:
return "int";
case BIGINT:
return "bigint";
case VARCHAR:
return "string";
case BOOLEAN:
return "boolean";
case FLOAT:
return "float";
case REAL:
case DOUBLE:
return "double";
case DECIMAL:
return "decimal";
case CHAR:
return "char";
case TIMESTAMP:
return "timestamp";
case DATE:
return "date";
case BINARY:
return "binary";
default:
throw new RuntimeException("Type " + type + " not supported for casting.");
}
}
}

View File

@@ -16,13 +16,20 @@
*/
package org.apache.impala.calcite.operators;
import com.google.common.collect.Lists;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.sql.SqlCall;
import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
import org.apache.calcite.sql2rel.ReflectiveConvertletTable;
import org.apache.calcite.sql2rel.SqlRexContext;
import org.apache.calcite.sql2rel.SqlRexConvertlet;
import org.apache.calcite.sql2rel.StandardConvertletTable;
import org.apache.impala.calcite.operators.ImpalaCustomOperatorTable;
import java.util.List;
/**
*
*/
@@ -32,6 +39,7 @@ public class ImpalaConvertletTable extends ReflectiveConvertletTable {
public ImpalaConvertletTable() {
addAlias(ImpalaCustomOperatorTable.PERCENT_REMAINDER, SqlStdOperatorTable.MOD);
registerOp(ImpalaCastFunction.INSTANCE, this::convertExplicitCast);
}
@Override
@@ -44,7 +52,22 @@ public class ImpalaConvertletTable extends ReflectiveConvertletTable {
return super.get(call);
}
// EXPLICIT_CAST convertlet has to be handled by our convertlet. Operation
// was registered in the constructor and it will call convertExplicitCast
if (call.getOperator().getName().equals("EXPLICIT_CAST")) {
return super.get(call);
}
return StandardConvertletTable.INSTANCE.get(call);
}
protected RexNode convertExplicitCast(
SqlRexContext cx, SqlCall call) {
final SqlNode expr = call.operand(0);
final RexBuilder rexBuilder = cx.getRexBuilder();
RelDataType returnType =
cx.getValidator().getValidatedNodeTypeIfKnown(call);
List<RexNode> operands = Lists.newArrayList(cx.convertExpression(expr));
return rexBuilder.makeCall(returnType, ImpalaCastFunction.INSTANCE, operands);
}
}

View File

@@ -96,6 +96,18 @@ public class ImpalaOperatorTable extends ReflectiveSqlOperatorTable {
}
String lowercaseOpName = opName.getSimple().toLowerCase();
// A little hack. We need our own version of "cast" when it is explicit. But
// we need to use a different name for the function ("explicit_cast") and a
// different type (SqlKind.OTHER instead of SqlKind.CAST) in order to avoid
// conflict problems within Calcite processing. In order to find our operator,
// we look for "cast" and use the "explicit_cast" name as defined in
// ImpalaCastFunction
if (lowercaseOpName.equals("cast")) {
operatorList.add(ImpalaCastFunction.INSTANCE);
return;
}
if (!USE_IMPALA_OPERATOR.contains(lowercaseOpName)) {
// Check Calcite operator table for existence.
SqlStdOperatorTable.instance().lookupOperatorOverloads(opName, category, syntax,