IMPALA-14421: Calcite planner: case statement returning wrong types for char, varchar

The 'case' function resolver in the original Impala planner has a quirk in it
which caused issues in the Calcite planner.

The function resolver for the original planner resolves all case statements with
the "boolean" version.  Later on, in the analysis of the CaseExpr, the proper
types are assessed and the necessary casting is added.

The Calcite planner follows a similar path. The resolver always returns boolean
as well and the coerce nodes module determines the proper return type for
the case statement.

Two other related issues are also fixed here:

Literal strings should be treated as type STRING instead of CHAR(X), but a null
should literal should not be changed from a CHAR(x) to a STRING.  This broke a
'case' test in the test framework where the columns were non-literals with type
char(x), and the return value was a "null" which should not have forced a cast
to string.

A cast from a varchar to a varchar should be ignored.

Testing:
Added a test to calcite.test.
Ensured the existing cast test in test_chars.py passed.
Ran through the Jenkins Calcite testing framework.

Change-Id: I82d657f4bfce432c458ee8198188dadf9f23f2ef
Reviewed-on: http://gerrit.cloudera.org:8080/23560
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
Steve Carlin
2025-09-05 13:05:18 -07:00
committed by Impala Public Jenkins
parent 8eb1d87edc
commit 52334ba426
5 changed files with 116 additions and 6 deletions

View File

@@ -193,7 +193,8 @@ public class CoerceOperandShuttle extends RexShuttle {
@Override
public RexNode visitLiteral(RexLiteral literal) {
// Coerce CHAR literal types into STRING
if (literal.getType().getSqlTypeName().equals(SqlTypeName.CHAR)) {
if (!literal.isNull() &&
(literal.getType().getSqlTypeName().equals(SqlTypeName.CHAR))) {
return rexBuilder.makeLiteral(RexLiteral.stringValue(literal),
ImpalaTypeConverter.getRelDataType(Type.STRING), true, true);
}
@@ -218,7 +219,15 @@ public class CoerceOperandShuttle extends RexShuttle {
}
private RelDataType getReturnType(RexNode rexNode, Type impalaReturnType) {
private RelDataType getReturnType(RexCall rexCall, Type impalaReturnType) {
// Case is a special case. Currently, there is a quirk in the Impala function
// resolver where it always returns the BOOLEAN signature. So the return type
// is evaluated here by finding the compatible type amongst the "then" clauses.
if (rexCall.getKind() == SqlKind.CASE) {
List<RelDataType> argTypes =
Lists.transform(rexCall.getOperands(), RexNode::getType);
return ImpalaTypeConverter.getCompatibleTypeForCase(argTypes, factory);
}
RelDataType retType = ImpalaTypeConverter.getRelDataType(impalaReturnType);
@@ -228,12 +237,12 @@ public class CoerceOperandShuttle extends RexShuttle {
// have to calculate the precision and scale based on operand types. If
// necessary, this code should be added later.
Preconditions.checkState(!SqlTypeUtil.isDecimal(retType) ||
SqlTypeUtil.isDecimal(rexNode.getType()));
SqlTypeUtil.isDecimal(rexCall.getType()));
// So if the original return type is Decimal and the function resolves to
// decimal, the precision and scale are saved from the original function.
if (SqlTypeUtil.isDecimal(retType)) {
retType = rexNode.getType();
retType = rexCall.getType();
}
return retType;
@@ -374,6 +383,14 @@ public class CoerceOperandShuttle extends RexShuttle {
return fromType;
}
// If both are varchar, return STRING type which
// covers the wildcard varchar and all varchar cases.
if (toImpalaType.equals(Type.VARCHAR)) {
if (fromType.getSqlTypeName().equals(SqlTypeName.VARCHAR)) {
return ImpalaTypeConverter.getRelDataType(Type.STRING);
}
}
if (!toImpalaType.isDecimal() || SqlTypeUtil.isNull(fromType)) {
return ImpalaTypeConverter.getRelDataType(toImpalaType);
}

View File

@@ -22,6 +22,7 @@ import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Lists;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeFactory;
import org.apache.calcite.rex.RexCall;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.sql.SqlKind;
@@ -100,6 +101,7 @@ public class FunctionResolver {
ImmutableSet.<String> builder()
.add("grouping_id")
.add("count")
.add("case")
.build();
public static Function getSupertypeFunction(RexCall call) {
@@ -185,6 +187,15 @@ public class FunctionResolver {
: getImpalaFunction(lowercaseName, impalaArgTypes, exactMatch);
}
/**
* getSpecialProcessingFunction handles functions that do not fit within the general
* getImpalaFunction retrieval of the function. Such functions include:
*
* - grouping_id() which is grabbed from AggregateFunction
* - count() which can have multiple arguments, but only one argument is resolved
* in the function
* - case() which always returns the "boolean" flavor and then gets resolved later
*/
private static Function getSpecialProcessingFunction(String lowercaseName,
List<Type> impalaArgTypes, boolean exactMatch) {
if (lowercaseName.equals("grouping_id")) {
@@ -206,6 +217,16 @@ public class FunctionResolver {
return getImpalaFunction(lowercaseName, impalaArgTypes, exactMatch);
}
if (lowercaseName.equals("case")) {
// hack: Impala has a quirk in it that the function resolver always returns
// the boolean case signature. The CaseExpr object resolves the actual
// signature that should be used. We need to follow the lead of the regular
// planner because the varchar case does not return the right value if
// we try to resolve it here. It will need to be resolved by the caller.
impalaArgTypes = Lists.newArrayList(Type.BOOLEAN);
return getImpalaFunction(lowercaseName, impalaArgTypes, exactMatch);
}
throw new RuntimeException("Special function not found: " + lowercaseName);
}

View File

@@ -122,7 +122,7 @@ public class RexCallConverter {
switch (rexCall.getOperator().getKind()) {
case CASE:
return createCaseExpr(fn, params, impalaRetType);
return createCaseExpr(fn, params, analyzer);
case POSIX_REGEX_CASE_SENSITIVE:
case POSIX_REGEX_CASE_INSENSITIVE:
return createRegexExpr(fn, params, impalaRetType, rexCall);
@@ -192,14 +192,29 @@ public class RexCallConverter {
return new AnalyzedCaseExpr(fn, impalaRetType, decodeExpr);
}
private static Expr createCaseExpr(Function fn, List<Expr> params, Type retType) {
private static Expr createCaseExpr(Function fn, List<Expr> params,
Analyzer analyzer) throws ImpalaException {
List<CaseWhenClause> caseWhenClauses = new ArrayList<>();
Expr whenParam = null;
// params alternate between "when" and the action expr
Type retType = null;
for (Expr param : params) {
if (whenParam == null) {
whenParam = param;
} else {
// The params are not always analyzed at this phase.
if (!param.isAnalyzed()) {
param.analyze(analyzer);
}
// The return type needs to be evaluated here since the case function
// resolver always returns boolean. At this point, the coercenodes module
// sets all the parameters to be the compatible type, and this is the type
// we use for the return value.
if (retType == null) {
retType = param.getType();
} else {
Preconditions.checkState(retType.equals(param.getType()));
}
caseWhenClauses.add(new CaseWhenClause(whenParam, param));
whenParam = null;
}

View File

@@ -27,12 +27,14 @@ import org.apache.calcite.rel.type.RelDataTypeFactory;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.sql.type.SqlTypeName;
import org.apache.impala.analysis.NumericLiteral;
import org.apache.impala.calcite.functions.FunctionResolver;
import org.apache.impala.catalog.ScalarType;
import org.apache.impala.catalog.Type;
import org.apache.impala.catalog.TypeCompatibility;
import org.apache.impala.thrift.TPrimitiveType;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
@@ -377,6 +379,20 @@ public class ImpalaTypeConverter {
return factory.createTypeWithNullability(rdt, false);
}
public static RelDataType getCompatibleTypeForCase(List<RelDataType> dataTypes,
RelDataTypeFactory factory) {
List<RelDataType> compatibleTypes = new ArrayList<>();
for (int i = 0; i < dataTypes.size(); ++i) {
// skip the "when" clauses which are always boolean and only evaluate the
// "then" and "else" clauses.
if (!FunctionResolver.shouldSkipOperandForCase(dataTypes.size(), i)) {
compatibleTypes.add(dataTypes.get(i));
}
}
Preconditions.checkState(compatibleTypes.size() > 0);
return getCompatibleType(compatibleTypes, factory);
}
public static RelDataType getCompatibleType(Collection<RelDataType> dataTypes,
RelDataTypeFactory factory) {
Preconditions.checkState(dataTypes.size() > 0);

View File

@@ -1138,3 +1138,44 @@ select * from (values(0));
---- RUNTIME_PROFILE
row_regex: .*SPOOL_QUERY_RESULTS=0.*
====
---- QUERY
# IMPALA-14421: Repeat of test in chars.test since Calcite
# was returning the wrong types
WITH numbered AS (
SELECT *, row_number() over (order by cs) as rn
FROM functional.chars_tiny)
SELECT *
FROM (
SELECT CASE WHEN rn % 2 = 0 THEN cs END cs,
CASE WHEN rn % 2 = 1 THEN cl END cl,
CASE WHEN rn % 3 = 0 THEN vc END vc
FROM numbered
UNION ALL
SELECT CASE WHEN rn % 2 = 1 THEN cs END cs,
CASE WHEN rn % 2 = 0 THEN cl END cl,
CASE WHEN rn % 3 = 1 THEN vc END vc
FROM numbered) v
---- TYPES
char, char, string
---- HS2_TYPES
char, char, varchar
---- RESULTS
'NULL','1bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb','NULL'
'2aaaa','NULL','NULL'
'NULL','3bbbbb ','3ccc'
'4aa ','NULL','NULL'
'NULL','5bbb ','NULL'
'6a ','NULL','6c'
'NULL','6b ','NULL'
'a ','NULL','NULL'
'NULL','NULL','NULL'
'1aaaa','NULL','1cccc'
'NULL','2bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb','NULL'
'3aaa ','NULL','NULL'
'NULL','4bbbb ','4cc'
'5a ','NULL','NULL'
'NULL','6b ','NULL'
'6a ','NULL','6c'
'NULL','b ','NULL'
'NULL','NULL','NULL'
====