IMPALA-14405: Labels for Calcite expressions not matching original planner

Calcite sets literal expressions to EXPR$<x> which did not match
expressions given by the Impala planner. For literal expressions
such as "select 1 + 1", Impala creates the column name as "1 + 1".

The field names can be found in the abstract syntax tree, so
they are not set within the CalciteRelNodeConverter before the
logical tree is created.

A small test was added to calcite.test for a basic sanity check,
but more comprehensive tests will be run in the tests/shell module
(e.g. in test_shell_commandline.py and test_shell_interactive) which
contain tests for labels.

Change-Id: Ibd3e6366a284f53807b4b2c42efafa279249c1ea
Reviewed-on: http://gerrit.cloudera.org:8080/23516
Reviewed-by: Steve Carlin <scarlin@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
Steve Carlin
2025-08-29 08:58:03 -07:00
parent 420e357b95
commit c67b19daf6
3 changed files with 87 additions and 3 deletions

View File

@@ -17,7 +17,7 @@
package org.apache.impala.calcite.service;
import org.apache.calcite.rel.type.RelDataTypeFactory;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import org.apache.calcite.avatica.util.Quoting;
import org.apache.calcite.plan.ConventionTraitDef;
@@ -37,13 +37,20 @@ import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.RelRoot;
import org.apache.calcite.rel.core.RelFactories;
import org.apache.calcite.rel.rules.CoreRules;
import org.apache.calcite.rel.type.RelDataTypeFactory;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.schema.SchemaPlus;
import org.apache.calcite.sql.parser.SqlParser;
import org.apache.calcite.sql.SqlBasicCall;
import org.apache.calcite.sql.SqlExplainFormat;
import org.apache.calcite.sql.SqlExplainLevel;
import org.apache.calcite.sql.SqlKind;
import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.sql.SqlSelect;
import org.apache.calcite.sql.SqlWith;
import org.apache.calcite.sql.dialect.MysqlSqlDialect;
import org.apache.calcite.sql.validate.SqlValidator;
import org.apache.calcite.sql.validate.SqlValidatorUtil;
import org.apache.calcite.sql2rel.RelDecorrelator;
import org.apache.calcite.sql2rel.SqlToRelConverter;
import org.apache.calcite.sql2rel.StandardConvertletTable;
@@ -159,6 +166,57 @@ public class CalciteRelNodeConverter implements CompilerStep {
return decorrelatedPlan;
}
/**
* Get the field names given the root level of an AST tree. Calcite creates some
* literal expressions like "1 + 1" as "$EXPR0" whereas Impala sets the field name
* label as "1 + 1", so this method changes the field name appropriately.
*/
public List<String> getFieldNames(SqlNode validatedNode) {
ImmutableList.Builder<String> fieldNamesBuilder = new ImmutableList.Builder();
for (SqlNode selectItem : getSelectList(validatedNode)) {
String fieldName = SqlValidatorUtil.alias(selectItem, 0);
if (fieldName.startsWith("EXPR$")) {
// If it's a Calcite generated field name, it will be of the form "EXPR$"
// We get the actual SQL expression using the toSqlString method. There
// is no Impala Dialect yet, so using MySql dialect to get the field
// name. The language chosen is irrelevant because we only are using it
// to grab the expression as/is to use for the label.
fieldName = selectItem.toSqlString(MysqlSqlDialect.DEFAULT).getSql();
}
fieldNamesBuilder.add(fieldName.toLowerCase());
}
return fieldNamesBuilder.build();
}
/**
* Retrieve the first select list found from the root node.
*/
public List<SqlNode> getSelectList(SqlNode validatedNode) {
// If a with clause exists, it will be on top and we need to
// get its child.
SqlNode firstSelectNode = (validatedNode instanceof SqlWith)
? ((SqlWith) validatedNode).body
: validatedNode;
// Top level could be some kind of "except/intersect" call. Need to
// traverse the tree until we either find a "values" (ROW kind) node
// or a select node.
if (firstSelectNode instanceof SqlBasicCall) {
SqlBasicCall basicCall = (SqlBasicCall) firstSelectNode;
// if it's a "values" clause, there is no select list and
// we just return the values list.
if (basicCall.getOperator().getKind().equals(SqlKind.ROW)) {
return basicCall.getOperandList();
}
// grab the first parameter for the field list. Since it could be
// a "with", we call this method recursively
return getSelectList(basicCall.operand(0));
}
Preconditions.checkState(firstSelectNode instanceof SqlSelect);
return ((SqlSelect)firstSelectNode).getSelectList();
}
public RelOptCluster getCluster() {
return cluster_;
}

View File

@@ -52,6 +52,7 @@ public class CalciteSingleNodePlanner implements SingleNodePlannerIntf {
private final PlannerContext ctx_;
private final CalciteAnalysisResult analysisResult_;
private NodeWithExprs rootNode_;
private List<String> fieldNames_;
public CalciteSingleNodePlanner(PlannerContext ctx) {
ctx_ = ctx;
@@ -63,6 +64,7 @@ public class CalciteSingleNodePlanner implements SingleNodePlannerIntf {
CalciteRelNodeConverter relNodeConverter =
new CalciteRelNodeConverter(analysisResult_);
RelNode logicalPlan = relNodeConverter.convert(analysisResult_.getValidatedNode());
fieldNames_ = relNodeConverter.getFieldNames(analysisResult_.getValidatedNode());
// Optimize the query
CalciteOptimizer optimizer =
@@ -89,7 +91,7 @@ public class CalciteSingleNodePlanner implements SingleNodePlannerIntf {
@Override
public List<String> getColLabels() {
return rootNode_.fieldNames_;
return fieldNames_;
}
@Override
@@ -97,7 +99,7 @@ public class CalciteSingleNodePlanner implements SingleNodePlannerIntf {
TResultSetMetadata metadata = new TResultSetMetadata();
int colCnt = rootNode_.outputExprs_.size();
for (int i = 0; i < colCnt; ++i) {
TColumn colDesc = new TColumn(rootNode_.fieldNames_.get(i).toLowerCase(),
TColumn colDesc = new TColumn(fieldNames_.get(i),
rootNode_.outputExprs_.get(i).getType().toThrift());
metadata.addToColumns(colDesc);
}

View File

@@ -1097,3 +1097,27 @@ select ndv(varchar_col, 0) from functional.chars_medium;
---- CATCH
Error in NDV function, second parameter needs to be between 1 and 10.
====
---- QUERY
# Labels test
select 2, 1 + 1;
---- LABELS
2,1 + 1
---- RESULTS
2,2
---- TYPES
tinyint,smallint
---- RUNTIME_PROFILE
row_regex: .*PlannerType: CalcitePlanner.*
====
---- QUERY
# Labels test
select length('hello')
---- LABELS
length('hello')
---- RESULTS
5
---- TYPES
int
---- RUNTIME_PROFILE
row_regex: .*PlannerType: CalcitePlanner.*
====