From c67b19daf6b47ac380058ea28de71f5d9a7bfe6f Mon Sep 17 00:00:00 2001 From: Steve Carlin Date: Fri, 29 Aug 2025 08:58:03 -0700 Subject: [PATCH] IMPALA-14405: Labels for Calcite expressions not matching original planner Calcite sets literal expressions to EXPR$ which did not match expressions given by the Impala planner. For literal expressions such as "select 1 + 1", Impala creates the column name as "1 + 1". The field names can be found in the abstract syntax tree, so they are not set within the CalciteRelNodeConverter before the logical tree is created. A small test was added to calcite.test for a basic sanity check, but more comprehensive tests will be run in the tests/shell module (e.g. in test_shell_commandline.py and test_shell_interactive) which contain tests for labels. Change-Id: Ibd3e6366a284f53807b4b2c42efafa279249c1ea Reviewed-on: http://gerrit.cloudera.org:8080/23516 Reviewed-by: Steve Carlin Tested-by: Impala Public Jenkins --- .../service/CalciteRelNodeConverter.java | 60 ++++++++++++++++++- .../service/CalciteSingleNodePlanner.java | 6 +- .../queries/QueryTest/calcite.test | 24 ++++++++ 3 files changed, 87 insertions(+), 3 deletions(-) diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteRelNodeConverter.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteRelNodeConverter.java index e8fde8bee..6d90281e3 100644 --- a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteRelNodeConverter.java +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteRelNodeConverter.java @@ -17,7 +17,7 @@ package org.apache.impala.calcite.service; -import org.apache.calcite.rel.type.RelDataTypeFactory; +import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; import org.apache.calcite.avatica.util.Quoting; import org.apache.calcite.plan.ConventionTraitDef; @@ -37,13 +37,20 @@ import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.RelRoot; import org.apache.calcite.rel.core.RelFactories; import org.apache.calcite.rel.rules.CoreRules; +import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.schema.SchemaPlus; import org.apache.calcite.sql.parser.SqlParser; +import org.apache.calcite.sql.SqlBasicCall; import org.apache.calcite.sql.SqlExplainFormat; import org.apache.calcite.sql.SqlExplainLevel; +import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.SqlSelect; +import org.apache.calcite.sql.SqlWith; +import org.apache.calcite.sql.dialect.MysqlSqlDialect; import org.apache.calcite.sql.validate.SqlValidator; +import org.apache.calcite.sql.validate.SqlValidatorUtil; import org.apache.calcite.sql2rel.RelDecorrelator; import org.apache.calcite.sql2rel.SqlToRelConverter; import org.apache.calcite.sql2rel.StandardConvertletTable; @@ -159,6 +166,57 @@ public class CalciteRelNodeConverter implements CompilerStep { return decorrelatedPlan; } + /** + * Get the field names given the root level of an AST tree. Calcite creates some + * literal expressions like "1 + 1" as "$EXPR0" whereas Impala sets the field name + * label as "1 + 1", so this method changes the field name appropriately. + */ + public List getFieldNames(SqlNode validatedNode) { + ImmutableList.Builder fieldNamesBuilder = new ImmutableList.Builder(); + + for (SqlNode selectItem : getSelectList(validatedNode)) { + String fieldName = SqlValidatorUtil.alias(selectItem, 0); + if (fieldName.startsWith("EXPR$")) { + // If it's a Calcite generated field name, it will be of the form "EXPR$" + // We get the actual SQL expression using the toSqlString method. There + // is no Impala Dialect yet, so using MySql dialect to get the field + // name. The language chosen is irrelevant because we only are using it + // to grab the expression as/is to use for the label. + fieldName = selectItem.toSqlString(MysqlSqlDialect.DEFAULT).getSql(); + } + fieldNamesBuilder.add(fieldName.toLowerCase()); + } + return fieldNamesBuilder.build(); + } + + /** + * Retrieve the first select list found from the root node. + */ + public List getSelectList(SqlNode validatedNode) { + // If a with clause exists, it will be on top and we need to + // get its child. + SqlNode firstSelectNode = (validatedNode instanceof SqlWith) + ? ((SqlWith) validatedNode).body + : validatedNode; + + // Top level could be some kind of "except/intersect" call. Need to + // traverse the tree until we either find a "values" (ROW kind) node + // or a select node. + if (firstSelectNode instanceof SqlBasicCall) { + SqlBasicCall basicCall = (SqlBasicCall) firstSelectNode; + // if it's a "values" clause, there is no select list and + // we just return the values list. + if (basicCall.getOperator().getKind().equals(SqlKind.ROW)) { + return basicCall.getOperandList(); + } + // grab the first parameter for the field list. Since it could be + // a "with", we call this method recursively + return getSelectList(basicCall.operand(0)); + } + Preconditions.checkState(firstSelectNode instanceof SqlSelect); + return ((SqlSelect)firstSelectNode).getSelectList(); + } + public RelOptCluster getCluster() { return cluster_; } diff --git a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteSingleNodePlanner.java b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteSingleNodePlanner.java index 97ea69669..6dbc6aa7d 100644 --- a/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteSingleNodePlanner.java +++ b/java/calcite-planner/src/main/java/org/apache/impala/calcite/service/CalciteSingleNodePlanner.java @@ -52,6 +52,7 @@ public class CalciteSingleNodePlanner implements SingleNodePlannerIntf { private final PlannerContext ctx_; private final CalciteAnalysisResult analysisResult_; private NodeWithExprs rootNode_; + private List fieldNames_; public CalciteSingleNodePlanner(PlannerContext ctx) { ctx_ = ctx; @@ -63,6 +64,7 @@ public class CalciteSingleNodePlanner implements SingleNodePlannerIntf { CalciteRelNodeConverter relNodeConverter = new CalciteRelNodeConverter(analysisResult_); RelNode logicalPlan = relNodeConverter.convert(analysisResult_.getValidatedNode()); + fieldNames_ = relNodeConverter.getFieldNames(analysisResult_.getValidatedNode()); // Optimize the query CalciteOptimizer optimizer = @@ -89,7 +91,7 @@ public class CalciteSingleNodePlanner implements SingleNodePlannerIntf { @Override public List getColLabels() { - return rootNode_.fieldNames_; + return fieldNames_; } @Override @@ -97,7 +99,7 @@ public class CalciteSingleNodePlanner implements SingleNodePlannerIntf { TResultSetMetadata metadata = new TResultSetMetadata(); int colCnt = rootNode_.outputExprs_.size(); for (int i = 0; i < colCnt; ++i) { - TColumn colDesc = new TColumn(rootNode_.fieldNames_.get(i).toLowerCase(), + TColumn colDesc = new TColumn(fieldNames_.get(i), rootNode_.outputExprs_.get(i).getType().toThrift()); metadata.addToColumns(colDesc); } diff --git a/testdata/workloads/functional-query/queries/QueryTest/calcite.test b/testdata/workloads/functional-query/queries/QueryTest/calcite.test index d464ad74f..a514be751 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/calcite.test +++ b/testdata/workloads/functional-query/queries/QueryTest/calcite.test @@ -1097,3 +1097,27 @@ select ndv(varchar_col, 0) from functional.chars_medium; ---- CATCH Error in NDV function, second parameter needs to be between 1 and 10. ==== +---- QUERY +# Labels test +select 2, 1 + 1; +---- LABELS +2,1 + 1 +---- RESULTS +2,2 +---- TYPES +tinyint,smallint +---- RUNTIME_PROFILE +row_regex: .*PlannerType: CalcitePlanner.* +==== +---- QUERY +# Labels test +select length('hello') +---- LABELS +length('hello') +---- RESULTS +5 +---- TYPES +int +---- RUNTIME_PROFILE +row_regex: .*PlannerType: CalcitePlanner.* +====