IMPALA-13982: Support regular views for Calcite planner in Impala

Before this patch, the Calcite planner in Impala only supported inline
views like 'temp' in the following query.

  select id from (
    select * from functional.alltypes
  ) as temp;

Regular views, on the other hand, were not supported. For instance, the
Calcite planner in Impala did not support regular views like
'functional.alltypes_view' created via the following statement and
hence queries against such regular views like
"select id from functional.alltypes_view" were not supported.

  CREATE VIEW functional.alltypes_view
  AS SELECT * FROM functional.alltypes;

This patch adds the support for regular views to the Calcite planner
via adding a ViewTable for each regular view in the given query
when populating the Calcite schema. This is similar to how regular
views are supported in PlannerTest#testView() at
https://github.com/apache/calcite/blob/main/core/src/test/java/org/apache/calcite/tools/PlannerTest.java
where the regular view to be tested is added in
https://github.com/apache/calcite/blob/main/testkit/src/main/java/org/apache/calcite/test/CalciteAssert.java.
We do not have to use or extend ViewTableMacro in
Apache Calcite because the information about the data types
returned from a regular view is already available in its respective
FeTable. Therefore, there is no need to parse the SQL statement
representing the regular view and collect the metadata of tables
referenced by the regular view as done by ViewTableMacro.

The patch supports the following cases, where
'functional.alltypes_view' is a regular view defined as
"SELECT * FROM functional.alltypes".
1. select id from functional.alltypes_view.
2. select alltypes_view.id from functional.alltypes_view.
3. select functional.alltypes_view.id from functional.alltypes_view.

Joining a regular view with an HDFS table like the following is also
supported.

  select alltypestiny.id
  from functional.alltypes_view, functional.alltypestiny

Note that after this patch, queries against regular views are supported
only in the legacy catalog mode but not the local catalog mode. In
fact, queries against HDFS tables in the local catalog mode are not
supported yet by the Calcite planner either. We will deal with this in
IMPALA-14080.

Testing:
 - Added test cases mentioned above to calcite.test. This makes sure
   the test cases are supported when we start the Impala server with
   the flag of '--use_calcite_planner=true'.
 - Manually verified the test cases above are supported if we start
   the Impala server with the environment variable USE_CALCITE_PLANNER
   set to true and the query option use_calcite_planner set to 1.

Change-Id: I600aae816727ae942fb221fae84c2aac63ae1893
Reviewed-on: http://gerrit.cloudera.org:8080/22883
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Reviewed-by: Joe McDonnell <joemcdonnell@cloudera.com>
This commit is contained in:
Fang-Yu Rao
2025-05-12 16:52:10 -07:00
committed by Joe McDonnell
parent 9a10213891
commit aba3a705a4
7 changed files with 202 additions and 21 deletions

View File

@@ -18,12 +18,23 @@
package org.apache.impala.calcite.schema;
import com.google.common.collect.ImmutableMap;
import org.apache.calcite.adapter.java.JavaTypeFactory;
import org.apache.calcite.prepare.CalciteCatalogReader;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeImpl;
import org.apache.calcite.schema.Table;
import org.apache.calcite.schema.impl.ViewTable;
import org.apache.calcite.schema.impl.AbstractSchema;
import org.apache.impala.calcite.type.ImpalaTypeSystemImpl;
import org.apache.impala.catalog.FeTable;
import org.apache.impala.catalog.HdfsTable;
import org.apache.impala.catalog.View;
import org.apache.impala.common.ImpalaException;
import org.apache.impala.common.UnsupportedFeatureException;
import com.google.common.collect.ImmutableList;
import java.lang.reflect.Type;
import java.util.HashMap;
import java.util.Map;
@@ -50,10 +61,33 @@ public class CalciteDb extends AbstractSchema {
}
public Builder addTable(String tableName, FeTable table) throws ImpalaException {
if (!tableMap_.containsKey(tableName)) {
tableMap_.put(tableName.toLowerCase(), new CalciteTable(table, reader_));
if (tableMap_.containsKey(tableName)) return this;
if (table instanceof HdfsTable) {
tableMap_.put(tableName.toLowerCase(), new CalciteTable(table, reader_));
return this;
}
return this;
if (table instanceof View) {
tableMap_.put(tableName.toLowerCase(), createViewTable(table));
return this;
}
throw new UnsupportedFeatureException(
"Table " + table.getFullName() + " has unsupported type " +
table.getClass().getSimpleName() + ". The Calcite planner only supports " +
"HdfsTable's and View's.");
}
private static ViewTable createViewTable(FeTable feTable) throws ImpalaException {
RelDataType rowType = CalciteTable.buildColumnsForRelDataType(feTable);
JavaTypeFactory typeFactory = (JavaTypeFactory) ImpalaTypeSystemImpl.TYPE_FACTORY;
Type elementType = typeFactory.getJavaClass(rowType);
return new ViewTable(elementType,
RelDataTypeImpl.proto(rowType), ((View) feTable).getQueryStmt().toSql(),
/* schemaPath */ ImmutableList.of(),
/* viewPath */ ImmutableList.of(feTable.getDb().getName().toLowerCase(),
feTable.getName().toLowerCase()));
}
public CalciteDb build() {

View File

@@ -93,7 +93,7 @@ public class CalciteTable extends RelOptAbstractTable
checkIfTableIsSupported(table);
}
private static RelDataType buildColumnsForRelDataType(FeTable table)
public static RelDataType buildColumnsForRelDataType(FeTable table)
throws ImpalaException {
RelDataTypeFactory typeFactory = new JavaTypeFactoryImpl(new ImpalaTypeSystemImpl());

View File

@@ -100,8 +100,15 @@ public class CalciteAnalysisDriver implements AnalysisDriver {
try {
reader_ = CalciteMetadataHandler.createCalciteCatalogReader(stmtTableCache_,
queryCtx_, queryCtx_.session.database);
// When CalciteRelNodeConverter#convert() is called to convert the valid AST into a
// logical plan, ViewTable#expandView() in Apache Calcite would be invoked if a
// regular view is involved in the query. expandView() validates the SQL statement
// defining the view. During the validation, all referenced tables by the regular
// view are required. Thus, we need all the tables in 'stmtTableCache_'.
// Recall that parsedStmt_.getTablesInQuery(null) only contains TableName's in the
// given query but not the underlying tables referenced by a regular view.
CalciteMetadataHandler.populateCalciteSchema(reader_, ctx_.getCatalog(),
parsedStmt_.getTablesInQuery(null));
stmtTableCache_);
typeFactory_ = new JavaTypeFactoryImpl(new ImpalaTypeSystemImpl());
sqlValidator_ = SqlValidatorUtil.newValidator(

View File

@@ -36,15 +36,11 @@ import org.apache.calcite.sql.util.SqlBasicVisitor;
import org.apache.impala.analysis.StmtMetadataLoader;
import org.apache.impala.analysis.TableName;
import org.apache.impala.calcite.schema.CalciteDb;
import org.apache.impala.calcite.schema.CalciteTable;
import org.apache.impala.calcite.schema.ImpalaCalciteCatalogReader;
import org.apache.impala.calcite.type.ImpalaTypeSystemImpl;
import org.apache.impala.catalog.Column;
import org.apache.impala.catalog.FeCatalog;
import org.apache.impala.catalog.FeDb;
import org.apache.impala.catalog.FeTable;
import org.apache.impala.catalog.FeView;
import org.apache.impala.catalog.HdfsTable;
import org.apache.impala.common.ImpalaException;
import org.apache.impala.common.UnsupportedFeatureException;
import org.apache.impala.thrift.TQueryCtx;
@@ -101,7 +97,7 @@ public class CalciteMetadataHandler implements CompilerStep {
// schema needs to contain the columns in the table for validation, which cannot
// be done when it's an IncompleteTable
List<String> errorTables = populateCalciteSchema(reader_,
queryCtx.getFrontend().getCatalog(), tableVisitor.tableNames_);
queryCtx.getFrontend().getCatalog(), stmtTableCache_);
tableVisitor.checkForComplexTable(stmtTableCache_, errorTables, queryCtx);
}
@@ -129,11 +125,12 @@ public class CalciteMetadataHandler implements CompilerStep {
* list of tables in the query that are not found in the database.
*/
public static List<String> populateCalciteSchema(CalciteCatalogReader reader,
FeCatalog catalog, Set<TableName> tableNames) throws ImpalaException {
FeCatalog catalog, StmtMetadataLoader.StmtTableCache stmtTableCache)
throws ImpalaException {
List<String> notFoundTables = new ArrayList<>();
CalciteSchema rootSchema = reader.getRootSchema();
Map<String, CalciteDb.Builder> dbSchemas = new HashMap<>();
for (TableName tableName : tableNames) {
for (TableName tableName : stmtTableCache.tables.keySet()) {
FeDb db = catalog.getDb(tableName.getDb());
// db is not found, this will probably fail in the validation step
if (db == null) {
@@ -147,12 +144,6 @@ public class CalciteMetadataHandler implements CompilerStep {
notFoundTables.add(tableName.toString());
continue;
}
if (!(feTable instanceof HdfsTable)) {
throw new UnsupportedFeatureException(
"Table " + feTable.getFullName() + " has unsupported type " +
feTable.getClass().getSimpleName() + ". The Calcite planner only supports " +
"HDFS tables.");
}
// populate the dbschema with its table, creating the dbschema if it's the
// first instance seen in the query.

View File

@@ -19,6 +19,7 @@ package org.apache.impala.calcite.service;
import org.apache.calcite.rel.type.RelDataTypeFactory;
import com.google.common.collect.ImmutableList;
import org.apache.calcite.avatica.util.Quoting;
import org.apache.calcite.plan.ConventionTraitDef;
import org.apache.calcite.plan.RelOptCluster;
import org.apache.calcite.plan.RelOptCostImpl;
@@ -35,13 +36,18 @@ import org.apache.calcite.rel.RelRoot;
import org.apache.calcite.rel.core.RelFactories;
import org.apache.calcite.rel.rules.CoreRules;
import org.apache.calcite.rex.RexBuilder;
import org.apache.calcite.sql.parser.SqlParser;
import org.apache.calcite.sql.SqlExplainFormat;
import org.apache.calcite.sql.SqlExplainLevel;
import org.apache.calcite.sql.SqlNode;
import org.apache.calcite.sql2rel.RelDecorrelator;
import org.apache.calcite.sql2rel.SqlToRelConverter;
import org.apache.impala.calcite.operators.ImpalaConvertletTable;
import org.apache.calcite.prepare.PlannerImpl;
import org.apache.calcite.schema.SchemaPlus;
import org.apache.calcite.sql2rel.StandardConvertletTable;
import org.apache.calcite.tools.FrameworkConfig;
import org.apache.calcite.tools.Frameworks;
import org.apache.calcite.tools.RelBuilder;
import org.apache.calcite.sql.validate.SqlValidator;
@@ -60,8 +66,7 @@ public class CalciteRelNodeConverter implements CompilerStep {
protected static final Logger LOG =
LoggerFactory.getLogger(CalciteRelNodeConverter.class.getName());
private static final RelOptTable.ViewExpander NOOP_EXPANDER =
(type, query, schema, path) -> null;
private final RelOptTable.ViewExpander viewExpander_;
private final RelOptCluster cluster_;
@@ -81,6 +86,8 @@ public class CalciteRelNodeConverter implements CompilerStep {
planner_.addRelTraitDef(ConventionTraitDef.INSTANCE);
cluster_ =
RelOptCluster.create(planner_, new RexBuilder(typeFactory_));
viewExpander_ = createViewExpander(
analysisResult.getSqlValidator().getCatalogReader().getRootSchema().plus());
}
public CalciteRelNodeConverter(CalciteValidator validator) {
@@ -91,11 +98,33 @@ public class CalciteRelNodeConverter implements CompilerStep {
planner_.addRelTraitDef(ConventionTraitDef.INSTANCE);
cluster_ =
RelOptCluster.create(planner_, new RexBuilder(typeFactory_));
viewExpander_ = createViewExpander(validator.getCatalogReader()
.getRootSchema().plus());
}
private static RelOptTable.ViewExpander createViewExpander(SchemaPlus schemaPlus) {
SqlParser.Config parserConfig =
SqlParser.configBuilder().setCaseSensitive(false).build()
// This makes SqlParser expect identifiers that require quoting to be
// enclosed by backticks.
.withQuoting(Quoting.BACK_TICK);
FrameworkConfig config = Frameworks.newConfigBuilder()
.defaultSchema(schemaPlus)
// This makes 'connectionConfig' in PlannerImpl case-insensitive, which in turn
// makes the CalciteCatalogReader used to validate the view in
// PlannerImpl#expandView() case-insensitive. Otherwise,
// CalciteRelNodeConverter#convert() would fail.
.parserConfig(parserConfig)
// We need to add ConventionTraitDef.INSTANCE to avoid the call to
// table.getStatistic() in LogicalTableScan#create().
.traitDefs(ConventionTraitDef.INSTANCE)
.build();
return new PlannerImpl(config);
}
public RelNode convert(SqlNode validatedNode) {
SqlToRelConverter relConverter = new SqlToRelConverter(
NOOP_EXPANDER,
viewExpander_,
sqlValidator_,
reader_,
cluster_,

View File

@@ -17,6 +17,7 @@
package org.apache.impala.calcite.type;
import org.apache.calcite.jdbc.JavaTypeFactoryImpl;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rel.type.RelDataTypeFactory;
import org.apache.calcite.rel.type.RelDataTypeSystemImpl;
@@ -60,6 +61,8 @@ public class ImpalaTypeSystemImpl extends RelDataTypeSystemImpl {
private static final int DEFAULT_FLOAT_PRECISION = 7;
private static final int DEFAULT_DOUBLE_PRECISION = 15;
public static final RelDataTypeFactory TYPE_FACTORY =
new JavaTypeFactoryImpl(new ImpalaTypeSystemImpl());
@Override
public int getMaxScale(SqlTypeName typeName) {

View File

@@ -943,3 +943,120 @@ row_regex:.*partitions=4/4.*
---- RUNTIME_PROFILE
row_regex: .*PlannerType: CalcitePlanner.*
====
---- QUERY
select count(*) from functional.alltypes_view;
---- RESULTS
7300
---- TYPES
BIGINT
====
---- QUERY
select * from functional.alltypes_view order by id limit 10;
---- RESULTS
0,true,0,0,0,0,0,0,'01/01/09','0',2009-01-01 00:00:00,2009,1
1,false,1,1,1,10,1.100000023841858,10.1,'01/01/09','1',2009-01-01 00:01:00,2009,1
2,true,2,2,2,20,2.200000047683716,20.2,'01/01/09','2',2009-01-01 00:02:00.100000000,2009,1
3,false,3,3,3,30,3.299999952316284,30.3,'01/01/09','3',2009-01-01 00:03:00.300000000,2009,1
4,true,4,4,4,40,4.400000095367432,40.4,'01/01/09','4',2009-01-01 00:04:00.600000000,2009,1
5,false,5,5,5,50,5.5,50.5,'01/01/09','5',2009-01-01 00:05:00.100000000,2009,1
6,true,6,6,6,60,6.599999904632568,60.59999999999999,'01/01/09','6',2009-01-01 00:06:00.150000000,2009,1
7,false,7,7,7,70,7.699999809265137,70.7,'01/01/09','7',2009-01-01 00:07:00.210000000,2009,1
8,true,8,8,8,80,8.800000190734863,80.8,'01/01/09','8',2009-01-01 00:08:00.280000000,2009,1
9,false,9,9,9,90,9.899999618530273,90.89999999999999,'01/01/09','9',2009-01-01 00:09:00.360000000,2009,1
---- TYPES
INT, BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, STRING, STRING, TIMESTAMP, INT, INT
====
---- QUERY
select id from functional.alltypes_view order by id limit 1;
---- RESULTS
0
---- TYPES
INT
====
---- QUERY
select alltypes_view.id from functional.alltypes_view order by id limit 1;
---- RESULTS
0
---- TYPES
INT
====
---- QUERY
select functional.alltypes_view.id from functional.alltypes_view order by id limit 1;
---- RESULTS
0
---- TYPES
INT
====
---- QUERY
select count(*) from functional.alltypes_view, functional.alltypestiny
where functional.alltypes_view.id = functional.alltypestiny.id;
---- RESULTS
8
---- TYPES
BIGINT
====
---- QUERY
select * from functional.alltypes_view, functional.alltypestiny
where functional.alltypes_view.id = functional.alltypestiny.id
order by functional.alltypes_view.id;
---- RESULTS
0,true,0,0,0,0,0,0,'01/01/09','0',2009-01-01 00:00:00,2009,1,0,true,0,0,0,0,0,0,'01/01/09','0',2009-01-01 00:00:00,2009,1
1,false,1,1,1,10,1.100000023841858,10.1,'01/01/09','1',2009-01-01 00:01:00,2009,1,1,false,1,1,1,10,1.100000023841858,10.1,'01/01/09','1',2009-01-01 00:01:00,2009,1
2,true,2,2,2,20,2.200000047683716,20.2,'01/01/09','2',2009-01-01 00:02:00.100000000,2009,1,2,true,0,0,0,0,0,0,'02/01/09','0',2009-02-01 00:00:00,2009,2
3,false,3,3,3,30,3.299999952316284,30.3,'01/01/09','3',2009-01-01 00:03:00.300000000,2009,1,3,false,1,1,1,10,1.100000023841858,10.1,'02/01/09','1',2009-02-01 00:01:00,2009,2
4,true,4,4,4,40,4.400000095367432,40.4,'01/01/09','4',2009-01-01 00:04:00.600000000,2009,1,4,true,0,0,0,0,0,0,'03/01/09','0',2009-03-01 00:00:00,2009,3
5,false,5,5,5,50,5.5,50.5,'01/01/09','5',2009-01-01 00:05:00.100000000,2009,1,5,false,1,1,1,10,1.100000023841858,10.1,'03/01/09','1',2009-03-01 00:01:00,2009,3
6,true,6,6,6,60,6.599999904632568,60.59999999999999,'01/01/09','6',2009-01-01 00:06:00.150000000,2009,1,6,true,0,0,0,0,0,0,'04/01/09','0',2009-04-01 00:00:00,2009,4
7,false,7,7,7,70,7.699999809265137,70.7,'01/01/09','7',2009-01-01 00:07:00.210000000,2009,1,7,false,1,1,1,10,1.100000023841858,10.1,'04/01/09','1',2009-04-01 00:01:00,2009,4
---- TYPES
INT, BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, STRING, STRING, TIMESTAMP, INT, INT, INT, BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, STRING, STRING, TIMESTAMP, INT, INT
====
---- QUERY
select id from functional.alltypes_view, functional.alltypestiny
where functional.alltypes_view.id = functional.alltypestiny.id
order by functional.alltypes_view.id;
---- CATCH
SqlValidatorException: Column 'ID' is ambiguous
====
---- QUERY
select alltypestiny.id from functional.alltypes_view, functional.alltypestiny
where functional.alltypes_view.id = functional.alltypestiny.id
order by functional.alltypes_view.id;
---- RESULTS
0
1
2
3
4
5
6
7
---- TYPES
INT
====
---- QUERY
select functional.alltypestiny.id from functional.alltypes_view, functional.alltypestiny
where functional.alltypes_view.id = functional.alltypestiny.id
order by id;
---- RESULTS
0
1
2
3
4
5
6
7
---- TYPES
INT
====
---- QUERY
# This test case makes sure that the definition of a regular view could be correctly
# parsed during view expansion even if database, table, and column names are enclosed in
# backticks.
select * from functional.alltypes_hive_view where id = 0;
---- RESULTS
0,true,0,0,0,0,0,0,'01/01/09','0',2009-01-01 00:00:00,2009,1
---- TYPES
INT, BOOLEAN, TINYINT, SMALLINT, INT, BIGINT, FLOAT, DOUBLE, STRING, STRING, TIMESTAMP, INT, INT
====