From 3f68be2caad9b5ae4b34dacedda6526bac68ae11 Mon Sep 17 00:00:00 2001 From: Alex Behm Date: Fri, 24 Jan 2014 22:32:10 -0800 Subject: [PATCH] IMP-1227: Ignore columns of unsupported types in compute stats. Enclose identifiers that are Impala keywords in quotes. Change-Id: Ie7fa6da2869090428c9229c44b973ecccbb49e8e Reviewed-on: http://gerrit.ent.cloudera.com:8080/1357 Reviewed-by: Alex Behm Tested-by: jenkins Reviewed-on: http://gerrit.ent.cloudera.com:8080/1368 --- .../impala/analysis/BaseTableRef.java | 2 +- .../impala/analysis/ComputeStatsStmt.java | 19 ++++++----- .../analysis/CreateOrAlterViewStmtBase.java | 4 +-- .../impala/analysis/InlineViewRef.java | 2 +- .../impala/analysis/SelectListItem.java | 2 +- .../com/cloudera/impala/analysis/SlotRef.java | 2 +- .../cloudera/impala/analysis/TableName.java | 4 +-- .../cloudera/impala/analysis/ToSqlUtils.java | 25 +++++++++------ .../com/cloudera/impala/analysis/ViewRef.java | 2 +- .../cloudera/impala/analysis/WithClause.java | 2 +- fe/src/main/jflex/sql-scanner.flex | 4 +++ .../cloudera/impala/analysis/ToSqlTest.java | 9 +++++- .../queries/QueryTest/compute-stats.test | 32 +++++++++++++++++++ 13 files changed, 80 insertions(+), 29 deletions(-) diff --git a/fe/src/main/java/com/cloudera/impala/analysis/BaseTableRef.java b/fe/src/main/java/com/cloudera/impala/analysis/BaseTableRef.java index 0a0848d27..5d32db8a7 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/BaseTableRef.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/BaseTableRef.java @@ -111,7 +111,7 @@ public class BaseTableRef extends TableRef { // Enclose the alias in quotes if Hive cannot parse it without quotes. // This is needed for view compatibility between Impala and Hive. String aliasSql = null; - if (alias_ != null) aliasSql = ToSqlUtils.getHiveIdentSql(alias_); + if (alias_ != null) aliasSql = ToSqlUtils.getIdentSql(alias_); return name_.toSql() + ((aliasSql != null) ? " " + aliasSql : ""); } diff --git a/fe/src/main/java/com/cloudera/impala/analysis/ComputeStatsStmt.java b/fe/src/main/java/com/cloudera/impala/analysis/ComputeStatsStmt.java index 630798af9..91b45e42d 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/ComputeStatsStmt.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/ComputeStatsStmt.java @@ -23,7 +23,6 @@ import com.cloudera.impala.catalog.AuthorizationException; import com.cloudera.impala.catalog.Column; import com.cloudera.impala.catalog.HBaseTable; import com.cloudera.impala.catalog.HdfsTable; -import com.cloudera.impala.catalog.PrimitiveType; import com.cloudera.impala.catalog.Table; import com.cloudera.impala.catalog.View; import com.cloudera.impala.common.AnalysisException; @@ -80,11 +79,11 @@ public class ComputeStatsStmt extends StatementBase { // Only add group by clause for HdfsTables. if (table_ instanceof HdfsTable) { for (int i = 0; i < table_.getNumClusteringCols(); ++i) { - String colName = table_.getColumns().get(i).getName(); - groupByCols.add(colName); + String colRefSql = ToSqlUtils.getIdentSql(table_.getColumns().get(i).getName()); + groupByCols.add(colRefSql); // For the select list, wrap the group by columns in a cast to string because // the Metastore stores them as strings. - tableStatsSelectList.add("cast(" + colName + " as string)"); + tableStatsSelectList.add("CAST(" + colRefSql + " AS STRING)"); } } tableStatsQueryBuilder.append(Joiner.on(", ").join(tableStatsSelectList)); @@ -105,15 +104,19 @@ public class ComputeStatsStmt extends StatementBase { int startColIdx = (table_ instanceof HBaseTable) ? 0 : table_.getNumClusteringCols(); for (int i = startColIdx; i < table_.getColumns().size(); ++i) { Column c = table_.getColumns().get(i); + // Ignore columns with an invalid/unsupported type. For example, complex types in + // an HBase-backed table will appear as invalid types. + if (!c.getType().isValid() || !c.getType().isSupported()) continue; // NDV approximation function. Add explicit alias for later identification when // updating the Metastore. - columnStatsSelectList.add("NDV(" + c.getName() + ") AS " + c.getName()); + String colRefSql = ToSqlUtils.getIdentSql(c.getName()); + columnStatsSelectList.add("NDV(" + colRefSql + ") AS " + colRefSql); // Count the number of NULL values. - columnStatsSelectList.add("COUNT(IF(" + c.getName() + " IS NULL, 1, NULL))"); + columnStatsSelectList.add("COUNT(IF(" + colRefSql + " IS NULL, 1, NULL))"); // For STRING columns also compute the max and avg string length. if (c.getType().isStringType()) { - columnStatsSelectList.add("MAX(length(" + c.getName() + "))"); - columnStatsSelectList.add("AVG(length(" + c.getName() + "))"); + columnStatsSelectList.add("MAX(length(" + colRefSql + "))"); + columnStatsSelectList.add("AVG(length(" + colRefSql + "))"); } else { // For non-STRING columns use -1 as the max/avg length to avoid having to // treat STRING columns specially in the BE CatalogOpExecutor. diff --git a/fe/src/main/java/com/cloudera/impala/analysis/CreateOrAlterViewStmtBase.java b/fe/src/main/java/com/cloudera/impala/analysis/CreateOrAlterViewStmtBase.java index 68db164a6..423805e99 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/CreateOrAlterViewStmtBase.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/CreateOrAlterViewStmtBase.java @@ -134,8 +134,8 @@ public abstract class CreateOrAlterViewStmtBase extends StatementBase { StringBuilder sb = new StringBuilder(); sb.append("SELECT "); for (int i = 0; i < finalColDefs_.size(); ++i) { - String colRef = ToSqlUtils.getHiveIdentSql(viewDefStmt_.getColLabels().get(i)); - String colAlias = ToSqlUtils.getHiveIdentSql(finalColDefs_.get(i).getColName()); + String colRef = ToSqlUtils.getIdentSql(viewDefStmt_.getColLabels().get(i)); + String colAlias = ToSqlUtils.getIdentSql(finalColDefs_.get(i).getColName()); sb.append(String.format("%s.%s AS %s", tableName_.getTbl(), colRef, colAlias)); sb.append((i+1 != finalColDefs_.size()) ? ", " : ""); } diff --git a/fe/src/main/java/com/cloudera/impala/analysis/InlineViewRef.java b/fe/src/main/java/com/cloudera/impala/analysis/InlineViewRef.java index b9a980b41..428dcfc00 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/InlineViewRef.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/InlineViewRef.java @@ -267,7 +267,7 @@ public class InlineViewRef extends TableRef { protected String tableRefToSql() { // Enclose the alias in quotes if Hive cannot parse it without quotes. // This is needed for view compatibility between Impala and Hive. - String aliasSql = ToSqlUtils.getHiveIdentSql(alias_); + String aliasSql = ToSqlUtils.getIdentSql(alias_); return "(" + queryStmt_.toSql() + ") " + aliasSql; } } diff --git a/fe/src/main/java/com/cloudera/impala/analysis/SelectListItem.java b/fe/src/main/java/com/cloudera/impala/analysis/SelectListItem.java index be408866a..45b4ae6f7 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/SelectListItem.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/SelectListItem.java @@ -56,7 +56,7 @@ class SelectListItem { // Enclose aliases in quotes if Hive cannot parse them without quotes. // This is needed for view compatibility between Impala and Hive. String aliasSql = null; - if (alias_ != null) aliasSql = ToSqlUtils.getHiveIdentSql(alias_); + if (alias_ != null) aliasSql = ToSqlUtils.getIdentSql(alias_); return expr_.toSql() + ((aliasSql != null) ? " " + aliasSql : ""); } else if (tblName_ != null) { return tblName_.toString() + ".*" + ((alias_ != null) ? " " + alias_ : ""); diff --git a/fe/src/main/java/com/cloudera/impala/analysis/SlotRef.java b/fe/src/main/java/com/cloudera/impala/analysis/SlotRef.java index 97a2ed05f..fe2e5cad9 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/SlotRef.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/SlotRef.java @@ -55,7 +55,7 @@ public class SlotRef extends Expr { super(); this.tblName_ = tblName; this.col_ = col; - this.label_ = ToSqlUtils.getHiveIdentSql(col); + this.label_ = ToSqlUtils.getIdentSql(col); } // C'tor for a "pre-analyzed" ref to a slot diff --git a/fe/src/main/java/com/cloudera/impala/analysis/TableName.java b/fe/src/main/java/com/cloudera/impala/analysis/TableName.java index 2be8a3eba..483fc65fd 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/TableName.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/TableName.java @@ -70,9 +70,9 @@ public class TableName { // Enclose the database and/or table name in quotes if Hive cannot parse them // without quotes. This is needed for view compatibility between Impala and Hive. if (db_ == null) { - return ToSqlUtils.getHiveIdentSql(tbl_); + return ToSqlUtils.getIdentSql(tbl_); } else { - return ToSqlUtils.getHiveIdentSql(db_) + "." + ToSqlUtils.getHiveIdentSql(tbl_); + return ToSqlUtils.getIdentSql(db_) + "." + ToSqlUtils.getIdentSql(tbl_); } } diff --git a/fe/src/main/java/com/cloudera/impala/analysis/ToSqlUtils.java b/fe/src/main/java/com/cloudera/impala/analysis/ToSqlUtils.java index 553f0d19f..8286a7469 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/ToSqlUtils.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/ToSqlUtils.java @@ -50,29 +50,34 @@ public class ToSqlUtils { ImmutableSet.of("EXTERNAL", "comment"); /** - * Given an unquoted identifier string, returns an identifier lexable by Hive, possibly - * by enclosing the original identifier in "`" quotes. + * Given an unquoted identifier string, returns an identifier lexable by + * Impala and Hive, possibly by enclosing the original identifier in "`" quotes. * For example, Hive cannot parse its own auto-generated column - * names "_c0", "_c1" etc. unless they are quoted. + * names "_c0", "_c1" etc. unless they are quoted. Impala and Hive keywords + * must also be quoted. * * Impala's lexer recognizes a superset of the unquoted identifiers that Hive can. - * This method always returns an identifier that Impala can recognize, although for - * some identifiers the quotes may not be strictly necessary for Impala. + * At the same time, Impala's and Hive's list of keywords differ. + * This method always returns an identifier that Impala and Hive can recognize, + * although for some identifiers the quotes may not be strictly necessary for + * one or the other system. */ - public static String getHiveIdentSql(String ident) { + public static String getIdentSql(String ident) { + boolean hiveNeedsQuotes = true; HiveLexer hiveLexer = new HiveLexer(new ANTLRStringStream(ident)); try { Token t = hiveLexer.nextToken(); // Check that the lexer recognizes an identifier and then EOF. boolean identFound = t.getType() == HiveLexer.Identifier; t = hiveLexer.nextToken(); - // No enclosing quotes are necessary. - if (identFound && t.getType() == HiveLexer.EOF) return ident; + // No enclosing quotes are necessary for Hive. + hiveNeedsQuotes = !(identFound && t.getType() == HiveLexer.EOF); } catch (Exception e) { // Ignore exception and just quote the identifier to be safe. } - // Hive's lexer does not recognize impalaIdent, so enclose it in quotes. - return "`" + ident + "`"; + boolean isImpalaKeyword = SqlScanner.isKeyword(ident.toUpperCase()); + if (hiveNeedsQuotes || isImpalaKeyword) return "`" + ident + "`"; + return ident; } /** diff --git a/fe/src/main/java/com/cloudera/impala/analysis/ViewRef.java b/fe/src/main/java/com/cloudera/impala/analysis/ViewRef.java index aa38bf593..14449384d 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/ViewRef.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/ViewRef.java @@ -161,7 +161,7 @@ public class ViewRef extends InlineViewRef { if (origTblRef_ != null) return origTblRef_.tableRefToSql(); // Enclose the alias in quotes if Hive cannot parse it without quotes. // This is needed for view compatibility between Impala and Hive. - String aliasSql = ToSqlUtils.getHiveIdentSql(alias_); + String aliasSql = ToSqlUtils.getIdentSql(alias_); return "(" + queryStmt_.toSql() + ") " + aliasSql; } diff --git a/fe/src/main/java/com/cloudera/impala/analysis/WithClause.java b/fe/src/main/java/com/cloudera/impala/analysis/WithClause.java index 459e50b88..dd7161969 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/WithClause.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/WithClause.java @@ -228,7 +228,7 @@ public class WithClause implements ParseNode { for (InlineViewRef view: views_) { // Enclose the view alias in quotes if Hive cannot parse it without quotes. // This is needed for view compatibility between Impala and Hive. - String aliasSql = ToSqlUtils.getHiveIdentSql(view.getAlias()); + String aliasSql = ToSqlUtils.getIdentSql(view.getAlias()); viewStrings.add(aliasSql + " AS (" + view.getViewStmt().toSql() + ")"); } return "WITH " + Joiner.on(",").join(viewStrings); diff --git a/fe/src/main/jflex/sql-scanner.flex b/fe/src/main/jflex/sql-scanner.flex index ff1ee75d3..e8fe984cf 100644 --- a/fe/src/main/jflex/sql-scanner.flex +++ b/fe/src/main/jflex/sql-scanner.flex @@ -243,6 +243,10 @@ import com.cloudera.impala.analysis.SqlParserSymbols; String token = tokenIdMap.get(tokenId); return keywordMap.containsKey(token.toLowerCase()); } + + public static boolean isKeyword(String ident) { + return keywordMap.containsKey(ident.toLowerCase()); + } private Symbol newToken(int id, Object value) { return new Symbol(id, yyline+1, yycolumn+1, value); diff --git a/fe/src/test/java/com/cloudera/impala/analysis/ToSqlTest.java b/fe/src/test/java/com/cloudera/impala/analysis/ToSqlTest.java index f62f50d05..604650e4f 100644 --- a/fe/src/test/java/com/cloudera/impala/analysis/ToSqlTest.java +++ b/fe/src/test/java/com/cloudera/impala/analysis/ToSqlTest.java @@ -71,7 +71,8 @@ public class ToSqlTest extends AnalyzerTest { } /** - * Tests quoting of identifiers for view compatibility with Hive. + * Tests quoting of identifiers for view compatibility with Hive, + * and for proper quoting of Impala keywords in view-definition stmts. */ @Test public void TestIdentifierQuoting() { @@ -85,6 +86,12 @@ public class ToSqlTest extends AnalyzerTest { // Quoted identifiers that require quoting in both Impala and Hive. testToSql("select 1 as `???`, 2.0 as '^^^'", "SELECT 1 `???`, 2.0 `^^^`"); + // Test quoting of identifiers that are Impala keywords. + testToSql("select `end`.`alter`, `end`.`table` from " + + "(select 1 as `alter`, 2 as `table`) `end`", + "SELECT `end`.`alter`, `end`.`table` FROM " + + "(SELECT 1 `alter`, 2 `table`) `end`"); + // Test quoting of inline view aliases. testToSql("select a from (select 1 as a) as _t", "SELECT a FROM (SELECT 1 a) `_t`"); diff --git a/testdata/workloads/functional-query/queries/QueryTest/compute-stats.test b/testdata/workloads/functional-query/queries/QueryTest/compute-stats.test index f4ae1b2ce..b2d3a2700 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/compute-stats.test +++ b/testdata/workloads/functional-query/queries/QueryTest/compute-stats.test @@ -308,3 +308,35 @@ COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE ---- TYPES STRING, STRING, BIGINT, BIGINT, DOUBLE, DOUBLE ==== +---- QUERY +# IMP-1227: Test computing stats on an HBase table that has a +# complex-typed column that Impala does not yet support. +create table compute_stats_db.map_table +like functional_hbase.map_table_hbase; +==== +---- QUERY +compute stats compute_stats_db.map_table +---- RESULTS +'Updated 1 partition(s) and 1 column(s).' +---- TYPES +STRING +==== +---- QUERY: VERIFY_IS_EQUAL +show table stats compute_stats_db.map_table +---- LABELS +REGION LOCATION, START ROWKEY, EST. #ROWS, SIZE +---- RESULTS +regex:.+,'',regex:.+,regex:.+ +---- TYPES +STRING, STRING, BIGINT, STRING +==== +---- QUERY +show column stats compute_stats_db.map_table +---- LABELS +COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE +---- RESULTS +'key','STRING',0,0,0,0 +'map_col','INVALID_TYPE',-1,-1,-1,-1 +---- TYPES +STRING, STRING, BIGINT, BIGINT, DOUBLE, DOUBLE +==== \ No newline at end of file