diff --git a/fe/src/main/cup/sql-parser.y b/fe/src/main/cup/sql-parser.y index 91978ed79..69bff7ae2 100644 --- a/fe/src/main/cup/sql-parser.y +++ b/fe/src/main/cup/sql-parser.y @@ -1488,9 +1488,14 @@ opt_with_clause ::= with_view_def ::= IDENT:alias KW_AS LPAREN query_stmt:query RPAREN - {: RESULT = new View(alias, query); :} + {: RESULT = new View(alias, query, null); :} | STRING_LITERAL:alias KW_AS LPAREN query_stmt:query RPAREN - {: RESULT = new View(alias, query); :} + {: RESULT = new View(alias, query, null); :} + | IDENT:alias LPAREN ident_list:col_names RPAREN KW_AS LPAREN query_stmt:query RPAREN + {: RESULT = new View(alias, query, col_names); :} + | STRING_LITERAL:alias LPAREN ident_list:col_names RPAREN + KW_AS LPAREN query_stmt:query RPAREN + {: RESULT = new View(alias, query, col_names); :} ; with_view_def_list ::= diff --git a/fe/src/main/java/com/cloudera/impala/analysis/Analyzer.java b/fe/src/main/java/com/cloudera/impala/analysis/Analyzer.java index a0dac0002..4923248bb 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/Analyzer.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/Analyzer.java @@ -416,10 +416,21 @@ public class Analyzer { /** * Registers a local view definition with this analyzer. Throws an exception if a view - * definition with the same alias has already been registered. + * definition with the same alias has already been registered or if the number of + * explicit column labels is greater than the number of columns in the view statement. */ public void registerLocalView(View view) throws AnalysisException { Preconditions.checkState(view.isLocalView()); + if (view.hasColLabels()) { + List viewLabels = view.getColLabels(); + List queryStmtLabels = view.getQueryStmt().getColLabels(); + if (viewLabels.size() > queryStmtLabels.size()) { + throw new AnalysisException("WITH-clause view '" + view.getName() + + "' returns " + queryStmtLabels.size() + " columns, but " + + viewLabels.size() + " labels were specified. The number of column " + + "labels must be smaller or equal to the number of returned columns."); + } + } if (localViews_.put(view.getName().toLowerCase(), view) != null) { throw new AnalysisException( String.format("Duplicate table alias: '%s'", view.getName())); diff --git a/fe/src/main/java/com/cloudera/impala/analysis/InlineViewRef.java b/fe/src/main/java/com/cloudera/impala/analysis/InlineViewRef.java index 167748f85..d3671e462 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/InlineViewRef.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/InlineViewRef.java @@ -97,6 +97,7 @@ public class InlineViewRef extends TableRef { smap_ = new ExprSubstitutionMap(); baseTblSmap_ = new ExprSubstitutionMap(); setJoinAttrs(origTblRef); + explicitColLabels_ = view.getColLabels(); // Set implicit aliases if no explicit one was given. if (hasExplicitAlias()) return; aliases_ = new String[] { diff --git a/fe/src/main/java/com/cloudera/impala/analysis/ToSqlUtils.java b/fe/src/main/java/com/cloudera/impala/analysis/ToSqlUtils.java index 3145e0c52..3463a2f0e 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/ToSqlUtils.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/ToSqlUtils.java @@ -93,6 +93,14 @@ public class ToSqlUtils { return ident; } + public static List getIdentSqlList(List identList) { + List identSqlList = Lists.newArrayList(); + for (String ident: identList) { + identSqlList.add(getIdentSql(ident)); + } + return identSqlList; + } + public static String getPathSql(List path) { StringBuilder result = new StringBuilder(); for (String p: path) { diff --git a/fe/src/main/java/com/cloudera/impala/analysis/WithClause.java b/fe/src/main/java/com/cloudera/impala/analysis/WithClause.java index ab5bbf250..aa553c916 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/WithClause.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/WithClause.java @@ -104,7 +104,8 @@ public class WithClause implements ParseNode { Preconditions.checkNotNull(other); views_ = Lists.newArrayList(); for (View view: other.views_) { - views_.add(new View(view.getName(), view.getQueryStmt().clone())); + views_.add(new View(view.getName(), view.getQueryStmt().clone(), + view.getOriginalColLabels())); } } @@ -119,9 +120,13 @@ public class WithClause implements ParseNode { public String toSql() { List viewStrings = Lists.newArrayList(); for (View view: views_) { - // Enclose the view alias in quotes if Hive cannot parse it without quotes. - // This is needed for view compatibility between Impala and Hive. + // Enclose the view alias and explicit labels in quotes if Hive cannot parse it + // without quotes. This is needed for view compatibility between Impala and Hive. String aliasSql = ToSqlUtils.getIdentSql(view.getName()); + if (view.hasColLabels()) { + aliasSql += "(" + Joiner.on(", ").join( + ToSqlUtils.getIdentSqlList(view.getOriginalColLabels())) + ")"; + } viewStrings.add(aliasSql + " AS (" + view.getQueryStmt().toSql() + ")"); } return "WITH " + Joiner.on(",").join(viewStrings); diff --git a/fe/src/main/java/com/cloudera/impala/catalog/View.java b/fe/src/main/java/com/cloudera/impala/catalog/View.java index 48612bf8d..5949964dd 100644 --- a/fe/src/main/java/com/cloudera/impala/catalog/View.java +++ b/fe/src/main/java/com/cloudera/impala/catalog/View.java @@ -15,6 +15,7 @@ package com.cloudera.impala.catalog; import java.io.StringReader; +import java.util.ArrayList; import java.util.List; import java.util.Set; @@ -29,6 +30,7 @@ import com.cloudera.impala.thrift.TCatalogObjectType; import com.cloudera.impala.thrift.TTable; import com.cloudera.impala.thrift.TTableDescriptor; import com.cloudera.impala.thrift.TTableType; +import com.google.common.collect.Lists; /** * Table metadata representing a catalog view or a local view from a WITH clause. @@ -64,6 +66,9 @@ public class View extends Table { // Set if this View is from a WITH clause and not persisted in the catalog. private final boolean isLocalView_; + // Set if this View is from a WITH clause with column labels. + private List colLabels_; + public View(TableId id, org.apache.hadoop.hive.metastore.api.Table msTable, Db db, String name, String owner) { super(id, msTable, db, name, owner); @@ -71,12 +76,14 @@ public class View extends Table { } /** - * C'tor for WITH-clause views that already have a parsed QueryStmt. + * C'tor for WITH-clause views that already have a parsed QueryStmt and an optional + * list of column labels. */ - public View(String alias, QueryStmt queryStmt) { + public View(String alias, QueryStmt queryStmt, List colLabels) { super(null, null, null, alias, null); isLocalView_ = true; queryStmt_ = queryStmt; + colLabels_ = colLabels; } @Override @@ -147,6 +154,27 @@ public class View extends Table { public String getInlineViewDef() { return inlineViewDef_; } public boolean isLocalView() { return isLocalView_; } + /** + * Returns the column labels the user specified in the WITH-clause. + */ + public List getOriginalColLabels() { return colLabels_; } + + /** + * Returns the explicit column labels for this view, or null if they need to be derived + * entirely from the underlying query statement. The returned list has at least as many + * elements as the number of column labels in the query stmt. + */ + public List getColLabels() { + if (colLabels_ == null) return null; + if (colLabels_.size() >= queryStmt_.getColLabels().size()) return colLabels_; + List explicitColLabels = Lists.newArrayList(colLabels_); + explicitColLabels.addAll(queryStmt_.getColLabels().subList( + colLabels_.size(), queryStmt_.getColLabels().size())); + return explicitColLabels; + } + + public boolean hasColLabels() { return colLabels_ != null; } + @Override public TTableDescriptor toThriftDescriptor(Set referencedPartitions) { throw new IllegalStateException("Cannot call toThriftDescriptor() on a view."); diff --git a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeStmtsTest.java b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeStmtsTest.java index d6226af25..7b29bbd3a 100644 --- a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeStmtsTest.java +++ b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeStmtsTest.java @@ -2345,12 +2345,28 @@ public class AnalyzeStmtsTest extends AnalyzerTest { // Single view in WITH clause. AnalyzesOk("with t as (select int_col x, bigint_col y from functional.alltypes) " + "select x, y from t"); + // Single view in WITH clause with column labels. + AnalyzesOk("with t(c1, c2) as (select int_col x, bigint_col y " + + "from functional.alltypes) " + + "select c1, c2 from t"); + // Single view in WITH clause with the number of column labels less than the number + // of columns. + AnalyzesOk("with t(c1) as (select int_col, bigint_col y " + + "from functional.alltypes) " + + "select c1, y from t"); // Multiple views in WITH clause. Only one view is used. AnalyzesOk("with t1 as (select int_col x, bigint_col y from functional.alltypes), " + - "t2 as (select 1 x , 10 y), t3 as (values(2 x , 20 y), (3, 30)), " + + "t2 as (select 1 x, 10 y), t3 as (values(2 x, 20 y), (3, 30)), " + "t4 as (select 4 x, 40 y union all select 5, 50), " + "t5 as (select * from (values(6 x, 60 y)) as a) " + "select x, y from t3"); + // Multiple views in WITH clause with column labels. Only one view is used. + AnalyzesOk("with t1(c1, c2) as (select int_col, bigint_col " + + "from functional.alltypes), " + + "t2(c1, c2) as (select 1, 10), t3(a, b) as (values(2, 5), (3, 30)), " + + "t4(c1, c2) as (select 4, 40 union all select 5, 50), " + + "t5 as (select * from (values(6, 60)) as a) " + + "select a, b from t3"); // Multiple views in WITH clause. All views used in a union. AnalyzesOk("with t1 as (select int_col x, bigint_col y from functional.alltypes), " + "t2 as (select 1 x , 10 y), t3 as (values(2 x , 20 y), (3, 30)), " + @@ -2365,9 +2381,19 @@ public class AnalyzeStmtsTest extends AnalyzerTest { "t5 as (select * from (values(6 x, 60 y)) as a) " + "select t1.y, t2.y, t3.y, t4.y, t5.y from t1, t2, t3, t4, t5 " + "where t1.y = t2.y and t2.y = t3.y and t3.y = t4.y and t4.y = t5.y"); + // Multiple views in WITH clause with column labels. All views used in a join. + AnalyzesOk("with t1(c1, c2) as (select int_col x, bigint_col y " + + "from functional.alltypes), " + + "t2(c1, c2) as (select 1 x , 10 y), t3 as (values(2 x , 20 y), (3, 30)), " + + "t4 as (select 4 x, 40 y union all select 5, 50), " + + "t5 as (select * from (values(6 x, 60 y)) as a) " + + "select t1.c2, t2.c2, t3.y, t4.y, t5.y from t1, t2, t3, t4, t5 " + + "where t1.c2 = t2.c2 and t2.c2 = t3.y and t3.y = t4.y and t4.y = t5.y"); // WITH clause in insert statement. AnalyzesOk("with t1 as (select * from functional.alltypestiny)" + "insert into functional.alltypes partition(year, month) select * from t1"); + AnalyzesOk("with t1(c1, c2) as (select * from functional.alltypestiny)" + + "insert into functional.alltypes partition(year, month) select * from t1"); // WITH clause in insert statement with a select statement that has a WITH // clause and an inline view (IMPALA-1100) AnalyzesOk("with test_ctas_1 as (select * from functional.alltypestiny) insert " + @@ -2385,6 +2411,8 @@ public class AnalyzeStmtsTest extends AnalyzerTest { AnalyzesOk("with t1 as (select id from functional.alltypestiny) " + "insert into functional.alltypes partition(year, month) " + "with t1 as (select * from functional.alltypessmall) select * from t1"); + AnalyzesOk("with t(c1, c2) as (select * from functional.alltypes) " + + "select a.c1, a.c2 from t a"); // WITH-clause view used in inline view. AnalyzesOk("with t1 as (select 'a') select * from (select * from t1) as t2"); AnalyzesOk("with t1 as (select 'a') " + @@ -2413,6 +2441,9 @@ public class AnalyzeStmtsTest extends AnalyzerTest { // Aliases are resolved from inner-most to the outer-most scope. AnalyzesOk("with t1 as (select 'a') " + "select t2.* from (with t1 as (select 'b') select * from t1) as t2"); + // Column labels do not conflict because they are in different scopes. + AnalyzesOk("with t1(c1) as (select 'a') " + + "select c1 from (with t1(c1) as (select 'b') select c1 from t1) as t2"); // Table aliases do not conflict because t1 from the inline view is never used. AnalyzesOk("with t1 as (select 1), t2 as (select 2)" + "select * from functional.alltypes as t1"); @@ -2421,6 +2452,9 @@ public class AnalyzeStmtsTest extends AnalyzerTest { // Fully-qualified table does not conflict with WITH-clause table. AnalyzesOk("with alltypes as (select * from functional.alltypes) " + "select * from functional.alltypes union all select * from alltypes"); + // Column labels can be used with table aliases. + AnalyzesOk("with t(c1) as (select id from functional.alltypes) " + + "select a.c1 from t a"); // Use a custom analyzer to change the default db to functional. // Recursion is prevented because 'alltypes' in t1 refers to the table @@ -2491,6 +2525,16 @@ public class AnalyzeStmtsTest extends AnalyzerTest { "Duplicate table alias: 't1'"); AnalysisError("with t1 as (select 1) select * from (select 2) as t1 inner join t1", "Duplicate table alias: 't1'"); + // With clause column labels must be used intead of aliases. + AnalysisError("with t1(c1) as (select id cnt from functional.alltypes) "+ + "select cnt from t1", + "Could not resolve column/field reference: 'cnt'"); + // With clause column labels must not exceed the number of columns in the query. + AnalysisError("with t(c1, c2) as (select id from functional.alltypes) " + + "select * from t", + "WITH-clause view 't' returns 1 columns, but 2 labels were specified. The " + + "number of column labels must be smaller or equal to the number of returned " + + "columns."); // Multiple references in same select statement require aliases. AnalysisError("with t1 as (select 'a' x) select * from t1 inner join t1", "Duplicate table alias: 't1'"); diff --git a/fe/src/test/java/com/cloudera/impala/analysis/ParserTest.java b/fe/src/test/java/com/cloudera/impala/analysis/ParserTest.java index 158ba5fd9..a32ff2188 100644 --- a/fe/src/test/java/com/cloudera/impala/analysis/ParserTest.java +++ b/fe/src/test/java/com/cloudera/impala/analysis/ParserTest.java @@ -750,11 +750,16 @@ public class ParserTest { @Test public void TestWithClause() throws AnalysisException { ParsesOk("with t as (select 1 as a) select a from t"); + ParsesOk("with t(x) as (select 1 as a) select x from t"); ParsesOk("with t as (select c from tab) select * from t"); + ParsesOk("with t(x, y) as (select * from tab) select * from t"); ParsesOk("with t as (values(1, 2, 3), (4, 5, 6)) select * from t"); + ParsesOk("with t(x, y, z) as (values(1, 2, 3), (4, 5, 6)) select * from t"); ParsesOk("with t1 as (select 1 as a), t2 as (select 2 as a) select a from t1"); ParsesOk("with t1 as (select c from tab), t2 as (select c from tab)" + "select c from t2"); + ParsesOk("with t1(x) as (select c from tab), t2(x) as (select c from tab)" + + "select x from t2"); // With clause and union statement. ParsesOk("with t1 as (select 1 as a), t2 as (select 2 as a)" + "select a from t1 union all select a from t2"); @@ -763,16 +768,19 @@ public class ParserTest { "select a from t1 inner join t2 on t1.a = t2.a"); // With clause in inline view. ParsesOk("select * from (with t as (select 1 as a) select * from t) as a"); + ParsesOk("select * from (with t(x) as (select 1 as a) select * from t) as a"); // With clause in query statement of insert statement. ParsesOk("insert into x with t as (select * from tab) select * from t"); + ParsesOk("insert into x with t(x, y) as (select * from tab) select * from t"); ParsesOk("insert into x with t as (values(1, 2, 3)) select * from t"); + ParsesOk("insert into x with t(x, y) as (values(1, 2, 3)) select * from t"); // With clause before insert statement. ParsesOk("with t as (select 1) insert into x select * from t"); + ParsesOk("with t(x) as (select 1) insert into x select * from t"); // Test quoted identifier or string literal as table alias. ParsesOk("with `t1` as (select 1 a), 't2' as (select 2 a), \"t3\" as (select 3 a)" + "select a from t1 union all select a from t2 union all select a from t3"); - // Multiple with clauses. Operands must be in parenthesis to // have their own with clause. ParsesOk("with t as (select 1) " + @@ -783,9 +791,14 @@ public class ParserTest { "(with t as (select 3) select * from t) order by 1 limit 1"); // Multiple with clauses. One before the insert and one inside the query statement. ParsesOk("with t as (select 1) insert into x with t as (select 2) select * from t"); + ParsesOk("with t(c1) as (select 1) " + + "insert into x with t(c2) as (select 2) select * from t"); // Empty with clause. ParserError("with t as () select 1"); + ParserError("with t(x) as () select 1"); + // No labels inside parenthesis. + ParserError("with t() as (select 1 as a) select a from t"); // Missing select, union or insert statement after with clause. ParserError("select * from (with t as (select 1 as a)) as a"); ParserError("with t as (select 1)"); @@ -794,8 +807,11 @@ public class ParserTest { ParserError("with t as select 1 as a union all select a from t"); ParserError("with t1 as (select 1 as a), t2 as select 2 as a select a from t"); ParserError("with t as select 1 as a select a from t"); + // Missing parenthesis around column labels. + ParserError("with t c1 as (select 1 as a) select c1 from t"); // Insert in with clause is not valid. ParserError("with t as (insert into x select * from tab) select * from t"); + ParserError("with t(c1) as (insert into x select * from tab) select * from t"); // Union operands need to be parenthesized to have their own with clause. ParserError("select * from t union all with t as (select 2) select * from t"); } diff --git a/fe/src/test/java/com/cloudera/impala/analysis/ToSqlTest.java b/fe/src/test/java/com/cloudera/impala/analysis/ToSqlTest.java index 47ced39e9..aebee3255 100644 --- a/fe/src/test/java/com/cloudera/impala/analysis/ToSqlTest.java +++ b/fe/src/test/java/com/cloudera/impala/analysis/ToSqlTest.java @@ -774,6 +774,14 @@ public class ToSqlTest extends AnalyzerTest { // WITH clause in select stmt. testToSql("with t as (select * from functional.alltypes) select * from t", "WITH t AS (SELECT * FROM functional.alltypes) SELECT * FROM t"); + testToSql("with t(c1) as (select * from functional.alltypes) select * from t", + "WITH t(c1) AS (SELECT * FROM functional.alltypes) SELECT * FROM t"); + testToSql("with t(`table`, col, `create`) as (select * from functional.alltypes) " + + "select * from t", + "WITH t(`table`, col, `create`) AS (SELECT * FROM functional.alltypes) " + + "SELECT * FROM t"); + testToSql("with t(c1, c2) as (select * from functional.alltypes) select * from t", + "WITH t(c1, c2) AS (SELECT * FROM functional.alltypes) SELECT * FROM t"); testToSql("with t as (select sum(int_col) over(partition by tinyint_col, " + "bool_col order by float_col rows between unbounded preceding and " + "current row) as x from functional.alltypes) " + @@ -787,6 +795,10 @@ public class ToSqlTest extends AnalyzerTest { "select * from t a inner join t b on (a.int_col = b.int_col)", "WITH t AS (SELECT * FROM functional.alltypes) " + "SELECT * FROM t a INNER JOIN t b ON (a.int_col = b.int_col)"); + testToSql("with t(c1, c2) as (select * from functional.alltypes) " + + "select a.c1, a.c2 from t a inner join t b on (a.c1 = b.c2)", + "WITH t(c1, c2) AS (SELECT * FROM functional.alltypes) " + + "SELECT a.c1, a.c2 FROM t a INNER JOIN t b ON (a.c1 = b.c2)"); // WITH clause in select stmt with a join and a USING clause. testToSql("with t as (select * from functional.alltypes) " + "select * from t a inner join t b using(int_col)", diff --git a/testdata/workloads/functional-query/queries/QueryTest/with-clause.test b/testdata/workloads/functional-query/queries/QueryTest/with-clause.test index bd359d9ad..d86b6828c 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/with-clause.test +++ b/testdata/workloads/functional-query/queries/QueryTest/with-clause.test @@ -19,9 +19,36 @@ select x, y from t order by y limit 10 BIGINT, STRING ==== ---- QUERY +# Basic tests with a single with-clause view with column labels. +with t(c1, c2) as (select int_col, bigint_col y from functional.alltypestiny) +select * from t limit 1 +---- RESULTS +0,0 +---- TYPES +INT, BIGINT +---- LABELS +C1, C2 +==== +---- QUERY +with t(c1) as (select int_col, bigint_col from functional.alltypestiny) +select * from t limit 1 +---- RESULTS +0,0 +---- TYPES +INT, BIGINT +---- LABELS +C1, BIGINT_COL +==== +---- QUERY +with t(c1, c2) as (select int_col from functional.alltypestiny) +select * from t limit 1 +---- CATCH +AnalysisException: WITH-clause view 't' returns 1 columns, but 2 labels were specified. +==== +---- QUERY # Multiple views in with-clause. Only one view is used. with t1 as (select int_col x, bigint_col y from functional.alltypestiny), -t2 as (select 1 x , 10 y), t3 as (values(2 x , 20 y), (3, 30)) +t2 as (select 1 x, 10 y), t3 as (values(2 x, 20 y), (3, 30)) select x, y from t2 ---- RESULTS 1,10 @@ -31,7 +58,7 @@ TINYINT, TINYINT ---- QUERY # Multiple views in with-clause. All views are used in a union. with t1 as (select int_col x, bigint_col y from functional.alltypestiny), -t2 as (select 1 x , 10 y), t3 as (values(2 x , 20 y), (3, 30)) +t2 as (select 1 x, 10 y), t3 as (values(2 x, 20 y), (3, 30)) select * from t1 union all select * from t2 union all (select * from t3) order by x limit 20 ---- RESULTS 0,0 @@ -49,6 +76,18 @@ select * from t1 union all select * from t2 union all (select * from t3) order b INT, BIGINT ==== ---- QUERY +with t1(c1, c2) as (select int_col x, bigint_col y from functional.alltypestiny), +t2(c3, c4) as (select 1 x, 10 y) +select * from t1 limit 1 union all select * from t2 limit 1 +---- RESULTS +0,0 +1,10 +---- TYPES +INT, BIGINT +---- LABELS +C1, C2 +==== +---- QUERY # Multiple views in with-clause. All views are used in a join. with t1 as (select int_col x, bigint_col y from functional.alltypes limit 2), t2 as (select int_col x, bigint_col y from functional.alltypestiny limit 2),