IMPALA-1300: Allow subqueries in UNION operands

This enables the existing subquery rewrite rules to rewrite UNION
statements. UNION rewriting is easily done by simply calling the
rewriter for each operand in the UNION. At least one TPC-DS query
requires this functionality (IMPALA-1365).

The more difficult case of a UNION within a subquery is still not
supported.

Change-Id: I7f83eed0eb8ae81565e629f09f6918a4ba86ee13
Reviewed-on: http://gerrit.sjc.cloudera.com:8080/4859
Reviewed-by: Casey Ching <casey@cloudera.com>
Tested-by: jenkins
This commit is contained in:
casey
2014-10-14 21:24:02 +00:00
committed by ishaan
parent 7b6ecbeea5
commit 516d7483dd
8 changed files with 98 additions and 41 deletions

1
fe/.gitignore vendored
View File

@@ -6,6 +6,7 @@ target
.project
# IntelliJ artifacts
.idea
*.iml
*.ipr
*.iws

View File

@@ -321,6 +321,7 @@ public class AnalysisContext {
analysisResult_.stmt_.analyze(analysisResult_.analyzer_);
LOG.trace("rewrittenStmt: " + rewrittenStmt.toSql());
if (isExplain) analysisResult_.stmt_.setIsExplain();
Preconditions.checkState(!analysisResult_.requiresRewrite());
}
} catch (AnalysisException e) {
// Don't wrap AnalysisExceptions in another AnalysisException

View File

@@ -73,11 +73,9 @@ public class CreateTableAsSelectStmt extends StatementBase {
if (analyzer.containsSubquery()) {
// The select statement of this CTAS is nested. Rewrite the
// statement to unnest all subqueries and re-analyze using a new analyzer.
Preconditions.checkState(tmpQueryStmt instanceof SelectStmt);
SelectStmt selectStmt = (SelectStmt)tmpQueryStmt;
StmtRewriter.rewriteStatement(selectStmt, tmpAnalyzer);
StmtRewriter.rewriteQueryStatement(tmpQueryStmt, tmpAnalyzer);
// Update the insert statement with the unanalyzed rewritten select stmt.
insertStmt_.setQueryStmt(selectStmt.clone());
insertStmt_.setQueryStmt(tmpQueryStmt.clone());
// Re-analyze the select statement of the CTAS.
tmpQueryStmt = insertStmt_.getQueryStmt().clone();

View File

@@ -93,21 +93,6 @@ public class InlineViewRef extends TableRef {
view_ = other.view_;
}
/**
* Rewrite all subqueries contained within the inline view. The inline view is
* modified in place and the rewrite should not alter its select list.
*/
public void rewrite() throws AnalysisException {
if (!(queryStmt_ instanceof SelectStmt)) return;
int oldSelectListItemCnt =
((SelectStmt)queryStmt_).getSelectList().getItems().size();
StmtRewriter.rewriteStatement((SelectStmt)queryStmt_, inlineViewAnalyzer_);
queryStmt_ = queryStmt_.clone();
int newSelectListItemCnt =
((SelectStmt)queryStmt_).getSelectList().getItems().size();
Preconditions.checkState(oldSelectListItemCnt == newSelectListItemCnt);
}
/**
* Analyzes the inline view query block in a child analyzer of 'analyzer', creates
* a new tuple descriptor for the inline view and registers auxiliary eq predicates
@@ -311,6 +296,10 @@ public class InlineViewRef extends TableRef {
}
public QueryStmt getViewStmt() { return queryStmt_; }
public void setRewrittenViewStmt(QueryStmt stmt) {
Preconditions.checkState(getAnalyzer().containsSubquery());
queryStmt_ = stmt;
}
@Override
public TableRef clone() { return new InlineViewRef(this); }

View File

@@ -164,8 +164,7 @@ public class InsertStmt extends StatementBase {
queryStmt_.analyze(queryStmtAnalyzer);
if (analyzer.containsSubquery()) {
Preconditions.checkState(queryStmt_ instanceof SelectStmt);
StmtRewriter.rewriteStatement((SelectStmt)queryStmt_, queryStmtAnalyzer);
StmtRewriter.rewriteQueryStatement(queryStmt_, queryStmtAnalyzer);
queryStmt_ = queryStmt_.clone();
queryStmtAnalyzer = new Analyzer(analyzer);
queryStmt_.analyze(queryStmtAnalyzer);

View File

@@ -21,6 +21,7 @@ import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.cloudera.impala.analysis.AnalysisContext.AnalysisResult;
import com.cloudera.impala.analysis.UnionStmt.UnionOperand;
import com.cloudera.impala.catalog.Type;
import com.cloudera.impala.common.AnalysisException;
import com.google.common.base.Preconditions;
@@ -41,9 +42,9 @@ public class StmtRewriter {
public static StatementBase rewrite(AnalysisResult analysisResult)
throws AnalysisException {
StatementBase rewrittenStmt = null;
if (analysisResult.getStmt() instanceof SelectStmt) {
SelectStmt analyzedStmt = (SelectStmt)analysisResult.getStmt();
rewriteStatement(analyzedStmt, analysisResult.getAnalyzer());
if (analysisResult.getStmt() instanceof QueryStmt) {
QueryStmt analyzedStmt = (QueryStmt)analysisResult.getStmt();
rewriteQueryStatement(analyzedStmt, analysisResult.getAnalyzer());
rewrittenStmt = analyzedStmt.clone();
} else if (analysisResult.getStmt() instanceof InsertStmt) {
// For an InsertStmt, rewrites are performed during its analysis.
@@ -53,15 +54,11 @@ public class StmtRewriter {
// For a CTAS, rewrites are performed during its analysis.
CreateTableAsSelectStmt ctasStmt =
(CreateTableAsSelectStmt)analysisResult.getStmt();
Preconditions.checkState(ctasStmt.getQueryStmt() instanceof SelectStmt);
// Create a new CTAS from the original create statement and the
// rewritten insert statement.
Preconditions.checkNotNull(analysisResult.getTmpCreateTableStmt());
rewrittenStmt = new CreateTableAsSelectStmt(analysisResult.getTmpCreateTableStmt(),
ctasStmt.getQueryStmt().clone());
} else if (analysisResult.getStmt() instanceof UnionStmt){
throw new AnalysisException("Subqueries are not supported in a UNION query: " +
analysisResult.getStmt().toSql());
} else {
throw new AnalysisException("Unsupported statement containing subqueries: " +
analysisResult.getStmt().toSql());
@@ -69,17 +66,37 @@ public class StmtRewriter {
return rewrittenStmt;
}
/**
* Calls the appropriate rewrite method based on the specific type of query stmt. See
* rewriteSelectStatement() and rewriteUnionStatement() documentation.
*/
public static void rewriteQueryStatement(QueryStmt stmt, Analyzer analyzer)
throws AnalysisException {
Preconditions.checkNotNull(stmt);
if (stmt instanceof SelectStmt) {
rewriteSelectStatement((SelectStmt)stmt, analyzer);
} else if (stmt instanceof UnionStmt) {
rewriteUnionStatement((UnionStmt)stmt, analyzer);
} else {
throw new AnalysisException("Subqueries not supported for " +
stmt.getClass().getSimpleName() + " statements");
}
}
/**
* Rewrite all the subqueries of a SelectStmt in place. Subqueries
* are currently supported in FROM and WHERE clauses. The rewrite is performed in
* place and not in a clone of SelectStmt because it requires the stmt to be analyzed.
*/
public static void rewriteStatement(SelectStmt stmt, Analyzer analyzer)
private static void rewriteSelectStatement(SelectStmt stmt, Analyzer analyzer)
throws AnalysisException {
// Rewrite all the subqueries in the FROM clause.
for (TableRef tblRef: stmt.tableRefs_) {
if (!(tblRef instanceof InlineViewRef)) continue;
((InlineViewRef)tblRef).rewrite();
InlineViewRef inlineViewRef = (InlineViewRef)tblRef;
rewriteQueryStatement(inlineViewRef.getViewStmt(), inlineViewRef.getAnalyzer());
// Reset the state of the underlying stmt since it was rewritten
inlineViewRef.setRewrittenViewStmt(inlineViewRef.getViewStmt().clone());
}
// Rewrite all the subqueries in the WHERE clause.
if (stmt.hasWhereClause()) {
@@ -97,6 +114,19 @@ public class StmtRewriter {
LOG.trace("rewritten stmt: " + stmt.toSql());
}
/**
* Rewrite all operands in a UNION. The conditions that apply to SelectStmt rewriting
* also apply here.
*/
private static void rewriteUnionStatement(UnionStmt stmt, Analyzer analyzer)
throws AnalysisException {
for (UnionOperand operand: stmt.getOperands()) {
Preconditions.checkState(operand.getQueryStmt() instanceof SelectStmt);
StmtRewriter.rewriteSelectStatement(
(SelectStmt)operand.getQueryStmt(), operand.getAnalyzer());
}
}
/**
* Returns true if the Expr tree rooted at 'expr' has at least one subquery
* that participates in a disjunction.
@@ -236,7 +266,7 @@ public class StmtRewriter {
// Extract the subquery and rewrite it.
Subquery subquery = expr.getSubquery();
Preconditions.checkNotNull(subquery);
rewriteStatement((SelectStmt)subquery.getStatement(), subquery.getAnalyzer());
rewriteSelectStatement((SelectStmt) subquery.getStatement(), subquery.getAnalyzer());
// Create a new Subquery with the rewritten stmt and use a substitution map
// to replace the original subquery from the expr.
Subquery newSubquery = new Subquery(subquery.getStatement().clone());

View File

@@ -886,6 +886,9 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest {
"from functional.alltypesagg a where a.id = t.id) and exists " +
"(select * from functional.alltypestiny s where s.bigint_col = " +
"t.bigint_col) and int_col < (select min(int_col) from functional.alltypes)");
AnalyzesOk("insert into functional.alltypessmall partition (year, month) " +
"select * from functional.alltypestiny where id = (select 1) " +
"union select * from functional.alltypestiny where id = (select 2)");
// CTAS with correlated subqueries
AnalyzesOk("create table functional.test_tbl as select * from " +
@@ -893,6 +896,9 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest {
"a where a.int_col = t.int_col and a.bool_col = false) and not exists " +
"(select * from functional.alltypestiny s where s.int_col = t.int_col) " +
"and t.bigint_col = (select count(*) from functional.alltypessmall)");
AnalyzesOk("create table functional.test_tbl as " +
"select * from functional.alltypestiny where id = (select 1) " +
"union select * from functional.alltypestiny where id = (select 2)");
// Predicate with a child subquery in the HAVING clause
AnalysisError("select id, count(*) from functional.alltypestiny t group by " +
@@ -941,6 +947,18 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest {
AnalyzesOk("select * from functional.alltypes_view a where exists " +
"(select * from functional.alltypes_view b where a.id = b.id)");
// Union query with subqueries
AnalyzesOk("select * from functional.alltypes where id = " +
"(select max(id) from functional.alltypestiny) union " +
"select * from functional.alltypes where id = " +
"(select min(id) from functional.alltypessmall)");
AnalyzesOk("select * from functional.alltypes where id = (select 1) " +
"union all select * from functional.alltypes where id in " +
"(select int_col from functional.alltypestiny)");
AnalyzesOk("select * from functional.alltypes where id = (select 1) " +
"union select * from (select * from functional.alltypes where id in " +
"(select int_col from functional.alltypestiny)) t");
// Union in the subquery
AnalysisError("select * from functional.alltypes where exists " +
"(select id from functional.alltypestiny union " +
@@ -948,16 +966,6 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest {
"A subquery must contain a single select block: " +
"(SELECT id FROM functional.alltypestiny UNION " +
"SELECT id FROM functional.alltypesagg)");
// Union query with subqueries
AnalysisError("select * from functional.alltypes where id = " +
"(select max(id) from functional.alltypestiny) union " +
"select * from functional.alltypes where id = " +
"(select min(id) from functional.alltypessmall)",
"Subqueries are not supported in a UNION query: " +
"SELECT * FROM functional.alltypes WHERE id = " +
"(SELECT max(id) FROM functional.alltypestiny) UNION " +
"SELECT * FROM functional.alltypes WHERE id = " +
"(SELECT min(id) FROM functional.alltypessmall)");
AnalysisError("select * from functional.alltypes where exists (values(1))",
"A subquery must contain a single select block: (VALUES(1))");

View File

@@ -728,3 +728,34 @@ AND t1.int_col NOT IN
---- TYPES
BIGINT
====
# WITH definition containing a subquery is used in a UNION
---- QUERY
WITH foo AS (SELECT 1 FROM alltypestiny WHERE int_col IN (SELECT 1))
SELECT * FROM foo
UNION SELECT * FROM foo
---- RESULTS
1
---- TYPES
TINYINT
====
# Regression test for IMPALA-1365
---- QUERY
WITH foo AS (SELECT 1 FROM alltypestiny WHERE int_col IN (SELECT 1))
SELECT * FROM (SELECT * FROM foo UNION SELECT * FROM foo) bar
---- RESULTS
1
---- TYPES
TINYINT
====
# UNION of query with subquery and various other queries
---- QUERY
SELECT 1 FROM ALLTYPESTINY WHERE 1 = (SELECT 1)
UNION VALUES (2)
UNION ALL SELECT 3
---- RESULTS
1
2
3
---- TYPES
TINYINT
====