mirror of
https://github.com/apache/impala.git
synced 2025-12-30 03:01:44 -05:00
IMPALA-1300: Allow subqueries in UNION operands
This enables the existing subquery rewrite rules to rewrite UNION statements. UNION rewriting is easily done by simply calling the rewriter for each operand in the UNION. At least one TPC-DS query requires this functionality (IMPALA-1365). The more difficult case of a UNION within a subquery is still not supported. Change-Id: I7f83eed0eb8ae81565e629f09f6918a4ba86ee13 Reviewed-on: http://gerrit.sjc.cloudera.com:8080/4859 Reviewed-by: Casey Ching <casey@cloudera.com> Tested-by: jenkins
This commit is contained in:
1
fe/.gitignore
vendored
1
fe/.gitignore
vendored
@@ -6,6 +6,7 @@ target
|
||||
.project
|
||||
|
||||
# IntelliJ artifacts
|
||||
.idea
|
||||
*.iml
|
||||
*.ipr
|
||||
*.iws
|
||||
|
||||
@@ -321,6 +321,7 @@ public class AnalysisContext {
|
||||
analysisResult_.stmt_.analyze(analysisResult_.analyzer_);
|
||||
LOG.trace("rewrittenStmt: " + rewrittenStmt.toSql());
|
||||
if (isExplain) analysisResult_.stmt_.setIsExplain();
|
||||
Preconditions.checkState(!analysisResult_.requiresRewrite());
|
||||
}
|
||||
} catch (AnalysisException e) {
|
||||
// Don't wrap AnalysisExceptions in another AnalysisException
|
||||
|
||||
@@ -73,11 +73,9 @@ public class CreateTableAsSelectStmt extends StatementBase {
|
||||
if (analyzer.containsSubquery()) {
|
||||
// The select statement of this CTAS is nested. Rewrite the
|
||||
// statement to unnest all subqueries and re-analyze using a new analyzer.
|
||||
Preconditions.checkState(tmpQueryStmt instanceof SelectStmt);
|
||||
SelectStmt selectStmt = (SelectStmt)tmpQueryStmt;
|
||||
StmtRewriter.rewriteStatement(selectStmt, tmpAnalyzer);
|
||||
StmtRewriter.rewriteQueryStatement(tmpQueryStmt, tmpAnalyzer);
|
||||
// Update the insert statement with the unanalyzed rewritten select stmt.
|
||||
insertStmt_.setQueryStmt(selectStmt.clone());
|
||||
insertStmt_.setQueryStmt(tmpQueryStmt.clone());
|
||||
|
||||
// Re-analyze the select statement of the CTAS.
|
||||
tmpQueryStmt = insertStmt_.getQueryStmt().clone();
|
||||
|
||||
@@ -93,21 +93,6 @@ public class InlineViewRef extends TableRef {
|
||||
view_ = other.view_;
|
||||
}
|
||||
|
||||
/**
|
||||
* Rewrite all subqueries contained within the inline view. The inline view is
|
||||
* modified in place and the rewrite should not alter its select list.
|
||||
*/
|
||||
public void rewrite() throws AnalysisException {
|
||||
if (!(queryStmt_ instanceof SelectStmt)) return;
|
||||
int oldSelectListItemCnt =
|
||||
((SelectStmt)queryStmt_).getSelectList().getItems().size();
|
||||
StmtRewriter.rewriteStatement((SelectStmt)queryStmt_, inlineViewAnalyzer_);
|
||||
queryStmt_ = queryStmt_.clone();
|
||||
int newSelectListItemCnt =
|
||||
((SelectStmt)queryStmt_).getSelectList().getItems().size();
|
||||
Preconditions.checkState(oldSelectListItemCnt == newSelectListItemCnt);
|
||||
}
|
||||
|
||||
/**
|
||||
* Analyzes the inline view query block in a child analyzer of 'analyzer', creates
|
||||
* a new tuple descriptor for the inline view and registers auxiliary eq predicates
|
||||
@@ -311,6 +296,10 @@ public class InlineViewRef extends TableRef {
|
||||
}
|
||||
|
||||
public QueryStmt getViewStmt() { return queryStmt_; }
|
||||
public void setRewrittenViewStmt(QueryStmt stmt) {
|
||||
Preconditions.checkState(getAnalyzer().containsSubquery());
|
||||
queryStmt_ = stmt;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TableRef clone() { return new InlineViewRef(this); }
|
||||
|
||||
@@ -164,8 +164,7 @@ public class InsertStmt extends StatementBase {
|
||||
queryStmt_.analyze(queryStmtAnalyzer);
|
||||
|
||||
if (analyzer.containsSubquery()) {
|
||||
Preconditions.checkState(queryStmt_ instanceof SelectStmt);
|
||||
StmtRewriter.rewriteStatement((SelectStmt)queryStmt_, queryStmtAnalyzer);
|
||||
StmtRewriter.rewriteQueryStatement(queryStmt_, queryStmtAnalyzer);
|
||||
queryStmt_ = queryStmt_.clone();
|
||||
queryStmtAnalyzer = new Analyzer(analyzer);
|
||||
queryStmt_.analyze(queryStmtAnalyzer);
|
||||
|
||||
@@ -21,6 +21,7 @@ import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import com.cloudera.impala.analysis.AnalysisContext.AnalysisResult;
|
||||
import com.cloudera.impala.analysis.UnionStmt.UnionOperand;
|
||||
import com.cloudera.impala.catalog.Type;
|
||||
import com.cloudera.impala.common.AnalysisException;
|
||||
import com.google.common.base.Preconditions;
|
||||
@@ -41,9 +42,9 @@ public class StmtRewriter {
|
||||
public static StatementBase rewrite(AnalysisResult analysisResult)
|
||||
throws AnalysisException {
|
||||
StatementBase rewrittenStmt = null;
|
||||
if (analysisResult.getStmt() instanceof SelectStmt) {
|
||||
SelectStmt analyzedStmt = (SelectStmt)analysisResult.getStmt();
|
||||
rewriteStatement(analyzedStmt, analysisResult.getAnalyzer());
|
||||
if (analysisResult.getStmt() instanceof QueryStmt) {
|
||||
QueryStmt analyzedStmt = (QueryStmt)analysisResult.getStmt();
|
||||
rewriteQueryStatement(analyzedStmt, analysisResult.getAnalyzer());
|
||||
rewrittenStmt = analyzedStmt.clone();
|
||||
} else if (analysisResult.getStmt() instanceof InsertStmt) {
|
||||
// For an InsertStmt, rewrites are performed during its analysis.
|
||||
@@ -53,15 +54,11 @@ public class StmtRewriter {
|
||||
// For a CTAS, rewrites are performed during its analysis.
|
||||
CreateTableAsSelectStmt ctasStmt =
|
||||
(CreateTableAsSelectStmt)analysisResult.getStmt();
|
||||
Preconditions.checkState(ctasStmt.getQueryStmt() instanceof SelectStmt);
|
||||
// Create a new CTAS from the original create statement and the
|
||||
// rewritten insert statement.
|
||||
Preconditions.checkNotNull(analysisResult.getTmpCreateTableStmt());
|
||||
rewrittenStmt = new CreateTableAsSelectStmt(analysisResult.getTmpCreateTableStmt(),
|
||||
ctasStmt.getQueryStmt().clone());
|
||||
} else if (analysisResult.getStmt() instanceof UnionStmt){
|
||||
throw new AnalysisException("Subqueries are not supported in a UNION query: " +
|
||||
analysisResult.getStmt().toSql());
|
||||
} else {
|
||||
throw new AnalysisException("Unsupported statement containing subqueries: " +
|
||||
analysisResult.getStmt().toSql());
|
||||
@@ -69,17 +66,37 @@ public class StmtRewriter {
|
||||
return rewrittenStmt;
|
||||
}
|
||||
|
||||
/**
|
||||
* Calls the appropriate rewrite method based on the specific type of query stmt. See
|
||||
* rewriteSelectStatement() and rewriteUnionStatement() documentation.
|
||||
*/
|
||||
public static void rewriteQueryStatement(QueryStmt stmt, Analyzer analyzer)
|
||||
throws AnalysisException {
|
||||
Preconditions.checkNotNull(stmt);
|
||||
if (stmt instanceof SelectStmt) {
|
||||
rewriteSelectStatement((SelectStmt)stmt, analyzer);
|
||||
} else if (stmt instanceof UnionStmt) {
|
||||
rewriteUnionStatement((UnionStmt)stmt, analyzer);
|
||||
} else {
|
||||
throw new AnalysisException("Subqueries not supported for " +
|
||||
stmt.getClass().getSimpleName() + " statements");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Rewrite all the subqueries of a SelectStmt in place. Subqueries
|
||||
* are currently supported in FROM and WHERE clauses. The rewrite is performed in
|
||||
* place and not in a clone of SelectStmt because it requires the stmt to be analyzed.
|
||||
*/
|
||||
public static void rewriteStatement(SelectStmt stmt, Analyzer analyzer)
|
||||
private static void rewriteSelectStatement(SelectStmt stmt, Analyzer analyzer)
|
||||
throws AnalysisException {
|
||||
// Rewrite all the subqueries in the FROM clause.
|
||||
for (TableRef tblRef: stmt.tableRefs_) {
|
||||
if (!(tblRef instanceof InlineViewRef)) continue;
|
||||
((InlineViewRef)tblRef).rewrite();
|
||||
InlineViewRef inlineViewRef = (InlineViewRef)tblRef;
|
||||
rewriteQueryStatement(inlineViewRef.getViewStmt(), inlineViewRef.getAnalyzer());
|
||||
// Reset the state of the underlying stmt since it was rewritten
|
||||
inlineViewRef.setRewrittenViewStmt(inlineViewRef.getViewStmt().clone());
|
||||
}
|
||||
// Rewrite all the subqueries in the WHERE clause.
|
||||
if (stmt.hasWhereClause()) {
|
||||
@@ -97,6 +114,19 @@ public class StmtRewriter {
|
||||
LOG.trace("rewritten stmt: " + stmt.toSql());
|
||||
}
|
||||
|
||||
/**
|
||||
* Rewrite all operands in a UNION. The conditions that apply to SelectStmt rewriting
|
||||
* also apply here.
|
||||
*/
|
||||
private static void rewriteUnionStatement(UnionStmt stmt, Analyzer analyzer)
|
||||
throws AnalysisException {
|
||||
for (UnionOperand operand: stmt.getOperands()) {
|
||||
Preconditions.checkState(operand.getQueryStmt() instanceof SelectStmt);
|
||||
StmtRewriter.rewriteSelectStatement(
|
||||
(SelectStmt)operand.getQueryStmt(), operand.getAnalyzer());
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns true if the Expr tree rooted at 'expr' has at least one subquery
|
||||
* that participates in a disjunction.
|
||||
@@ -236,7 +266,7 @@ public class StmtRewriter {
|
||||
// Extract the subquery and rewrite it.
|
||||
Subquery subquery = expr.getSubquery();
|
||||
Preconditions.checkNotNull(subquery);
|
||||
rewriteStatement((SelectStmt)subquery.getStatement(), subquery.getAnalyzer());
|
||||
rewriteSelectStatement((SelectStmt) subquery.getStatement(), subquery.getAnalyzer());
|
||||
// Create a new Subquery with the rewritten stmt and use a substitution map
|
||||
// to replace the original subquery from the expr.
|
||||
Subquery newSubquery = new Subquery(subquery.getStatement().clone());
|
||||
|
||||
@@ -886,6 +886,9 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest {
|
||||
"from functional.alltypesagg a where a.id = t.id) and exists " +
|
||||
"(select * from functional.alltypestiny s where s.bigint_col = " +
|
||||
"t.bigint_col) and int_col < (select min(int_col) from functional.alltypes)");
|
||||
AnalyzesOk("insert into functional.alltypessmall partition (year, month) " +
|
||||
"select * from functional.alltypestiny where id = (select 1) " +
|
||||
"union select * from functional.alltypestiny where id = (select 2)");
|
||||
|
||||
// CTAS with correlated subqueries
|
||||
AnalyzesOk("create table functional.test_tbl as select * from " +
|
||||
@@ -893,6 +896,9 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest {
|
||||
"a where a.int_col = t.int_col and a.bool_col = false) and not exists " +
|
||||
"(select * from functional.alltypestiny s where s.int_col = t.int_col) " +
|
||||
"and t.bigint_col = (select count(*) from functional.alltypessmall)");
|
||||
AnalyzesOk("create table functional.test_tbl as " +
|
||||
"select * from functional.alltypestiny where id = (select 1) " +
|
||||
"union select * from functional.alltypestiny where id = (select 2)");
|
||||
|
||||
// Predicate with a child subquery in the HAVING clause
|
||||
AnalysisError("select id, count(*) from functional.alltypestiny t group by " +
|
||||
@@ -941,6 +947,18 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest {
|
||||
AnalyzesOk("select * from functional.alltypes_view a where exists " +
|
||||
"(select * from functional.alltypes_view b where a.id = b.id)");
|
||||
|
||||
// Union query with subqueries
|
||||
AnalyzesOk("select * from functional.alltypes where id = " +
|
||||
"(select max(id) from functional.alltypestiny) union " +
|
||||
"select * from functional.alltypes where id = " +
|
||||
"(select min(id) from functional.alltypessmall)");
|
||||
AnalyzesOk("select * from functional.alltypes where id = (select 1) " +
|
||||
"union all select * from functional.alltypes where id in " +
|
||||
"(select int_col from functional.alltypestiny)");
|
||||
AnalyzesOk("select * from functional.alltypes where id = (select 1) " +
|
||||
"union select * from (select * from functional.alltypes where id in " +
|
||||
"(select int_col from functional.alltypestiny)) t");
|
||||
|
||||
// Union in the subquery
|
||||
AnalysisError("select * from functional.alltypes where exists " +
|
||||
"(select id from functional.alltypestiny union " +
|
||||
@@ -948,16 +966,6 @@ public class AnalyzeSubqueriesTest extends AnalyzerTest {
|
||||
"A subquery must contain a single select block: " +
|
||||
"(SELECT id FROM functional.alltypestiny UNION " +
|
||||
"SELECT id FROM functional.alltypesagg)");
|
||||
// Union query with subqueries
|
||||
AnalysisError("select * from functional.alltypes where id = " +
|
||||
"(select max(id) from functional.alltypestiny) union " +
|
||||
"select * from functional.alltypes where id = " +
|
||||
"(select min(id) from functional.alltypessmall)",
|
||||
"Subqueries are not supported in a UNION query: " +
|
||||
"SELECT * FROM functional.alltypes WHERE id = " +
|
||||
"(SELECT max(id) FROM functional.alltypestiny) UNION " +
|
||||
"SELECT * FROM functional.alltypes WHERE id = " +
|
||||
"(SELECT min(id) FROM functional.alltypessmall)");
|
||||
AnalysisError("select * from functional.alltypes where exists (values(1))",
|
||||
"A subquery must contain a single select block: (VALUES(1))");
|
||||
|
||||
|
||||
@@ -728,3 +728,34 @@ AND t1.int_col NOT IN
|
||||
---- TYPES
|
||||
BIGINT
|
||||
====
|
||||
# WITH definition containing a subquery is used in a UNION
|
||||
---- QUERY
|
||||
WITH foo AS (SELECT 1 FROM alltypestiny WHERE int_col IN (SELECT 1))
|
||||
SELECT * FROM foo
|
||||
UNION SELECT * FROM foo
|
||||
---- RESULTS
|
||||
1
|
||||
---- TYPES
|
||||
TINYINT
|
||||
====
|
||||
# Regression test for IMPALA-1365
|
||||
---- QUERY
|
||||
WITH foo AS (SELECT 1 FROM alltypestiny WHERE int_col IN (SELECT 1))
|
||||
SELECT * FROM (SELECT * FROM foo UNION SELECT * FROM foo) bar
|
||||
---- RESULTS
|
||||
1
|
||||
---- TYPES
|
||||
TINYINT
|
||||
====
|
||||
# UNION of query with subquery and various other queries
|
||||
---- QUERY
|
||||
SELECT 1 FROM ALLTYPESTINY WHERE 1 = (SELECT 1)
|
||||
UNION VALUES (2)
|
||||
UNION ALL SELECT 3
|
||||
---- RESULTS
|
||||
1
|
||||
2
|
||||
3
|
||||
---- TYPES
|
||||
TINYINT
|
||||
====
|
||||
|
||||
Reference in New Issue
Block a user