From 2a69019525f364adca7e5076a2fcad92829dfb96 Mon Sep 17 00:00:00 2001 From: Henry Robinson Date: Fri, 11 Apr 2014 17:53:02 -0700 Subject: [PATCH] IMPALA-945: Fix column reordering with SELECT expressions Previously, to produce the correct output expressions for the root plan fragment before a table sink, InsertStmt would reorder the result expressions for the query statement at the plan root. This had stopped working for SelectStmts (and test coverage didn't catch that). Now InsertStmt produces its own output expressions that can substitute for the originals from the query statement, and the planner uses those instead. All query tests for column reordering have been duplicated to use SELECT expressions. Change-Id: Ib909fe35d27416b33ba2e5ac797aa931e1fe43f9 Reviewed-on: http://gerrit.ent.cloudera.com:8080/2204 Tested-by: jenkins Reviewed-by: Henry Robinson (cherry picked from commit d526db7ac6274f35b6affcb7428327100026e14e) Reviewed-on: http://gerrit.ent.cloudera.com:8080/2275 --- .../cloudera/impala/analysis/InsertStmt.java | 20 ++- .../cloudera/impala/analysis/QueryStmt.java | 5 - .../com/cloudera/impala/planner/Planner.java | 7 +- .../queries/QueryTest/insert_permutation.test | 139 ++++++++++++++++++ 4 files changed, 157 insertions(+), 14 deletions(-) diff --git a/fe/src/main/java/com/cloudera/impala/analysis/InsertStmt.java b/fe/src/main/java/com/cloudera/impala/analysis/InsertStmt.java index 9a641d4db..acc7e832f 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/InsertStmt.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/InsertStmt.java @@ -91,6 +91,12 @@ public class InsertStmt extends StatementBase { // should decide whether to re-partition or not). private Boolean isRepartition_ = null; + // Output expressions that produce the final results to write to the target table. May + // include casts, and NullLiterals where an output column isn't explicitly mentioned. + // Set in prepareExpressions(). The i'th expr produces the i'th column of the target + // table. + private final ArrayList resultExprs_ = new ArrayList(); + // The column permutation is specified by writing INSERT INTO tbl(col3, col1, col2...) // // It is a mapping from select-list expr index to (non-partition) output column. If @@ -144,7 +150,7 @@ public class InsertStmt extends StatementBase { if (!needsGeneratedQueryStatement_) { try { queryStmt_.analyze(analyzer); - selectListExprs = queryStmt_.getBaseTblResultExprs(); + selectListExprs = Expr.cloneList(queryStmt_.getBaseTblResultExprs()); } catch (AnalysisException e) { if (analyzer.getMissingTbls().isEmpty()) throw e; } @@ -256,7 +262,6 @@ public class InsertStmt extends StatementBase { // Populate partitionKeyExprs from partitionKeyValues and selectExprTargetColumns prepareExpressions(selectExprTargetColumns, selectListExprs, table_, analyzer); - // Analyze plan hints at the end to prefer reporting other error messages first // (e.g., the PARTITION clause is not applicable to unpartitioned and HBase tables). analyzePlanHints(); @@ -418,7 +423,7 @@ public class InsertStmt extends StatementBase { * 2. Populates partitionKeyExprs with type-compatible expressions, in Hive * partition-column order, for all partition columns * - * 3. Replaces selectListExprs with type-compatible expressions, in Hive column order, + * 3. Populates resultExprs_ with type-compatible expressions, in Hive column order, * for all expressions in the select-list. Unmentioned columns are assigned NULL literal * expressions. * @@ -483,12 +488,11 @@ public class InsertStmt extends StatementBase { // Finally, 'undo' the permutation so that the selectListExprs are in Hive column // order, and add NULL expressions to all missing columns. - List permutedSelectListExprs = Lists.newArrayList(); for (Column tblColumn: table_.getColumnsInHiveOrder()) { boolean matchFound = false; for (int i = 0; i < selectListExprs.size(); ++i) { if (selectExprTargetColumns.get(i).getName().equals(tblColumn.getName())) { - permutedSelectListExprs.add(selectListExprs.get(i)); + resultExprs_.add(selectListExprs.get(i)); matchFound = true; break; } @@ -500,7 +504,7 @@ public class InsertStmt extends StatementBase { if (tblColumn.getPosition() >= numClusteringCols) { // Unmentioned non-clustering columns get NULL literals with the appropriate // target type because Parquet cannot handle NULL_TYPE (IMPALA-617). - permutedSelectListExprs.add(new NullLiteral().castTo(tblColumn.getType())); + resultExprs_.add(new NullLiteral().castTo(tblColumn.getType())); } } } @@ -508,14 +512,13 @@ public class InsertStmt extends StatementBase { if (needsGeneratedQueryStatement_) { // Build a query statement that returns NULL for every column List selectListItems = Lists.newArrayList(); - for(Expr e: permutedSelectListExprs) { + for(Expr e: resultExprs_) { selectListItems.add(new SelectListItem(e, null)); } SelectList selectList = new SelectList(selectListItems); queryStmt_ = new SelectStmt(selectList, null, null, null, null, null, null); queryStmt_.analyze(analyzer); } - queryStmt_.setResultExprs(permutedSelectListExprs); } /** @@ -593,6 +596,7 @@ public class InsertStmt extends StatementBase { public QueryStmt getQueryStmt() { return queryStmt_; } public List getPartitionKeyExprs() { return partitionKeyExprs_; } public Boolean isRepartition() { return isRepartition_; } + public ArrayList getResultExprs() { return resultExprs_; } public DataSink createDataSink() { // analyze() must have been called before. diff --git a/fe/src/main/java/com/cloudera/impala/analysis/QueryStmt.java b/fe/src/main/java/com/cloudera/impala/analysis/QueryStmt.java index 0ae55e551..df5313710 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/QueryStmt.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/QueryStmt.java @@ -170,11 +170,6 @@ public abstract class QueryStmt extends StatementBase { public ArrayList getResultExprs() { return resultExprs_; } public ArrayList getBaseTblResultExprs() { return baseTblResultExprs_; } - public void setResultExprs(List resultExprs) { - this.resultExprs_.clear(); - this.resultExprs_.addAll(resultExprs); - } - /** * Mark all slots that need to be materialized for the execution of this stmt. * This excludes slots referenced in resultExprs (it depends on the consumer of diff --git a/fe/src/main/java/com/cloudera/impala/planner/Planner.java b/fe/src/main/java/com/cloudera/impala/planner/Planner.java index 401fd85bb..3c5a5a3f8 100644 --- a/fe/src/main/java/com/cloudera/impala/planner/Planner.java +++ b/fe/src/main/java/com/cloudera/impala/planner/Planner.java @@ -159,7 +159,12 @@ public class Planner { // set up table sink for root fragment rootFragment.setSink(insertStmt.createDataSink()); } - rootFragment.setOutputExprs(queryStmt.getBaseTblResultExprs()); + + if (analysisResult.isInsertStmt()) { + rootFragment.setOutputExprs(analysisResult.getInsertStmt().getResultExprs()); + } else { + rootFragment.setOutputExprs(queryStmt.getBaseTblResultExprs()); + } LOG.debug("finalize plan fragments"); for (PlanFragment fragment: fragments) { diff --git a/testdata/workloads/functional-query/queries/QueryTest/insert_permutation.test b/testdata/workloads/functional-query/queries/QueryTest/insert_permutation.test index 1e8c93111..7b8171c6f 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/insert_permutation.test +++ b/testdata/workloads/functional-query/queries/QueryTest/insert_permutation.test @@ -194,3 +194,142 @@ NULL,'NULL',2,'foo' ---- TYPES INT,STRING,INT,STRING ==== +---- QUERY +# Perform the same set of queries, but with SELECT clauses rather than VALUES +# Simple non-permutation +insert into perm_nopart(int_col1, string_col, int_col2) select 1,'str',2 +---- SETUP +RESET insert_permutation_test.perm_nopart +---- RESULTS +: 1 +==== +---- QUERY +select * from perm_nopart +---- RESULTS +1,'str',2 +---- TYPES +INT,STRING,INT +==== +---- QUERY +# Permute the int columns +insert into perm_nopart(int_col2, string_col, int_col1) select 1,'str',2 +---- SETUP +RESET insert_permutation_test.perm_nopart +---- RESULTS +: 1 +==== +---- QUERY +select * from perm_nopart +---- RESULTS +2,'str',1 +---- TYPES +INT,STRING,INT +==== +---- QUERY +# Leave out two columns, check they are assigned NULL +insert into perm_nopart(int_col2) select 1 +---- SETUP +RESET insert_permutation_test.perm_nopart +---- RESULTS +: 1 +==== +---- QUERY +select * from perm_nopart +---- RESULTS +NULL,'NULL',1 +---- TYPES +INT,STRING,INT +==== +---- QUERY +# Permute the partition columns +insert into perm_part(p1, string_col, int_col1, p2) select 10,'str',1, 'hello' +---- SETUP +RESET insert_permutation_test.perm_part +---- RESULTS +p1=10/p2=hello/: 1 +==== +---- QUERY +select * from perm_part +---- RESULTS +1,'str',10,'hello' +---- TYPES +INT,STRING,INT,STRING +==== +---- QUERY +# Same thing - permute the partition columns, but invert their order relative to Hive +insert into perm_part(p2, string_col, int_col1, p1) select 'hello','str',1, 10 +---- SETUP +RESET insert_permutation_test.perm_part +---- RESULTS +p1=10/p2=hello/: 1 +==== +---- QUERY +select * from perm_part +---- RESULTS +1,'str',10,'hello' +---- TYPES +INT,STRING,INT,STRING +==== +---- QUERY +# Check NULL if only partition keys are mentioned +insert into perm_part(p2, p1) select 'hello', 10 +---- SETUP +RESET insert_permutation_test.perm_part +---- RESULTS +p1=10/p2=hello/: 1 +==== +---- QUERY +select * from perm_part +---- RESULTS +NULL,'NULL',10,'hello' +---- TYPES +INT,STRING,INT,STRING +==== +---- QUERY +# Check NULL if only partition keys are mentioned, one static +insert into perm_part(p2) PARTITION(p1=10) select 'hello' +---- SETUP +RESET insert_permutation_test.perm_part +---- RESULTS +p1=10/p2=hello/: 1 +==== +---- QUERY +select * from perm_part +---- RESULTS +NULL,'NULL',10,'hello' +---- TYPES +INT,STRING,INT,STRING +==== +---- QUERY +# Now some queries that use SELECT FROM +# Simple non-permutation +insert into perm_nopart(int_col1, string_col, int_col2) select 1,'str',2 FROM +functional.alltypes LIMIT 2 +---- SETUP +RESET insert_permutation_test.perm_nopart +---- RESULTS +: 2 +==== +---- QUERY +select * from perm_nopart +---- RESULTS +1,'str',2 +1,'str',2 +---- TYPES +INT,STRING,INT +==== +---- QUERY +insert into perm_nopart(int_col1) select id FROM functional.alltypes ORDER BY ID LIMIT 2 +---- SETUP +RESET insert_permutation_test.perm_nopart +---- RESULTS +: 2 +==== +---- QUERY +select * from perm_nopart +---- RESULTS +0,'NULL',NULL +1,'NULL',NULL +---- TYPES +INT,STRING,INT +==== \ No newline at end of file