diff --git a/fe/src/main/java/com/cloudera/impala/analysis/InsertStmt.java b/fe/src/main/java/com/cloudera/impala/analysis/InsertStmt.java index 9a641d4db..acc7e832f 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/InsertStmt.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/InsertStmt.java @@ -91,6 +91,12 @@ public class InsertStmt extends StatementBase { // should decide whether to re-partition or not). private Boolean isRepartition_ = null; + // Output expressions that produce the final results to write to the target table. May + // include casts, and NullLiterals where an output column isn't explicitly mentioned. + // Set in prepareExpressions(). The i'th expr produces the i'th column of the target + // table. + private final ArrayList resultExprs_ = new ArrayList(); + // The column permutation is specified by writing INSERT INTO tbl(col3, col1, col2...) // // It is a mapping from select-list expr index to (non-partition) output column. If @@ -144,7 +150,7 @@ public class InsertStmt extends StatementBase { if (!needsGeneratedQueryStatement_) { try { queryStmt_.analyze(analyzer); - selectListExprs = queryStmt_.getBaseTblResultExprs(); + selectListExprs = Expr.cloneList(queryStmt_.getBaseTblResultExprs()); } catch (AnalysisException e) { if (analyzer.getMissingTbls().isEmpty()) throw e; } @@ -256,7 +262,6 @@ public class InsertStmt extends StatementBase { // Populate partitionKeyExprs from partitionKeyValues and selectExprTargetColumns prepareExpressions(selectExprTargetColumns, selectListExprs, table_, analyzer); - // Analyze plan hints at the end to prefer reporting other error messages first // (e.g., the PARTITION clause is not applicable to unpartitioned and HBase tables). analyzePlanHints(); @@ -418,7 +423,7 @@ public class InsertStmt extends StatementBase { * 2. Populates partitionKeyExprs with type-compatible expressions, in Hive * partition-column order, for all partition columns * - * 3. Replaces selectListExprs with type-compatible expressions, in Hive column order, + * 3. Populates resultExprs_ with type-compatible expressions, in Hive column order, * for all expressions in the select-list. Unmentioned columns are assigned NULL literal * expressions. * @@ -483,12 +488,11 @@ public class InsertStmt extends StatementBase { // Finally, 'undo' the permutation so that the selectListExprs are in Hive column // order, and add NULL expressions to all missing columns. - List permutedSelectListExprs = Lists.newArrayList(); for (Column tblColumn: table_.getColumnsInHiveOrder()) { boolean matchFound = false; for (int i = 0; i < selectListExprs.size(); ++i) { if (selectExprTargetColumns.get(i).getName().equals(tblColumn.getName())) { - permutedSelectListExprs.add(selectListExprs.get(i)); + resultExprs_.add(selectListExprs.get(i)); matchFound = true; break; } @@ -500,7 +504,7 @@ public class InsertStmt extends StatementBase { if (tblColumn.getPosition() >= numClusteringCols) { // Unmentioned non-clustering columns get NULL literals with the appropriate // target type because Parquet cannot handle NULL_TYPE (IMPALA-617). - permutedSelectListExprs.add(new NullLiteral().castTo(tblColumn.getType())); + resultExprs_.add(new NullLiteral().castTo(tblColumn.getType())); } } } @@ -508,14 +512,13 @@ public class InsertStmt extends StatementBase { if (needsGeneratedQueryStatement_) { // Build a query statement that returns NULL for every column List selectListItems = Lists.newArrayList(); - for(Expr e: permutedSelectListExprs) { + for(Expr e: resultExprs_) { selectListItems.add(new SelectListItem(e, null)); } SelectList selectList = new SelectList(selectListItems); queryStmt_ = new SelectStmt(selectList, null, null, null, null, null, null); queryStmt_.analyze(analyzer); } - queryStmt_.setResultExprs(permutedSelectListExprs); } /** @@ -593,6 +596,7 @@ public class InsertStmt extends StatementBase { public QueryStmt getQueryStmt() { return queryStmt_; } public List getPartitionKeyExprs() { return partitionKeyExprs_; } public Boolean isRepartition() { return isRepartition_; } + public ArrayList getResultExprs() { return resultExprs_; } public DataSink createDataSink() { // analyze() must have been called before. diff --git a/fe/src/main/java/com/cloudera/impala/analysis/QueryStmt.java b/fe/src/main/java/com/cloudera/impala/analysis/QueryStmt.java index 0ae55e551..df5313710 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/QueryStmt.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/QueryStmt.java @@ -170,11 +170,6 @@ public abstract class QueryStmt extends StatementBase { public ArrayList getResultExprs() { return resultExprs_; } public ArrayList getBaseTblResultExprs() { return baseTblResultExprs_; } - public void setResultExprs(List resultExprs) { - this.resultExprs_.clear(); - this.resultExprs_.addAll(resultExprs); - } - /** * Mark all slots that need to be materialized for the execution of this stmt. * This excludes slots referenced in resultExprs (it depends on the consumer of diff --git a/fe/src/main/java/com/cloudera/impala/planner/Planner.java b/fe/src/main/java/com/cloudera/impala/planner/Planner.java index 401fd85bb..3c5a5a3f8 100644 --- a/fe/src/main/java/com/cloudera/impala/planner/Planner.java +++ b/fe/src/main/java/com/cloudera/impala/planner/Planner.java @@ -159,7 +159,12 @@ public class Planner { // set up table sink for root fragment rootFragment.setSink(insertStmt.createDataSink()); } - rootFragment.setOutputExprs(queryStmt.getBaseTblResultExprs()); + + if (analysisResult.isInsertStmt()) { + rootFragment.setOutputExprs(analysisResult.getInsertStmt().getResultExprs()); + } else { + rootFragment.setOutputExprs(queryStmt.getBaseTblResultExprs()); + } LOG.debug("finalize plan fragments"); for (PlanFragment fragment: fragments) { diff --git a/testdata/workloads/functional-query/queries/QueryTest/insert_permutation.test b/testdata/workloads/functional-query/queries/QueryTest/insert_permutation.test index 1e8c93111..7b8171c6f 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/insert_permutation.test +++ b/testdata/workloads/functional-query/queries/QueryTest/insert_permutation.test @@ -194,3 +194,142 @@ NULL,'NULL',2,'foo' ---- TYPES INT,STRING,INT,STRING ==== +---- QUERY +# Perform the same set of queries, but with SELECT clauses rather than VALUES +# Simple non-permutation +insert into perm_nopart(int_col1, string_col, int_col2) select 1,'str',2 +---- SETUP +RESET insert_permutation_test.perm_nopart +---- RESULTS +: 1 +==== +---- QUERY +select * from perm_nopart +---- RESULTS +1,'str',2 +---- TYPES +INT,STRING,INT +==== +---- QUERY +# Permute the int columns +insert into perm_nopart(int_col2, string_col, int_col1) select 1,'str',2 +---- SETUP +RESET insert_permutation_test.perm_nopart +---- RESULTS +: 1 +==== +---- QUERY +select * from perm_nopart +---- RESULTS +2,'str',1 +---- TYPES +INT,STRING,INT +==== +---- QUERY +# Leave out two columns, check they are assigned NULL +insert into perm_nopart(int_col2) select 1 +---- SETUP +RESET insert_permutation_test.perm_nopart +---- RESULTS +: 1 +==== +---- QUERY +select * from perm_nopart +---- RESULTS +NULL,'NULL',1 +---- TYPES +INT,STRING,INT +==== +---- QUERY +# Permute the partition columns +insert into perm_part(p1, string_col, int_col1, p2) select 10,'str',1, 'hello' +---- SETUP +RESET insert_permutation_test.perm_part +---- RESULTS +p1=10/p2=hello/: 1 +==== +---- QUERY +select * from perm_part +---- RESULTS +1,'str',10,'hello' +---- TYPES +INT,STRING,INT,STRING +==== +---- QUERY +# Same thing - permute the partition columns, but invert their order relative to Hive +insert into perm_part(p2, string_col, int_col1, p1) select 'hello','str',1, 10 +---- SETUP +RESET insert_permutation_test.perm_part +---- RESULTS +p1=10/p2=hello/: 1 +==== +---- QUERY +select * from perm_part +---- RESULTS +1,'str',10,'hello' +---- TYPES +INT,STRING,INT,STRING +==== +---- QUERY +# Check NULL if only partition keys are mentioned +insert into perm_part(p2, p1) select 'hello', 10 +---- SETUP +RESET insert_permutation_test.perm_part +---- RESULTS +p1=10/p2=hello/: 1 +==== +---- QUERY +select * from perm_part +---- RESULTS +NULL,'NULL',10,'hello' +---- TYPES +INT,STRING,INT,STRING +==== +---- QUERY +# Check NULL if only partition keys are mentioned, one static +insert into perm_part(p2) PARTITION(p1=10) select 'hello' +---- SETUP +RESET insert_permutation_test.perm_part +---- RESULTS +p1=10/p2=hello/: 1 +==== +---- QUERY +select * from perm_part +---- RESULTS +NULL,'NULL',10,'hello' +---- TYPES +INT,STRING,INT,STRING +==== +---- QUERY +# Now some queries that use SELECT FROM +# Simple non-permutation +insert into perm_nopart(int_col1, string_col, int_col2) select 1,'str',2 FROM +functional.alltypes LIMIT 2 +---- SETUP +RESET insert_permutation_test.perm_nopart +---- RESULTS +: 2 +==== +---- QUERY +select * from perm_nopart +---- RESULTS +1,'str',2 +1,'str',2 +---- TYPES +INT,STRING,INT +==== +---- QUERY +insert into perm_nopart(int_col1) select id FROM functional.alltypes ORDER BY ID LIMIT 2 +---- SETUP +RESET insert_permutation_test.perm_nopart +---- RESULTS +: 2 +==== +---- QUERY +select * from perm_nopart +---- RESULTS +0,'NULL',NULL +1,'NULL',NULL +---- TYPES +INT,STRING,INT +==== \ No newline at end of file