diff --git a/fe/src/main/cup/sql-parser.y b/fe/src/main/cup/sql-parser.y index 7350b8833..54e1a481f 100644 --- a/fe/src/main/cup/sql-parser.y +++ b/fe/src/main/cup/sql-parser.y @@ -23,6 +23,7 @@ import java.math.BigInteger; import java.util.ArrayList; import java.util.List; import java_cup.runtime.Symbol; +import com.google.common.collect.Lists; parser code {: private Symbol errorToken; @@ -198,6 +199,7 @@ nonterminal SelectStmt select_stmt; nonterminal ValuesStmt values_stmt; // Select or union statement. nonterminal QueryStmt query_stmt; +nonterminal QueryStmt optional_query_stmt; // Single select_stmt or parenthesized query_stmt. nonterminal QueryStmt union_operand; // List of select or union blocks connected by UNION operators or a single select block. @@ -221,6 +223,7 @@ nonterminal ArrayList expr_list; nonterminal ArrayList func_arg_list; nonterminal String alias_clause; nonterminal ArrayList ident_list; +nonterminal ArrayList optional_ident_list; nonterminal TableName table_name; nonterminal Expr where_clause; nonterminal Predicate predicate, between_predicate, comparison_predicate, @@ -248,6 +251,7 @@ nonterminal PrimitiveType primitive_type; nonterminal Expr sign_chain_expr; nonterminal InsertStmt insert_stmt; nonterminal StatementBase explain_stmt; +nonterminal List col_list; nonterminal ArrayList partition_spec; nonterminal ArrayList partition_clause; nonterminal ArrayList static_partition_key_value_list; @@ -297,7 +301,7 @@ precedence left ADD, SUBTRACT; precedence left STAR, DIVIDE, MOD, KW_DIV; precedence left BITAND, BITOR, BITXOR, BITNOT; precedence left KW_ORDER, KW_BY, KW_LIMIT; -precedence left RPAREN; +precedence left LPAREN, RPAREN; // Support chaining of timestamp arithmetic exprs. precedence left KW_INTERVAL; @@ -334,24 +338,47 @@ stmt ::= explain_stmt ::= KW_EXPLAIN query_stmt:query - {: + {: query.setIsExplain(true); RESULT = query; :} | KW_EXPLAIN insert_stmt:insert - {: + {: insert.setIsExplain(true); RESULT = insert; :} ; +// Insert statements have two optional clauses: the column permutation (INSERT into +// tbl(col1,...) etc) and the PARTITION clause. If the column permutation is present, the +// query statement clause is optional as well. insert_stmt ::= - KW_INSERT KW_OVERWRITE optional_kw_table table_name:table + KW_INSERT KW_OVERWRITE optional_kw_table table_name:table LPAREN + optional_ident_list:col_perm RPAREN partition_clause:list optional_query_stmt:query + {: RESULT = new InsertStmt(table, true, list, query, col_perm); :} + | KW_INSERT KW_OVERWRITE optional_kw_table table_name:table partition_clause:list query_stmt:query - {: RESULT = new InsertStmt(table, true, list, query); :} + {: RESULT = new InsertStmt(table, true, list, query, null); :} + | KW_INSERT KW_INTO optional_kw_table table_name:table LPAREN + optional_ident_list:col_perm RPAREN partition_clause:list optional_query_stmt:query + {: RESULT = new InsertStmt(table, false, list, query, col_perm); :} | KW_INSERT KW_INTO optional_kw_table table_name:table partition_clause:list query_stmt:query - {: RESULT = new InsertStmt(table, false, list, query); :} + {: RESULT = new InsertStmt(table, false, list, query, null); :} + ; + +optional_query_stmt ::= + query_stmt:query + {: RESULT = query; :} + | /* empty */ + {: RESULT = null; :} + ; + +optional_ident_list ::= + ident_list:ident + {: RESULT = ident; :} + | /* empty */ + {: RESULT = Lists.newArrayList(); :} ; optional_kw_table ::= @@ -365,7 +392,7 @@ alter_tbl_stmt ::= {: RESULT = new AlterTableAddReplaceColsStmt(table, col_defs, replace); :} | KW_ALTER KW_TABLE table_name:table KW_ADD if_not_exists_val:if_not_exists partition_spec:partition location_val:location - {: + {: RESULT = new AlterTableAddPartitionStmt(table, partition, location, if_not_exists); :} | KW_ALTER KW_TABLE table_name:table KW_DROP optional_kw_column IDENT:col_name @@ -379,7 +406,7 @@ alter_tbl_stmt ::= | KW_ALTER KW_TABLE table_name:table partition_spec:partition KW_SET KW_FILEFORMAT file_format_val:file_format {: RESULT = new AlterTableSetFileFormatStmt(table, partition, file_format); :} - | KW_ALTER KW_TABLE table_name:table partition_spec:partition KW_SET + | KW_ALTER KW_TABLE table_name:table partition_spec:partition KW_SET KW_LOCATION STRING_LITERAL:location {: RESULT = new AlterTableSetLocationStmt(table, partition, location); :} | KW_ALTER KW_TABLE table_name:table KW_RENAME KW_TO table_name:new_table @@ -1095,7 +1122,7 @@ case_else_clause ::= {: RESULT = null; :} ; -sign_chain_expr ::= +sign_chain_expr ::= SUBTRACT expr:e {: // integrate signs into literals @@ -1343,7 +1370,7 @@ like_predicate ::= new LikePredicate(LikePredicate.Operator.REGEXP, e1, e2), null); :} ; -// Avoid a reduce/reduce conflict with compound_predicate by explicitly +// Avoid a reduce/reduce conflict with compound_predicate by explicitly // using non_pred_expr and predicate separately instead of expr. between_predicate ::= expr:e1 KW_BETWEEN non_pred_expr:e2 KW_AND expr:e3 @@ -1352,13 +1379,13 @@ between_predicate ::= {: RESULT = new BetweenPredicate(e1, e2, e3, false); :} | expr:e1 KW_NOT KW_BETWEEN non_pred_expr:e2 KW_AND expr:e3 {: RESULT = new BetweenPredicate(e1, e2, e3, true); :} - | expr:e1 KW_NOT KW_BETWEEN predicate:e2 KW_AND expr:e3 + | expr:e1 KW_NOT KW_BETWEEN predicate:e2 KW_AND expr:e3 {: RESULT = new BetweenPredicate(e1, e2, e3, true); :} ; in_predicate ::= expr:e KW_IN LPAREN func_arg_list:l RPAREN - {: RESULT = new InPredicate(e, l, false); :} + {: RESULT = new InPredicate(e, l, false); :} | expr:e KW_NOT KW_IN LPAREN func_arg_list:l RPAREN {: RESULT = new InPredicate(e, l, true); :} ; @@ -1408,4 +1435,3 @@ primitive_type ::= | KW_STRING {: RESULT = PrimitiveType.STRING; :} ; - diff --git a/fe/src/main/java/com/cloudera/impala/analysis/InsertStmt.java b/fe/src/main/java/com/cloudera/impala/analysis/InsertStmt.java index dde3afd8c..7b3f9b1e5 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/InsertStmt.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/InsertStmt.java @@ -18,6 +18,7 @@ import java.util.ArrayList; import java.util.Iterator; import java.util.LinkedList; import java.util.List; +import java.util.Set; import com.cloudera.impala.authorization.Privilege; import com.cloudera.impala.catalog.AuthorizationException; @@ -28,6 +29,9 @@ import com.cloudera.impala.catalog.Table; import com.cloudera.impala.common.AnalysisException; import com.cloudera.impala.planner.DataSink; import com.google.common.base.Preconditions; +import com.google.common.collect.Sets; +import com.google.common.collect.Lists; +import com.google.common.base.Joiner; /** * Representation of a single insert statement, including the select statement @@ -44,8 +48,12 @@ public class InsertStmt extends StatementBase { // List of column:value elements from the PARTITION (...) clause. // Set to null if no partition was given. private final List partitionKeyValues; - // Select or union whose results are to be inserted. - private final QueryStmt queryStmt; + // Select or union whose results are to be inserted. If null, will be set after + // analysis. + private QueryStmt queryStmt; + // False if the original insert statement had a query statement, true if we need to + // auto-generate one (for insert into tbl();) during analysis. + private final boolean needsGeneratedQueryStatement; // Set in analyze(). Contains metadata of target table to determine type of sink. private Table table; // Set in analyze(). Exprs corresponding to the partitionKeyValues, @@ -53,25 +61,53 @@ public class InsertStmt extends StatementBase { // True if this InsertStmt is the top level query from an EXPLAIN private boolean isExplain = false; + // The column permutation is specified by writing INSERT INTO tbl(col3, col1, col2...) + // + // It is a mapping from select-list expr index to (non-partition) output column. If + // null, will be set to the default permutation of all non-partition columns in Hive + // order. + // + // A column is said to be 'mentioned' if it occurs either in the column permutation, or + // the PARTITION clause. If columnPermutation is null, all non-partition columns are + // considered mentioned. + // + // Between them, the columnPermutation and the set of partitionKeyValues must mention to + // every partition column in the target table exactly once. Other columns, if not + // explicitly mentioned, will be assigned NULL values. Partition columns are not + // defaulted to NULL by design, and are not just for NULL-valued partition slots. + // + // Dynamic partition keys may occur in either the permutation or the PARTITION + // clause. Partition columns with static values may only be mentioned in the PARTITION + // clause, where the static value is specified. + private final List columnPermutation; + public InsertStmt(TableName targetTable, boolean overwrite, - List partitionKeyValues, QueryStmt queryStmt) { + List partitionKeyValues, QueryStmt queryStmt, + List columnPermutation) { this.targetTableName = targetTable; this.originalTableName = targetTableName; this.overwrite = overwrite; this.partitionKeyValues = partitionKeyValues; this.queryStmt = queryStmt; + needsGeneratedQueryStatement = (queryStmt == null); + this.columnPermutation = columnPermutation; table = null; } @Override public void analyze(Analyzer analyzer) throws AnalysisException, AuthorizationException { - queryStmt.analyze(analyzer); - List selectListExprs = queryStmt.getResultExprs(); + List selectListExprs; + if (!needsGeneratedQueryStatement) { + queryStmt.analyze(analyzer); + selectListExprs = queryStmt.getResultExprs(); + } else { + selectListExprs = Lists.newArrayList(); + } if (!targetTableName.isFullyQualified()) { - this.targetTableName = new TableName(analyzer.getDefaultDb(), - targetTableName.getTbl()); + this.targetTableName = + new TableName(analyzer.getDefaultDb(), targetTableName.getTbl()); } table = analyzer.getTable(targetTableName, Privilege.INSERT); @@ -81,156 +117,227 @@ public class InsertStmt extends StatementBase { boolean isHBaseTable = (table instanceof HBaseTable); int numClusteringCols = isHBaseTable ? 0 : table.getNumClusteringCols(); - int numDynamicPartKeys = 0; + + if (partitionKeyValues != null && numClusteringCols == 0) { + if (isHBaseTable) { + throw new AnalysisException("PARTITION clause is not valid for INSERT into " + + "HBase tables. '" + targetTableName + "' is an HBase table"); + + } else { + // Unpartitioned table, but INSERT has PARTITION clause + throw new AnalysisException("PARTITION clause is only valid for INSERT into " + + "partitioned table. '" + targetTableName + "' is not partitioned"); + } + } if (isHBaseTable && overwrite) { throw new AnalysisException("HBase doesn't have a way to perform INSERT OVERWRITE"); } - // Make sure there is a set of partitionKeys when the table below - // indicates they are needed. + // Analysis of the INSERT statement from this point is basically the act of matching + // the set of output columns (which come from a column permutation, perhaps + // implicitly, and the PARTITION clause) to the set of input columns (which come from + // the select-list and any statically-valued columns in the PARTITION clause). // - // Table Type | numClusteringCols | Expecting PartionKeys - // -----------+--------------------+------------------------ - // HDFS | 1+ | Yes - // HDFS | 0 | No - // HBase | 1+ | No + // First, we compute the set of mentioned columns, and reject statements that refer to + // non-existant columns, or duplicates (we must check both the column permutation, and + // the set of partition keys). Next, we check that all partition columns are + // mentioned. During this process we build the map from select-list expr index to + // column in the targeted table. + // + // Then we check that the select-list contains exactly the right number of expressions + // for all mentioned columns which are not statically-valued partition columns (which + // get their expressions from partitionKeyValues). + // + // Finally, prepareExpressions analyzes the expressions themselves, and confirms that + // they are type-compatible with the target columns. Where columns are not mentioned + // (and by this point, we know that missing columns are not partition columns), + // prepareExpressions assigns them a NULL literal expressions. - if (partitionKeyValues == null) { - if (numClusteringCols != 0) { - // Partitioned table but no partition clause. - throw new AnalysisException("No PARTITION clause given for INSERT into " + - "partitioned table '" + targetTableName.getTbl() + "'."); + // An null permutation clause is the same as listing all non-partition columns in + // order. + List analysisColumnPermutation = columnPermutation; + if (analysisColumnPermutation == null) { + analysisColumnPermutation = Lists.newArrayList(); + ArrayList tableColumns = table.getColumns(); + for (int i = numClusteringCols; i < tableColumns.size(); ++i) { + analysisColumnPermutation.add(tableColumns.get(i).getName()); } - } else { - if (numClusteringCols == 0) { - // Unpartitioned table, but INSERT has PARTITION clause - throw new AnalysisException("PARTITION clause is only valid for INSERT into " + - "partitioned table. '" + targetTableName.getTbl() + "' is not partitioned"); + } + + // selectExprTargetColumns maps from select expression index to a column in the target + // table. It will eventually include all mentioned columns that aren't static-valued + // partition columns. + ArrayList selectExprTargetColumns = Lists.newArrayList(); + + // Tracks the name of all columns encountered in either the permutation clause or the + // partition clause to detect duplicates. + Set mentionedColumnNames = Sets.newHashSet(); + for (String columnName: analysisColumnPermutation) { + Column column = table.getColumn(columnName); + if (column == null) { + throw new AnalysisException( + "Unknown column '" + columnName + "' in column permutation"); } - // Make sure static partition key values only contain const exprs. + if (!mentionedColumnNames.add(columnName)) { + throw new AnalysisException( + "Duplicate column '" + columnName + "' in column permutation"); + } + selectExprTargetColumns.add(column); + } + + int numStaticPartitionExprs = 0; + if (partitionKeyValues != null) { + for (PartitionKeyValue pkv: partitionKeyValues) { + Column column = table.getColumn(pkv.getColName()); + if (column == null) { + throw new AnalysisException("Unknown column '" + pkv.getColName() + + "' in partition clause"); + } + + if (column.getPosition() >= numClusteringCols) { + throw new AnalysisException( + "Column '" + pkv.getColName() + "' is not a partition column"); + } + + if (!mentionedColumnNames.add(pkv.getColName())) { + throw new AnalysisException( + "Duplicate column '" + pkv.getColName() + "' in partition clause"); + } + if (!pkv.isDynamic()) { + numStaticPartitionExprs++; + } else { + selectExprTargetColumns.add(column); + } + } + } + + // Check that all columns are mentioned by the permutation and partition clauses + if (selectExprTargetColumns.size() + numStaticPartitionExprs != + table.getColumns().size()) { + // We've already ruled out too many columns in the permutation and partition clauses + // by checking that there are no duplicates and that every column mentioned actually + // exists. So all columns aren't mentioned in the query. If the unmentioned columns + // include partition columns, this is an error. + List missingColumnNames = Lists.newArrayList(); + for (Column column: table.getColumns()) { + if (!mentionedColumnNames.contains(column.getName())) { + // HBase tables have a single row-key column which is always in position 0. It + // must be mentioned, since it is invalid to set it to NULL (which would + // otherwise happen by default). + if (isHBaseTable && column.getPosition() == 0) { + throw new AnalysisException("Row-key column '" + column.getName() + + "' must be explicitly mentioned in column permutation."); + } + if (column.getPosition() < numClusteringCols) { + missingColumnNames.add(column.getName()); + } + } + } + + if (!missingColumnNames.isEmpty()) { + throw new AnalysisException( + "Not enough partition columns mentioned in query. Missing columns are: " + + Joiner.on(", ").join(missingColumnNames)); + } + } + + // Expect the selectListExpr to have entries for every target column + if (selectExprTargetColumns.size() != selectListExprs.size()) { + String comparator = + (selectExprTargetColumns.size() < selectListExprs.size()) ? "fewer" : "more"; + String partitionClause = + (partitionKeyValues == null) ? "returns" : "and PARTITION clause return"; + + // If there was no column permutation provided, the error is that the select-list + // has the wrong number of expressions compared to the number of columns in the + // table. If there was a column permutation, then the mismatch is between the + // select-list and the permutation itself. + if (columnPermutation == null) { + int totalColumnsMentioned = selectListExprs.size() + numStaticPartitionExprs; + throw new AnalysisException(String.format( + "Target table '%s' has %s columns (%s) than the SELECT / VALUES clause %s" + + " (%s)", table.getFullName(), comparator, + table.getColumns().size(), partitionClause, totalColumnsMentioned)); + } else { + String partitionPrefix = + (partitionKeyValues == null) ? "mentions" : "and PARTITION clause mention"; + throw new AnalysisException(String.format( + "Column permutation %s %s columns (%s) than " + + "the SELECT / VALUES clause %s (%s)", partitionPrefix, comparator, + selectExprTargetColumns.size(), partitionClause, selectListExprs.size())); + } + } + + // Make sure static partition key values only contain const exprs. + if (partitionKeyValues != null) { for (PartitionKeyValue kv: partitionKeyValues) { kv.analyze(analyzer); } - - // Check that the partition clause mentions all the table's partitioning - // columns, and that no non-partition columns are mentioned in the - // partition clause. - checkPartitionClauseCompleteness(); - - // Check that all dynamic partition keys are at the end of the selectListExprs. - numDynamicPartKeys = fillPartitionKeyExprs(); - - // make sure we have stats for partitionKeyExprs - for (Expr expr: partitionKeyExprs) { - expr.analyze(analyzer); - } } - // Check union compatibility, ignoring partitioning columns for dynamic partitions. - checkUnionCompatibility(table, selectListExprs, numDynamicPartKeys); + // Populate partitionKeyExprs from partitionKeyValues and selectExprTargetColumns + prepareExpressions(selectExprTargetColumns, selectListExprs, table, analyzer); } - /** - * Checks whether all partitioning columns in table are mentioned in - * partitionKeyValues, and that all partitionKeyValues have a match in table. - * - * @throws AnalysisException - * If the partitionKeyValues don't mention all partitioning columns in - * table, or if they mention extra columns. - */ - private void checkPartitionClauseCompleteness() - throws AnalysisException { - List columns = table.getColumns(); - int numClusteringCols = table.getNumClusteringCols(); - // Copy the partition key values a temporary list. - // We remove items as we match them against partitioning columns in the table. - List unmatchedPartKeyVals = new LinkedList(); - unmatchedPartKeyVals.addAll(partitionKeyValues); - // Check that all partitioning columns were mentioned in the partition clause. - // Remove matching items from unmatchedPartKeyVals - // to detect superfluous columns in the partition clause. - for (int i = 0; i < numClusteringCols; ++i) { - PartitionKeyValue matchingPartKeyVal = null; - Iterator clauseIter = unmatchedPartKeyVals.iterator(); - while (clauseIter.hasNext()) { - PartitionKeyValue pkv = clauseIter.next(); - if (pkv.getColName().equals(columns.get(i).getName())) { - matchingPartKeyVal = pkv; - clauseIter.remove(); - break; - } - } - if (matchingPartKeyVal == null) { - throw new AnalysisException("Missing partition column '" - + columns.get(i).getName() + "' from PARTITION clause."); - } - } - // All partitioning columns of the table were matched. - // Check for superfluous columns in the partition clause. - if (!unmatchedPartKeyVals.isEmpty()) { - StringBuilder strBuilder = new StringBuilder(); - for (PartitionKeyValue pkv : unmatchedPartKeyVals) { - strBuilder.append(pkv.getColName() + ","); - } - strBuilder.deleteCharAt(strBuilder.length() - 1); - throw new AnalysisException("Superfluous columns in PARTITION clause: " - + strBuilder.toString() + "."); - } - } /** - * Fills the partitionKeyExprs class member, by positionally - * matching the dynamic partition keys - * against the last numDynamicPartKeys selectListExprs. - * If necessary, adds casts to the selectListExprs to make them compatible - * with the type of the corresponding partitioning column. + * Performs three final parts of the analysis: + * 1. Checks type compatibility between all expressions and their targets + * + * 2. Populates partitionKeyExprs with type-compatible expressions, in Hive + * partition-column order, for all partition columns + * + * 3. Replaces selectListExprs with type-compatible expressions, in Hive column order, + * for all expressions in the select-list. Unmentioned columns are assigned NULL literal + * expressions. + * + * If necessary, adds casts to the expressions to make them compatible with the type of + * the corresponding column. * - * @return Number of dynamic partition keys. * @throws AnalysisException - * If not all dynamic partition keys are mentioned in the selectListExprs. + * If an expression is not compatible with its target column */ - private int fillPartitionKeyExprs() throws AnalysisException { - // Count the number of dynamic partition keys. - int numDynamicPartKeys = 0; - for (PartitionKeyValue pkv : partitionKeyValues) { - if (pkv.isDynamic()) { - ++numDynamicPartKeys; - } - } - List selectListExprs = queryStmt.getResultExprs(); - // Position of selectListExpr corresponding to the next dynamic partition column. - int exprMatchPos = table.getColumns().size() - table.getNumClusteringCols(); + private void prepareExpressions(List selectExprTargetColumns, + List selectListExprs, Table tbl, Analyzer analyzer) + throws AnalysisException, AuthorizationException { // Temporary lists of partition key exprs and names in an arbitrary order. List tmpPartitionKeyExprs = new ArrayList(); List tmpPartitionKeyNames = new ArrayList(); - for (PartitionKeyValue pkv : partitionKeyValues) { - Expr partitionColValue; - if (pkv.isStatic()) { - partitionColValue = pkv.getValue(); - } else { - if (exprMatchPos >= selectListExprs.size()) { - throw new AnalysisException("No matching select list item found for " - + "dynamic partition '" + pkv.getColName() + "'.\n" - + "The select list items corresponding to dynamic partition " - + "keys must be at the end of the select list."); - } - partitionColValue = selectListExprs.get(exprMatchPos++); - } - Column tableColumn = table.getColumn(pkv.getColName()); - Expr compatibleExpr = checkTypeCompatibility(tableColumn, partitionColValue); - tmpPartitionKeyExprs.add(compatibleExpr); - tmpPartitionKeyNames.add(pkv.getColName()); - } - // Reorder the partition key exprs and names to be consistent - // with the target table declaration. - // We need those exprs in the original order to create the - // corresponding Hdfs folder structure correctly. - int numClusteringCols = table.getNumClusteringCols(); - for (int i = 0; i < numClusteringCols; ++i) { - Column c = table.getColumns().get(i); + int numClusteringCols = (tbl instanceof HBaseTable) ? 0 : tbl.getNumClusteringCols(); + + // Check dynamic partition columns for type compatibility. + for (int i = 0; i < selectListExprs.size(); ++i) { + Column targetColumn = selectExprTargetColumns.get(i); + Expr compatibleExpr = checkTypeCompatibility(targetColumn, selectListExprs.get(i)); + if (targetColumn.getPosition() < numClusteringCols) { + // This is a dynamic clustering column + tmpPartitionKeyExprs.add(compatibleExpr); + tmpPartitionKeyNames.add(targetColumn.getName()); + } + selectListExprs.set(i, compatibleExpr); + } + + // Check static partition columns, dynamic entries in partitionKeyValues will already + // be in selectExprTargetColumns and therefore are ignored in this loop + if (partitionKeyValues != null) { + for (PartitionKeyValue pkv: partitionKeyValues) { + if (pkv.isStatic()) { + // tableColumns is guaranteed to exist after the earlier analysis checks + Column tableColumn = table.getColumn(pkv.getColName()); + Expr compatibleExpr = checkTypeCompatibility(tableColumn, pkv.getValue()); + tmpPartitionKeyExprs.add(compatibleExpr); + tmpPartitionKeyNames.add(pkv.getColName()); + } + } + } + + // Reorder the partition key exprs and names to be consistent with the target table + // declaration. We need those exprs in the original order to create the corresponding + // Hdfs folder structure correctly. + for (Column c: table.getColumns()) { for (int j = 0; j < tmpPartitionKeyNames.size(); ++j) { if (c.getName().equals(tmpPartitionKeyNames.get(j))) { partitionKeyExprs.add(tmpPartitionKeyExprs.get(j)); @@ -240,53 +347,47 @@ public class InsertStmt extends StatementBase { } Preconditions.checkState(partitionKeyExprs.size() == numClusteringCols); - return numDynamicPartKeys; - } - - /** - * Checks for union compatibility of a table and a list of exprs. - * May cast the exprs to higher precision types, - * if necessary, to make them compatible with their corresponding table columns. - * - * @param table - * Table from the metadata - * @param selectListExprs - * In/Out: List of expressions from a select statement. - * Possibly modified with casts. - * @param numDynamicPartKeys - * The number of dynamic partition keys. We assume that the partition keys - * have a verified match at the end of the select list. - * @throws AnalysisException - * If the columns and exprs are not union compatible, - * or if making them union compatible - * would lose precision in at least one column. - */ - private void checkUnionCompatibility(Table table, List selectListExprs, - int numDynamicPartKeys) - throws AnalysisException { - List columns = table.getColumns(); - - // For writes to hbase every query node can write to any part of - // the row key space. So use 0 as the numClusteringCols. - int numClusteringCols = - (table instanceof HBaseTable) ? 0 : table.getNumClusteringCols(); - - int numNonClusteringCols = columns.size() - numClusteringCols; - if (numNonClusteringCols != selectListExprs.size() - numDynamicPartKeys) { - throw new AnalysisException("Target table '" + targetTableName - + "' and result of select statement are not union compatible.\n" - + "Target table expects " - + numNonClusteringCols + " columns but the select statement returns " - + (selectListExprs.size() - numDynamicPartKeys) + "."); + // Make sure we have stats for partitionKeyExprs + for (Expr expr: partitionKeyExprs) { + expr.analyze(analyzer); } - for (int i = numClusteringCols; i < columns.size(); ++i) { - int selectListIndex = i - numClusteringCols; - // Check for compatible type, and add casts to the selectListExprs if necessary. - Expr expr = checkTypeCompatibility(columns.get(i), - selectListExprs.get(selectListIndex)); - selectListExprs.set(selectListIndex, expr); + // Finally, 'undo' the permutation so that the selectListExprs are in Hive column + // order, and add NULL expressions to all missing columns. + List permutedSelectListExprs = Lists.newArrayList(); + for (Column tblColumn: table.getColumnsInHiveOrder()) { + boolean matchFound = false; + for (int i = 0; i < selectListExprs.size(); ++i) { + if (selectExprTargetColumns.get(i).getName().equals(tblColumn.getName())) { + permutedSelectListExprs.add(selectListExprs.get(i)); + matchFound = true; + break; + } + } + // If no match is found, either the column is a clustering column with a static + // value, or it was unmentioned and therefore should have a NULL select-list + // expression. + if (!matchFound) { + if (tblColumn.getPosition() >= numClusteringCols) { + // Unmentioned non-clustering columns get NULL expressions. Note that we do not + // analyze them, nor do we type-check them, on the assumption that neither is + // necessary. + permutedSelectListExprs.add(new NullLiteral()); + } + } } + // TODO: Check that HBase row-key columns are not NULL? See IMPALA-406 + if (needsGeneratedQueryStatement) { + // Build a query statement that returns NULL for every column + List selectListItems = Lists.newArrayList(); + for(Expr e: permutedSelectListExprs) { + selectListItems.add(new SelectListItem(e, null)); + } + SelectList selectList = new SelectList(selectListItems); + queryStmt = new SelectStmt(selectList, null, null, null, null, null, -1); + queryStmt.analyze(analyzer); + } + queryStmt.setResultExprs(permutedSelectListExprs); } /** @@ -297,8 +398,6 @@ public class InsertStmt extends StatementBase { * Table column. * @param expr * Expr to be checked for type compatibility with column, - * @return - * Possibly cast compatible expr. * @throws AnalysisException * If the column and expr type are incompatible, or if casting the * expr would lead to loss of precision. @@ -317,18 +416,21 @@ public class InsertStmt extends StatementBase { PrimitiveType.getAssignmentCompatibleType(colType, exprType); // Incompatible types. if (!compatibleType.isValid()) { - throw new AnalysisException("Target table '" + targetTableName - + "' and result of select statement are not union compatible.\n" - + "Incompatible types '" + colType.toString() + "' and '" - + exprType.toString() + "' in column '" + expr.toSql() + "'."); + throw new AnalysisException( + String.format("Target table '%s' is incompatible with SELECT / PARTITION " + + "expressions.\nExpression '%s' (type: %s) is not compatible " + + "with column '%s' (type: %s)", + targetTableName, expr.toSql(), exprType, + column.getName(), colType)); } // Loss of precision when inserting into the table. if (compatibleType != colType && !compatibleType.isNull()) { - throw new AnalysisException("Inserting into target table '" - + targetTableName.getTbl() + "' may result in loss of precision.\n" - + "Would need to cast '" - + expr.toSql() + "' to '" - + colType.toString() + "'."); + throw new AnalysisException( + String.format("Possible loss of precision for target table '%s'.\n" + + "Expression '%s' (type: %s) would need to be cast to %s" + + " for column '%s'", + targetTableName, expr.toSql(), exprType, colType, + column.getName())); } // Add a cast to the selectListExpr to the higher type. Expr castExpr = expr.castTo(compatibleType); @@ -347,14 +449,13 @@ public class InsertStmt extends StatementBase { return overwrite; } + /** + * Only valid after analysis + */ public QueryStmt getQueryStmt() { return queryStmt; } - public List getPartitionList() { - return partitionKeyValues; - } - public List getPartitionKeyExprs() { return partitionKeyExprs; } @@ -374,20 +475,23 @@ public class InsertStmt extends StatementBase { } else { strBuilder.append("INTO "); } - strBuilder.append("TABLE " + originalTableName + " "); - if (partitionKeyValues != null) { - strBuilder.append("PARTITION ("); - for (int i = 0; i < partitionKeyValues.size(); ++i) { - PartitionKeyValue pkv = partitionKeyValues.get(i); - strBuilder.append(pkv.getColName()); - if (pkv.getValue() != null) { - strBuilder.append("=" + pkv.getValue().toSql()); - } - strBuilder.append((i+1 != partitionKeyValues.size()) ? ", " : ""); - } - strBuilder.append(") "); + strBuilder.append("TABLE " + originalTableName); + if (columnPermutation != null) { + strBuilder.append("("); + strBuilder.append(Joiner.on(", ").join(columnPermutation)); + strBuilder.append(")"); + } + if (partitionKeyValues != null) { + List values = Lists.newArrayList(); + for (PartitionKeyValue pkv: partitionKeyValues) { + values.add(pkv.getColName() + + (pkv.getValue() != null ? ("=" + pkv.getValue().toSql()) : "")); + } + strBuilder.append(" PARTITION (" + Joiner.on(", ").join(values) + ")"); + } + if (!needsGeneratedQueryStatement) { + strBuilder.append(" " + queryStmt.toSql()); } - strBuilder.append(queryStmt.toSql()); return strBuilder.toString(); } diff --git a/fe/src/main/java/com/cloudera/impala/analysis/QueryStmt.java b/fe/src/main/java/com/cloudera/impala/analysis/QueryStmt.java index 0a98b09e9..a341f28c8 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/QueryStmt.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/QueryStmt.java @@ -162,6 +162,11 @@ public abstract class QueryStmt extends StatementBase { return resultExprs; } + public void setResultExprs(List resultExprs) { + this.resultExprs.clear(); + this.resultExprs.addAll(resultExprs); + } + public void setIsExplain(boolean isExplain) { this.isExplain = isExplain; } diff --git a/fe/src/main/java/com/cloudera/impala/analysis/SelectList.java b/fe/src/main/java/com/cloudera/impala/analysis/SelectList.java index 73eba778e..633d0c89c 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/SelectList.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/SelectList.java @@ -14,6 +14,7 @@ package com.cloudera.impala.analysis; +import java.util.List; import java.util.ArrayList; import com.google.common.collect.Lists; @@ -26,10 +27,14 @@ class SelectList { private boolean isDistinct; public SelectList() { - super(); this.isDistinct = false; } + public SelectList(List items) { + isDistinct = false; + this.items.addAll(items); + } + public ArrayList getItems() { return items; } diff --git a/fe/src/main/java/com/cloudera/impala/analysis/SelectStmt.java b/fe/src/main/java/com/cloudera/impala/analysis/SelectStmt.java index 651e63a93..0f37303dd 100644 --- a/fe/src/main/java/com/cloudera/impala/analysis/SelectStmt.java +++ b/fe/src/main/java/com/cloudera/impala/analysis/SelectStmt.java @@ -30,8 +30,8 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Lists; /** - * Representation of a single select block, including GROUP BY, ORDERY BY and HAVING clauses. - * + * Representation of a single select block, including GROUP BY, ORDER BY and HAVING + * clauses. */ public class SelectStmt extends QueryStmt { private final static Logger LOG = LoggerFactory.getLogger(SelectStmt.class); diff --git a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeStmtsTest.java b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeStmtsTest.java index 6c1ab11c0..061a19698 100644 --- a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeStmtsTest.java +++ b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzeStmtsTest.java @@ -855,9 +855,11 @@ public class AnalyzeStmtsTest extends AnalyzerTest { AnalysisError("insert overwrite table functional.alltypes " + "partition (year, month) " + "values(1, true, 'a', 1, 1, 1, 1.0, 1.0, 'a', 'a', cast(0 as timestamp)," + - "2009, 10)", "Target table 'functional.alltypes' and result of select " + - "statement are not union compatible.\nIncompatible types 'TINYINT' and " + - "'STRING' in column ''."); + "2009, 10)", + "Target table 'functional.alltypes' is incompatible with SELECT / PARTITION " + + "expressions.\n" + + "Expression '' (type: STRING) is not compatible with column " + + "'tinyint_col' (type: TINYINT)"); } @Test @@ -866,6 +868,7 @@ public class AnalyzeStmtsTest extends AnalyzerTest { testInsertStatic(qualifier); testInsertDynamic(qualifier); testInsertUnpartitioned(qualifier); + testInsertWithPermutation(qualifier); } } @@ -957,34 +960,31 @@ public class AnalyzeStmtsTest extends AnalyzerTest { "select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, " + "float_col, double_col, date_string_col, string_col, timestamp_col " + "from functional.alltypes", - "No matching select list item found for dynamic partition 'year'.\n" + - "The select list items corresponding to dynamic partition keys " + - "must be at the end of the select list."); + "Target table 'functional.alltypessmall' has more columns (13) than the " + + "SELECT / VALUES clause and PARTITION clause return (11)"); // No corresponding select list items of partially dynamic partitions. AnalysisError("insert " + qualifier + " table functional.alltypessmall " + "partition (year=2009, month)" + "select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, " + "float_col, double_col, date_string_col, string_col, timestamp_col " + "from functional.alltypes", - "No matching select list item found for dynamic partition 'month'.\n" + - "The select list items corresponding to dynamic partition keys " + - "must be at the end of the select list."); + "Target table 'functional.alltypessmall' has more columns (13) than the " + + "SELECT / VALUES clause and PARTITION clause return (12)"); + // No corresponding select list items of partially dynamic partitions. AnalysisError("insert " + qualifier + " table functional.alltypessmall " + "partition (year, month=4)" + "select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, " + "float_col, double_col, date_string_col, string_col, timestamp_col " + "from functional.alltypes", - "No matching select list item found for dynamic partition 'year'.\n" + - "The select list items corresponding to dynamic partition keys " + - "must be at the end of the select list."); + "Target table 'functional.alltypessmall' has more columns (13) than the " + + "SELECT / VALUES clause and PARTITION clause return (12)"); // Select '*' includes partitioning columns, and hence, is not union compatible. AnalysisError("insert " + qualifier + " table functional.alltypessmall " + "partition (year=2009, month=4)" + "select * from functional.alltypes", - "Target table 'functional.alltypessmall' and result of select statement " + - "are not union compatible.\n" + - "Target table expects 11 columns but the select statement returns 13."); + "Target table 'functional.alltypessmall' has fewer columns (13) than the " + + "SELECT / VALUES clause and PARTITION clause return (15)"); } /** @@ -992,12 +992,20 @@ public class AnalyzeStmtsTest extends AnalyzerTest { */ private void testInsertUnpartitioned(String qualifier) throws AnalysisException { // Wrong number of columns. - AnalysisError("insert " + qualifier + " table functional.alltypesnopart " + + AnalysisError( + "insert " + qualifier + " table functional.alltypesnopart " + "select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, " + - "float_col, double_col, date_string_col, string_col from functional.alltypes"); + "float_col, double_col, date_string_col, string_col from functional.alltypes", + "Target table 'functional.alltypesnopart' has more columns (11) than the SELECT" + + " / VALUES clause returns (10)"); + // Wrong number of columns. - AnalysisError("INSERT " + qualifier + - " TABLE functional.hbasealltypesagg SELECT * FROM functional.alltypesagg"); + if (!qualifier.contains("OVERWRITE")) { + AnalysisError("INSERT " + qualifier + " TABLE functional.hbasealltypesagg " + + "SELECT * FROM functional.alltypesagg", + "Target table 'functional.hbasealltypesagg' has fewer columns (11) than the " + + "SELECT / VALUES clause returns (14)"); + } // Unpartitioned table without partition clause. AnalyzesOk("insert " + qualifier + " table functional.alltypesnopart " + "select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, " + @@ -1026,11 +1034,11 @@ public class AnalyzeStmtsTest extends AnalyzerTest { AnalysisError("INSERT " + qualifier + " TABLE functional.alltypesnopart PARTITION(year=2009) " + "SELECT * FROM functional.alltypes", "PARTITION clause is only valid for INSERT" + - " into partitioned table. 'alltypesnopart' is not partitioned"); + " into partitioned table. 'functional.alltypesnopart' is not partitioned"); // Unknown target DB - AnalysisError("INSERT " + qualifier + - " table UNKNOWNDB.alltypesnopart SELECT * from functional.alltypesnopart"); + AnalysisError("INSERT " + qualifier + " table UNKNOWNDB.alltypesnopart SELECT * " + + "from functional.alltypesnopart", "Database does not exist: UNKNOWNDB"); } /** @@ -1093,61 +1101,60 @@ public class AnalyzeStmtsTest extends AnalyzerTest { "select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, " + "float_col, double_col, date_string_col, string_col, timestamp_col " + "from functional.alltypes", - "No PARTITION clause given for INSERT into partitioned table 'alltypessmall'."); + "Not enough partition columns mentioned in query. Missing columns are: year, " + + "month"); // Not union compatible, unequal number of columns. AnalysisError("insert " + qualifier + " table functional.alltypessmall " + "partition (year=2009, month=4)" + "select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, " + "float_col, double_col, date_string_col, timestamp_col from functional.alltypes", - "Target table 'functional.alltypessmall' and result of select statement are " + - "not union compatible.\n" + - "Target table expects 11 columns but the select statement returns 10."); + "Target table 'functional.alltypessmall' has more columns (13) than the " + + "SELECT / VALUES clause and PARTITION clause return (12)"); // Not union compatible, incompatible type in last column (bool_col -> string). AnalysisError("insert " + qualifier + " table functional.alltypessmall " + "partition (year=2009, month=4)" + "select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, " + "float_col, double_col, date_string_col, bool_col, timestamp_col " + "from functional.alltypes", - "Target table 'functional.alltypessmall' and result of select " + - "statement are not union compatible.\n" + - "Incompatible types 'STRING' and 'BOOLEAN' in column 'bool_col'."); - // Too many partitioning columns. + "Target table 'functional.alltypessmall' is incompatible with SELECT / " + + "PARTITION expressions.\nExpression 'bool_col' (type: BOOLEAN) is not " + + "compatible with column 'string_col' (type: STRING)"); + // Duplicate partition columns AnalysisError("insert " + qualifier + " table functional.alltypessmall " + "partition (year=2009, month=4, year=10)" + "select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, " + "float_col, double_col, date_string_col, string_col, timestamp_col " + "from functional.alltypes", - "Superfluous columns in PARTITION clause: year."); + "Duplicate column 'year' in partition clause"); // Too few partitioning columns. AnalysisError("insert " + qualifier + " table functional.alltypessmall " + "partition (year=2009)" + "select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, " + "float_col, double_col, date_string_col, string_col, timestamp_col " + "from functional.alltypes", - "Missing partition column 'month' from PARTITION clause."); + "Not enough partition columns mentioned in query. Missing columns are: month"); // Non-partitioning column in partition clause. AnalysisError("insert " + qualifier + " table functional.alltypessmall " + "partition (year=2009, bigint_col=10)" + "select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, " + "float_col, double_col, date_string_col, string_col, timestamp_col " + "from functional.alltypes", - "Missing partition column 'month' from PARTITION clause."); + "Column 'bigint_col' is not a partition column"); // Loss of precision when casting in column 6 (double_col -> float). AnalysisError("insert " + qualifier + " table functional.alltypessmall " + "partition (year=2009, month=4)" + "select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, " + "double_col, double_col, date_string_col, string_col, timestamp_col " + "from functional.alltypes", - "Inserting into target table 'alltypessmall' may result in loss of precision.\n" + - "Would need to cast 'double_col' to 'FLOAT'."); + "Possible loss of precision for target table 'functional.alltypessmall'.\n" + + "Expression 'double_col' (type: DOUBLE) would need to be cast to FLOAT for " + + "column 'float_col'"); // Select '*' includes partitioning columns, and hence, is not union compatible. AnalysisError("insert " + qualifier + " table functional.alltypessmall " + "partition (year=2009, month=4)" + "select * from functional.alltypes", - "Target table 'functional.alltypessmall' and result of select statement are " + - "not union compatible.\n" + - "Target table expects 11 columns but the select statement returns 13."); - + "Target table 'functional.alltypessmall' has fewer columns (13) than the " + + "SELECT / VALUES clause and PARTITION clause return (15)"); // Partition columns should be type-checked AnalysisError("insert " + qualifier + " table functional.alltypessmall " + "partition (year=\"should be an int\", month=4)" + @@ -1162,5 +1169,158 @@ public class AnalyzeStmtsTest extends AnalyzerTest { "from functional.alltypes", "Non-constant expressions are not supported as static partition-key values " + "in 'month=int_col'."); + + if (qualifier.contains("OVERWRITE")) { + AnalysisError("insert " + qualifier + " table functional.hbasealltypessmall " + + "partition(year, month) select * from functional.alltypessmall", + "PARTITION clause is not valid for INSERT into HBase tables. " + + "'functional.hbasealltypessmall' is an HBase table"); + } + } + + private void testInsertWithPermutation(String qualifier) throws AnalysisException { + // Duplicate column in permutation + AnalysisError("insert " + qualifier + " table functional.tinytable(a, a, b)" + + "values(1, 2, 3)", "Duplicate column 'a' in column permutation"); + + // Unknown column in permutation + AnalysisError("insert " + qualifier + " table functional.tinytable" + + "(a, c) values(1, 2)", "Unknown column 'c' in column permutation"); + + // Too few columns in permutation - fill with NULL values + AnalyzesOk("insert " + qualifier + " table functional.tinytable(a) values('hello')"); + + // Too many columns in select list + AnalysisError("insert " + qualifier + " table functional.tinytable(a, b)" + + " select 'a', 'b', 'c' from functional.alltypes", + "Column permutation mentions fewer columns (2) than the SELECT / VALUES clause" + + " returns (3)"); + + // Too few columns in select list + AnalysisError("insert " + qualifier + " table functional.tinytable(a, b)" + + " select 'a' from functional.alltypes", + "Column permutation mentions more columns (2) than the SELECT / VALUES clause" + + " returns (1)"); + + // Type error in select clause brought on by permutation. tinyint_col and string_col + // are swapped in the permutation clause + AnalysisError("insert " + qualifier + " table functional.alltypesnopart" + + "(id, bool_col, string_col, smallint_col, int_col, bigint_col, " + + "float_col, double_col, date_string_col, tinyint_col, timestamp_col)" + + " select * from functional.alltypesnopart", + "Target table 'functional.alltypesnopart' is incompatible with SELECT / " + + "PARTITION expressions.\nExpression 'functional.alltypesnopart.tinyint_col' " + + "(type: TINYINT) is not compatible with column 'string_col' (type: STRING)"); + + // Above query should work fine if select list also permuted + AnalyzesOk("insert " + qualifier + " table functional.alltypesnopart" + + "(id, bool_col, string_col, smallint_col, int_col, bigint_col, " + + "float_col, double_col, date_string_col, tinyint_col, timestamp_col)" + + " select id, bool_col, string_col, smallint_col, int_col, bigint_col, " + + "float_col, double_col, date_string_col, tinyint_col, timestamp_col" + + " from functional.alltypesnopart"); + + // Mentioning partition keys (year, month) in permutation + AnalyzesOk("insert " + qualifier + " table functional.alltypes" + + "(id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, " + + "float_col, double_col, date_string_col, string_col, timestamp_col, " + + "year, month) select * from functional.alltypes"); + + // Duplicate mention of partition column + AnalysisError("insert " + qualifier + " table functional.alltypes" + + "(id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, " + + "float_col, double_col, date_string_col, string_col, timestamp_col, " + + "year, month) PARTITION(year) select * from functional.alltypes", + "Duplicate column 'year' in partition clause"); + + // Split partition columns between permutation and PARTITION clause. Also confirm + // that dynamic columns in PARTITION clause are looked for at the end of the select + // list. + AnalyzesOk("insert " + qualifier + " table functional.alltypes" + + "(id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, " + + "float_col, double_col, date_string_col, string_col, timestamp_col, " + + "year) PARTITION(month) select * from functional.alltypes"); + + // Split partition columns, one dynamic in permutation clause, one static in PARTITION + // clause + AnalyzesOk("insert " + qualifier + " table functional.alltypes(id, year)" + + "PARTITION(month=2009) select 1, 2 from functional.alltypes"); + + // Omit most columns, should default to NULL + AnalyzesOk("insert " + qualifier + " table functional.alltypesnopart" + + "(id, bool_col) select id, bool_col from functional.alltypesnopart"); + + // Can't omit partition keys, they have to be mentioned somewhere + AnalysisError("insert " + qualifier + " table functional.alltypes(id)" + + " select id from functional.alltypes", + "Not enough partition columns mentioned in query. " + + "Missing columns are: year, month"); + + // Duplicate partition columns, one with partition key + AnalysisError("insert " + qualifier + " table functional.alltypes(year)" + + " partition(year=2012, month=3) select 1 from functional.alltypes", + "Duplicate column 'year' in partition clause"); + + // Type error between dynamic partition column mentioned in PARTITION column and + // select list (confirm that dynamic partition columns are mapped to the last select + // list expressions) + AnalysisError("insert " + qualifier + " table functional.alltypes" + + "(id, bool_col, string_col, smallint_col, int_col, bigint_col, " + + "float_col, double_col, date_string_col, tinyint_col, timestamp_col) " + + "PARTITION (year, month)" + + " select id, bool_col, month, smallint_col, int_col, bigint_col, " + + "float_col, double_col, date_string_col, tinyint_col, timestamp_col, " + + "year, string_col from functional.alltypes", + "Target table 'functional.alltypes' is incompatible with SELECT / PARTITION " + + "expressions.\n" + + "Expression 'month' (type: INT) is not compatible with column 'string_col' " + + "(type: STRING)"); + + // Empty permutation and no query statement + AnalyzesOk("insert " + qualifier + " table functional.alltypesnopart()"); + // Empty permutation can't receive any select list exprs + AnalysisError("insert " + qualifier + " table functional.alltypesnopart() select 1", + "Column permutation mentions fewer columns (0) than the SELECT / VALUES clause " + + "returns (1)"); + // Empty permutation with static partition columns can omit query statement + AnalyzesOk("insert " + qualifier + " table functional.alltypes() " + + "partition(year=2012, month=1)"); + // No mentioned columns to receive select-list exprs + AnalysisError("insert " + qualifier + " table functional.alltypes() " + + "partition(year=2012, month=1) select 1", + "Column permutation and PARTITION clause mention fewer columns (0) than the " + + "SELECT / VALUES clause and PARTITION clause return (1)"); + // Can't have dynamic partition columns with no query statement + AnalysisError("insert " + qualifier + " table functional.alltypes() " + + "partition(year, month)", + "Column permutation and PARTITION clause mention more columns (2) than the " + + "SELECT / VALUES clause and PARTITION clause return (0)"); + // If there are select-list exprs for dynamic partition columns, empty permutation is + // ok + AnalyzesOk("insert " + qualifier + " table functional.alltypes() " + + "partition(year, month) select 1,2 from functional.alltypes"); + + if (!qualifier.contains("OVERWRITE")) { + // Simple permutation + AnalyzesOk("insert " + qualifier + " table functional.hbasealltypesagg" + + "(id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, " + + "float_col, double_col, date_string_col, string_col, timestamp_col) " + + "select * from functional.alltypesnopart"); + // Too few columns in permutation + AnalysisError("insert " + qualifier + " table functional.hbasealltypesagg" + + "(id, tinyint_col, smallint_col, int_col, bigint_col, " + + "float_col, double_col, date_string_col, string_col) " + + "select * from functional.alltypesnopart", + "Column permutation mentions fewer columns (9) than the SELECT /" + + " VALUES clause returns (11)"); + // Omitting the row-key column is an error + AnalysisError("insert " + qualifier + " table functional.hbasealltypesagg" + + "(bool_col, tinyint_col, smallint_col, int_col, bigint_col, " + + "float_col, double_col, date_string_col, string_col, timestamp_col) " + + "select bool_col, tinyint_col, smallint_col, int_col, bigint_col, " + + "float_col, double_col, date_string_col, string_col, timestamp_col from " + + "functional.alltypesnopart", + "Row-key column 'id' must be explicitly mentioned in column permutation."); + } } } diff --git a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzerTest.java b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzerTest.java index 07eb88e0b..36d14acd6 100644 --- a/fe/src/test/java/com/cloudera/impala/analysis/AnalyzerTest.java +++ b/fe/src/test/java/com/cloudera/impala/analysis/AnalyzerTest.java @@ -327,7 +327,7 @@ public class AnalyzerTest { "select id, bool_col, tinyint_col, smallint_col, int_col, bigint_col, " + "float_col, double_col, date_string_col, string_col, timestamp_col " + "from functional.alltypes", - "Superfluous columns in PARTITION clause: year."); + "Duplicate column 'year' in partition clause"); // Analysis error from explain query AnalysisError("explain " + diff --git a/fe/src/test/java/com/cloudera/impala/analysis/ParserTest.java b/fe/src/test/java/com/cloudera/impala/analysis/ParserTest.java index 721ef4d64..7b7695de8 100644 --- a/fe/src/test/java/com/cloudera/impala/analysis/ParserTest.java +++ b/fe/src/test/java/com/cloudera/impala/analysis/ParserTest.java @@ -889,6 +889,23 @@ public class ParserTest { "select a from src where b > 5"); ParsesOk("insert " + qualifier + " t partition (pk1=f(a), pk2=!true and false) " + "select a from src where b > 5"); + // Permutation + ParsesOk("insert " + qualifier + " t(a,b,c) values(1,2,3)"); + // Permutation with mismatched select list (should parse fine) + ParsesOk("insert " + qualifier + " t(a,b,c) values(1,2,3,4,5,6)"); + // Permutation and partition + ParsesOk("insert " + qualifier + " t(a,b,c) partition(d) values(1,2,3,4)"); + // Empty permutation list + ParsesOk("insert " + qualifier + " t() select 1 from a"); + // Permutation with optional query statement + ParsesOk("insert " + qualifier + " t() partition(d)"); + ParsesOk("insert " + qualifier + " t()"); + // No comma in permutation list + ParserError("insert " + qualifier + " t(a b c) select 1 from a"); + // Can't use strings as identifiers in permutation list + ParserError("insert " + qualifier + " t('a') select 1 from a"); + // Expressions not allowed in permutation list + ParserError("insert " + qualifier + " t(a=1, b) select 1 from a"); } @Test @@ -898,6 +915,8 @@ public class ParserTest { testInsert(true, true); testInsert(false, false); testInsert(false, true); + // Missing query statement + ParserError("insert into table t"); // Missing 'overwrite/insert'. ParserError("insert table t select a from src where b > 5"); // Missing target table identifier. @@ -910,13 +929,13 @@ public class ParserTest { ParserError("insert into table t"); // Missing parentheses around 'partition'. ParserError("insert overwrite table t partition pk1=10 " + - "select a from src where b > 5"); + "select a from src where b > 5"); // Missing parentheses around 'partition'. ParserError("insert into table t partition pk1=10 " + "select a from src where b > 5"); // Missing comma in partition list. ParserError("insert overwrite table t partition (pk1=10 pk2=20) " + - "select a from src where b > 5"); + "select a from src where b > 5"); // Missing comma in partition list. ParserError("insert into table t partition (pk1=10 pk2=20) " + "select a from src where b > 5"); diff --git a/fe/src/test/java/com/cloudera/impala/analysis/ToSqlTest.java b/fe/src/test/java/com/cloudera/impala/analysis/ToSqlTest.java index ea45b1305..f370d550b 100644 --- a/fe/src/test/java/com/cloudera/impala/analysis/ToSqlTest.java +++ b/fe/src/test/java/com/cloudera/impala/analysis/ToSqlTest.java @@ -282,5 +282,29 @@ public class ToSqlTest { "bool_col, tinyint_col, smallint_col, int_col, bigint_col, float_col, " + "double_col, date_string_col, string_col, timestamp_col, month " + "FROM functional.alltypes"); + + // Permutations + testToSql("insert into table functional.alltypesnopart(id, bool_col, tinyint_col) " + + " values(1, true, 0)", + "INSERT INTO TABLE functional.alltypesnopart(id, bool_col, tinyint_col) " + + "VALUES(1, TRUE, 0)"); + + // Permutations that mention partition column + testToSql("insert into table functional.alltypes(id, year, month) " + + " values(1, 1990, 12)", + "INSERT INTO TABLE functional.alltypes(id, year, month) " + + "VALUES(1, 1990, 12)"); + + // Empty permutation with no select statement + testToSql("insert into table functional.alltypesnopart()", + "INSERT INTO TABLE functional.alltypesnopart()"); + + // Permutation and explicit partition clause + testToSql("insert into table functional.alltypes(id) " + + " partition (year=2009, month) values(1, 12)", + "INSERT INTO TABLE functional.alltypes(id) " + + "PARTITION (year=2009, month) VALUES(1, 12)"); + + } } diff --git a/testdata/workloads/functional-query/queries/QueryTest/insert_permutation.test b/testdata/workloads/functional-query/queries/QueryTest/insert_permutation.test new file mode 100644 index 000000000..72fb7b620 --- /dev/null +++ b/testdata/workloads/functional-query/queries/QueryTest/insert_permutation.test @@ -0,0 +1,191 @@ +==== +---- QUERY +create database insert_permutation_test +---- RESULTS +==== +---- QUERY +use insert_permutation_test +---- RESULTS +==== +---- QUERY +create table perm_nopart(int_col1 int, string_col string, int_col2 int); +create table perm_part(int_col1 int, string_col string) partitioned by (p1 int, p2 string); +---- RESULTS +==== +---- QUERY +# Simple non-permutation +insert into perm_nopart(int_col1, string_col, int_col2) values(1,'str',2) +---- SETUP +RESET insert_permutation_test.perm_nopart +RELOAD insert_permutation_test.perm_nopart +---- RESULTS +: 1 +==== +---- QUERY +select * from perm_nopart +---- RESULTS +1,'str',2 +---- TYPES +INT,STRING,INT +==== +---- QUERY +# Permute the int columns +insert into perm_nopart(int_col2, string_col, int_col1) values(1,'str',2) +---- SETUP +RESET insert_permutation_test.perm_nopart +RELOAD insert_permutation_test.perm_nopart +---- RESULTS +: 1 +==== +---- QUERY +select * from perm_nopart +---- RESULTS +2,'str',1 +---- TYPES +INT,STRING,INT +==== +---- QUERY +# Leave out two columns, check they are assigned NULL +insert into perm_nopart(int_col2) values(1) +---- SETUP +RESET insert_permutation_test.perm_nopart +RELOAD insert_permutation_test.perm_nopart +---- RESULTS +: 1 +==== +---- QUERY +select * from perm_nopart +---- RESULTS +NULL,'NULL',1 +---- TYPES +INT,STRING,INT +==== +---- QUERY +# Permute the partition columns +insert into perm_part(p1, string_col, int_col1, p2) values(10,'str',1, 'hello') +---- SETUP +RESET insert_permutation_test.perm_part +RELOAD insert_permutation_test.perm_part +---- RESULTS +p1=10/p2=hello/: 1 +==== +---- QUERY +select * from perm_part +---- RESULTS +1,'str',10,'hello' +---- TYPES +INT,STRING,INT,STRING +==== +---- QUERY +# Same thing - permute the partition columns, but invert their order relative to Hive +insert into perm_part(p2, string_col, int_col1, p1) values('hello','str',1, 10) +---- SETUP +RESET insert_permutation_test.perm_part +RELOAD insert_permutation_test.perm_part +---- RESULTS +p1=10/p2=hello/: 1 +==== +---- QUERY +select * from perm_part +---- RESULTS +1,'str',10,'hello' +---- TYPES +INT,STRING,INT,STRING +==== +---- QUERY +# Check NULL if only partition keys are mentioned +insert into perm_part(p2, p1) values('hello', 10) +---- SETUP +RESET insert_permutation_test.perm_part +RELOAD insert_permutation_test.perm_part +---- RESULTS +p1=10/p2=hello/: 1 +==== +---- QUERY +select * from perm_part +---- RESULTS +NULL,'NULL',10,'hello' +---- TYPES +INT,STRING,INT,STRING +==== +---- QUERY +# Check NULL if only partition keys are mentioned, one static +insert into perm_part(p2) PARTITION(p1=10) values('hello') +---- SETUP +RESET insert_permutation_test.perm_part +RELOAD insert_permutation_test.perm_part +---- RESULTS +p1=10/p2=hello/: 1 +==== +---- QUERY +select * from perm_part +---- RESULTS +NULL,'NULL',10,'hello' +---- TYPES +INT,STRING,INT,STRING +==== +---- QUERY +# Check dynamic keys mentioned in the PARTITION column are still looked for at the end of +# the select-list +insert into perm_part(int_col1, string_col) PARTITION(p1=10, p2) values(1,'perm_col','part_col') +---- SETUP +RESET insert_permutation_test.perm_part +RELOAD insert_permutation_test.perm_part +---- RESULTS +p1=10/p2=part_col/: 1 +==== +---- QUERY +select * from perm_part +---- RESULTS +1,'perm_col',10,'part_col' +---- TYPES +INT,STRING,INT,STRING +==== +---- QUERY +# Check behaviour of empty permutation clause with no query statement +insert into perm_part() PARTITION(p1=10, p2='foo') +---- SETUP +RESET insert_permutation_test.perm_part +RELOAD insert_permutation_test.perm_part +---- RESULTS +p1=10/p2=foo/: 1 +==== +---- QUERY +select * from perm_part +---- RESULTS +NULL,'NULL',10,'foo' +---- TYPES +INT,STRING,INT,STRING +==== +---- QUERY +# Check behaviour of empty permutation clause +insert into perm_part() PARTITION(p1, p2='foo') values(5) +---- SETUP +RESET insert_permutation_test.perm_part +RELOAD insert_permutation_test.perm_part +---- RESULTS +p1=5/p2=foo/: 1 +==== +---- QUERY +select * from perm_part +---- RESULTS +NULL,'NULL',5,'foo' +---- TYPES +INT,STRING,INT,STRING +==== +---- QUERY +# Check behaviour of empty permutation clause with unpartitioned table +insert into perm_nopart() +---- SETUP +RESET insert_permutation_test.perm_nopart +RELOAD insert_permutation_test.perm_nopart +---- RESULTS +: 1 +==== +---- QUERY +select * from perm_nopart +---- RESULTS +NULL,'NULL',NULL +---- TYPES +INT,STRING,INT +==== diff --git a/tests/query_test/test_insert_permutation.py b/tests/query_test/test_insert_permutation.py new file mode 100755 index 000000000..6a0c76687 --- /dev/null +++ b/tests/query_test/test_insert_permutation.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python +# Copyright (c) 2012 Cloudera, Inc. All rights reserved. +# Targeted Impala insert tests +# +import logging +import pytest +from tests.common.test_vector import * +from tests.common.impala_test_suite import * +from tests.common.test_dimensions import create_exec_option_dimension + +class TestInsertQueriesWithPermutation(ImpalaTestSuite): + """ + Tests for the column permutation feature of INSERT statements + """ + @classmethod + def get_workload(self): + return 'functional-query' + + @classmethod + def add_test_dimensions(cls): + super(TestInsertQueriesWithPermutation, cls).add_test_dimensions() + # Fix the exec_option vector to have a single value. This is needed should we decide + # to run the insert tests in parallel (otherwise there will be two tests inserting + # into the same table at the same time for the same file format). + # TODO: When we do decide to run these tests in parallel we could create unique temp + # tables for each test case to resolve the concurrency problems. + cls.TestMatrix.add_dimension(create_exec_option_dimension( + cluster_sizes=[0], disable_codegen_options=[False], batch_sizes=[0])) + # Insert is currently only supported for text and parquet + cls.TestMatrix.add_constraint(lambda v:\ + v.get_value('table_format').file_format in ['text']) + cls.TestMatrix.add_constraint(lambda v:\ + v.get_value('table_format').compression_codec == 'none') + + @pytest.mark.execute_serially + def test_insert_permutation(self, vector): + self.run_test_case('QueryTest/insert_permutation', vector) + + def teardown_method(self, method): + map(self.cleanup_db, ["insert_permutation_test"]) + + def cleanup_db(cls, db_name): + # TODO: Find a common place to put this method + # To drop a db, we need to first drop all the tables in that db + if db_name in cls.hive_client.get_all_databases(): + for table_name in cls.hive_client.get_all_tables(db_name): + cls.hive_client.drop_table(db_name, table_name, True) + cls.hive_client.drop_database(db_name, True, False) + cls.client.refresh()