From bf2124bf30bb768f06a614bdfa260a6877b1da35 Mon Sep 17 00:00:00 2001 From: Thomas Tauber-Marshall Date: Tue, 15 May 2018 17:20:28 -0700 Subject: [PATCH] IMPALA-6929: Support multi-column range partitions for Kudu Kudu allows specifying range partitions over multiple columns. Impala already has support for doing this when the partitions are specified with '=', but if the partitions are specified with '<' or '<=', the parser would return an error. This patch modifies the parser to allow for creating Kudu tables like: create table kudu_test (a int, b int, primary key(a, b)) partition by range(a, b) (partition (0, 0) <= values < (1, 1)); and similary to alter partitions like: alter table kudu_test add range partition (1, 1) <= values < (2, 2); Testing: - Modified functional_kudu.jointbl's schema so that we have a table in functional with a multi-column range partition to test things against. - Added FE and E2E tests for CREATE and ALTER. Change-Id: I0141dd3344a4f22b186f513b7406f286668ef1e7 Reviewed-on: http://gerrit.cloudera.org:8080/10441 Reviewed-by: Impala Public Jenkins Tested-by: Impala Public Jenkins --- fe/src/main/cup/sql-parser.cup | 18 +++++-- .../impala/analysis/RangePartition.java | 47 ++++++++++--------- .../impala/analysis/AnalyzeDDLTest.java | 30 +++++++++++- .../functional/functional_schema_template.sql | 7 ++- .../queries/PlannerTest/kudu.test | 4 +- .../queries/QueryTest/kudu_alter.test | 42 +++++++++++++++++ 6 files changed, 115 insertions(+), 33 deletions(-) diff --git a/fe/src/main/cup/sql-parser.cup b/fe/src/main/cup/sql-parser.cup index 261f8ec6d..82fdf72d9 100644 --- a/fe/src/main/cup/sql-parser.cup +++ b/fe/src/main/cup/sql-parser.cup @@ -432,7 +432,7 @@ nonterminal StructField struct_field_def; nonterminal KuduPartitionParam hash_partition_param; nonterminal List range_params_list; nonterminal RangePartition range_param; -nonterminal Pair opt_lower_range_val, +nonterminal Pair, Boolean> opt_lower_range_val, opt_upper_range_val; nonterminal ArrayList hash_partition_param_list; nonterminal ArrayList partition_param_list; @@ -1401,18 +1401,26 @@ range_param ::= opt_lower_range_val ::= expr:l LESSTHAN - {: RESULT = new Pair(l, false); :} + {: RESULT = new Pair, Boolean>(Lists.newArrayList(l), false); :} | expr:l LESSTHAN EQUAL - {: RESULT = new Pair(l, true); :} + {: RESULT = new Pair, Boolean>(Lists.newArrayList(l), true); :} + | LPAREN expr_list:l RPAREN LESSTHAN + {: RESULT = new Pair, Boolean>(l, false); :} + | LPAREN expr_list:l RPAREN LESSTHAN EQUAL + {: RESULT = new Pair, Boolean>(l, true); :} | /* empty */ {: RESULT = null; :} ; opt_upper_range_val ::= LESSTHAN expr:l - {: RESULT = new Pair(l, false); :} + {: RESULT = new Pair, Boolean>(Lists.newArrayList(l), false); :} | LESSTHAN EQUAL expr:l - {: RESULT = new Pair(l, true); :} + {: RESULT = new Pair, Boolean>(Lists.newArrayList(l), true); :} + | LESSTHAN LPAREN expr_list:l RPAREN + {: RESULT = new Pair, Boolean>(l, false); :} + | LESSTHAN EQUAL LPAREN expr_list:l RPAREN + {: RESULT = new Pair, Boolean>(l, true); :} | /* empty */ {: RESULT = null; :} ; diff --git a/fe/src/main/java/org/apache/impala/analysis/RangePartition.java b/fe/src/main/java/org/apache/impala/analysis/RangePartition.java index 9ed5da89e..e0441f84d 100644 --- a/fe/src/main/java/org/apache/impala/analysis/RangePartition.java +++ b/fe/src/main/java/org/apache/impala/analysis/RangePartition.java @@ -37,13 +37,15 @@ import com.google.common.collect.Lists; * The following cases are supported: * - Bounded on both ends: * PARTITION l_val <[=] VALUES <[=] u_val + * PARTITION (l_val1, ..., l_valn) <[=] VALUES <[=] (u_val1, ..., u_valn) * - Unbounded lower: * PARTITION VALUES <[=] u_val + * PARTITION VALUES <[=] (u_val, ..., u_valn) * - Unbounded upper: * PARTITION l_val <[=] VALUES + * PARTITION (l_val1, ..., l_valn) <[=] VALUES * - Single value (no range): * PARTITION VALUE = val - * - Multi-value: * PARTITION VALUE = (val1, val2, ..., valn) * * Internally, all these cases are represented using the quadruplet: @@ -78,25 +80,26 @@ public class RangePartition implements ParseNode { } /** - * Constructs a range partition. The range is specified in the CREATE TABLE statement - * using the 'PARTITION OP VALUES OP ' clause. 'lower' corresponds to - * the ' OP' pair which defines an optional lower bound. 'upper' corresponds to - * the 'OP ' pair which defines an optional upper bound. Since only '<' and - * '<=' operators are allowed, operators are represented with boolean values that - * indicate inclusive or exclusive bounds. + * Constructs a range partition. The range is specified in the CREATE/ALTER TABLE + * statement using the 'PARTITION OP VALUES OP ' clause or using the + * 'PARTITION (,....,) OP VALUES OP (,...,)' clause. 'lower' + * corresponds to the ' OP' or '(,...,) OP' pair which defines an + * optional lower bound, and similarly 'upper' corresponds to the optional upper bound. + * Since only '<' and '<=' operators are allowed, operators are represented with boolean + * values that indicate inclusive or exclusive bounds. */ - public static RangePartition createFromRange(Pair lower, - Pair upper) { + public static RangePartition createFromRange(Pair, Boolean> lower, + Pair, Boolean> upper) { List lowerBoundExprs = Lists.newArrayListWithCapacity(1); boolean lowerBoundInclusive = false; List upperBoundExprs = Lists.newArrayListWithCapacity(1); boolean upperBoundInclusive = false; if (lower != null) { - lowerBoundExprs.add(lower.first); + lowerBoundExprs = lower.first; lowerBoundInclusive = lower.second; } if (upper != null) { - upperBoundExprs.add(upper.first); + upperBoundExprs = upper.first; upperBoundInclusive = upper.second; } return new RangePartition(lowerBoundExprs, lowerBoundInclusive, upperBoundExprs, @@ -105,7 +108,7 @@ public class RangePartition implements ParseNode { /** * Constructs a range partition from a set of values. The values are specified in the - * CREATE TABLE statement using the 'PARTITION VALUE = ' or the + * CREATE/ALTER TABLE statement using the 'PARTITION VALUE = ' or the * 'PARTITION VALUE = (,...,)' clause. For both cases, the generated * range partition has the same lower and upper bounds. */ @@ -219,23 +222,21 @@ public class RangePartition implements ParseNode { if (isSingletonRange_) { output.append("VALUE = "); if (lowerBound_.size() > 1) output.append("("); - List literals = Lists.newArrayList(); - for (Expr literal: lowerBound_) literals.add(literal.toSql()); - output.append(Joiner.on(",").join(literals)); + output.append(Expr.toSql(lowerBound_)); if (lowerBound_.size() > 1) output.append(")"); } else { if (!lowerBound_.isEmpty()) { - Preconditions.checkState(lowerBound_.size() == 1); - output.append(lowerBound_.get(0).toSql() + " " + - (lowerBoundInclusive_ ? "<=" : "<")); - output.append(" "); + if (lowerBound_.size() > 1) output.append("("); + output.append(Expr.toSql(lowerBound_)); + if (lowerBound_.size() > 1) output.append(")"); + output.append(lowerBoundInclusive_ ? " <= " : " < "); } output.append("VALUES"); if (!upperBound_.isEmpty()) { - Preconditions.checkState(upperBound_.size() == 1); - output.append(" "); - output.append((upperBoundInclusive_ ? "<=" : "<") + " " + - upperBound_.get(0).toSql() ); + output.append(upperBoundInclusive_ ? " <= " : " < "); + if (upperBound_.size() > 1) output.append("("); + output.append(Expr.toSql(upperBound_)); + if (upperBound_.size() > 1) output.append(")"); } } return output.toString(); diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java index e5cd76606..100e5d308 100644 --- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java +++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java @@ -2212,12 +2212,28 @@ public class AnalyzeDDLTest extends FrontendTestBase { "partition 10 <= values", kw)); AnalyzesOk(String.format("alter table functional_kudu.testtbl %s range " + "partition 1+1 <= values <= factorial(3)", kw)); + AnalyzesOk(String.format("alter table functional_kudu.jointbl %s range " + + "partition (0, '0') < values", kw)); + AnalyzesOk(String.format("alter table functional_kudu.jointbl %s range " + + "partition values <= (1, '1')", kw)); + AnalyzesOk(String.format("alter table functional_kudu.jointbl %s range " + + "partition (0, '0') <= values < (1, '1')", kw)); + AnalyzesOk(String.format("alter table functional_kudu.jointbl %s range " + + "partition value = (-1, 'a')", kw)); AnalysisError(String.format("alter table functional.alltypes %s range " + "partition 10 < values < 20", kw), "Table functional.alltypes does not " + "support range partitions: RANGE PARTITION 10 < VALUES < 20"); AnalysisError(String.format("alter table functional_kudu.testtbl %s range " + "partition values < isnull(null, null)", kw), "Range partition values " + "cannot be NULL. Range partition: 'PARTITION VALUES < isnull(NULL, NULL)'"); + AnalysisError(String.format("alter table functional_kudu.jointbl %s range " + + "partition (0) < values", kw), + "Number of specified range partition values is different than the number of " + + "partitioning columns: (1 vs 2). Range partition: 'PARTITION (0) < VALUES'"); + AnalysisError(String.format("alter table functional_kudu.jointbl %s range " + + "partition values < (0, 0)", kw), + "Range partition value 0 (type: TINYINT) is not type compatible with " + + "partitioning column 'test_name' (type: STRING)."); } // ALTER TABLE ADD COLUMNS @@ -2406,7 +2422,7 @@ public class AnalyzeDDLTest extends FrontendTestBase { "partition by range(a) (partition value = (1, 2), " + "partition value = 3, partition value = 4) stored as kudu", "Number of specified range partition values is different than the number of " + - "partitioning columns: (2 vs 1). Range partition: 'PARTITION VALUE = (1,2)'"); + "partitioning columns: (2 vs 1). Range partition: 'PARTITION VALUE = (1, 2)'"); // Key ranges must match the column types. AnalysisError("create table tab (a int, b int, c int, d int, primary key(a, b, c)) " + "partition by hash (a, b, c) partitions 8, range (a) " + @@ -2493,6 +2509,18 @@ public class AnalyzeDDLTest extends FrontendTestBase { AnalysisError("create table tab (x int primary key) " + "partitioned by (y int) stored as kudu", "PARTITIONED BY cannot be used " + "in Kudu tables."); + // Multi-column range partitions + AnalyzesOk("create table tab (a bigint, b tinyint, c double, primary key(a, b)) " + + "partition by range(a, b) (partition (0, 0) < values <= (1, 1)) stored as kudu"); + AnalysisError("create table tab (a bigint, b tinyint, c double, primary key(a, b)) " + + "partition by range(a, b) (partition values <= (1, 'b')) stored as kudu", + "Range partition value 'b' (type: STRING) is not type compatible with " + + "partitioning column 'b' (type: TINYINT)"); + AnalysisError("create table tab (a bigint, b tinyint, c double, primary key(a, b)) " + + "partition by range(a, b) (partition 0 < values <= 1) stored as kudu", + "Number of specified range partition values is different than the number of " + + "partitioning columns: (1 vs 2). Range partition: 'PARTITION 0 < VALUES <= 1'"); + // Test unsupported Kudu types List unsupportedTypes = Lists.newArrayList("VARCHAR(20)", "CHAR(20)", diff --git a/testdata/datasets/functional/functional_schema_template.sql b/testdata/datasets/functional/functional_schema_template.sql index 4d26ea959..cfd90309c 100644 --- a/testdata/datasets/functional/functional_schema_template.sql +++ b/testdata/datasets/functional/functional_schema_template.sql @@ -822,8 +822,11 @@ create table {db_name}{db_suffix}.{table_name} ( alltypes_id int, primary key (test_id, test_name, test_zip, alltypes_id) ) -partition by range(test_id) (partition values <= 1003, partition 1003 < values <= 1007, -partition 1007 < values) stored as kudu; +partition by range(test_id, test_name) + (partition values <= (1003, 'Name3'), + partition (1003, 'Name3') < values <= (1007, 'Name7'), + partition (1007, 'Name7') < values) +stored as kudu; ==== ---- DATASET functional diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test b/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test index 57a08dc58..dfea97018 100644 --- a/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test +++ b/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test @@ -525,12 +525,12 @@ INSERT INTO KUDU [functional_kudu.alltypes] ==== # Partition exprs are constant but primary key exprs aren't, so sort but don't partition. insert into functional_kudu.jointbl (test_id, test_name, test_zip, alltypes_id) -select 1, string_col, int_col, id from functional.alltypes +select 1, '1', int_col, id from functional.alltypes ---- DISTRIBUTEDPLAN INSERT INTO KUDU [functional_kudu.jointbl] | 01:PARTIAL SORT -| order by: string_col ASC NULLS LAST, int_col ASC NULLS LAST, id ASC NULLS LAST +| order by: int_col ASC NULLS LAST, id ASC NULLS LAST | 00:SCAN HDFS [functional.alltypes] partitions=24/24 files=24 size=478.45KB diff --git a/testdata/workloads/functional-query/queries/QueryTest/kudu_alter.test b/testdata/workloads/functional-query/queries/QueryTest/kudu_alter.test index 85937fae3..a0d95d91a 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/kudu_alter.test +++ b/testdata/workloads/functional-query/queries/QueryTest/kudu_alter.test @@ -568,3 +568,45 @@ ID,NAME,NEW_COL1,NEW_COL2,LAST_NAME,NEW_COL4 ---- TYPES INT,STRING,INT,BIGINT,STRING,INT ==== +---- QUERY +# Check that range partitions defined on multiple columns work +create table multi_range_partition_cols (a string, b int, c tinyint, primary key(a, b, c)) +partition by range(a, b) +(partition values < ('a', 0), partition ('a', 0) <= values < ('b', 1)) +stored as kudu +---- RESULTS +'Table has been created.' +==== +---- QUERY +show range partitions multi_range_partition_cols; +---- RESULTS +'VALUES < ("a", 0)' +'("a", 0) <= VALUES < ("b", 1)' +==== +---- QUERY +alter table multi_range_partition_cols add range partition ('b', 1) <= values < ('c', 2) +==== +---- QUERY +show range partitions multi_range_partition_cols; +---- RESULTS +'VALUES < ("a", 0)' +'("a", 0) <= VALUES < ("b", 1)' +'("b", 1) <= VALUES < ("c", 2)' +==== +---- QUERY +alter table multi_range_partition_cols add range partition value = ('c', 2) +==== +---- QUERY +show range partitions multi_range_partition_cols; +---- RESULTS +'VALUES < ("a", 0)' +'("a", 0) <= VALUES < ("b", 1)' +'("b", 1) <= VALUES < ("c", 2)' +'VALUE = ("c", 2)' +==== +---- QUERY +# Try to insert a partition that overlaps with an existing partition +alter table multi_range_partition_cols add range partition ('b', 2) <= values < ('c', 1) +---- CATCH +NonRecoverableException: New range partition conflicts with existing range partition: ("b", 2) <= VALUES < ("c", 1) +====