From bf2124bf30bb768f06a614bdfa260a6877b1da35 Mon Sep 17 00:00:00 2001
From: Thomas Tauber-Marshall <tmarshall@cloudera.com>
Date: Tue, 15 May 2018 17:20:28 -0700
Subject: [PATCH] IMPALA-6929: Support multi-column range partitions for Kudu

Kudu allows specifying range partitions over multiple columns. Impala
already has support for doing this when the partitions are specified
with '=', but if the partitions are specified with '<' or '<=', the
parser would return an error.

This patch modifies the parser to allow for creating Kudu tables like:
create table kudu_test (a int, b int, primary key(a, b))
  partition by range(a, b) (partition (0, 0) <= values < (1, 1));
and similary to alter partitions like:
alter table kudu_test add range partition (1, 1) <= values < (2, 2);

Testing:
- Modified functional_kudu.jointbl's schema so that we have a table
  in functional with a multi-column range partition to test things
  against.
- Added FE and E2E tests for CREATE and ALTER.

Change-Id: I0141dd3344a4f22b186f513b7406f286668ef1e7
Reviewed-on: http://gerrit.cloudera.org:8080/10441
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
---
 fe/src/main/cup/sql-parser.cup                | 18 +++++--
 .../impala/analysis/RangePartition.java       | 47 ++++++++++---------
 .../impala/analysis/AnalyzeDDLTest.java       | 30 +++++++++++-
 .../functional/functional_schema_template.sql |  7 ++-
 .../queries/PlannerTest/kudu.test             |  4 +-
 .../queries/QueryTest/kudu_alter.test         | 42 +++++++++++++++++
 6 files changed, 115 insertions(+), 33 deletions(-)
diff --git a/fe/src/main/cup/sql-parser.cup b/fe/src/main/cup/sql-parser.cup
index 261f8ec6d..82fdf72d9 100644
--- a/fe/src/main/cup/sql-parser.cup
+++ b/fe/src/main/cup/sql-parser.cup
@@ -432,7 +432,7 @@ nonterminal StructField struct_field_def;
 nonterminal KuduPartitionParam hash_partition_param;
 nonterminal List<RangePartition> range_params_list;
 nonterminal RangePartition range_param;
-nonterminal Pair<Expr, Boolean> opt_lower_range_val,
+nonterminal Pair<List<Expr>, Boolean> opt_lower_range_val,
    opt_upper_range_val;
 nonterminal ArrayList<KuduPartitionParam> hash_partition_param_list;
 nonterminal ArrayList<KuduPartitionParam> partition_param_list;
@@ -1401,18 +1401,26 @@ range_param ::=
 
 opt_lower_range_val ::=
   expr:l LESSTHAN
-  {: RESULT = new Pair<Expr, Boolean>(l, false); :}
+  {: RESULT = new Pair<List<Expr>, Boolean>(Lists.newArrayList(l), false); :}
   | expr:l LESSTHAN EQUAL
-  {: RESULT = new Pair<Expr, Boolean>(l, true); :}
+  {: RESULT = new Pair<List<Expr>, Boolean>(Lists.newArrayList(l), true); :}
+  | LPAREN expr_list:l RPAREN LESSTHAN
+  {: RESULT = new Pair<List<Expr>, Boolean>(l, false); :}
+  | LPAREN expr_list:l RPAREN LESSTHAN EQUAL
+  {: RESULT = new Pair<List<Expr>, Boolean>(l, true); :}
   | /* empty */
   {: RESULT = null; :}
   ;
 
 opt_upper_range_val ::=
   LESSTHAN expr:l
-  {: RESULT = new Pair<Expr, Boolean>(l, false); :}
+  {: RESULT = new Pair<List<Expr>, Boolean>(Lists.newArrayList(l), false); :}
   | LESSTHAN EQUAL expr:l
-  {: RESULT = new Pair<Expr, Boolean>(l, true); :}
+  {: RESULT = new Pair<List<Expr>, Boolean>(Lists.newArrayList(l), true); :}
+  | LESSTHAN LPAREN expr_list:l RPAREN
+  {: RESULT = new Pair<List<Expr>, Boolean>(l, false); :}
+  | LESSTHAN EQUAL LPAREN expr_list:l RPAREN
+  {: RESULT = new Pair<List<Expr>, Boolean>(l, true); :}
   | /* empty */
   {: RESULT = null; :}
   ;
diff --git a/fe/src/main/java/org/apache/impala/analysis/RangePartition.java b/fe/src/main/java/org/apache/impala/analysis/RangePartition.java
index 9ed5da89e..e0441f84d 100644
--- a/fe/src/main/java/org/apache/impala/analysis/RangePartition.java
+++ b/fe/src/main/java/org/apache/impala/analysis/RangePartition.java
@@ -37,13 +37,15 @@ import com.google.common.collect.Lists;
  * The following cases are supported:
  * - Bounded on both ends:
  *   PARTITION l_val <[=] VALUES <[=] u_val
+ *   PARTITION (l_val1, ..., l_valn) <[=] VALUES <[=] (u_val1, ..., u_valn)
  * - Unbounded lower:
  *   PARTITION VALUES <[=] u_val
+ *   PARTITION VALUES <[=] (u_val, ..., u_valn)
  * - Unbounded upper:
  *   PARTITION l_val <[=] VALUES
+ *   PARTITION (l_val1, ..., l_valn) <[=] VALUES
  * - Single value (no range):
  *   PARTITION VALUE = val
- * - Multi-value:
  *   PARTITION VALUE = (val1, val2, ..., valn)
  *
  * Internally, all these cases are represented using the quadruplet:
@@ -78,25 +80,26 @@ public class RangePartition implements ParseNode {
   }
 
   /**
-   * Constructs a range partition. The range is specified in the CREATE TABLE statement
-   * using the 'PARTITION <expr> OP VALUES OP <expr>' clause. 'lower' corresponds to
-   * the '<expr> OP' pair which defines an optional lower bound. 'upper' corresponds to
-   * the 'OP <expr>' pair which defines an optional upper bound. Since only '<' and
-   * '<=' operators are allowed, operators are represented with boolean values that
-   * indicate inclusive or exclusive bounds.
+   * Constructs a range partition. The range is specified in the CREATE/ALTER TABLE
+   * statement using the 'PARTITION <expr> OP VALUES OP <expr>' clause or using the
+   * 'PARTITION (<expr>,....,<expr>) OP VALUES OP (<expr>,...,<expr>)' clause. 'lower'
+   * corresponds to the '<expr> OP' or '(<expr>,...,<expr>) OP' pair which defines an
+   * optional lower bound, and similarly 'upper' corresponds to the optional upper bound.
+   * Since only '<' and '<=' operators are allowed, operators are represented with boolean
+   * values that indicate inclusive or exclusive bounds.
    */
-  public static RangePartition createFromRange(Pair<Expr, Boolean> lower,
-      Pair<Expr, Boolean> upper) {
+  public static RangePartition createFromRange(Pair<List<Expr>, Boolean> lower,
+      Pair<List<Expr>, Boolean> upper) {
     List<Expr> lowerBoundExprs = Lists.newArrayListWithCapacity(1);
     boolean lowerBoundInclusive = false;
     List<Expr> upperBoundExprs = Lists.newArrayListWithCapacity(1);
     boolean upperBoundInclusive = false;
     if (lower != null) {
-      lowerBoundExprs.add(lower.first);
+      lowerBoundExprs = lower.first;
       lowerBoundInclusive = lower.second;
     }
     if (upper != null) {
-      upperBoundExprs.add(upper.first);
+      upperBoundExprs = upper.first;
       upperBoundInclusive = upper.second;
     }
     return new RangePartition(lowerBoundExprs, lowerBoundInclusive, upperBoundExprs,
@@ -105,7 +108,7 @@ public class RangePartition implements ParseNode {
 
   /**
    * Constructs a range partition from a set of values. The values are specified in the
-   * CREATE TABLE statement using the 'PARTITION VALUE = <expr>' or the
+   * CREATE/ALTER TABLE statement using the 'PARTITION VALUE = <expr>' or the
    * 'PARTITION VALUE = (<expr>,...,<expr>)' clause. For both cases, the generated
    * range partition has the same lower and upper bounds.
    */
@@ -219,23 +222,21 @@ public class RangePartition implements ParseNode {
     if (isSingletonRange_) {
       output.append("VALUE = ");
       if (lowerBound_.size() > 1) output.append("(");
-      List<String> literals = Lists.newArrayList();
-      for (Expr literal: lowerBound_) literals.add(literal.toSql());
-      output.append(Joiner.on(",").join(literals));
+      output.append(Expr.toSql(lowerBound_));
       if (lowerBound_.size() > 1) output.append(")");
     } else {
       if (!lowerBound_.isEmpty()) {
-        Preconditions.checkState(lowerBound_.size() == 1);
-        output.append(lowerBound_.get(0).toSql() + " " +
-            (lowerBoundInclusive_ ? "<=" : "<"));
-        output.append(" ");
+        if (lowerBound_.size() > 1) output.append("(");
+        output.append(Expr.toSql(lowerBound_));
+        if (lowerBound_.size() > 1) output.append(")");
+        output.append(lowerBoundInclusive_ ? " <= " : " < ");
       }
       output.append("VALUES");
       if (!upperBound_.isEmpty()) {
-        Preconditions.checkState(upperBound_.size() == 1);
-        output.append(" ");
-        output.append((upperBoundInclusive_ ? "<=" : "<") + " " +
-            upperBound_.get(0).toSql() );
+        output.append(upperBoundInclusive_ ? " <= " : " < ");
+        if (upperBound_.size() > 1) output.append("(");
+        output.append(Expr.toSql(upperBound_));
+        if (upperBound_.size() > 1) output.append(")");
       }
     }
     return output.toString();
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
index e5cd76606..100e5d308 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
@@ -2212,12 +2212,28 @@ public class AnalyzeDDLTest extends FrontendTestBase {
           "partition 10 <= values", kw));
       AnalyzesOk(String.format("alter table functional_kudu.testtbl %s range " +
           "partition 1+1 <= values <= factorial(3)", kw));
+      AnalyzesOk(String.format("alter table functional_kudu.jointbl %s range " +
+          "partition (0, '0') < values", kw));
+      AnalyzesOk(String.format("alter table functional_kudu.jointbl %s range " +
+          "partition values <= (1, '1')", kw));
+      AnalyzesOk(String.format("alter table functional_kudu.jointbl %s range " +
+          "partition (0, '0') <= values < (1, '1')", kw));
+      AnalyzesOk(String.format("alter table functional_kudu.jointbl %s range " +
+          "partition value = (-1, 'a')", kw));
       AnalysisError(String.format("alter table functional.alltypes %s range " +
           "partition 10 < values < 20", kw), "Table functional.alltypes does not " +
           "support range partitions: RANGE PARTITION 10 < VALUES < 20");
       AnalysisError(String.format("alter table functional_kudu.testtbl %s range " +
           "partition values < isnull(null, null)", kw), "Range partition values " +
           "cannot be NULL. Range partition: 'PARTITION VALUES < isnull(NULL, NULL)'");
+      AnalysisError(String.format("alter table functional_kudu.jointbl %s range " +
+          "partition (0) < values", kw),
+          "Number of specified range partition values is different than the number of " +
+          "partitioning columns: (1 vs 2). Range partition: 'PARTITION (0) < VALUES'");
+      AnalysisError(String.format("alter table functional_kudu.jointbl %s range " +
+          "partition values < (0, 0)", kw),
+          "Range partition value 0 (type: TINYINT) is not type compatible with " +
+          "partitioning column 'test_name' (type: STRING).");
     }
 
     // ALTER TABLE ADD COLUMNS
@@ -2406,7 +2422,7 @@ public class AnalyzeDDLTest extends FrontendTestBase {
         "partition by range(a) (partition value = (1, 2), " +
         "partition value = 3, partition value = 4) stored as kudu",
         "Number of specified range partition values is different than the number of " +
-        "partitioning columns: (2 vs 1). Range partition: 'PARTITION VALUE = (1,2)'");
+        "partitioning columns: (2 vs 1). Range partition: 'PARTITION VALUE = (1, 2)'");
     // Key ranges must match the column types.
     AnalysisError("create table tab (a int, b int, c int, d int, primary key(a, b, c)) " +
         "partition by hash (a, b, c) partitions 8, range (a) " +
@@ -2493,6 +2509,18 @@ public class AnalyzeDDLTest extends FrontendTestBase {
     AnalysisError("create table tab (x int primary key) " +
         "partitioned by (y int) stored as kudu", "PARTITIONED BY cannot be used " +
         "in Kudu tables.");
+    // Multi-column range partitions
+    AnalyzesOk("create table tab (a bigint, b tinyint, c double, primary key(a, b)) " +
+        "partition by range(a, b) (partition (0, 0) < values <= (1, 1)) stored as kudu");
+    AnalysisError("create table tab (a bigint, b tinyint, c double, primary key(a, b)) " +
+        "partition by range(a, b) (partition values <= (1, 'b')) stored as kudu",
+        "Range partition value 'b' (type: STRING) is not type compatible with " +
+        "partitioning column 'b' (type: TINYINT)");
+    AnalysisError("create table tab (a bigint, b tinyint, c double, primary key(a, b)) " +
+        "partition by range(a, b) (partition 0 < values <= 1) stored as kudu",
+        "Number of specified range partition values is different than the number of " +
+        "partitioning columns: (1 vs 2). Range partition: 'PARTITION 0 < VALUES <= 1'");
+
 
     // Test unsupported Kudu types
     List<String> unsupportedTypes = Lists.newArrayList("VARCHAR(20)", "CHAR(20)",
diff --git a/testdata/datasets/functional/functional_schema_template.sql b/testdata/datasets/functional/functional_schema_template.sql
index 4d26ea959..cfd90309c 100644
--- a/testdata/datasets/functional/functional_schema_template.sql
+++ b/testdata/datasets/functional/functional_schema_template.sql
@@ -822,8 +822,11 @@ create table {db_name}{db_suffix}.{table_name} (
   alltypes_id int,
   primary key (test_id, test_name, test_zip, alltypes_id)
 )
-partition by range(test_id) (partition values <= 1003, partition 1003 < values <= 1007,
-partition 1007 < values) stored as kudu;
+partition by range(test_id, test_name)
+  (partition values <= (1003, 'Name3'),
+   partition (1003, 'Name3') < values <= (1007, 'Name7'),
+   partition (1007, 'Name7') < values)
+stored as kudu;
 ====
 ---- DATASET
 functional
diff --git a/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test b/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test
index 57a08dc58..dfea97018 100644
--- a/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test
+++ b/testdata/workloads/functional-planner/queries/PlannerTest/kudu.test
@@ -525,12 +525,12 @@ INSERT INTO KUDU [functional_kudu.alltypes]
 ====
 # Partition exprs are constant but primary key exprs aren't, so sort but don't partition.
 insert into functional_kudu.jointbl (test_id, test_name, test_zip, alltypes_id)
-select 1, string_col, int_col, id from functional.alltypes
+select 1, '1', int_col, id from functional.alltypes
 ---- DISTRIBUTEDPLAN
 INSERT INTO KUDU [functional_kudu.jointbl]
 |
 01:PARTIAL SORT
-|  order by: string_col ASC NULLS LAST, int_col ASC NULLS LAST, id ASC NULLS LAST
+|  order by: int_col ASC NULLS LAST, id ASC NULLS LAST
 |
 00:SCAN HDFS [functional.alltypes]
    partitions=24/24 files=24 size=478.45KB
diff --git a/testdata/workloads/functional-query/queries/QueryTest/kudu_alter.test b/testdata/workloads/functional-query/queries/QueryTest/kudu_alter.test
index 85937fae3..a0d95d91a 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/kudu_alter.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/kudu_alter.test
@@ -568,3 +568,45 @@ ID,NAME,NEW_COL1,NEW_COL2,LAST_NAME,NEW_COL4
 ---- TYPES
 INT,STRING,INT,BIGINT,STRING,INT
 ====
+---- QUERY
+# Check that range partitions defined on multiple columns work
+create table multi_range_partition_cols (a string, b int, c tinyint, primary key(a, b, c))
+partition by range(a, b)
+(partition values < ('a', 0), partition ('a', 0) <= values < ('b', 1))
+stored as kudu
+---- RESULTS
+'Table has been created.'
+====
+---- QUERY
+show range partitions multi_range_partition_cols;
+---- RESULTS
+'VALUES < ("a", 0)'
+'("a", 0) <= VALUES < ("b", 1)'
+====
+---- QUERY
+alter table multi_range_partition_cols add range partition ('b', 1) <= values < ('c', 2)
+====
+---- QUERY
+show range partitions multi_range_partition_cols;
+---- RESULTS
+'VALUES < ("a", 0)'
+'("a", 0) <= VALUES < ("b", 1)'
+'("b", 1) <= VALUES < ("c", 2)'
+====
+---- QUERY
+alter table multi_range_partition_cols add range partition value = ('c', 2)
+====
+---- QUERY
+show range partitions multi_range_partition_cols;
+---- RESULTS
+'VALUES < ("a", 0)'
+'("a", 0) <= VALUES < ("b", 1)'
+'("b", 1) <= VALUES < ("c", 2)'
+'VALUE = ("c", 2)'
+====
+---- QUERY
+# Try to insert a partition that overlaps with an existing partition
+alter table multi_range_partition_cols add range partition ('b', 2) <= values < ('c', 1)
+---- CATCH
+NonRecoverableException: New range partition conflicts with existing range partition: ("b", 2) <= VALUES < ("c", 1)
+====