IMPALA-3724: Support Kudu non-covering range partitions

This commit adds support for non-covering range partitions in Kudu tables. The SPLIT ROWS clause is now deprecated and no longer supported. The following new syntax provides more flexibility in creating range partitions and it supports bounded and unbounded ranges as well as single value partitions; multi-column range partitions are supported as well. The new syntax is: DISTRIBUTE BY RANGE (col_list) ( PARTITION lower_1 <[=] VALUES <[=] upper_1, PARTITION lower_2 <[=] VALUES <[=] upper_2, .... PARTITION lower_n <[=] VALUES <[=] upper_n, PARTITION VALUE = val_1, .... PARTITION VALUE = val_n ) Multi-column range partitions are specified as follows: DISTRIBUTE BY RANGE (col1, col2,..., coln) ( PARTITION VALUE = (col1_val, col2_val, ..., coln_val), .... PARTITION VALUE = (col1_val, col2_val, ..., coln_val) ) Change-Id: I6799c01a37003f0f4c068d911a13e3f060110a06 Reviewed-on: http://gerrit.cloudera.org:8080/4856 Reviewed-by: Dimitris Tsirogiannis <dtsirogiannis@cloudera.com> Tested-by: Internal Jenkins
2026-01-04 18:00:57 -05:00 · 2016-10-26 10:23:01 -07:00
parent 5f27ae0c2f
commit d802f321b2
17 changed files with 858 additions and 494 deletions
--- a/testdata/datasets/functional/functional_schema_template.sql
+++ b/testdata/datasets/functional/functional_schema_template.sql
@@ -766,7 +766,8 @@ create table {db_name}{db_suffix}.{table_name} (
  name string,
  zip int
 )
-distribute by range(id) split rows ((1003), (1007)) stored as kudu;
+distribute by range(id) (partition values <= 1003, partition 1003 < values <= 1007,
+partition 1007 < values) stored as kudu;
 ====
 ---- DATASET
 functional
@@ -789,7 +790,8 @@ create table {db_name}{db_suffix}.{table_name} (
  name string,
  zip int
 )
-distribute by range(id) split rows ((1003), (1007)) stored as kudu;
+distribute by range(id) (partition values <= 1003, partition 1003 < values <= 1007,
+partition 1007 < values) stored as kudu;
 ====
 ---- DATASET
 functional
@@ -815,7 +817,8 @@ create table {db_name}{db_suffix}.{table_name} (
  alltypes_id int,
  primary key (test_id, test_name, test_zip, alltypes_id)
 )
-distribute by range(test_id) split rows ((1003), (1007)) stored as kudu;
+distribute by range(test_id) (partition values <= 1003, partition 1003 < values <= 1007,
+partition 1007 < values) stored as kudu;
 ====
 ---- DATASET
 functional
@@ -1261,7 +1264,8 @@ create table {db_name}{db_suffix}.{table_name} (
  a string primary key,
  b string
 )
-distribute by range(a) split rows (('b'), ('d')) stored as kudu;
+distribute by range(a) (partition values <= 'b', partition 'b' < values <= 'd',
+partition 'd' < values) stored as kudu;
 ====
 ---- DATASET
 functional
@@ -1280,7 +1284,9 @@ DROP TABLE IF EXISTS {db_name}{db_suffix}.{table_name};
 create table {db_name}{db_suffix}.{table_name} (
  int_col int primary key
 )
-distribute by range(int_col) split rows ((2), (4), (6), (8)) stored as kudu;
+distribute by range(int_col) (partition values <= 2, partition 2 < values <= 4,
+partition 4 < values <= 6, partition 6 < values <= 8, partition 8 < values)
+stored as kudu;
 ====
 ---- DATASET
 functional
@@ -1405,14 +1411,17 @@ LOAD DATA LOCAL INPATH '{impala_home}/testdata/ImpalaDemoDataset/DEC_00_SF3_P077
 ---- CREATE_KUDU
 DROP TABLE IF EXISTS {db_name}{db_suffix}.{table_name};
 create table {db_name}{db_suffix}.{table_name} (
-  id string,
+  id string primary key,
  zip string,
  description1 string,
  description2 string,
-  income int,
-  primary key (id, zip))
-distribute by range(id, zip) split rows (('8600000US01475', '01475'), ('8600000US63121', '63121'), ('8600000US84712', '84712'))
-stored as kudu;
+  income int)
+distribute by range(id)
+(partition values <= '8600000US01475',
+ partition '8600000US01475' < values <= '8600000US63121',
+ partition '8600000US63121' < values <= '8600000US84712',
+ partition '8600000US84712' < values
+) stored as kudu;
 ====
 ---- DATASET
 functional