IMPALA-3724: Support Kudu non-covering range partitions

This commit adds support for non-covering range partitions in Kudu
tables. The SPLIT ROWS clause is now deprecated and no longer supported.
The following new syntax provides more flexibility in creating range
partitions and it supports bounded and unbounded ranges as well as single value
partitions; multi-column range partitions are supported as well.

The new syntax is:
DISTRIBUTE BY RANGE (col_list)
(
 PARTITION lower_1 <[=] VALUES <[=] upper_1,
 PARTITION lower_2 <[=] VALUES <[=] upper_2,
             ....
 PARTITION lower_n <[=] VALUES <[=] upper_n,
 PARTITION VALUE = val_1,
             ....
 PARTITION VALUE = val_n
)

Multi-column range partitions are specified as follows:
DISTRIBUTE BY RANGE (col1, col2,..., coln)
(
 PARTITION VALUE = (col1_val, col2_val, ..., coln_val),
                     ....
 PARTITION VALUE = (col1_val, col2_val, ..., coln_val)
)

Change-Id: I6799c01a37003f0f4c068d911a13e3f060110a06
Reviewed-on: http://gerrit.cloudera.org:8080/4856
Reviewed-by: Dimitris Tsirogiannis <dtsirogiannis@cloudera.com>
Tested-by: Internal Jenkins
This commit is contained in:
Dimitris Tsirogiannis
2016-10-26 10:23:01 -07:00
committed by Internal Jenkins
parent 5f27ae0c2f
commit d802f321b2
17 changed files with 858 additions and 494 deletions

View File

@@ -766,7 +766,8 @@ create table {db_name}{db_suffix}.{table_name} (
name string,
zip int
)
distribute by range(id) split rows ((1003), (1007)) stored as kudu;
distribute by range(id) (partition values <= 1003, partition 1003 < values <= 1007,
partition 1007 < values) stored as kudu;
====
---- DATASET
functional
@@ -789,7 +790,8 @@ create table {db_name}{db_suffix}.{table_name} (
name string,
zip int
)
distribute by range(id) split rows ((1003), (1007)) stored as kudu;
distribute by range(id) (partition values <= 1003, partition 1003 < values <= 1007,
partition 1007 < values) stored as kudu;
====
---- DATASET
functional
@@ -815,7 +817,8 @@ create table {db_name}{db_suffix}.{table_name} (
alltypes_id int,
primary key (test_id, test_name, test_zip, alltypes_id)
)
distribute by range(test_id) split rows ((1003), (1007)) stored as kudu;
distribute by range(test_id) (partition values <= 1003, partition 1003 < values <= 1007,
partition 1007 < values) stored as kudu;
====
---- DATASET
functional
@@ -1261,7 +1264,8 @@ create table {db_name}{db_suffix}.{table_name} (
a string primary key,
b string
)
distribute by range(a) split rows (('b'), ('d')) stored as kudu;
distribute by range(a) (partition values <= 'b', partition 'b' < values <= 'd',
partition 'd' < values) stored as kudu;
====
---- DATASET
functional
@@ -1280,7 +1284,9 @@ DROP TABLE IF EXISTS {db_name}{db_suffix}.{table_name};
create table {db_name}{db_suffix}.{table_name} (
int_col int primary key
)
distribute by range(int_col) split rows ((2), (4), (6), (8)) stored as kudu;
distribute by range(int_col) (partition values <= 2, partition 2 < values <= 4,
partition 4 < values <= 6, partition 6 < values <= 8, partition 8 < values)
stored as kudu;
====
---- DATASET
functional
@@ -1405,14 +1411,17 @@ LOAD DATA LOCAL INPATH '{impala_home}/testdata/ImpalaDemoDataset/DEC_00_SF3_P077
---- CREATE_KUDU
DROP TABLE IF EXISTS {db_name}{db_suffix}.{table_name};
create table {db_name}{db_suffix}.{table_name} (
id string,
id string primary key,
zip string,
description1 string,
description2 string,
income int,
primary key (id, zip))
distribute by range(id, zip) split rows (('8600000US01475', '01475'), ('8600000US63121', '63121'), ('8600000US84712', '84712'))
stored as kudu;
income int)
distribute by range(id)
(partition values <= '8600000US01475',
partition '8600000US01475' < values <= '8600000US63121',
partition '8600000US63121' < values <= '8600000US84712',
partition '8600000US84712' < values
) stored as kudu;
====
---- DATASET
functional