mirror of
https://github.com/apache/impala.git
synced 2026-01-03 15:00:52 -05:00
This commit adds support for non-covering range partitions in Kudu
tables. The SPLIT ROWS clause is now deprecated and no longer supported.
The following new syntax provides more flexibility in creating range
partitions and it supports bounded and unbounded ranges as well as single value
partitions; multi-column range partitions are supported as well.
The new syntax is:
DISTRIBUTE BY RANGE (col_list)
(
PARTITION lower_1 <[=] VALUES <[=] upper_1,
PARTITION lower_2 <[=] VALUES <[=] upper_2,
....
PARTITION lower_n <[=] VALUES <[=] upper_n,
PARTITION VALUE = val_1,
....
PARTITION VALUE = val_n
)
Multi-column range partitions are specified as follows:
DISTRIBUTE BY RANGE (col1, col2,..., coln)
(
PARTITION VALUE = (col1_val, col2_val, ..., coln_val),
....
PARTITION VALUE = (col1_val, col2_val, ..., coln_val)
)
Change-Id: I6799c01a37003f0f4c068d911a13e3f060110a06
Reviewed-on: http://gerrit.cloudera.org:8080/4856
Reviewed-by: Dimitris Tsirogiannis <dtsirogiannis@cloudera.com>
Tested-by: Internal Jenkins
87 lines
2.5 KiB
Plaintext
87 lines
2.5 KiB
Plaintext
====
|
|
---- QUERY
|
|
create table t primary key (id) distribute by hash (id) into 3 buckets
|
|
stored as kudu
|
|
as select id, int_col from functional.alltypestiny;
|
|
select * from t;
|
|
---- RESULTS
|
|
0,0
|
|
1,1
|
|
2,0
|
|
3,1
|
|
4,0
|
|
5,1
|
|
6,0
|
|
7,1
|
|
---- TYPES
|
|
INT,INT
|
|
====
|
|
---- QUERY
|
|
# Boolean primary key column
|
|
create table tab (x int, y boolean, primary key(x, y))
|
|
distribute by hash (x) into 3 buckets stored as kudu
|
|
---- CATCH
|
|
NonRecoverableException: Key column may not have type of BOOL, FLOAT, or DOUBLE
|
|
====
|
|
---- QUERY
|
|
# Float primary key column
|
|
create table tab (x int, y float, primary key(x, y))
|
|
distribute by hash (x) into 3 buckets stored as kudu
|
|
---- CATCH
|
|
NonRecoverableException: Key column may not have type of BOOL, FLOAT, or DOUBLE
|
|
====
|
|
---- QUERY
|
|
# Primary keys should be declared first
|
|
# TODO: See KUDU-1709 for improving Kudu error messages.
|
|
create table tab (x int, y int, primary key(y))
|
|
distribute by hash (y) into 3 buckets stored as kudu
|
|
---- CATCH
|
|
NonRecoverableException: Got out-of-order key column: name: "y" type: INT32 is_key: true is_nullable: false cfile_block_size: 0
|
|
====
|
|
---- QUERY
|
|
# Small number of hash buckets
|
|
create table tab (a int, b int, c int, d int, primary key(a, b, c))
|
|
distribute by hash(a,b) into 8 buckets, hash(c) into 1 buckets stored as kudu
|
|
---- CATCH
|
|
NonRecoverableException: must have at least two hash buckets
|
|
====
|
|
---- QUERY
|
|
# Same column in multiple hash based distributions
|
|
create table tab (a int, b int, primary key (a))
|
|
distribute by hash (a) into 3 buckets, hash (a) into 2 buckets stored as kudu
|
|
---- CATCH
|
|
NonRecoverableException: hash bucket schema components must not contain columns in common
|
|
====
|
|
---- QUERY
|
|
# Same column referenced multiple times in the same hash-based distribution
|
|
create table tab (a int primary key) distribute by hash (a, a, a) into 3 buckets
|
|
stored as kudu
|
|
---- CATCH
|
|
NonRecoverableException: hash bucket schema components must not contain columns in common
|
|
====
|
|
---- QUERY
|
|
# Kudu table that uses Impala keywords as table name and column names
|
|
create table `add`(`analytic` int, `function` int, primary key(`analytic`, `function`))
|
|
distribute by hash (`analytic`) into 4 buckets, range (`function`)
|
|
(partition values <= 1, partition 1 < values <= 10, partition 10 < values) stored as kudu;
|
|
insert into `add` select id, int_col from functional.alltypestiny;
|
|
select * from `add`
|
|
---- RESULTS
|
|
0,0
|
|
1,1
|
|
2,0
|
|
3,1
|
|
4,0
|
|
5,1
|
|
6,0
|
|
7,1
|
|
---- TYPES
|
|
INT,INT
|
|
====
|
|
---- QUERY
|
|
# Test implicit casting/folding of partition values.
|
|
create table tab (a int primary key) distribute by range (a) (partition value = false)
|
|
stored as kudu
|
|
---- RESULTS
|
|
====
|