mirror of
https://github.com/apache/impala.git
synced 2025-12-25 02:03:09 -05:00
Initial implementation of KUDU-1261 (array column type) recently merged in upstream Apache Kudu repository. This patch add initial Impala support for working with Kudu tables having array type columns. Unlike rows, the elements of a Kudu array are stored in a different format than Impala. Instead of per-row bit flag for NULL info, values and NULL bits are stored in separate arrays. The following types of queries are not supported in this patch: - (IMPALA-14538) Queries that reference an array column as a table, e.g. ```sql SELECT item FROM kudu_array.array_int; ``` - (IMPALA-14539) Queries that create duplicate collection slots, e.g. ```sql SELECT array_int FROM kudu_array AS t, t.array_int AS unnested; ``` Testing: - Add some FE tests in AnalyzeDDLTest and AnalyzeKuduDDLTest. - Add EE test test_kudu.py::TestKuduArray. Since Impala does not support inserting complex types, including array, the data insertion part of the test is achieved through custom C++ code kudu-array-inserter.cc that insert into Kudu via Kudu C++ client. It would be great if we could migrate it to Python so that it can be moved to the same file as the test (IMPALA-14537). - Pass core tests. Co-authored-by: Riza Suminto Change-Id: I9282aac821bd30668189f84b2ed8fff7047e7310 Reviewed-on: http://gerrit.cloudera.org:8080/23493 Reviewed-by: Alexey Serbin <alexey@apache.org> Reviewed-by: Michael Smith <michael.smith@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
755 lines
22 KiB
Plaintext
755 lines
22 KiB
Plaintext
====
|
|
---- QUERY
|
|
create table t primary key (id) partition by hash (id) partitions 3
|
|
stored as kudu
|
|
as select id, int_col from functional.alltypestiny;
|
|
select * from t;
|
|
---- RESULTS
|
|
0,0
|
|
1,1
|
|
2,0
|
|
3,1
|
|
4,0
|
|
5,1
|
|
6,0
|
|
7,1
|
|
---- TYPES
|
|
INT,INT
|
|
====
|
|
---- QUERY
|
|
# Boolean primary key column
|
|
create table tab (x int, y boolean, primary key(x, y))
|
|
partition by hash (x) partitions 3 stored as kudu
|
|
---- CATCH
|
|
NonRecoverableException: key column may not have type of BOOL, FLOAT, DOUBLE, or NESTED
|
|
====
|
|
---- QUERY
|
|
# Float primary key column
|
|
create table tab (x int, y float, primary key(x, y))
|
|
partition by hash (x) partitions 3 stored as kudu
|
|
---- CATCH
|
|
NonRecoverableException: key column may not have type of BOOL, FLOAT, DOUBLE, or NESTED
|
|
====
|
|
---- QUERY
|
|
# Primary keys should be declared first
|
|
create table tab (x int, y int, primary key(y))
|
|
partition by hash (y) partitions 3 stored as kudu
|
|
---- CATCH
|
|
ImpalaRuntimeException: Kudu PRIMARY KEY columns must be specified as the first columns in the table (expected leading columns ('y') but found ('x'))
|
|
====
|
|
---- QUERY
|
|
# Primary keys should be be listed in the same order
|
|
create table tab (x int, y int, z int, primary key(y,x))
|
|
partition by hash (y) partitions 3 stored as kudu
|
|
---- CATCH
|
|
ImpalaRuntimeException: Kudu PRIMARY KEY columns must be specified as the first columns in the table (expected leading columns ('y', 'x') but found ('x', 'y'))
|
|
====
|
|
---- QUERY
|
|
# Small number of hash partitions
|
|
create table tab (a int, b int, c int, d int, primary key(a, b, c))
|
|
partition by hash(a,b) partitions 8, hash(c) partitions 1 stored as kudu
|
|
---- CATCH
|
|
NonRecoverableException: must have at least two hash buckets
|
|
====
|
|
---- QUERY
|
|
# Same column in multiple hash based distributions
|
|
create table tab (a int, b int, primary key (a))
|
|
partition by hash (a) partitions 3, hash (a) partitions 2 stored as kudu
|
|
---- CATCH
|
|
NonRecoverableException: hash bucket schema components must not contain columns in common
|
|
====
|
|
---- QUERY
|
|
# Same column referenced multiple times in the same hash-based distribution
|
|
create table tab (a int primary key) partition by hash (a, a, a) partitions 3
|
|
stored as kudu
|
|
---- CATCH
|
|
NonRecoverableException: hash bucket schema components must not contain columns in common
|
|
====
|
|
---- QUERY
|
|
# Kudu table that uses Impala keywords as table name and column names
|
|
create table `add`(`analytic` int, `function` int, primary key(`analytic`, `function`))
|
|
partition by hash (`analytic`) partitions 4, range (`function`)
|
|
(partition values <= 1, partition 1 < values <= 10, partition 10 < values) stored as kudu;
|
|
insert into `add` select id, int_col from functional.alltypestiny;
|
|
select * from `add`
|
|
---- RESULTS
|
|
0,0
|
|
1,1
|
|
2,0
|
|
3,1
|
|
4,0
|
|
5,1
|
|
6,0
|
|
7,1
|
|
---- TYPES
|
|
INT,INT
|
|
====
|
|
---- QUERY
|
|
# Test implicit casting/folding of partition values.
|
|
create table tab (a int not null primary key)
|
|
partition by range (a) (partition value = false)
|
|
stored as kudu
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
# Invalid hostname
|
|
create table tdata_bogus_host (id int primary key, name string, valf float, vali bigint)
|
|
PARTITION BY RANGE (PARTITION 10 <= VALUES <= 30) STORED AS KUDU
|
|
TBLPROPERTIES('kudu.master_addresses' = 'bogus host name')
|
|
---- CATCH
|
|
Couldn't resolve this master's address bogus host name:7051
|
|
====
|
|
---- QUERY
|
|
# Non-existing host
|
|
create table tdata_non_existing_host
|
|
(id int primary key, name string, valf float, vali bigint)
|
|
PARTITION BY RANGE (PARTITION 10 <= VALUES <= 30) STORED AS KUDU
|
|
TBLPROPERTIES('kudu.master_addresses' = 'bogus.host.name')
|
|
---- CATCH
|
|
Couldn't resolve this master's address bogus.host.name:7051
|
|
====
|
|
---- QUERY
|
|
# Valid host with whitespace
|
|
create table tdata_master_addresses_whitespace (id int primary key) stored as kudu
|
|
tblproperties('kudu.master_addresses' = ' $INTERNAL_LISTEN_HOST ')
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into tdata_master_addresses_whitespace values (0), (1)
|
|
---- DML_RESULTS: tdata_master_addresses_whitespace
|
|
0
|
|
1
|
|
====
|
|
---- QUERY
|
|
# Test that string case is ignored
|
|
create table ignore_column_case (Id int, NAME string, vAlf float, vali bigint,
|
|
primary key (Id, NAME)) PARTITION BY RANGE (PARTITION VALUE = (1, 'Martin'))
|
|
STORED AS KUDU
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into ignore_column_case values (1, 'Martin', 1.0, 10);
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
====
|
|
---- QUERY
|
|
select ID, nAmE, VALF, VALI from ignore_column_case where NaMe = 'Martin';
|
|
---- RESULTS
|
|
1,'Martin',1.0,10
|
|
---- TYPES
|
|
INT,STRING,FLOAT,BIGINT
|
|
====
|
|
---- QUERY
|
|
# Using NULL as default values
|
|
create table tbl_with_null_defaults (x int primary key, i1 tinyint default null,
|
|
i2 smallint default null, i3 int default null, i4 bigint default null,
|
|
vals string default null, valf float default null, vald double default null,
|
|
valb boolean default null, valdec4 decimal(9) default null,
|
|
valdec8 decimal(18) default null, valdec16 decimal(38) default null,
|
|
valdate date default null, valvc varchar(2000) default null)
|
|
partition by hash (x) partitions 3 stored as kudu
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into tbl_with_null_defaults (x) values (1);
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
X, I1, I2, I3, I4, VALS, VALF, VALD, VALB, VALDEC4, VALDEC8, VALDEC16, VALDATE, VALVC
|
|
---- DML_RESULTS: tbl_with_null_defaults
|
|
1,NULL,NULL,NULL,NULL,'NULL',NULL,NULL,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
---- TYPES
|
|
INT,TINYINT,SMALLINT,INT,BIGINT,STRING,FLOAT,DOUBLE,BOOLEAN,DECIMAL,DECIMAL,DECIMAL,DATE,STRING
|
|
---- HS2_TYPES
|
|
INT,TINYINT,SMALLINT,INT,BIGINT,STRING,FLOAT,DOUBLE,BOOLEAN,DECIMAL,DECIMAL,DECIMAL,DATE,VARCHAR
|
|
====
|
|
---- QUERY
|
|
# Overlapping ranges are rejected by the Kudu client
|
|
create table ts_ranges (ts timestamp primary key, i int)
|
|
partition by range (
|
|
partition cast('2009-01-02 00:00:00' as timestamp) <= VALUES <
|
|
cast('2009-01-03 00:00:00' as timestamp),
|
|
partition cast('2009-01-02 10:00:00' as timestamp) <= VALUES
|
|
) stored as kudu
|
|
---- CATCH
|
|
NonRecoverableException: overlapping range partitions: first range partition: 2009-01-02T00:00:00.000000Z <= VALUES < 2009-01-03T00:00:00.000000Z, second range partition: 2009-01-02T10:00:00.000000Z <= VALUES
|
|
====
|
|
---- QUERY
|
|
# Creates a range partitioned Kudu table with a timestamp PK. Note that nanoseconds
|
|
# are rounded (same behavior as when writing timestamp values to Kudu).
|
|
create table ts_ranges (ts timestamp primary key, i int)
|
|
partition by range (
|
|
partition nanoseconds_add(cast('2009-01-01 00:00:00' as timestamp), 999) <= VALUES <
|
|
nanoseconds_add(cast('2009-01-02 00:00:00' as timestamp), 1),
|
|
partition cast('2009-01-02 00:00:00' as timestamp) <= VALUES <
|
|
cast('2009-01-03 00:00:00' as timestamp),
|
|
partition '2009-01-03 00:00:00' <= VALUES
|
|
) stored as kudu
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
show range partitions ts_ranges
|
|
---- RESULTS
|
|
'2009-01-01T00:00:00.000001Z <= VALUES < 2009-01-02T00:00:00.000000Z'
|
|
'2009-01-02T00:00:00.000000Z <= VALUES < 2009-01-03T00:00:00.000000Z'
|
|
'VALUES >= 2009-01-03T00:00:00.000000Z'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
create table ts_ranges_ctas
|
|
primary key (ts)
|
|
partition by range (
|
|
partition VALUES < '2009-01-02 00:00:00',
|
|
partition '2009-01-02 00:00:00' <= VALUES <
|
|
'2009-01-03 00:00:00',
|
|
partition '2009-01-03 00:00:00' < VALUES,
|
|
partition VALUE = ('2009-01-03 00:00:00')
|
|
)
|
|
stored as kudu
|
|
as select timestamp_col ts, id from functional.alltypestiny;
|
|
---- RESULTS
|
|
'Inserted 8 row(s)'
|
|
====
|
|
---- QUERY
|
|
show range partitions ts_ranges_ctas
|
|
---- RESULTS
|
|
'VALUES < 2009-01-02T00:00:00.000000Z'
|
|
'2009-01-02T00:00:00.000000Z <= VALUES < 2009-01-03T00:00:00.000000Z'
|
|
'VALUE = 2009-01-03T00:00:00.000000Z'
|
|
'VALUES >= 2009-01-03T00:00:00.000001Z'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
select * from ts_ranges_ctas order by id
|
|
---- RESULTS
|
|
2009-01-01 00:00:00,0
|
|
2009-01-01 00:01:00,1
|
|
2009-02-01 00:00:00,2
|
|
2009-02-01 00:01:00,3
|
|
2009-03-01 00:00:00,4
|
|
2009-03-01 00:01:00,5
|
|
2009-04-01 00:00:00,6
|
|
2009-04-01 00:01:00,7
|
|
---- TYPES
|
|
TIMESTAMP,INT
|
|
====
|
|
---- QUERY
|
|
# Creates a Kudu table with timestamp column default values.
|
|
create table ts_default (i int primary key, ts1 timestamp,
|
|
ts2 timestamp default cast('2009-01-01 00:00:00' as timestamp))
|
|
partition by hash(i) partitions 3 stored as kudu
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into ts_default (i) values (1);
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
I, TS1, TS2
|
|
---- DML_RESULTS: ts_default
|
|
1,NULL,2009-01-01 00:00:00
|
|
---- TYPES
|
|
INT,TIMESTAMP,TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
insert into ts_default (i, ts1, ts2) values (2, NULL, NULL);
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
I, TS1, TS2
|
|
---- DML_RESULTS: ts_default
|
|
1,NULL,2009-01-01 00:00:00
|
|
2,NULL,NULL
|
|
---- TYPES
|
|
INT,TIMESTAMP,TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
# create an unpartitioned table
|
|
create table unpartitioned_kudu_table (col0 bigint primary key, col1 string)
|
|
stored as kudu
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
---- ERRORS
|
|
Unpartitioned Kudu tables are inefficient for large data sizes.
|
|
====
|
|
---- QUERY
|
|
insert into unpartitioned_kudu_table values (0, 'zero'), (1, 'one')
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 2
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
COL0,COL1
|
|
---- DML_RESULTS: unpartitioned_kudu_table
|
|
0,'zero'
|
|
1,'one'
|
|
---- TYPES
|
|
BIGINT,STRING
|
|
====
|
|
---- QUERY
|
|
create table unpartitioned_kudu_table2 primary key(id) stored as kudu
|
|
as select id from functional.alltypestiny where id > 4
|
|
---- RESULTS
|
|
'Inserted 3 row(s)'
|
|
---- ERRORS
|
|
Unpartitioned Kudu tables are inefficient for large data sizes.
|
|
====
|
|
---- QUERY
|
|
select * from unpartitioned_kudu_table2
|
|
---- RESULTS
|
|
5
|
|
6
|
|
7
|
|
---- LABELS
|
|
ID
|
|
---- TYPES
|
|
INT
|
|
====
|
|
---- QUERY
|
|
# Creates a Kudu table with decimal columns and primary key
|
|
create table create_decimal
|
|
(
|
|
decimal_4 decimal(9, 9),
|
|
decimal_8 decimal(18, 2) not null default 100.00,
|
|
decimal_16 decimal(38, 0) null,
|
|
primary key (decimal_4))
|
|
stored as kudu;
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
# Create as select table with decimal columns and primary key
|
|
create table ctas_decimal primary key (d1,d2,d3)
|
|
stored as kudu
|
|
as select * from functional.decimal_tbl;
|
|
select * from ctas_decimal;
|
|
---- RESULTS
|
|
1234,2222,1.2345678900,0.12345678900000000000000000000000000000,12345.78900,1
|
|
2345,111,12.3456789000,0.12345678900000000000000000000000000000,3.14100,1
|
|
12345,333,123.4567890000,0.12345678900000000000000000000000000000,11.22000,1
|
|
12345,333,1234.5678900000,0.12345678900000000000000000000000000000,0.10000,1
|
|
132842,333,12345.6789000000,0.12345678900000000000000000000000000000,0.77889,1
|
|
---- TYPES
|
|
DECIMAL,DECIMAL,DECIMAL,DECIMAL,DECIMAL,DECIMAL
|
|
====
|
|
---- QUERY
|
|
# IMPALA-6954: CTAS with an expr rewrite.
|
|
create table ctas_rewrite primary key(id)
|
|
partition by range(id) (partition 0 <= values < 100) stored as kudu
|
|
as select id, tinyint_col from functional.alltypes
|
|
where id between 0 and 1;
|
|
show range partitions ctas_rewrite;
|
|
---- RESULTS
|
|
'0 <= VALUES < 100'
|
|
====
|
|
---- QUERY
|
|
# create table with primary key of DATE type
|
|
create table kudu_date_key (fdatekey date primary key, val string)
|
|
stored as kudu
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
---- ERRORS
|
|
Unpartitioned Kudu tables are inefficient for large data sizes.
|
|
====
|
|
---- QUERY
|
|
insert into kudu_date_key values (DATE '1970-01-01', 'Unix epoch'), (DATE '2019-12-12', 'today')
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 2
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
FDATEKEY,VAL
|
|
---- DML_RESULTS: kudu_date_key
|
|
1970-01-01,'Unix epoch'
|
|
2019-12-12,'today'
|
|
---- TYPES
|
|
DATE,STRING
|
|
====
|
|
---- QUERY
|
|
# create table with invalid default DATE value
|
|
create table kudu_invalid_default_date (fdate date default DATE '111111-33-33')
|
|
stored as kudu
|
|
---- CATCH
|
|
AnalysisException: Invalid date literal: '111111-33-33'
|
|
====
|
|
---- QUERY
|
|
# create table with invalid default STRING value
|
|
create table kudu_invalid_default_string (s string primary key default unhex("aa"))
|
|
stored as kudu
|
|
---- CATCH
|
|
AnalysisException: Invalid String default value: unhex('aa')
|
|
====
|
|
---- QUERY
|
|
# create table with invalid range STRING value
|
|
create table kudu_invalid_range_string (s string primary key)
|
|
partition by range (s) (partition values < unhex("aa")) stored as kudu;
|
|
---- CATCH
|
|
AnalysisException: Invalid String range partition value: 'unhex("AA")'
|
|
====
|
|
---- QUERY
|
|
# create table with DATE primary key partitioned by range
|
|
create table kudu_datepk_range (fdate DATE not null primary key)
|
|
partition by range (fdate)
|
|
(
|
|
partition values < DATE '1900-01-01',
|
|
partition DATE '1900-01-01' <= values < DATE '1970-01-01',
|
|
partition DATE '1970-01-01' <= values < DATE '2000-01-01',
|
|
partition DATE '2000-01-01' <= values
|
|
)
|
|
stored as kudu
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into kudu_datepk_range values
|
|
(DATE '1800-01-01'),
|
|
(DATE '1970-01-01'),
|
|
(DATE '2019-12-12')
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 3
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
FDATE
|
|
---- DML_RESULTS: kudu_datepk_range
|
|
1800-01-01
|
|
1970-01-01
|
|
2019-12-12
|
|
---- TYPES
|
|
DATE
|
|
====
|
|
---- QUERY
|
|
select * from kudu_datepk_range;
|
|
---- RESULTS
|
|
1800-01-01
|
|
1970-01-01
|
|
2019-12-12
|
|
---- TYPES
|
|
DATE
|
|
====
|
|
---- QUERY
|
|
# Creates a Kudu table with varchar columns, primary key,
|
|
# hash partition and range partition.
|
|
create table create_varchar
|
|
(
|
|
valvc varchar(10),
|
|
primary key (valvc))
|
|
partition by hash (valvc) partitions 4, range (valvc) (
|
|
partition values < cast('m' as VARCHAR(10)),
|
|
partition cast('m' as VARCHAR(10)) <= values)
|
|
stored as kudu;
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into create_varchar values
|
|
(cast('a' as VARCHAR(10))),
|
|
(cast('m' as VARCHAR(10))),
|
|
(cast('z' as VARCHAR(10)))
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 3
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
VALVC
|
|
---- DML_RESULTS: create_varchar
|
|
'a'
|
|
'm'
|
|
'z'
|
|
---- TYPES
|
|
STRING
|
|
---- HS2_TYPES
|
|
VARCHAR
|
|
====
|
|
---- QUERY
|
|
# Creates as select table with varchar columns and primary key
|
|
create table ctas_varchar primary key (vc)
|
|
stored as kudu
|
|
as select vc from functional.chars_tiny;
|
|
select * from ctas_varchar;
|
|
---- RESULTS
|
|
'1cccc'
|
|
'2cccccc'
|
|
'3ccc'
|
|
'4cc'
|
|
'5c'
|
|
'6c'
|
|
'c'
|
|
---- TYPES
|
|
STRING
|
|
---- HS2_TYPES
|
|
VARCHAR
|
|
====
|
|
---- QUERY
|
|
# Create with keyword 'stored by'
|
|
create table kudu_stored_by (i int primary key, s string, ts timestamp, d date) stored by kudu;
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
describe formatted kudu_stored_by;
|
|
---- RESULTS: VERIFY_IS_SUBSET
|
|
'','storage_handler ','org.apache.hadoop.hive.kudu.KuduStorageHandler'
|
|
---- TYPES
|
|
string, string, string
|
|
====
|
|
---- QUERY
|
|
# Create Kudu table with non unique primary key
|
|
create table non_unique_key_create_tbl1 (id int non unique primary key, name string)
|
|
partition by hash (id) partitions 3
|
|
stored as kudu
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into non_unique_key_create_tbl1 values (1,'Martin'), (2,'Smith');
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 2
|
|
NumRowErrors: 0
|
|
====
|
|
---- QUERY
|
|
# auto-incrementing column is not shown for "select *"
|
|
select * from non_unique_key_create_tbl1 where name = 'Martin';
|
|
---- RESULTS
|
|
1,'Martin'
|
|
---- TYPES
|
|
INT,STRING
|
|
====
|
|
---- QUERY
|
|
# Create Kudu table with non unique composite primary key
|
|
create table non_unique_key_create_tbl2 (a int, b string, non unique primary key(a, b))
|
|
partition by hash (a) partitions 3
|
|
stored as kudu
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
# Create Kudu table with non unique composite primary key
|
|
create table non_unique_key_create_tbl3 (a string, b int, non unique primary key(a, b))
|
|
partition by hash (a) partitions 3
|
|
stored as kudu
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
# Create Kudu table without primary key columns,
|
|
# partition columns will be promoted as non unique primary key columns.
|
|
create table promote_partition_keys_as_non_unique_keys_test (a int, b string, c float)
|
|
partition by hash (a, b) partitions 3
|
|
stored as kudu;
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
---- ERRORS
|
|
Partition columns (a, b) are promoted as non unique primary key.
|
|
====
|
|
---- QUERY
|
|
# Create Kudu table without primary key columns,
|
|
# partition columns cannot be promoted as non unique primary key columns since the columns
|
|
# are not beginning columns of the table.
|
|
create table partition_keys_not_promoted_test (a int, b string, c float)
|
|
partition by hash (b, c) partitions 3
|
|
stored as kudu;
|
|
---- CATCH
|
|
AnalysisException: Specify primary key or non unique primary key for the Kudu table, or create partitions with the beginning columns of the table.
|
|
====
|
|
---- QUERY
|
|
# Create unpartitioned Kudu table with non unique primary key column.
|
|
create table non_unique_key_create_tbl4 (a int non unique primary key, b string)
|
|
stored as kudu;
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
# Create Kudu table in CTAS statement with non unique primary key
|
|
create table non_unique_key_create_tbl5 non unique primary key (id)
|
|
partition by hash (id) partitions 3
|
|
stored as kudu
|
|
as select id, int_col from functional.alltypestiny;
|
|
select * from non_unique_key_create_tbl5 order by id asc;
|
|
---- RESULTS
|
|
0,0
|
|
1,1
|
|
2,0
|
|
3,1
|
|
4,0
|
|
5,1
|
|
6,0
|
|
7,1
|
|
---- TYPES
|
|
INT,INT
|
|
====
|
|
---- QUERY
|
|
# Create Kudu table in CTAS statement with non unique primary key and range partitions
|
|
create table non_unique_key_create_tbl6 non unique primary key (id)
|
|
partition by range (id) (partition values <= 1, partition 1 < values <= 3,
|
|
partition 3 < values <= 5, partition 5 < values)
|
|
stored as kudu
|
|
as select id, int_col from functional.alltypestiny order by id asc limit 100;
|
|
select id, int_col, auto_incrementing_id from non_unique_key_create_tbl6 order by id asc;
|
|
---- RESULTS
|
|
0,0,1
|
|
1,1,2
|
|
2,0,1
|
|
3,1,2
|
|
4,0,1
|
|
5,1,2
|
|
6,0,1
|
|
7,1,2
|
|
---- TYPES
|
|
INT,INT,BIGINT
|
|
====
|
|
---- QUERY
|
|
# Non unique primary key for non Kudu table
|
|
create table non_unique_key_create_tbl7 (x int, y boolean, non unique primary key(x, y))
|
|
---- CATCH
|
|
AnalysisException: NON UNIQUE PRIMARY KEY is only supported for Kudu
|
|
====
|
|
---- QUERY
|
|
# Non unique primary keys should be declared first
|
|
create table non_unique_key_create_tbl8 (x int, y int, non unique primary key(y))
|
|
partition by hash (y) partitions 3 stored as kudu
|
|
---- CATCH
|
|
ImpalaRuntimeException: Kudu NON UNIQUE PRIMARY KEY columns must be specified as the first columns in the table (expected leading columns ('y') but found ('x'))
|
|
====
|
|
---- QUERY
|
|
# Non unique primary keys should be be listed in the same order
|
|
create table non_unique_key_create_tbl9 (x int, y int, z int, non unique primary key(y,x))
|
|
partition by hash (y) partitions 3 stored as kudu
|
|
---- CATCH
|
|
ImpalaRuntimeException: Kudu NON UNIQUE PRIMARY KEY columns must be specified as the first columns in the table (expected leading columns ('y', 'x') but found ('x', 'y'))
|
|
====
|
|
---- QUERY
|
|
# Non unique primary key cannot be boolean type
|
|
create table non_unique_key_create_tbl10 (x boolean non unique primary key)
|
|
partition by hash(x) partitions 8 stored as kudu
|
|
---- CATCH
|
|
NonRecoverableException: key column may not have type of BOOL, FLOAT, DOUBLE, or NESTED
|
|
====
|
|
---- QUERY
|
|
# Non unique primary key cannot be float type
|
|
create table non_unique_key_create_tbl11 (x float non unique primary key)
|
|
partition by hash(x) partitions 8 stored as kudu
|
|
---- CATCH
|
|
NonRecoverableException: key column may not have type of BOOL, FLOAT, DOUBLE, or NESTED
|
|
====
|
|
---- QUERY
|
|
# Non unique primary key cannot be double type
|
|
create table non_unique_key_create_tbl12 (x double non unique primary key)
|
|
partition by hash(x) partitions 8 stored as kudu
|
|
---- CATCH
|
|
NonRecoverableException: key column may not have type of BOOL, FLOAT, DOUBLE, or NESTED
|
|
====
|
|
---- QUERY
|
|
# Cannot create a Kudu table without any key and partition
|
|
create table non_unique_key_create_tbl13 (x int) stored as kudu
|
|
---- CATCH
|
|
AnalysisException: A primary key is required for a Kudu table.
|
|
====
|
|
---- QUERY
|
|
# Cannot create a Kudu table with a column named as "auto_incrementing_id"
|
|
create table non_unique_key_create_tbl14 (id int primary key, auto_incrementing_id bigint)
|
|
partition by hash(id) partitions 3 stored as kudu
|
|
---- CATCH
|
|
IllegalArgumentException: Column name auto_incrementing_id is reserved by Kudu engine
|
|
====
|
|
---- QUERY
|
|
# Create Kudu table in CTAS statement without specifying primary key.
|
|
# Partition column 'id' is promoted as non unique primary key.
|
|
create table non_unique_key_create_tbl15
|
|
partition by range (id) (partition values <= 1, partition 1 < values <= 3,
|
|
partition 3 < values <= 5, partition 5 < values)
|
|
stored as kudu
|
|
as select id, int_col from functional.alltypestiny order by id asc limit 100;
|
|
select id, int_col, auto_incrementing_id from non_unique_key_create_tbl15 order by id asc;
|
|
---- RESULTS
|
|
0,0,1
|
|
1,1,2
|
|
2,0,1
|
|
3,1,2
|
|
4,0,1
|
|
5,1,2
|
|
6,0,1
|
|
7,1,2
|
|
---- TYPES
|
|
INT,INT,BIGINT
|
|
====
|
|
---- QUERY
|
|
# Create Kudu table with decimal primary key and range partitions.
|
|
create table decimal_partitioned_tbl
|
|
(c1 decimal(10,4) primary key, c2 decimal(15,5), c3 decimal(1,1))
|
|
partition by range (c1) (partition values <= 0.2000, partition 0.2000 < values <= 0.7575,
|
|
partition 0.7575 < values)
|
|
stored as kudu
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
show range partitions decimal_partitioned_tbl
|
|
---- RESULTS
|
|
'VALUES < 0.2001'
|
|
'0.2001 <= VALUES < 0.7576'
|
|
'VALUES >= 0.7576'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
insert into decimal_partitioned_tbl
|
|
select c1, c2, c3 from functional.decimal_tiny order by c1 asc limit 10
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 10
|
|
NumRowErrors: 0
|
|
====
|
|
---- QUERY
|
|
select c1, c2, c3 from decimal_partitioned_tbl order by c1 asc
|
|
---- RESULTS
|
|
0.0000,100.00000,0.0
|
|
0.1111,101.22222,0.1
|
|
0.2222,102.44444,0.2
|
|
0.3333,103.66666,0.3
|
|
0.4444,104.88888,0.4
|
|
0.5555,106.11110,0.5
|
|
0.6666,107.33332,0.6
|
|
0.7777,108.55554,0.7
|
|
0.8888,109.77776,0.8
|
|
0.9999,110.99998,0.9
|
|
---- TYPES
|
|
DECIMAL,DECIMAL,DECIMAL
|
|
====
|
|
---- QUERY
|
|
# Cannot create a Kudu table with float range partition
|
|
create table create_tbl_float_part (x float) partition by range (x)
|
|
(partition values < 0)
|
|
stored as kudu
|
|
---- CATCH
|
|
AnalysisException: FLOAT type is not allowed to be part of a PRIMARY KEY therefore not allowed for range-partitioning.
|
|
====
|
|
---- QUERY
|
|
# Cannot create a Kudu table with double range partition
|
|
create table create_tbl_double_part (x double) partition by range (x)
|
|
(partition values < 0)
|
|
stored as kudu
|
|
---- CATCH
|
|
AnalysisException: DOUBLE type is not allowed to be part of a PRIMARY KEY therefore not allowed for range-partitioning.
|
|
====
|
|
---- QUERY
|
|
# Cannot create a Kudu table with boolean range partition
|
|
create table create_tbl_float_part (x boolean) partition by range (x)
|
|
(partition value = true)
|
|
stored as kudu
|
|
---- CATCH
|
|
AnalysisException: BOOLEAN type is not allowed to be part of a PRIMARY KEY therefore not allowed for range-partitioning.
|
|
====
|
|
---- QUERY
|
|
# Array primary key column
|
|
create table tab_array_primary_key (array_int array<int> primary key) stored as kudu
|
|
---- CATCH
|
|
IllegalArgumentException: Array type column: array_int cannot be a key column
|