mirror of
https://github.com/apache/impala.git
synced 2026-01-02 21:00:35 -05:00
While support for TIMESTAMP columns in Kudu tables has been committed (IMPALA-5137), it does not support TIMESTAMP column default values. This supports CREATE TABLE syntax to specify the default values, but more importantly this fixes the loading of Kudu tables that may have had default values set on UNIXTIME_MICROS columns, e.g. if the table was created via the python client. This involves fixing KuduColumn to hide the LiteralExpr representing the default value because it will be a BIGINT if the column type is TIMESTAMP. It is only needed to call toSql() and toStringValue(), so helper functions are added to KuduColumn to encapsulate special logic for TIMESTAMP. TODO: Add support and tests for ALTER setting the default value (when IMPALA-4622 is committed). Change-Id: I655910fb4805bb204a999627fa9f68e43ea8aaf2 Reviewed-on: http://gerrit.cloudera.org:8080/6936 Reviewed-by: Matthew Jacobs <mj@cloudera.com> Tested-by: Impala Public Jenkins
251 lines
7.5 KiB
Plaintext
251 lines
7.5 KiB
Plaintext
====
|
|
---- QUERY
|
|
create table t primary key (id) partition by hash (id) partitions 3
|
|
stored as kudu
|
|
as select id, int_col from functional.alltypestiny;
|
|
select * from t;
|
|
---- RESULTS
|
|
0,0
|
|
1,1
|
|
2,0
|
|
3,1
|
|
4,0
|
|
5,1
|
|
6,0
|
|
7,1
|
|
---- TYPES
|
|
INT,INT
|
|
====
|
|
---- QUERY
|
|
# Boolean primary key column
|
|
create table tab (x int, y boolean, primary key(x, y))
|
|
partition by hash (x) partitions 3 stored as kudu
|
|
---- CATCH
|
|
NonRecoverableException: key column may not have type of BOOL, FLOAT, or DOUBLE
|
|
====
|
|
---- QUERY
|
|
# Float primary key column
|
|
create table tab (x int, y float, primary key(x, y))
|
|
partition by hash (x) partitions 3 stored as kudu
|
|
---- CATCH
|
|
NonRecoverableException: key column may not have type of BOOL, FLOAT, or DOUBLE
|
|
====
|
|
---- QUERY
|
|
# Primary keys should be declared first
|
|
# TODO: See KUDU-1709 for improving Kudu error messages.
|
|
create table tab (x int, y int, primary key(y))
|
|
partition by hash (y) partitions 3 stored as kudu
|
|
---- CATCH
|
|
NonRecoverableException: Got out-of-order key column: name: "y" type: INT32 is_key: true is_nullable: false cfile_block_size: 0
|
|
====
|
|
---- QUERY
|
|
# Small number of hash partitions
|
|
create table tab (a int, b int, c int, d int, primary key(a, b, c))
|
|
partition by hash(a,b) partitions 8, hash(c) partitions 1 stored as kudu
|
|
---- CATCH
|
|
NonRecoverableException: must have at least two hash buckets
|
|
====
|
|
---- QUERY
|
|
# Same column in multiple hash based distributions
|
|
create table tab (a int, b int, primary key (a))
|
|
partition by hash (a) partitions 3, hash (a) partitions 2 stored as kudu
|
|
---- CATCH
|
|
NonRecoverableException: hash bucket schema components must not contain columns in common
|
|
====
|
|
---- QUERY
|
|
# Same column referenced multiple times in the same hash-based distribution
|
|
create table tab (a int primary key) partition by hash (a, a, a) partitions 3
|
|
stored as kudu
|
|
---- CATCH
|
|
NonRecoverableException: hash bucket schema components must not contain columns in common
|
|
====
|
|
---- QUERY
|
|
# Kudu table that uses Impala keywords as table name and column names
|
|
create table `add`(`analytic` int, `function` int, primary key(`analytic`, `function`))
|
|
partition by hash (`analytic`) partitions 4, range (`function`)
|
|
(partition values <= 1, partition 1 < values <= 10, partition 10 < values) stored as kudu;
|
|
insert into `add` select id, int_col from functional.alltypestiny;
|
|
select * from `add`
|
|
---- RESULTS
|
|
0,0
|
|
1,1
|
|
2,0
|
|
3,1
|
|
4,0
|
|
5,1
|
|
6,0
|
|
7,1
|
|
---- TYPES
|
|
INT,INT
|
|
====
|
|
---- QUERY
|
|
# Test implicit casting/folding of partition values.
|
|
create table tab (a int not null primary key)
|
|
partition by range (a) (partition value = false)
|
|
stored as kudu
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Invalid hostname
|
|
create table tdata_bogus_host (id int primary key, name string, valf float, vali bigint)
|
|
PARTITION BY RANGE (PARTITION 10 <= VALUES <= 30) STORED AS KUDU
|
|
TBLPROPERTIES('kudu.master_addresses' = 'bogus host name')
|
|
---- CATCH
|
|
Couldn't resolve this master's address bogus host name:7051
|
|
====
|
|
---- QUERY
|
|
# Non-existing host
|
|
create table tdata_non_existing_host
|
|
(id int primary key, name string, valf float, vali bigint)
|
|
PARTITION BY RANGE (PARTITION 10 <= VALUES <= 30) STORED AS KUDU
|
|
TBLPROPERTIES('kudu.master_addresses' = 'bogus.host.name')
|
|
---- CATCH
|
|
Couldn't resolve this master's address bogus.host.name:7051
|
|
====
|
|
---- QUERY
|
|
# Test that string case is ignored
|
|
create table ignore_column_case (Id int, NAME string, vAlf float, vali bigint,
|
|
primary key (Id, NAME)) PARTITION BY RANGE (PARTITION VALUE = (1, 'Martin'))
|
|
STORED AS KUDU
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
insert into ignore_column_case values (1, 'Martin', 1.0, 10);
|
|
---- RESULTS
|
|
: 1
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
====
|
|
---- QUERY
|
|
select ID, nAmE, VALF, VALI from ignore_column_case where NaMe = 'Martin';
|
|
---- RESULTS
|
|
1,'Martin',1.0,10
|
|
---- TYPES
|
|
INT,STRING,FLOAT,BIGINT
|
|
====
|
|
---- QUERY
|
|
# Using NULL as default values
|
|
create table tbl_with_null_defaults (x int primary key, i1 tinyint default null,
|
|
i2 smallint default null, i3 int default null, i4 bigint default null,
|
|
vals string default null, valf float default null, vald double default null,
|
|
valb boolean default null) partition by hash (x) partitions 3 stored as kudu
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
insert into tbl_with_null_defaults (x) values (1);
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
X, I1, I2, I3, I4, VALS, VALF, VALD, VALB
|
|
---- DML_RESULTS: tbl_with_null_defaults
|
|
1,NULL,NULL,NULL,NULL,'NULL',NULL,NULL,NULL
|
|
---- TYPES
|
|
INT,TINYINT,SMALLINT,INT,BIGINT,STRING,FLOAT,DOUBLE,BOOLEAN
|
|
====
|
|
---- QUERY
|
|
# Overlapping ranges are rejected by the Kudu client
|
|
create table ts_ranges (ts timestamp primary key, i int)
|
|
partition by range (
|
|
partition cast('2009-01-02 00:00:00' as timestamp) <= VALUES <
|
|
cast('2009-01-03 00:00:00' as timestamp),
|
|
partition cast('2009-01-02 10:00:00' as timestamp) <= VALUES
|
|
) stored as kudu
|
|
---- CATCH
|
|
NonRecoverableException: overlapping range partitions: first range partition: 2009-01-02T00:00:00.000000Z <= VALUES < 2009-01-03T00:00:00.000000Z, second range partition: VALUES >= 2009-01-02T10:00:00.000000Z
|
|
====
|
|
---- QUERY
|
|
# Creates a range partitioned Kudu table with a timestamp PK. Note that nanoseconds
|
|
# are rounded (same behavior as when writing timestamp values to Kudu).
|
|
create table ts_ranges (ts timestamp primary key, i int)
|
|
partition by range (
|
|
partition nanoseconds_add(cast('2009-01-01 00:00:00' as timestamp), 999) <= VALUES <
|
|
nanoseconds_add(cast('2009-01-02 00:00:00' as timestamp), 1),
|
|
partition cast('2009-01-02 00:00:00' as timestamp) <= VALUES <
|
|
cast('2009-01-03 00:00:00' as timestamp),
|
|
partition '2009-01-03 00:00:00' <= VALUES
|
|
) stored as kudu
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
show range partitions ts_ranges
|
|
---- RESULTS
|
|
'2009-01-01T00:00:00.000001Z <= VALUES < 2009-01-02T00:00:00.000000Z'
|
|
'2009-01-02T00:00:00.000000Z <= VALUES < 2009-01-03T00:00:00.000000Z'
|
|
'VALUES >= 2009-01-03T00:00:00.000000Z'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
create table ts_ranges_ctas
|
|
primary key (ts)
|
|
partition by range (
|
|
partition VALUES < '2009-01-02 00:00:00',
|
|
partition '2009-01-02 00:00:00' <= VALUES <
|
|
'2009-01-03 00:00:00',
|
|
partition '2009-01-03 00:00:00' < VALUES,
|
|
partition VALUE = ('2009-01-03 00:00:00')
|
|
)
|
|
stored as kudu
|
|
as select timestamp_col ts, id from functional.alltypestiny;
|
|
---- RESULTS
|
|
'Inserted 8 row(s)'
|
|
====
|
|
---- QUERY
|
|
show range partitions ts_ranges_ctas
|
|
---- RESULTS
|
|
'VALUES < 2009-01-02T00:00:00.000000Z'
|
|
'2009-01-02T00:00:00.000000Z <= VALUES < 2009-01-03T00:00:00.000000Z'
|
|
'VALUE = 2009-01-03T00:00:00.000000Z'
|
|
'VALUES >= 2009-01-03T00:00:00.000001Z'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
select * from ts_ranges_ctas order by id
|
|
---- RESULTS
|
|
2009-01-01 00:00:00,0
|
|
2009-01-01 00:01:00,1
|
|
2009-02-01 00:00:00,2
|
|
2009-02-01 00:01:00,3
|
|
2009-03-01 00:00:00,4
|
|
2009-03-01 00:01:00,5
|
|
2009-04-01 00:00:00,6
|
|
2009-04-01 00:01:00,7
|
|
---- TYPES
|
|
TIMESTAMP,INT
|
|
====
|
|
---- QUERY
|
|
# Creates a Kudu table with timestamp column default values.
|
|
create table ts_default (i int primary key, ts1 timestamp,
|
|
ts2 timestamp default cast('2009-01-01 00:00:00' as timestamp))
|
|
partition by hash(i) partitions 3 stored as kudu
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
insert into ts_default (i) values (1);
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
I, TS1, TS2
|
|
---- DML_RESULTS: ts_default
|
|
1,NULL,2009-01-01 00:00:00
|
|
---- TYPES
|
|
INT,TIMESTAMP,TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
insert into ts_default (i, ts1, ts2) values (2, NULL, NULL);
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
I, TS1, TS2
|
|
---- DML_RESULTS: ts_default
|
|
1,NULL,2009-01-01 00:00:00
|
|
2,NULL,NULL
|
|
---- TYPES
|
|
INT,TIMESTAMP,TIMESTAMP
|
|
====
|