mirror of
https://github.com/apache/impala.git
synced 2026-01-22 09:01:58 -05:00
This patch supports reading and writing DATE values to Kudu tables. It does not add min-max filter runtime support, but there is followup JIRA IMPALA-9294. Corresponding Kudu JIRA is KUDU-2632. Change-Id: I91656749a58ac769b54c2a63bdd4f85c89520b32 Reviewed-on: http://gerrit.cloudera.org:8080/14705 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
425 lines
12 KiB
Plaintext
425 lines
12 KiB
Plaintext
====
|
|
---- QUERY
|
|
create table t primary key (id) partition by hash (id) partitions 3
|
|
stored as kudu
|
|
as select id, int_col from functional.alltypestiny;
|
|
select * from t;
|
|
---- RESULTS
|
|
0,0
|
|
1,1
|
|
2,0
|
|
3,1
|
|
4,0
|
|
5,1
|
|
6,0
|
|
7,1
|
|
---- TYPES
|
|
INT,INT
|
|
====
|
|
---- QUERY
|
|
# Boolean primary key column
|
|
create table tab (x int, y boolean, primary key(x, y))
|
|
partition by hash (x) partitions 3 stored as kudu
|
|
---- CATCH
|
|
NonRecoverableException: key column may not have type of BOOL, FLOAT, or DOUBLE
|
|
====
|
|
---- QUERY
|
|
# Float primary key column
|
|
create table tab (x int, y float, primary key(x, y))
|
|
partition by hash (x) partitions 3 stored as kudu
|
|
---- CATCH
|
|
NonRecoverableException: key column may not have type of BOOL, FLOAT, or DOUBLE
|
|
====
|
|
---- QUERY
|
|
# Primary keys should be declared first
|
|
create table tab (x int, y int, primary key(y))
|
|
partition by hash (y) partitions 3 stored as kudu
|
|
---- CATCH
|
|
ImpalaRuntimeException: Kudu PRIMARY KEY columns must be specified as the first columns in the table (expected leading columns ('y') but found ('x'))
|
|
====
|
|
---- QUERY
|
|
# Primary keys should be be listed in the same order
|
|
create table tab (x int, y int, z int, primary key(y,x))
|
|
partition by hash (y) partitions 3 stored as kudu
|
|
---- CATCH
|
|
ImpalaRuntimeException: Kudu PRIMARY KEY columns must be specified as the first columns in the table (expected leading columns ('y', 'x') but found ('x', 'y'))
|
|
====
|
|
---- QUERY
|
|
# Small number of hash partitions
|
|
create table tab (a int, b int, c int, d int, primary key(a, b, c))
|
|
partition by hash(a,b) partitions 8, hash(c) partitions 1 stored as kudu
|
|
---- CATCH
|
|
NonRecoverableException: must have at least two hash buckets
|
|
====
|
|
---- QUERY
|
|
# Same column in multiple hash based distributions
|
|
create table tab (a int, b int, primary key (a))
|
|
partition by hash (a) partitions 3, hash (a) partitions 2 stored as kudu
|
|
---- CATCH
|
|
NonRecoverableException: hash bucket schema components must not contain columns in common
|
|
====
|
|
---- QUERY
|
|
# Same column referenced multiple times in the same hash-based distribution
|
|
create table tab (a int primary key) partition by hash (a, a, a) partitions 3
|
|
stored as kudu
|
|
---- CATCH
|
|
NonRecoverableException: hash bucket schema components must not contain columns in common
|
|
====
|
|
---- QUERY
|
|
# Kudu table that uses Impala keywords as table name and column names
|
|
create table `add`(`analytic` int, `function` int, primary key(`analytic`, `function`))
|
|
partition by hash (`analytic`) partitions 4, range (`function`)
|
|
(partition values <= 1, partition 1 < values <= 10, partition 10 < values) stored as kudu;
|
|
insert into `add` select id, int_col from functional.alltypestiny;
|
|
select * from `add`
|
|
---- RESULTS
|
|
0,0
|
|
1,1
|
|
2,0
|
|
3,1
|
|
4,0
|
|
5,1
|
|
6,0
|
|
7,1
|
|
---- TYPES
|
|
INT,INT
|
|
====
|
|
---- QUERY
|
|
# Test implicit casting/folding of partition values.
|
|
create table tab (a int not null primary key)
|
|
partition by range (a) (partition value = false)
|
|
stored as kudu
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
# Invalid hostname
|
|
create table tdata_bogus_host (id int primary key, name string, valf float, vali bigint)
|
|
PARTITION BY RANGE (PARTITION 10 <= VALUES <= 30) STORED AS KUDU
|
|
TBLPROPERTIES('kudu.master_addresses' = 'bogus host name')
|
|
---- CATCH
|
|
Couldn't resolve this master's address bogus host name:7051
|
|
====
|
|
---- QUERY
|
|
# Non-existing host
|
|
create table tdata_non_existing_host
|
|
(id int primary key, name string, valf float, vali bigint)
|
|
PARTITION BY RANGE (PARTITION 10 <= VALUES <= 30) STORED AS KUDU
|
|
TBLPROPERTIES('kudu.master_addresses' = 'bogus.host.name')
|
|
---- CATCH
|
|
Couldn't resolve this master's address bogus.host.name:7051
|
|
====
|
|
---- QUERY
|
|
# Valid host with whitespace
|
|
create table tdata_master_addresses_whitespace (id int primary key) stored as kudu
|
|
tblproperties('kudu.master_addresses' = ' $INTERNAL_LISTEN_HOST ')
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into tdata_master_addresses_whitespace values (0), (1)
|
|
---- DML_RESULTS: tdata_master_addresses_whitespace
|
|
0
|
|
1
|
|
====
|
|
---- QUERY
|
|
# Test that string case is ignored
|
|
create table ignore_column_case (Id int, NAME string, vAlf float, vali bigint,
|
|
primary key (Id, NAME)) PARTITION BY RANGE (PARTITION VALUE = (1, 'Martin'))
|
|
STORED AS KUDU
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into ignore_column_case values (1, 'Martin', 1.0, 10);
|
|
---- RESULTS
|
|
: 1
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
====
|
|
---- QUERY
|
|
select ID, nAmE, VALF, VALI from ignore_column_case where NaMe = 'Martin';
|
|
---- RESULTS
|
|
1,'Martin',1.0,10
|
|
---- TYPES
|
|
INT,STRING,FLOAT,BIGINT
|
|
====
|
|
---- QUERY
|
|
# Using NULL as default values
|
|
create table tbl_with_null_defaults (x int primary key, i1 tinyint default null,
|
|
i2 smallint default null, i3 int default null, i4 bigint default null,
|
|
vals string default null, valf float default null, vald double default null,
|
|
valb boolean default null, valdec4 decimal(9) default null,
|
|
valdec8 decimal(18) default null, valdec16 decimal(38) default null,
|
|
valdate date default null)
|
|
partition by hash (x) partitions 3 stored as kudu
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into tbl_with_null_defaults (x) values (1);
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
X, I1, I2, I3, I4, VALS, VALF, VALD, VALB, VALDEC4, VALDEC8, VALDEC16, VALDATE
|
|
---- DML_RESULTS: tbl_with_null_defaults
|
|
1,NULL,NULL,NULL,NULL,'NULL',NULL,NULL,NULL,NULL,NULL,NULL,NULL
|
|
---- TYPES
|
|
INT,TINYINT,SMALLINT,INT,BIGINT,STRING,FLOAT,DOUBLE,BOOLEAN,DECIMAL,DECIMAL,DECIMAL,DATE
|
|
====
|
|
---- QUERY
|
|
# Overlapping ranges are rejected by the Kudu client
|
|
create table ts_ranges (ts timestamp primary key, i int)
|
|
partition by range (
|
|
partition cast('2009-01-02 00:00:00' as timestamp) <= VALUES <
|
|
cast('2009-01-03 00:00:00' as timestamp),
|
|
partition cast('2009-01-02 10:00:00' as timestamp) <= VALUES
|
|
) stored as kudu
|
|
---- CATCH
|
|
NonRecoverableException: overlapping range partitions: first range partition: 2009-01-02T00:00:00.000000Z <= VALUES < 2009-01-03T00:00:00.000000Z, second range partition: VALUES >= 2009-01-02T10:00:00.000000Z
|
|
====
|
|
---- QUERY
|
|
# Creates a range partitioned Kudu table with a timestamp PK. Note that nanoseconds
|
|
# are rounded (same behavior as when writing timestamp values to Kudu).
|
|
create table ts_ranges (ts timestamp primary key, i int)
|
|
partition by range (
|
|
partition nanoseconds_add(cast('2009-01-01 00:00:00' as timestamp), 999) <= VALUES <
|
|
nanoseconds_add(cast('2009-01-02 00:00:00' as timestamp), 1),
|
|
partition cast('2009-01-02 00:00:00' as timestamp) <= VALUES <
|
|
cast('2009-01-03 00:00:00' as timestamp),
|
|
partition '2009-01-03 00:00:00' <= VALUES
|
|
) stored as kudu
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
show range partitions ts_ranges
|
|
---- RESULTS
|
|
'2009-01-01T00:00:00.000001Z <= VALUES < 2009-01-02T00:00:00.000000Z'
|
|
'2009-01-02T00:00:00.000000Z <= VALUES < 2009-01-03T00:00:00.000000Z'
|
|
'VALUES >= 2009-01-03T00:00:00.000000Z'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
create table ts_ranges_ctas
|
|
primary key (ts)
|
|
partition by range (
|
|
partition VALUES < '2009-01-02 00:00:00',
|
|
partition '2009-01-02 00:00:00' <= VALUES <
|
|
'2009-01-03 00:00:00',
|
|
partition '2009-01-03 00:00:00' < VALUES,
|
|
partition VALUE = ('2009-01-03 00:00:00')
|
|
)
|
|
stored as kudu
|
|
as select timestamp_col ts, id from functional.alltypestiny;
|
|
---- RESULTS
|
|
'Inserted 8 row(s)'
|
|
====
|
|
---- QUERY
|
|
show range partitions ts_ranges_ctas
|
|
---- RESULTS
|
|
'VALUES < 2009-01-02T00:00:00.000000Z'
|
|
'2009-01-02T00:00:00.000000Z <= VALUES < 2009-01-03T00:00:00.000000Z'
|
|
'VALUE = 2009-01-03T00:00:00.000000Z'
|
|
'VALUES >= 2009-01-03T00:00:00.000001Z'
|
|
---- TYPES
|
|
STRING
|
|
====
|
|
---- QUERY
|
|
select * from ts_ranges_ctas order by id
|
|
---- RESULTS
|
|
2009-01-01 00:00:00,0
|
|
2009-01-01 00:01:00,1
|
|
2009-02-01 00:00:00,2
|
|
2009-02-01 00:01:00,3
|
|
2009-03-01 00:00:00,4
|
|
2009-03-01 00:01:00,5
|
|
2009-04-01 00:00:00,6
|
|
2009-04-01 00:01:00,7
|
|
---- TYPES
|
|
TIMESTAMP,INT
|
|
====
|
|
---- QUERY
|
|
# Creates a Kudu table with timestamp column default values.
|
|
create table ts_default (i int primary key, ts1 timestamp,
|
|
ts2 timestamp default cast('2009-01-01 00:00:00' as timestamp))
|
|
partition by hash(i) partitions 3 stored as kudu
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into ts_default (i) values (1);
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
I, TS1, TS2
|
|
---- DML_RESULTS: ts_default
|
|
1,NULL,2009-01-01 00:00:00
|
|
---- TYPES
|
|
INT,TIMESTAMP,TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
insert into ts_default (i, ts1, ts2) values (2, NULL, NULL);
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
I, TS1, TS2
|
|
---- DML_RESULTS: ts_default
|
|
1,NULL,2009-01-01 00:00:00
|
|
2,NULL,NULL
|
|
---- TYPES
|
|
INT,TIMESTAMP,TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
# create an unpartitioned table
|
|
create table unpartitioned_kudu_table (col0 bigint primary key, col1 string)
|
|
stored as kudu
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
---- ERRORS
|
|
Unpartitioned Kudu tables are inefficient for large data sizes.
|
|
====
|
|
---- QUERY
|
|
insert into unpartitioned_kudu_table values (0, 'zero'), (1, 'one')
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 2
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
COL0,COL1
|
|
---- DML_RESULTS: unpartitioned_kudu_table
|
|
0,'zero'
|
|
1,'one'
|
|
---- TYPES
|
|
BIGINT,STRING
|
|
====
|
|
---- QUERY
|
|
create table unpartitioned_kudu_table2 primary key(id) stored as kudu
|
|
as select id from functional.alltypestiny where id > 4
|
|
---- RESULTS
|
|
'Inserted 3 row(s)'
|
|
---- ERRORS
|
|
Unpartitioned Kudu tables are inefficient for large data sizes.
|
|
====
|
|
---- QUERY
|
|
select * from unpartitioned_kudu_table2
|
|
---- RESULTS
|
|
5
|
|
6
|
|
7
|
|
---- LABELS
|
|
ID
|
|
---- TYPES
|
|
INT
|
|
====
|
|
---- QUERY
|
|
# Creates a Kudu table with decimal columns and primary key
|
|
create table create_decimal
|
|
(
|
|
decimal_4 decimal(9, 9),
|
|
decimal_8 decimal(18, 2) not null default 100.00,
|
|
decimal_16 decimal(38, 0) null,
|
|
primary key (decimal_4))
|
|
stored as kudu;
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
# Create as select table with decimal columns and primary key
|
|
create table ctas_decimal primary key (d1,d2,d3)
|
|
stored as kudu
|
|
as select * from functional.decimal_tbl;
|
|
select * from ctas_decimal;
|
|
---- RESULTS
|
|
1234,2222,1.2345678900,0.12345678900000000000000000000000000000,12345.78900,1
|
|
2345,111,12.3456789000,0.12345678900000000000000000000000000000,3.14100,1
|
|
12345,333,123.4567890000,0.12345678900000000000000000000000000000,11.22000,1
|
|
12345,333,1234.5678900000,0.12345678900000000000000000000000000000,0.10000,1
|
|
132842,333,12345.6789000000,0.12345678900000000000000000000000000000,0.77889,1
|
|
---- TYPES
|
|
DECIMAL,DECIMAL,DECIMAL,DECIMAL,DECIMAL,DECIMAL
|
|
====
|
|
---- QUERY
|
|
# IMPALA-6954: CTAS with an expr rewrite.
|
|
create table ctas_rewrite primary key(id)
|
|
partition by range(id) (partition 0 <= values < 100) stored as kudu
|
|
as select id, tinyint_col from functional.alltypes
|
|
where id between 0 and 1;
|
|
show range partitions ctas_rewrite;
|
|
---- RESULTS
|
|
'0 <= VALUES < 100'
|
|
====
|
|
---- QUERY
|
|
# create table with primary key of DATE type
|
|
create table kudu_date_key (fdatekey date primary key, val string)
|
|
stored as kudu
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
---- ERRORS
|
|
Unpartitioned Kudu tables are inefficient for large data sizes.
|
|
====
|
|
---- QUERY
|
|
insert into kudu_date_key values (DATE '1970-01-01', 'Unix epoch'), (DATE '2019-12-12', 'today')
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 2
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
FDATEKEY,VAL
|
|
---- DML_RESULTS: kudu_date_key
|
|
1970-01-01,'Unix epoch'
|
|
2019-12-12,'today'
|
|
---- TYPES
|
|
DATE,STRING
|
|
====
|
|
---- QUERY
|
|
# create table with invalid default DATE value
|
|
create table kudu_invalid_default_date (fdate date default DATE '111111-33-33')
|
|
stored as kudu
|
|
---- CATCH
|
|
AnalysisException: Invalid date literal: '111111-33-33'
|
|
====
|
|
---- QUERY
|
|
# create table with DATE primary key partitioned by range
|
|
create table kudu_datepk_range (fdate DATE not null primary key)
|
|
partition by range (fdate)
|
|
(
|
|
partition values < DATE '1900-01-01',
|
|
partition DATE '1900-01-01' <= values < DATE '1970-01-01',
|
|
partition DATE '1970-01-01' <= values < DATE '2000-01-01',
|
|
partition DATE '2000-01-01' <= values
|
|
)
|
|
stored as kudu
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into kudu_datepk_range values
|
|
(DATE '1800-01-01'),
|
|
(DATE '1970-01-01'),
|
|
(DATE '2019-12-12')
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 3
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
FDATE
|
|
---- DML_RESULTS: kudu_datepk_range
|
|
1800-01-01
|
|
1970-01-01
|
|
2019-12-12
|
|
---- TYPES
|
|
DATE
|
|
====
|
|
---- QUERY
|
|
select * from kudu_datepk_range;
|
|
---- RESULTS
|
|
1800-01-01
|
|
1970-01-01
|
|
2019-12-12
|
|
---- TYPES
|
|
DATE
|
|
====
|