mirror of
https://github.com/apache/impala.git
synced 2026-01-04 00:00:56 -05:00
Adds Impala support for TIMESTAMP types stored in Kudu. Impala stores TIMESTAMP values in 96-bits and has nanosecond precision. Kudu's timestamp is a 64-bit microsecond delta from the Unix epoch (called UNIXTIME_MICROS), so a conversion is necessary. When writing to Kudu, TIMESTAMP values in nanoseconds are averaged to the nearest microsecond. When reading from Kudu, the KuduScanner returns UNIXTIME_MICROS with 8bytes of padding so Impala can convert the value to a TimestampValue in-line and copy the entire row. Testing: Updated the functional_kudu schema to use TIMESTAMPs instead of converting to STRING, so this provides some decent coverage. Some BE tests were added, and some EE tests as well. TODO: Support pushing down TIMESTAMP predicates TODO: Support TIMESTAMPs in range partitioning expressions Change-Id: Iae6ccfffb79118a9036fb2227dba3a55356c896d Reviewed-on: http://gerrit.cloudera.org:8080/6526 Reviewed-by: Matthew Jacobs <mj@cloudera.com> Tested-by: Impala Public Jenkins
433 lines
14 KiB
Plaintext
433 lines
14 KiB
Plaintext
====
|
|
---- QUERY
|
|
create table tdata
|
|
(id int primary key, valf float null, vali bigint null, valv string null,
|
|
valb boolean null, valt tinyint null, vals smallint null, vald double null,
|
|
ts timestamp)
|
|
PARTITION BY RANGE (PARTITION VALUES < 10, PARTITION 10 <= VALUES < 30,
|
|
PARTITION 30 <= VALUES) STORED AS KUDU
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# VALUES, single row, all target cols, no errors
|
|
insert into tdata values (1, 1, 1, 'one', true, 1, 1, 1,
|
|
cast('1987-05-19 00:00:00' as timestamp))
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, VALF, VALI, VALV, VALB, VALT, VALS, VALD, TS
|
|
---- DML_RESULTS: tdata
|
|
1,1,1,'one',true,1,1,1,1987-05-19 00:00:00
|
|
---- TYPES
|
|
INT,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE,TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
# VALUES, single row, all target cols, NULL
|
|
insert into tdata values (2, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, VALF, VALI, VALV, VALB, VALT, VALS, VALD, TS
|
|
---- DML_RESULTS: tdata
|
|
1,1,1,'one',true,1,1,1,1987-05-19 00:00:00
|
|
2,NULL,NULL,'NULL',NULL,NULL,NULL,NULL,NULL
|
|
---- TYPES
|
|
INT,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE,TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
# VALUES, single row, all target cols, boundary values. The timestamp value is the max
|
|
# possible value that Impala can represent; it gets truncated rather than rounded up to
|
|
# the nearest microsecond. If it were rounded up, it wouldn't be readable by Impala.
|
|
insert into tdata values
|
|
(3, cast('nan' as float), max_bigint(), '', true, min_tinyint(), max_smallint(),
|
|
cast('-inf' as double),
|
|
nanoseconds_add(cast('9999-12-31 23:59:59' as timestamp), 999999999))
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, VALF, VALI, VALV, VALB, VALT, VALS, VALD, TS
|
|
---- DML_RESULTS: tdata
|
|
1,1,1,'one',true,1,1,1,1987-05-19 00:00:00
|
|
2,NULL,NULL,'NULL',NULL,NULL,NULL,NULL,NULL
|
|
3,NaN,9223372036854775807,'',true,-128,32767,-Infinity,9999-12-31 23:59:59.999999000
|
|
---- TYPES
|
|
INT,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE,TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
# VALUES, single row, subset of target cols
|
|
insert into tdata (valb, vald, id) values (true, 0, 4)
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, VALF, VALI, VALV, VALB, VALT, VALS, VALD, TS
|
|
---- DML_RESULTS: tdata
|
|
1,1,1,'one',true,1,1,1,1987-05-19 00:00:00
|
|
2,NULL,NULL,'NULL',NULL,NULL,NULL,NULL,NULL
|
|
3,NaN,9223372036854775807,'',true,-128,32767,-Infinity,9999-12-31 23:59:59.999999000
|
|
4,NULL,NULL,'NULL',true,NULL,NULL,0,NULL
|
|
---- TYPES
|
|
INT,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE,TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
# VALUES, multiple rows, all target cols
|
|
insert into tdata values
|
|
(5, 5.0, 5, 'five', false, NULL, NULL, NULL, NULL),
|
|
(6, 16, 60, '', true, 0, -1, -6, cast('2010-12-31 23:59:59' as timestamp)),
|
|
(7, NULL, 10, NULL, false, max_tinyint(), -7, 2, cast('1400-01-01 00:00:00' as timestamp))
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 3
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, VALF, VALI, VALV, VALB, VALT, VALS, VALD, TS
|
|
---- DML_RESULTS: tdata
|
|
1,1,1,'one',true,1,1,1,1987-05-19 00:00:00
|
|
2,NULL,NULL,'NULL',NULL,NULL,NULL,NULL,NULL
|
|
3,NaN,9223372036854775807,'',true,-128,32767,-Infinity,9999-12-31 23:59:59.999999000
|
|
4,NULL,NULL,'NULL',true,NULL,NULL,0,NULL
|
|
5,5.0,5,'five',false,NULL,NULL,NULL,NULL
|
|
6,16,60,'',true,0,-1,-6,2010-12-31 23:59:59
|
|
7,NULL,10,'NULL',false,127,-7,2,1400-01-01 00:00:00
|
|
---- TYPES
|
|
INT,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE,TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
# VALUES, multiple rows, subset of cols
|
|
insert into tdata (valv, valf, vali, id) values
|
|
('eight', 88, 888, 8),
|
|
(NULL, -9, -99, 9)
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 2
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, VALF, VALI, VALV, VALB, VALT, VALS, VALD, TS
|
|
---- DML_RESULTS: tdata
|
|
1,1,1,'one',true,1,1,1,1987-05-19 00:00:00
|
|
2,NULL,NULL,'NULL',NULL,NULL,NULL,NULL,NULL
|
|
3,NaN,9223372036854775807,'',true,-128,32767,-Infinity,9999-12-31 23:59:59.999999000
|
|
4,NULL,NULL,'NULL',true,NULL,NULL,0,NULL
|
|
5,5.0,5,'five',false,NULL,NULL,NULL,NULL
|
|
6,16,60,'',true,0,-1,-6,2010-12-31 23:59:59
|
|
7,NULL,10,'NULL',false,127,-7,2,1400-01-01 00:00:00
|
|
8,88,888,'eight',NULL,NULL,NULL,NULL,NULL
|
|
9,-9,-99,'NULL',NULL,NULL,NULL,NULL,NULL
|
|
---- TYPES
|
|
INT,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE,TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
# SELECT, single row, all target cols
|
|
insert into tdata
|
|
select id, float_col, bigint_col, string_col, bool_col, tinyint_col, smallint_col,
|
|
double_col, timestamp_col
|
|
from functional.alltypes where id = 10
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, VALF, VALI, VALV, VALB, VALT, VALS, VALD, TS
|
|
---- DML_RESULTS: tdata
|
|
1,1,1,'one',true,1,1,1,1987-05-19 00:00:00
|
|
2,NULL,NULL,'NULL',NULL,NULL,NULL,NULL,NULL
|
|
3,NaN,9223372036854775807,'',true,-128,32767,-Infinity,9999-12-31 23:59:59.999999000
|
|
4,NULL,NULL,'NULL',true,NULL,NULL,0,NULL
|
|
5,5.0,5,'five',false,NULL,NULL,NULL,NULL
|
|
6,16,60,'',true,0,-1,-6,2010-12-31 23:59:59
|
|
7,NULL,10,'NULL',false,127,-7,2,1400-01-01 00:00:00
|
|
8,88,888,'eight',NULL,NULL,NULL,NULL,NULL
|
|
9,-9,-99,'NULL',NULL,NULL,NULL,NULL,NULL
|
|
10,0,0,'0',true,0,0,0,2009-01-02 00:10:00.450000000
|
|
---- TYPES
|
|
INT,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE,TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
# SELECT, single row, subset of cols
|
|
insert into tdata (id, vald, valb, vali, ts)
|
|
select id, double_col, bool_col, bigint_col, timestamp_col
|
|
from functional.alltypes where id = 11
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, VALF, VALI, VALV, VALB, VALT, VALS, VALD, TS
|
|
---- DML_RESULTS: tdata
|
|
1,1,1,'one',true,1,1,1,1987-05-19 00:00:00
|
|
2,NULL,NULL,'NULL',NULL,NULL,NULL,NULL,NULL
|
|
3,NaN,9223372036854775807,'',true,-128,32767,-Infinity,9999-12-31 23:59:59.999999000
|
|
4,NULL,NULL,'NULL',true,NULL,NULL,0,NULL
|
|
5,5.0,5,'five',false,NULL,NULL,NULL,NULL
|
|
6,16,60,'',true,0,-1,-6,2010-12-31 23:59:59
|
|
7,NULL,10,'NULL',false,127,-7,2,1400-01-01 00:00:00
|
|
8,88,888,'eight',NULL,NULL,NULL,NULL,NULL
|
|
9,-9,-99,'NULL',NULL,NULL,NULL,NULL,NULL
|
|
10,0,0,'0',true,0,0,0,2009-01-02 00:10:00.450000000
|
|
11,NULL,10,'NULL',false,NULL,NULL,10.1,2009-01-02 00:11:00.450000000
|
|
---- TYPES
|
|
INT,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE,TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
delete tdata
|
|
---- DML_RESULTS: tdata
|
|
====
|
|
---- QUERY
|
|
# SELECT, multiple rows, all target cols
|
|
insert into tdata
|
|
select id, float_col, bigint_col, string_col, bool_col, tinyint_col, smallint_col,
|
|
double_col, timestamp_col
|
|
from functional.alltypes where id < 2
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 2
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, VALF, VALI, VALV, VALB, VALT, VALS, VALD, TS
|
|
---- DML_RESULTS: tdata
|
|
0,0,0,'0',true,0,0,0,2009-01-01 00:00:00
|
|
1,1.100000023841858,10,'1',false,1,1,10.1,2009-01-01 00:01:00
|
|
---- TYPES
|
|
INT,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE,TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
# SELECT, multiple rows, subset of cols
|
|
insert into tdata (vals, id, valt, vald, ts)
|
|
select smallint_col, id, tinyint_col, double_col, timestamp_col
|
|
from functional.alltypes where id > 2 and id < 6
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 3
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, VALF, VALI, VALV, VALB, VALT, VALS, VALD, TS
|
|
---- DML_RESULTS: tdata
|
|
0,0,0,'0',true,0,0,0,2009-01-01 00:00:00
|
|
1,1.100000023841858,10,'1',false,1,1,10.1,2009-01-01 00:01:00
|
|
3,NULL,NULL,'NULL',NULL,3,3,30.3,2009-01-01 00:03:00.300000000
|
|
4,NULL,NULL,'NULL',NULL,4,4,40.4,2009-01-01 00:04:00.600000000
|
|
5,NULL,NULL,'NULL',NULL,5,5,50.5,2009-01-01 00:05:00.100000000
|
|
---- TYPES
|
|
INT,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE,TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
# Make sure we can insert empty strings into string columns and that we can scan them
|
|
# back.
|
|
insert into tdata values (320, 2.0, 932, cast('' as string), false, 0, 0, 0, NULL)
|
|
---- RESULTS
|
|
: 1
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
====
|
|
---- QUERY
|
|
select id, valv, valb from tdata where id = 320;
|
|
---- RESULTS
|
|
320,'',false
|
|
---- TYPES
|
|
INT,STRING,BOOLEAN
|
|
====
|
|
---- QUERY
|
|
insert into tdata values
|
|
(666, cast(1.2 as float), 43, cast('z' as string), true, 0, 0, 0, NULL)
|
|
---- RESULTS
|
|
: 1
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
====
|
|
---- QUERY
|
|
# insert row with primary key that already exists
|
|
insert into tdata values
|
|
(666, cast(1.2 as float), 43, cast('z' as VARCHAR(20)), true, 0, 0, 0, NULL)
|
|
---- RESULTS
|
|
: 0
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 0
|
|
NumRowErrors: 1
|
|
====
|
|
---- QUERY
|
|
create table kudu_test_tbl primary key(id)
|
|
partition by range(id) (partition values < 100, partition 100 <= values <= 10000)
|
|
stored as kudu as
|
|
select * from functional_kudu.alltypes where id < 100;
|
|
---- RESULTS
|
|
'Inserted 100 row(s)'
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 100
|
|
NumRowErrors: 0
|
|
====
|
|
---- QUERY
|
|
insert into kudu_test_tbl
|
|
select * from functional_kudu.alltypes where id < 100;
|
|
---- RESULTS
|
|
: 0
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 0
|
|
NumRowErrors: 100
|
|
====
|
|
---- QUERY
|
|
# large insert - 100 rows were already inserted above and result in errors
|
|
insert into kudu_test_tbl
|
|
select * from functional_kudu.alltypes;
|
|
---- RESULTS
|
|
: 7200
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 7200
|
|
NumRowErrors: 100
|
|
====
|
|
---- QUERY
|
|
# Insert rows that are not covered by any of the existing range partitions
|
|
# Only the row at 10000 is inserted.
|
|
insert into kudu_test_tbl SELECT cast(id + 10000 as int), bool_col, tinyint_col,
|
|
smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col,
|
|
timestamp_col, year, month
|
|
from functional_kudu.alltypes
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 7299
|
|
====
|
|
---- QUERY
|
|
# Table with all supported types as primary key and distribution columns
|
|
create table allkeytypes (i1 tinyint, i2 smallint, i3 int, i4 bigint, name string,
|
|
valf float, vald double, primary key (i1, i2, i3, i4, name)) partition by
|
|
hash partitions 3, range (partition value = (1,1,1,1,'1'),
|
|
partition value = (2,2,2,2,'2'), partition value = (3,3,3,3,'3')) stored as kudu
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
insert into allkeytypes select cast(id as tinyint), smallint_col, int_col,
|
|
cast (bigint_col/10 as bigint), string_col, float_col, double_col
|
|
from functional.alltypes where id > 0 and id < 10
|
|
---- RESULTS
|
|
: 3
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 3
|
|
NumRowErrors: 6
|
|
====
|
|
---- QUERY
|
|
# Table with default values
|
|
create table tbl_with_defaults (a int primary key, b int null default 10,
|
|
c int not null default 100, d int default 1000, e int null, f int not null,
|
|
g string default 'test', h boolean default true) partition by hash (a)
|
|
partitions 3 stored as kudu
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
insert into tbl_with_defaults (a, f) values (1, 1), (2, 2), (3, 3), (4, 4)
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 4
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
A, B, C, D, E, F, G, H
|
|
---- DML_RESULTS: tbl_with_defaults
|
|
1,10,100,1000,NULL,1,'test',true
|
|
2,10,100,1000,NULL,2,'test',true
|
|
3,10,100,1000,NULL,3,'test',true
|
|
4,10,100,1000,NULL,4,'test',true
|
|
---- TYPES
|
|
INT,INT,INT,INT,INT,INT,STRING,BOOLEAN
|
|
====
|
|
---- QUERY
|
|
insert into tbl_with_defaults values (5, 5, 5, 5, 5, 5, 'row', false)
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
A, B, C, D, E, F, G, H
|
|
---- DML_RESULTS: tbl_with_defaults
|
|
1,10,100,1000,NULL,1,'test',true
|
|
2,10,100,1000,NULL,2,'test',true
|
|
3,10,100,1000,NULL,3,'test',true
|
|
4,10,100,1000,NULL,4,'test',true
|
|
5,5,5,5,5,5,'row',false
|
|
---- TYPES
|
|
INT,INT,INT,INT,INT,INT,STRING,BOOLEAN
|
|
====
|
|
---- QUERY
|
|
alter table tbl_with_defaults add columns (i int null, j int not null default 10000)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
select * from tbl_with_defaults
|
|
---- RESULTS
|
|
1,10,100,1000,NULL,1,'test',true,NULL,10000
|
|
2,10,100,1000,NULL,2,'test',true,NULL,10000
|
|
3,10,100,1000,NULL,3,'test',true,NULL,10000
|
|
4,10,100,1000,NULL,4,'test',true,NULL,10000
|
|
5,5,5,5,5,5,'row',false,NULL,10000
|
|
---- TYPES
|
|
INT,INT,INT,INT,INT,INT,STRING,BOOLEAN,INT,INT
|
|
====
|
|
---- QUERY
|
|
insert into tbl_with_defaults values (6,6,6,6,6,6,'another row',false,6,6)
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
A, B, C, D, E, F, G, H, I, J
|
|
---- DML_RESULTS: tbl_with_defaults
|
|
1,10,100,1000,NULL,1,'test',true,NULL,10000
|
|
2,10,100,1000,NULL,2,'test',true,NULL,10000
|
|
3,10,100,1000,NULL,3,'test',true,NULL,10000
|
|
4,10,100,1000,NULL,4,'test',true,NULL,10000
|
|
5,5,5,5,5,5,'row',false,NULL,10000
|
|
6,6,6,6,6,6,'another row',false,6,6
|
|
---- TYPES
|
|
INT,INT,INT,INT,INT,INT,STRING,BOOLEAN,INT,INT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-5217: Try to insert NULL to a 'NOT NULL' col with a target col list that leaves
|
|
# out some cols.
|
|
insert into tbl_with_defaults (a, c, f) values (0, null, 1)
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 0
|
|
NumRowErrors: 1
|
|
====
|
|
---- QUERY
|
|
# IMPALA-5217: Insert NULL into a nullable col when a non-nullable col has been left out
|
|
# of the target col list.
|
|
insert into tbl_with_defaults (a, b, d, f) values (0, 0, null, 0)
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
A, B, C, D, E, F, G, H, I, J
|
|
---- DML_RESULTS: tbl_with_defaults
|
|
0,0,100,NULL,NULL,0,'test',true,NULL,10000
|
|
1,10,100,1000,NULL,1,'test',true,NULL,10000
|
|
2,10,100,1000,NULL,2,'test',true,NULL,10000
|
|
3,10,100,1000,NULL,3,'test',true,NULL,10000
|
|
4,10,100,1000,NULL,4,'test',true,NULL,10000
|
|
5,5,5,5,5,5,'row',false,NULL,10000
|
|
6,6,6,6,6,6,'another row',false,6,6
|
|
---- TYPES
|
|
INT,INT,INT,INT,INT,INT,STRING,BOOLEAN,INT,INT
|
|
====
|
|
---- QUERY
|
|
create table multiple_partition_cols (x bigint, y bigint, z string, primary key(x, y))
|
|
partition by hash(x, y) partitions 8 stored as kudu
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# SELECT with constant
|
|
insert into multiple_partition_cols select 0, bigint_col, string_col
|
|
from functional.alltypes where id = 0
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
X,Y,Z
|
|
---- DML_RESULTS: multiple_partition_cols
|
|
0,0,'0'
|
|
---- TYPES
|
|
BIGINT,BIGINT,STRING
|
|
====
|
|
---- QUERY
|
|
# SELECT with constant NULL
|
|
insert into multiple_partition_cols select bigint_col, null, string_col
|
|
from functional.alltypes where id = 1
|
|
---- RESULTS
|
|
: 0
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 0
|
|
NumRowErrors: 1
|
|
====
|