mirror of
https://github.com/apache/impala.git
synced 2026-01-07 00:02:28 -05:00
KuduTableSink uses the referenced_columns map to translate between the index into the output exprs 'j' and the index into columns in the Kudu table 'col', but we incorrectly use 'j' when calling into the Kudu table schema to check the nullability of columns. Testing: - Added e2e tests to kudu_insert.test Change-Id: I8ed458278f135288a821570939de8ee294183df2 Reviewed-on: http://gerrit.cloudera.org:8080/6670 Reviewed-by: Thomas Tauber-Marshall <tmarshall@cloudera.com> Tested-by: Impala Public Jenkins
433 lines
12 KiB
Plaintext
433 lines
12 KiB
Plaintext
====
|
|
---- QUERY
|
|
create table tdata
|
|
(id int primary key, valf float null, vali bigint null, valv string null,
|
|
valb boolean null, valt tinyint null, vals smallint null, vald double null)
|
|
PARTITION BY RANGE (PARTITION VALUES < 10, PARTITION 10 <= VALUES < 30,
|
|
PARTITION 30 <= VALUES) STORED AS KUDU
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# VALUES, single row, all target cols, no errors
|
|
insert into tdata values (1, 1, 1, 'one', true, 1, 1, 1)
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, VALF, VALI, VALV, VALB, VALT, VALS, VALD
|
|
---- DML_RESULTS: tdata
|
|
1,1,1,'one',true,1,1,1
|
|
---- TYPES
|
|
INT,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE
|
|
====
|
|
---- QUERY
|
|
# VALUES, single row, all target cols, NULL
|
|
insert into tdata values (2, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, VALF, VALI, VALV, VALB, VALT, VALS, VALD
|
|
---- DML_RESULTS: tdata
|
|
1,1,1,'one',true,1,1,1
|
|
2,NULL,NULL,'NULL',NULL,NULL,NULL,NULL
|
|
---- TYPES
|
|
INT,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE
|
|
====
|
|
---- QUERY
|
|
# VALUES, single row, all target cols, bounday values
|
|
insert into tdata values
|
|
(3, cast('nan' as float), max_bigint(), '', true, min_tinyint(), max_smallint(),
|
|
cast('-inf' as double))
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, VALF, VALI, VALV, VALB, VALT, VALS, VALD
|
|
---- DML_RESULTS: tdata
|
|
1,1,1,'one',true,1,1,1
|
|
2,NULL,NULL,'NULL',NULL,NULL,NULL,NULL
|
|
3,NaN,9223372036854775807,'',true,-128,32767,-Infinity
|
|
---- TYPES
|
|
INT,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE
|
|
====
|
|
---- QUERY
|
|
# VALUES, single row, subset of target cols
|
|
insert into tdata (valb, vald, id) values (true, 0, 4)
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, VALF, VALI, VALV, VALB, VALT, VALS, VALD
|
|
---- DML_RESULTS: tdata
|
|
1,1,1,'one',true,1,1,1
|
|
2,NULL,NULL,'NULL',NULL,NULL,NULL,NULL
|
|
3,NaN,9223372036854775807,'',true,-128,32767,-Infinity
|
|
4,NULL,NULL,'NULL',true,NULL,NULL,0
|
|
---- TYPES
|
|
INT,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE
|
|
====
|
|
---- QUERY
|
|
# VALUES, multiple rows, all target cols
|
|
insert into tdata values
|
|
(5, 5.0, 5, 'five', false, NULL, NULL, NULL),
|
|
(6, 16, 60, '', true, 0, -1, -6),
|
|
(7, NULL, 10, NULL, false, max_tinyint(), -7, 2)
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 3
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, VALF, VALI, VALV, VALB, VALT, VALS, VALD
|
|
---- DML_RESULTS: tdata
|
|
1,1,1,'one',true,1,1,1
|
|
2,NULL,NULL,'NULL',NULL,NULL,NULL,NULL
|
|
3,NaN,9223372036854775807,'',true,-128,32767,-Infinity
|
|
4,NULL,NULL,'NULL',true,NULL,NULL,0
|
|
5,5.0,5,'five',false,NULL,NULL,NULL
|
|
6,16,60,'',true,0,-1,-6
|
|
7,NULL,10,'NULL',false,127,-7,2
|
|
---- TYPES
|
|
INT,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE
|
|
====
|
|
---- QUERY
|
|
# VALUES, multiple rows, subset of cols
|
|
insert into tdata (valv, valf, vali, id) values
|
|
('eight', 88, 888, 8),
|
|
(NULL, -9, -99, 9)
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 2
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, VALF, VALI, VALV, VALB, VALT, VALS, VALD
|
|
---- DML_RESULTS: tdata
|
|
1,1,1,'one',true,1,1,1
|
|
2,NULL,NULL,'NULL',NULL,NULL,NULL,NULL
|
|
3,NaN,9223372036854775807,'',true,-128,32767,-Infinity
|
|
4,NULL,NULL,'NULL',true,NULL,NULL,0
|
|
5,5.0,5,'five',false,NULL,NULL,NULL
|
|
6,16,60,'',true,0,-1,-6
|
|
7,NULL,10,'NULL',false,127,-7,2
|
|
8,88,888,'eight',NULL,NULL,NULL,NULL
|
|
9,-9,-99,'NULL',NULL,NULL,NULL,NULL
|
|
---- TYPES
|
|
INT,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE
|
|
====
|
|
---- QUERY
|
|
# SELECT, single row, all target cols
|
|
insert into tdata
|
|
select id, float_col, bigint_col, string_col, bool_col, tinyint_col, smallint_col, double_col
|
|
from functional.alltypes where id = 10
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, VALF, VALI, VALV, VALB, VALT, VALS, VALD
|
|
---- DML_RESULTS: tdata
|
|
1,1,1,'one',true,1,1,1
|
|
2,NULL,NULL,'NULL',NULL,NULL,NULL,NULL
|
|
3,NaN,9223372036854775807,'',true,-128,32767,-Infinity
|
|
4,NULL,NULL,'NULL',true,NULL,NULL,0
|
|
5,5.0,5,'five',false,NULL,NULL,NULL
|
|
6,16,60,'',true,0,-1,-6
|
|
7,NULL,10,'NULL',false,127,-7,2
|
|
8,88,888,'eight',NULL,NULL,NULL,NULL
|
|
9,-9,-99,'NULL',NULL,NULL,NULL,NULL
|
|
10,0,0,'0',true,0,0,0
|
|
---- TYPES
|
|
INT,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE
|
|
====
|
|
---- QUERY
|
|
# SELECT, single row, subset of cols
|
|
insert into tdata (id, vald, valb, vali)
|
|
select id, double_col, bool_col, bigint_col
|
|
from functional.alltypes where id = 11
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, VALF, VALI, VALV, VALB, VALT, VALS, VALD
|
|
---- DML_RESULTS: tdata
|
|
1,1,1,'one',true,1,1,1
|
|
2,NULL,NULL,'NULL',NULL,NULL,NULL,NULL
|
|
3,NaN,9223372036854775807,'',true,-128,32767,-Infinity
|
|
4,NULL,NULL,'NULL',true,NULL,NULL,0
|
|
5,5.0,5,'five',false,NULL,NULL,NULL
|
|
6,16,60,'',true,0,-1,-6
|
|
7,NULL,10,'NULL',false,127,-7,2
|
|
8,88,888,'eight',NULL,NULL,NULL,NULL
|
|
9,-9,-99,'NULL',NULL,NULL,NULL,NULL
|
|
10,0,0,'0',true,0,0,0
|
|
11,NULL,10,'NULL',false,NULL,NULL,10.1
|
|
---- TYPES
|
|
INT,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE
|
|
====
|
|
---- QUERY
|
|
delete tdata
|
|
---- DML_RESULTS: tdata
|
|
====
|
|
---- QUERY
|
|
# SELECT, multiple rows, all target cols
|
|
insert into tdata
|
|
select id, float_col, bigint_col, string_col, bool_col, tinyint_col, smallint_col, double_col
|
|
from functional.alltypes where id < 2
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 2
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, VALF, VALI, VALV, VALB, VALT, VALS, VALD
|
|
---- DML_RESULTS: tdata
|
|
0,0,0,'0',true,0,0,0
|
|
1,1.100000023841858,10,'1',false,1,1,10.1
|
|
---- TYPES
|
|
INT,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE
|
|
====
|
|
---- QUERY
|
|
# SELECT, multiple rows, subset of cols
|
|
insert into tdata (vals, id, valt, vald)
|
|
select smallint_col, id, tinyint_col, double_col
|
|
from functional.alltypes where id > 2 and id < 6
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 3
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, VALF, VALI, VALV, VALB, VALT, VALS, VALD
|
|
---- DML_RESULTS: tdata
|
|
0,0,0,'0',true,0,0,0
|
|
1,1.100000023841858,10,'1',false,1,1,10.1
|
|
3,NULL,NULL,'NULL',NULL,3,3,30.3
|
|
4,NULL,NULL,'NULL',NULL,4,4,40.4
|
|
5,NULL,NULL,'NULL',NULL,5,5,50.5
|
|
---- TYPES
|
|
INT,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE
|
|
====
|
|
---- QUERY
|
|
# Make sure we can insert empty strings into string columns and that we can scan them
|
|
# back.
|
|
insert into tdata values (320, 2.0, 932, cast('' as string), false, 0, 0, 0)
|
|
---- RESULTS
|
|
: 1
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
====
|
|
---- QUERY
|
|
select id, valv, valb from tdata where id = 320;
|
|
---- RESULTS
|
|
320,'',false
|
|
---- TYPES
|
|
INT,STRING,BOOLEAN
|
|
====
|
|
---- QUERY
|
|
insert into tdata values
|
|
(666, cast(1.2 as float), 43, cast('z' as string), true, 0, 0, 0)
|
|
---- RESULTS
|
|
: 1
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
====
|
|
---- QUERY
|
|
# insert row with primary key that already exists
|
|
insert into tdata values
|
|
(666, cast(1.2 as float), 43, cast('z' as VARCHAR(20)), true, 0, 0, 0)
|
|
---- RESULTS
|
|
: 0
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 0
|
|
NumRowErrors: 1
|
|
====
|
|
---- QUERY
|
|
create table kudu_test_tbl primary key(id)
|
|
partition by range(id) (partition values < 100, partition 100 <= values <= 10000)
|
|
stored as kudu as
|
|
select * from functional_kudu.alltypes where id < 100;
|
|
---- RESULTS
|
|
'Inserted 100 row(s)'
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 100
|
|
NumRowErrors: 0
|
|
====
|
|
---- QUERY
|
|
insert into kudu_test_tbl
|
|
select * from functional_kudu.alltypes where id < 100;
|
|
---- RESULTS
|
|
: 0
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 0
|
|
NumRowErrors: 100
|
|
====
|
|
---- QUERY
|
|
# large insert - 100 rows were already inserted above and result in errors
|
|
insert into kudu_test_tbl
|
|
select * from functional_kudu.alltypes;
|
|
---- RESULTS
|
|
: 7200
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 7200
|
|
NumRowErrors: 100
|
|
====
|
|
---- QUERY
|
|
# Insert rows that are not covered by any of the existing range partitions
|
|
# Only the row at 10000 is inserted.
|
|
insert into kudu_test_tbl SELECT cast(id + 10000 as int), bool_col, tinyint_col,
|
|
smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col,
|
|
timestamp_col, year, month
|
|
from functional_kudu.alltypes
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 7299
|
|
====
|
|
---- QUERY
|
|
# IMPALA-2521: clustered insert into table.
|
|
create table impala_2521
|
|
(id bigint primary key, name string, zip int)
|
|
partition by hash partitions 3 stored as kudu
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
insert into impala_2521 /*+ clustered */
|
|
select id, name, maxzip as zip
|
|
from (
|
|
select tinyint_col as id, cast(max(int_col) + 1 as int) as maxzip, string_col as name
|
|
from functional_kudu.alltypessmall group by id, name
|
|
) as sub;
|
|
---- RESULTS
|
|
: 10
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 10
|
|
NumRowErrors: 0
|
|
====
|
|
---- QUERY
|
|
select * from impala_2521
|
|
---- RESULTS
|
|
0,'0',1
|
|
1,'1',2
|
|
2,'2',3
|
|
3,'3',4
|
|
4,'4',5
|
|
5,'5',6
|
|
6,'6',7
|
|
7,'7',8
|
|
8,'8',9
|
|
9,'9',10
|
|
---- TYPES
|
|
BIGINT,STRING,INT
|
|
====
|
|
---- QUERY
|
|
# Table with all supported types as primary key and distribution columns
|
|
create table allkeytypes (i1 tinyint, i2 smallint, i3 int, i4 bigint, name string,
|
|
valf float, vald double, primary key (i1, i2, i3, i4, name)) partition by
|
|
hash partitions 3, range (partition value = (1,1,1,1,'1'),
|
|
partition value = (2,2,2,2,'2'), partition value = (3,3,3,3,'3')) stored as kudu
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
insert into allkeytypes select cast(id as tinyint), smallint_col, int_col,
|
|
cast (bigint_col/10 as bigint), string_col, float_col, double_col
|
|
from functional.alltypes where id > 0 and id < 10
|
|
---- RESULTS
|
|
: 3
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 3
|
|
NumRowErrors: 6
|
|
====
|
|
---- QUERY
|
|
# Table with default values
|
|
create table tbl_with_defaults (a int primary key, b int null default 10,
|
|
c int not null default 100, d int default 1000, e int null, f int not null,
|
|
g string default 'test', h boolean default true) partition by hash (a)
|
|
partitions 3 stored as kudu
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
insert into tbl_with_defaults (a, f) values (1, 1), (2, 2), (3, 3), (4, 4)
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 4
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
A, B, C, D, E, F, G, H
|
|
---- DML_RESULTS: tbl_with_defaults
|
|
1,10,100,1000,NULL,1,'test',true
|
|
2,10,100,1000,NULL,2,'test',true
|
|
3,10,100,1000,NULL,3,'test',true
|
|
4,10,100,1000,NULL,4,'test',true
|
|
---- TYPES
|
|
INT,INT,INT,INT,INT,INT,STRING,BOOLEAN
|
|
====
|
|
---- QUERY
|
|
insert into tbl_with_defaults values (5, 5, 5, 5, 5, 5, 'row', false)
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
A, B, C, D, E, F, G, H
|
|
---- DML_RESULTS: tbl_with_defaults
|
|
1,10,100,1000,NULL,1,'test',true
|
|
2,10,100,1000,NULL,2,'test',true
|
|
3,10,100,1000,NULL,3,'test',true
|
|
4,10,100,1000,NULL,4,'test',true
|
|
5,5,5,5,5,5,'row',false
|
|
---- TYPES
|
|
INT,INT,INT,INT,INT,INT,STRING,BOOLEAN
|
|
====
|
|
---- QUERY
|
|
alter table tbl_with_defaults add columns (i int null, j int not null default 10000)
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
select * from tbl_with_defaults
|
|
---- RESULTS
|
|
1,10,100,1000,NULL,1,'test',true,NULL,10000
|
|
2,10,100,1000,NULL,2,'test',true,NULL,10000
|
|
3,10,100,1000,NULL,3,'test',true,NULL,10000
|
|
4,10,100,1000,NULL,4,'test',true,NULL,10000
|
|
5,5,5,5,5,5,'row',false,NULL,10000
|
|
---- TYPES
|
|
INT,INT,INT,INT,INT,INT,STRING,BOOLEAN,INT,INT
|
|
====
|
|
---- QUERY
|
|
insert into tbl_with_defaults values (6,6,6,6,6,6,'another row',false,6,6)
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
A, B, C, D, E, F, G, H, I, J
|
|
---- DML_RESULTS: tbl_with_defaults
|
|
1,10,100,1000,NULL,1,'test',true,NULL,10000
|
|
2,10,100,1000,NULL,2,'test',true,NULL,10000
|
|
3,10,100,1000,NULL,3,'test',true,NULL,10000
|
|
4,10,100,1000,NULL,4,'test',true,NULL,10000
|
|
5,5,5,5,5,5,'row',false,NULL,10000
|
|
6,6,6,6,6,6,'another row',false,6,6
|
|
---- TYPES
|
|
INT,INT,INT,INT,INT,INT,STRING,BOOLEAN,INT,INT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-5217: Try to insert NULL to a 'NOT NULL' col with a target col list that leaves
|
|
# out some cols.
|
|
insert into tbl_with_defaults (a, c, f) values (0, null, 1)
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 0
|
|
NumRowErrors: 1
|
|
====
|
|
---- QUERY
|
|
# IMPALA-5217: Insert NULL into a nullable col when a non-nullable col has been left out
|
|
# of the target col list.
|
|
insert into tbl_with_defaults (a, b, d, f) values (0, 0, null, 0)
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
A, B, C, D, E, F, G, H, I, J
|
|
---- DML_RESULTS: tbl_with_defaults
|
|
0,0,100,NULL,NULL,0,'test',true,NULL,10000
|
|
1,10,100,1000,NULL,1,'test',true,NULL,10000
|
|
2,10,100,1000,NULL,2,'test',true,NULL,10000
|
|
3,10,100,1000,NULL,3,'test',true,NULL,10000
|
|
4,10,100,1000,NULL,4,'test',true,NULL,10000
|
|
5,5,5,5,5,5,'row',false,NULL,10000
|
|
6,6,6,6,6,6,'another row',false,6,6
|
|
---- TYPES
|
|
INT,INT,INT,INT,INT,INT,STRING,BOOLEAN,INT,INT
|
|
====
|