mirror of
https://github.com/apache/impala.git
synced 2026-02-01 21:00:29 -05:00
Kudu engine recently enables the auto-incrementing column feature (KUDU-1945). The feature works by appending a system generated auto-incrementing column to the primary key columns to guarantee the uniqueness on primary key when the primary key columns can be non unique. The non unique primary key columns and the auto-incrementing column form the effective unique composite primary key. This auto-incrementing column is named as 'auto_incrementing_id' with big int type. The assignment to it during insertion is automatic so insertion statements should not specify values for auto-incrementing column. In current Kudu implementation, there is no central key provider for auto-incrementing columns. It uses a per tablet-server global counter to assign values for auto-incrementing columns. So the values of auto-incrementing columns are not unique in a Kudu table, but unique within a continuous region of the table served by a tablet-server. This patch also upgraded Kudu version to 345fd44ca3 to pick up Kudu changes needed for supporting non-unique primary key. It added syntactic support for creating Kudu table with non unique primary key. When creating a Kudu table, specifying PRIMARY KEY is optional. If there is no primary key attribute specified, the partition key columns will be promoted as non unique primary key if those columns are the beginning columns of the table. New column "key_unique" is added to the output of 'describe' table command for Kudu table. Examples of CREATE TABLE statement with non unique primary key: CREATE TABLE tbl (i INT NON UNIQUE PRIMARY KEY, s STRING) PARTITION BY HASH (i) PARTITIONS 3 STORED as KUDU; CREATE TABLE tbl (i INT, s STRING, NON UNIQUE PRIMARY KEY(i)) PARTITION BY HASH (i) PARTITIONS 3 STORED as KUDU; CREATE TABLE tbl NON UNIQUE PRIMARY KEY(id) PARTITION BY HASH (id) PARTITIONS 3 STORED as KUDU AS SELECT id, string_col FROM functional.alltypes WHERE id = 10; CREATE TABLE tbl NON UNIQUE PRIMARY KEY(id) PARTITION BY RANGE (id) (PARTITION VALUES <= 1000, PARTITION 1000 < VALUES <= 2000, PARTITION 2000 < VALUES <= 3000, PARTITION 3000 < VALUES) STORED as KUDU AS SELECT id, int_col FROM functional.alltypestiny ORDER BY id ASC LIMIT 4000; CREATE TABLE tbl (id INT, name STRING, NON UNIQUE PRIMARY KEY(id)) STORED as KUDU; CREATE TABLE tbl (a INT, b STRING, c FLOAT) PARTITION BY HASH (a, b) PARTITIONS 3 STORED as KUDU; SELECT statement does not show the system generated auto-incrementing column unless the column is explicitly specified in the select list. Auto-incrementing column cannot be added, removed or renamed with ALTER TABLE statements. UPSERT operation is not supported now for Kudu tables with auto incrementing column due to limitation in Kudu engine. Testing: - Ran manual test in impala-shell with queries to create Kudu tables with non unique primary key, and tested insert/update/delete operations for these tables with non unique primary key. - Added front end tests, and end to end unit tests for Kudu tables with non unique primary key. - Passed exhaustive test. Change-Id: I4d7882bf3d01a3492cc9827c072d1f3200d9eebd Reviewed-on: http://gerrit.cloudera.org:8080/19383 Reviewed-by: Riza Suminto <riza.suminto@cloudera.com> Reviewed-by: Wenzhe Zhou <wzhou@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
682 lines
25 KiB
Plaintext
682 lines
25 KiB
Plaintext
====
|
|
---- QUERY
|
|
create table tdata
|
|
(id int primary key, valf float null, vali bigint null, valv string null,
|
|
valb boolean null, valt tinyint null, vals smallint null, vald double null,
|
|
ts timestamp, decimal4 decimal(9,9) null, decimal8 decimal(18,2) null,
|
|
decimal16 decimal(38, 0) null, valdate date null, valvc varchar(10) null)
|
|
PARTITION BY RANGE (PARTITION VALUES < 10, PARTITION 10 <= VALUES < 30,
|
|
PARTITION 30 <= VALUES) STORED AS KUDU
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
# VALUES, single row, all target cols, no errors
|
|
insert into tdata values (1, 1, 1, 'one', true, 1, 1, 1,
|
|
cast('1987-05-19 00:00:00' as timestamp), 0.000000001, 1.00, 1, DATE '1970-01-01', cast('one' as varchar(10)))
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, VALF, VALI, VALV, VALB, VALT, VALS, VALD, TS, DECIMAL4, DECIMAL8, DECIMAL16, VALDATE, VALVC
|
|
---- DML_RESULTS: tdata
|
|
1,1,1,'one',true,1,1,1,1987-05-19 00:00:00,0.000000001,1.00,1,1970-01-01,'one'
|
|
---- TYPES
|
|
INT,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE,TIMESTAMP,DECIMAL,DECIMAL,DECIMAL,DATE,STRING
|
|
====
|
|
---- QUERY
|
|
# VALUES, single row, all target cols, NULL
|
|
insert into tdata values (2, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, VALF, VALI, VALV, VALB, VALT, VALS, VALD, TS, DECIMAL4, DECIMAL8, DECIMAL16, VALDATE, VALVC
|
|
---- DML_RESULTS: tdata
|
|
1,1,1,'one',true,1,1,1,1987-05-19 00:00:00,0.000000001,1.00,1,1970-01-01,'one'
|
|
2,NULL,NULL,'NULL',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
---- TYPES
|
|
INT,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE,TIMESTAMP,DECIMAL,DECIMAL,DECIMAL,DATE,STRING
|
|
====
|
|
---- QUERY
|
|
# VALUES, single row, all target cols, boundary values. The timestamp value is the max
|
|
# possible value that Impala can represent; it gets truncated rather than rounded up to
|
|
# the nearest microsecond. If it were rounded up, it wouldn't be readable by Impala.
|
|
insert into tdata values
|
|
(3, cast('nan' as float), max_bigint(), '', true, min_tinyint(), max_smallint(),
|
|
cast('-inf' as double),
|
|
nanoseconds_add(cast('9999-12-31 23:59:59' as timestamp), 999999999),
|
|
0.999999999, 9999999999999999.99, 99999999999999999999999999999999999999, DATE '9999-12-31', cast('' as varchar(10)))
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, VALF, VALI, VALV, VALB, VALT, VALS, VALD, TS, DECIMAL4, DECIMAL8, DECIMAL16, VALDATE, VALVC
|
|
---- DML_RESULTS: tdata
|
|
1,1,1,'one',true,1,1,1,1987-05-19 00:00:00,0.000000001,1.00,1,1970-01-01,'one'
|
|
2,NULL,NULL,'NULL',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
3,NaN,9223372036854775807,'',true,-128,32767,-Infinity,9999-12-31 23:59:59.999999000,0.999999999,9999999999999999.99,99999999999999999999999999999999999999,9999-12-31,''
|
|
---- TYPES
|
|
INT,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE,TIMESTAMP,DECIMAL,DECIMAL,DECIMAL,DATE,STRING
|
|
====
|
|
---- QUERY
|
|
# VALUES, single row, subset of target cols
|
|
insert into tdata (valb, vald, id) values (true, 0, 4)
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, VALF, VALI, VALV, VALB, VALT, VALS, VALD, TS, DECIMAL4, DECIMAL8, DECIMAL16, VALDATE, VALVC
|
|
---- DML_RESULTS: tdata
|
|
1,1,1,'one',true,1,1,1,1987-05-19 00:00:00,0.000000001,1.00,1,1970-01-01,'one'
|
|
2,NULL,NULL,'NULL',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
3,NaN,9223372036854775807,'',true,-128,32767,-Infinity,9999-12-31 23:59:59.999999000,0.999999999,9999999999999999.99,99999999999999999999999999999999999999,9999-12-31,''
|
|
4,NULL,NULL,'NULL',true,NULL,NULL,0,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
---- TYPES
|
|
INT,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE,TIMESTAMP,DECIMAL,DECIMAL,DECIMAL,DATE,STRING
|
|
====
|
|
---- QUERY
|
|
# VALUES, multiple rows, all target cols
|
|
insert into tdata values
|
|
(5, 5.0, 5, 'five', false, NULL, NULL, NULL, NULL, NULL, NULL, NULL,NULL, cast('five' as varchar(10))),
|
|
(6, 16, 60, '', true, 0, -1, -6, cast('2010-12-31 23:59:59' as timestamp), -0.000000001, -1.00, -1, DATE '1970-01-01', cast ('' as varchar(10))),
|
|
(7, NULL, 10, NULL, false, max_tinyint(), -7, 2, cast('1400-01-01 00:00:00' as timestamp), 0.000000000, 0.00, 0, DATE '1970-01-01', NULL)
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 3
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, VALF, VALI, VALV, VALB, VALT, VALS, VALD, TS, DECIMAL4, DECIMAL8, DECIMAL16, VALDATE, VALVC
|
|
---- DML_RESULTS: tdata
|
|
1,1,1,'one',true,1,1,1,1987-05-19 00:00:00,0.000000001,1.00,1,1970-01-01,'one'
|
|
2,NULL,NULL,'NULL',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
3,NaN,9223372036854775807,'',true,-128,32767,-Infinity,9999-12-31 23:59:59.999999000,0.999999999,9999999999999999.99,99999999999999999999999999999999999999,9999-12-31,''
|
|
4,NULL,NULL,'NULL',true,NULL,NULL,0,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
5,5.0,5,'five',false,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'five'
|
|
6,16,60,'',true,0,-1,-6,2010-12-31 23:59:59,-0.000000001,-1.00,-1,1970-01-01,''
|
|
7,NULL,10,'NULL',false,127,-7,2,1400-01-01 00:00:00,0.000000000,0.00,0,1970-01-01,'NULL'
|
|
---- TYPES
|
|
INT,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE,TIMESTAMP,DECIMAL,DECIMAL,DECIMAL,DATE,STRING
|
|
====
|
|
---- QUERY
|
|
# VALUES, multiple rows, subset of cols
|
|
insert into tdata (valv, valf, vali, id) values
|
|
('eight', 88, 888, 8),
|
|
(NULL, -9, -99, 9)
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 2
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, VALF, VALI, VALV, VALB, VALT, VALS, VALD, TS, DECIMAL4, DECIMAL8, DECIMAL16, VALDATE, VALVC
|
|
---- DML_RESULTS: tdata
|
|
1,1,1,'one',true,1,1,1,1987-05-19 00:00:00,0.000000001,1.00,1,1970-01-01,'one'
|
|
2,NULL,NULL,'NULL',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
3,NaN,9223372036854775807,'',true,-128,32767,-Infinity,9999-12-31 23:59:59.999999000,0.999999999,9999999999999999.99,99999999999999999999999999999999999999,9999-12-31,''
|
|
4,NULL,NULL,'NULL',true,NULL,NULL,0,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
5,5.0,5,'five',false,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'five'
|
|
6,16,60,'',true,0,-1,-6,2010-12-31 23:59:59,-0.000000001,-1.00,-1,1970-01-01,''
|
|
7,NULL,10,'NULL',false,127,-7,2,1400-01-01 00:00:00,0.000000000,0.00,0,1970-01-01,'NULL'
|
|
8,88,888,'eight',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
9,-9,-99,'NULL',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
---- TYPES
|
|
INT,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE,TIMESTAMP,DECIMAL,DECIMAL,DECIMAL,DATE,STRING
|
|
====
|
|
---- QUERY
|
|
# SELECT, single row, all target cols
|
|
insert into tdata
|
|
select id, float_col, bigint_col, string_col, bool_col, tinyint_col, smallint_col,
|
|
double_col, timestamp_col, NULL, NULL, NULL, NULL, NULL
|
|
from functional.alltypes where id = 10
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, VALF, VALI, VALV, VALB, VALT, VALS, VALD, TS, DECIMAL4, DECIMAL8, DECIMAL16, VALDATE, VALVC
|
|
---- DML_RESULTS: tdata
|
|
1,1,1,'one',true,1,1,1,1987-05-19 00:00:00,0.000000001,1.00,1,1970-01-01,'one'
|
|
2,NULL,NULL,'NULL',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
3,NaN,9223372036854775807,'',true,-128,32767,-Infinity,9999-12-31 23:59:59.999999000,0.999999999,9999999999999999.99,99999999999999999999999999999999999999,9999-12-31,''
|
|
4,NULL,NULL,'NULL',true,NULL,NULL,0,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
5,5.0,5,'five',false,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'five'
|
|
6,16,60,'',true,0,-1,-6,2010-12-31 23:59:59,-0.000000001,-1.00,-1,1970-01-01,''
|
|
7,NULL,10,'NULL',false,127,-7,2,1400-01-01 00:00:00,0.000000000,0.00,0,1970-01-01,'NULL'
|
|
8,88,888,'eight',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
9,-9,-99,'NULL',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
10,0,0,'0',true,0,0,0,2009-01-02 00:10:00.450000000,NULL,NULL,NULL,NULL,'NULL'
|
|
---- TYPES
|
|
INT,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE,TIMESTAMP,DECIMAL,DECIMAL,DECIMAL,DATE,STRING
|
|
====
|
|
---- QUERY
|
|
# SELECT, single row, subset of cols
|
|
insert into tdata (id, vald, valb, vali, ts)
|
|
select id, double_col, bool_col, bigint_col, timestamp_col
|
|
from functional.alltypes where id = 11
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, VALF, VALI, VALV, VALB, VALT, VALS, VALD, TS, DECIMAL4, DECIMAL8, DECIMAL16, VALDATE, VALVC
|
|
---- DML_RESULTS: tdata
|
|
1,1,1,'one',true,1,1,1,1987-05-19 00:00:00,0.000000001,1.00,1,1970-01-01,'one'
|
|
2,NULL,NULL,'NULL',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
3,NaN,9223372036854775807,'',true,-128,32767,-Infinity,9999-12-31 23:59:59.999999000,0.999999999,9999999999999999.99,99999999999999999999999999999999999999,9999-12-31,''
|
|
4,NULL,NULL,'NULL',true,NULL,NULL,0,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
5,5.0,5,'five',false,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'five'
|
|
6,16,60,'',true,0,-1,-6,2010-12-31 23:59:59,-0.000000001,-1.00,-1,1970-01-01,''
|
|
7,NULL,10,'NULL',false,127,-7,2,1400-01-01 00:00:00,0.000000000,0.00,0,1970-01-01,'NULL'
|
|
8,88,888,'eight',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
9,-9,-99,'NULL',NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
10,0,0,'0',true,0,0,0,2009-01-02 00:10:00.450000000,NULL,NULL,NULL,NULL,'NULL'
|
|
11,NULL,10,'NULL',false,NULL,NULL,10.1,2009-01-02 00:11:00.450000000,NULL,NULL,NULL,NULL,'NULL'
|
|
---- TYPES
|
|
INT,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE,TIMESTAMP,DECIMAL,DECIMAL,DECIMAL,DATE,STRING
|
|
====
|
|
---- QUERY
|
|
delete tdata
|
|
---- DML_RESULTS: tdata
|
|
====
|
|
---- QUERY
|
|
# SELECT, multiple rows, all target cols
|
|
insert into tdata
|
|
select id, float_col, bigint_col, string_col, bool_col, tinyint_col, smallint_col,
|
|
double_col, timestamp_col, NULL, NULL, NULL, NULL, cast(string_col as varchar(10))
|
|
from functional.alltypes where id < 2
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 2
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, VALF, VALI, VALV, VALB, VALT, VALS, VALD, TS, DECIMAL4, DECIMAL8, DECIMAL16, VALDATE, VALVC
|
|
---- DML_RESULTS: tdata
|
|
0,0,0,'0',true,0,0,0,2009-01-01 00:00:00,NULL,NULL,NULL,NULL,'0'
|
|
1,1.100000023841858,10,'1',false,1,1,10.1,2009-01-01 00:01:00,NULL,NULL,NULL,NULL,'1'
|
|
---- TYPES
|
|
INT,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE,TIMESTAMP,DECIMAL,DECIMAL,DECIMAL,DATE,STRING
|
|
====
|
|
---- QUERY
|
|
# SELECT, multiple rows, subset of cols
|
|
insert into tdata (vals, id, valt, vald, ts)
|
|
select smallint_col, id, tinyint_col, double_col, timestamp_col
|
|
from functional.alltypes where id > 2 and id < 6
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 3
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, VALF, VALI, VALV, VALB, VALT, VALS, VALD, TS, DECIMAL4, DECIMAL8, DECIMAL16, VALDATE, VALVC
|
|
---- DML_RESULTS: tdata
|
|
0,0,0,'0',true,0,0,0,2009-01-01 00:00:00,NULL,NULL,NULL,NULL,'0'
|
|
1,1.100000023841858,10,'1',false,1,1,10.1,2009-01-01 00:01:00,NULL,NULL,NULL,NULL,'1'
|
|
3,NULL,NULL,'NULL',NULL,3,3,30.3,2009-01-01 00:03:00.300000000,NULL,NULL,NULL,NULL,'NULL'
|
|
4,NULL,NULL,'NULL',NULL,4,4,40.4,2009-01-01 00:04:00.600000000,NULL,NULL,NULL,NULL,'NULL'
|
|
5,NULL,NULL,'NULL',NULL,5,5,50.5,2009-01-01 00:05:00.100000000,NULL,NULL,NULL,NULL,'NULL'
|
|
---- TYPES
|
|
INT,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE,TIMESTAMP,DECIMAL,DECIMAL,DECIMAL,DATE,STRING
|
|
====
|
|
---- QUERY
|
|
# Make sure we can insert empty strings into string columns and that we can scan them
|
|
# back.
|
|
insert into tdata values (320, 2.0, 932, cast('' as string), false, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, cast('' as varchar(10)))
|
|
---- RESULTS
|
|
: 1
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
====
|
|
---- QUERY
|
|
select id, valv, valb, valvc from tdata where id = 320;
|
|
---- RESULTS
|
|
320,'',false,''
|
|
---- TYPES
|
|
INT,STRING,BOOLEAN,STRING
|
|
====
|
|
---- QUERY
|
|
insert into tdata values
|
|
(666, cast(1.2 as float), 43, cast('z' as string), true, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL)
|
|
---- RESULTS
|
|
: 1
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
====
|
|
---- QUERY
|
|
# insert row with primary key that already exists
|
|
insert into tdata values
|
|
(666, cast(1.2 as float), 43, cast('z' as VARCHAR(20)), true, 0, 0, 0, NULL, NULL, NULL, NULL, NULL, NULL)
|
|
---- RESULTS
|
|
: 0
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 0
|
|
NumRowErrors: 1
|
|
====
|
|
---- QUERY
|
|
create table kudu_test_tbl primary key(id)
|
|
partition by range(id) (partition values < 100, partition 100 <= values <= 10000)
|
|
stored as kudu as
|
|
select * from functional_kudu.alltypes where id < 100;
|
|
---- RESULTS
|
|
'Inserted 100 row(s)'
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 100
|
|
NumRowErrors: 0
|
|
====
|
|
---- QUERY
|
|
insert into kudu_test_tbl
|
|
select * from functional_kudu.alltypes where id < 100;
|
|
---- RESULTS
|
|
: 0
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 0
|
|
NumRowErrors: 100
|
|
====
|
|
---- QUERY
|
|
# large insert - 100 rows were already inserted above and result in errors
|
|
insert into kudu_test_tbl
|
|
select * from functional_kudu.alltypes;
|
|
---- RESULTS
|
|
: 7200
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 7200
|
|
NumRowErrors: 100
|
|
====
|
|
---- QUERY
|
|
# Insert rows that are not covered by any of the existing range partitions
|
|
# Only the row at 10000 is inserted.
|
|
insert into kudu_test_tbl SELECT cast(id + 10000 as int), bool_col, tinyint_col,
|
|
smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, string_col,
|
|
timestamp_col, year, month
|
|
from functional_kudu.alltypes
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 7299
|
|
====
|
|
---- QUERY
|
|
# Table with all supported types as primary key and distribution columns
|
|
create table allkeytypes (i1 tinyint, i2 smallint, i3 int, i4 bigint, name string,
|
|
valt timestamp, valvc varchar(10), valf float, vald double,
|
|
primary key (i1, i2, i3, i4, name, valt, valvc))
|
|
partition by hash partitions 3, range
|
|
(partition value = (1,1,1,1,'1','2009-01-01 00:01:00',cast('1' as varchar(10))),
|
|
partition value = (2,2,2,2,'2','2009-01-01 00:02:00.100000000',cast('2' as varchar(10))),
|
|
partition value = (3,3,3,3,'3','2009-01-01 00:03:00.300000000',cast('3' as varchar(10)))) stored as kudu
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into allkeytypes select cast(id as tinyint), smallint_col, int_col,
|
|
cast (bigint_col/10 as bigint), string_col, timestamp_col, cast(string_col as varchar(10)), float_col, double_col
|
|
from functional.alltypes where id > 1 and id < 10
|
|
---- RESULTS
|
|
: 2
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 2
|
|
NumRowErrors: 6
|
|
====
|
|
---- QUERY
|
|
# IMPALA-5871 - test that a cast is correctly added when inserting a string into a Kudu
|
|
# timestamp partition column with distributed exec.
|
|
insert into allkeytypes values (1,1,1,1,'1','2009-01-01 00:01:00',cast('1' as varchar(10)),null,null)
|
|
---- RESULTS
|
|
: 1
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
row_regex: .*EXEC_SINGLE_NODE_ROWS_THRESHOLD=0.*
|
|
====
|
|
---- QUERY
|
|
create table timestampkey (t timestamp primary key)
|
|
partition by hash(t) PARTITIONS 2 stored as kudu;
|
|
====
|
|
---- QUERY
|
|
# Regression test for IMPALA-8909 (used to hit a DCHECK in debug builds).
|
|
insert into timestampkey select concat(string_col, " ") from functional.alltypessmall;
|
|
---- RESULTS
|
|
: 0
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 0
|
|
NumRowErrors: 100
|
|
====
|
|
---- QUERY
|
|
# Table with default values
|
|
create table tbl_with_defaults (a int primary key, b int null default 10,
|
|
c int not null default 100, d int default 1000, e int null, f int not null,
|
|
g string default 'test', h boolean default true,
|
|
i decimal(9, 2) default 1111.11, j varchar(10) default cast('foo' as varchar(10)))
|
|
partition by hash (a) partitions 3 stored as kudu
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into tbl_with_defaults (a, f) values (1, 1), (2, 2), (3, 3), (4, 4)
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 4
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
A, B, C, D, E, F, G, H, I, J
|
|
---- DML_RESULTS: tbl_with_defaults
|
|
1,10,100,1000,NULL,1,'test',true,1111.11,'foo'
|
|
2,10,100,1000,NULL,2,'test',true,1111.11,'foo'
|
|
3,10,100,1000,NULL,3,'test',true,1111.11,'foo'
|
|
4,10,100,1000,NULL,4,'test',true,1111.11,'foo'
|
|
---- TYPES
|
|
INT,INT,INT,INT,INT,INT,STRING,BOOLEAN,DECIMAL,STRING
|
|
====
|
|
---- QUERY
|
|
insert into tbl_with_defaults values (5, 5, 5, 5, 5, 5, 'row', false, 55555.55, cast('row' as varchar(10)))
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
A, B, C, D, E, F, G, H, I, J
|
|
---- DML_RESULTS: tbl_with_defaults
|
|
1,10,100,1000,NULL,1,'test',true,1111.11,'foo'
|
|
2,10,100,1000,NULL,2,'test',true,1111.11,'foo'
|
|
3,10,100,1000,NULL,3,'test',true,1111.11,'foo'
|
|
4,10,100,1000,NULL,4,'test',true,1111.11,'foo'
|
|
5,5,5,5,5,5,'row',false,55555.55,'row'
|
|
---- TYPES
|
|
INT,INT,INT,INT,INT,INT,STRING,BOOLEAN,DECIMAL,STRING
|
|
====
|
|
---- QUERY
|
|
alter table tbl_with_defaults add columns (k int null, l int not null default 10000)
|
|
---- RESULTS
|
|
'Column(s) have been added.'
|
|
====
|
|
---- QUERY
|
|
select * from tbl_with_defaults
|
|
---- RESULTS
|
|
1,10,100,1000,NULL,1,'test',true,1111.11,'foo',NULL,10000
|
|
2,10,100,1000,NULL,2,'test',true,1111.11,'foo',NULL,10000
|
|
3,10,100,1000,NULL,3,'test',true,1111.11,'foo',NULL,10000
|
|
4,10,100,1000,NULL,4,'test',true,1111.11,'foo',NULL,10000
|
|
5,5,5,5,5,5,'row',false,55555.55,'row',NULL,10000
|
|
---- TYPES
|
|
INT,INT,INT,INT,INT,INT,STRING,BOOLEAN,DECIMAL,STRING,INT,INT
|
|
====
|
|
---- QUERY
|
|
insert into tbl_with_defaults values (6,6,6,6,6,6,'another row',false,66666.66,cast('another' as varchar(10)),6,6)
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
A, B, C, D, E, F, G, H, I, J, K, L
|
|
---- DML_RESULTS: tbl_with_defaults
|
|
1,10,100,1000,NULL,1,'test',true,1111.11,'foo',NULL,10000
|
|
2,10,100,1000,NULL,2,'test',true,1111.11,'foo',NULL,10000
|
|
3,10,100,1000,NULL,3,'test',true,1111.11,'foo',NULL,10000
|
|
4,10,100,1000,NULL,4,'test',true,1111.11,'foo',NULL,10000
|
|
5,5,5,5,5,5,'row',false,55555.55,'row',NULL,10000
|
|
6,6,6,6,6,6,'another row',false,66666.66,'another',6,6
|
|
---- TYPES
|
|
INT,INT,INT,INT,INT,INT,STRING,BOOLEAN,DECIMAL,STRING,INT,INT
|
|
====
|
|
---- QUERY
|
|
# IMPALA-5217: Try to insert NULL to a 'NOT NULL' col with a target col list that leaves
|
|
# out some cols.
|
|
insert into tbl_with_defaults (a, c, f) values (0, null, 1)
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 0
|
|
NumRowErrors: 1
|
|
====
|
|
---- QUERY
|
|
# IMPALA-5217: Insert NULL into a nullable col when a non-nullable col has been left out
|
|
# of the target col list.
|
|
insert into tbl_with_defaults (a, b, d, f) values (0, 0, null, 0)
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
A, B, C, D, E, F, G, H, I, J, K, L
|
|
---- DML_RESULTS: tbl_with_defaults
|
|
0,0,100,NULL,NULL,0,'test',true,1111.11,'foo',NULL,10000
|
|
1,10,100,1000,NULL,1,'test',true,1111.11,'foo',NULL,10000
|
|
2,10,100,1000,NULL,2,'test',true,1111.11,'foo',NULL,10000
|
|
3,10,100,1000,NULL,3,'test',true,1111.11,'foo',NULL,10000
|
|
4,10,100,1000,NULL,4,'test',true,1111.11,'foo',NULL,10000
|
|
5,5,5,5,5,5,'row',false,55555.55,'row',NULL,10000
|
|
6,6,6,6,6,6,'another row',false,66666.66,'another',6,6
|
|
---- TYPES
|
|
INT,INT,INT,INT,INT,INT,STRING,BOOLEAN,DECIMAL,STRING,INT,INT
|
|
====
|
|
---- QUERY
|
|
# Test VARCHAR truncation
|
|
insert into tbl_with_defaults values (7,7,7,7,7,7,'another row',false,77777.77,cast('sevensevensevenseven' as varchar(10)),7,7)
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
A, B, C, D, E, F, G, H, I, J, K, L
|
|
---- DML_RESULTS: tbl_with_defaults
|
|
0,0,100,NULL,NULL,0,'test',true,1111.11,'foo',NULL,10000
|
|
1,10,100,1000,NULL,1,'test',true,1111.11,'foo',NULL,10000
|
|
2,10,100,1000,NULL,2,'test',true,1111.11,'foo',NULL,10000
|
|
3,10,100,1000,NULL,3,'test',true,1111.11,'foo',NULL,10000
|
|
4,10,100,1000,NULL,4,'test',true,1111.11,'foo',NULL,10000
|
|
5,5,5,5,5,5,'row',false,55555.55,'row',NULL,10000
|
|
6,6,6,6,6,6,'another row',false,66666.66,'another',6,6
|
|
7,7,7,7,7,7,'another row',false,77777.77,'sevenseven',7,7
|
|
---- TYPES
|
|
INT,INT,INT,INT,INT,INT,STRING,BOOLEAN,DECIMAL,STRING,INT,INT
|
|
====
|
|
---- QUERY
|
|
create table multiple_partition_cols (x bigint, y bigint, z string, primary key(x, y))
|
|
partition by hash(x, y) partitions 8 stored as kudu
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
# SELECT with constant
|
|
insert into multiple_partition_cols select 0, bigint_col, string_col
|
|
from functional.alltypes where id = 0
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
X,Y,Z
|
|
---- DML_RESULTS: multiple_partition_cols
|
|
0,0,'0'
|
|
---- TYPES
|
|
BIGINT,BIGINT,STRING
|
|
====
|
|
---- QUERY
|
|
# SELECT with constant NULL
|
|
insert into multiple_partition_cols select bigint_col, null, string_col
|
|
from functional.alltypes where id = 1
|
|
---- RESULTS
|
|
: 0
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 0
|
|
NumRowErrors: 1
|
|
====
|
|
---- QUERY
|
|
create table dates (valdate date primary key) stored as kudu
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
---- ERRORS
|
|
Unpartitioned Kudu tables are inefficient for large data sizes.
|
|
====
|
|
---- QUERY
|
|
insert into dates values
|
|
(date '1970-01-01'),
|
|
(date '0001-01-01'),
|
|
(date '9999-12-31')
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 3
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
VALDATE
|
|
---- DML_RESULTS: dates
|
|
1970-01-01
|
|
0001-01-01
|
|
9999-12-31
|
|
---- TYPES
|
|
DATE
|
|
====
|
|
---- QUERY
|
|
create table orders_key_only (o_orderkey bigint primary key)
|
|
partition by range(o_orderkey) (
|
|
partition values <= 1000000,
|
|
partition 1000000 < values <= 2000000,
|
|
partition 2000000 < values <= 3000000,
|
|
partition 3000000 < values <= 4000000,
|
|
partition 4000000 < values <= 5000000,
|
|
partition 5000000 < values )
|
|
stored as kudu;
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
# Very large insert to test concurrency and partitioning. Regression test for IMPALA-9782
|
|
# when run with mt_dop > 1. Note that we need to run against the kudu version of the table
|
|
# so that there's enough parallelism to reproduce the bug.
|
|
insert into orders_key_only
|
|
select o_orderkey from tpch_kudu.orders;
|
|
---- RESULTS
|
|
: 1500000
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1500000
|
|
NumRowErrors: 0
|
|
====
|
|
---- QUERY
|
|
# Create Kudu table with non unique primary key
|
|
create table insert_non_unique_key_test_tbl1
|
|
(id int non unique primary key, vali bigint null, valv string null)
|
|
PARTITION BY RANGE (PARTITION VALUES < 10, PARTITION 10 <= VALUES < 30,
|
|
PARTITION 30 <= VALUES) STORED AS KUDU
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
# Insert VALUES with single row
|
|
insert into insert_non_unique_key_test_tbl1 values (1, 1, 'one')
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, VALI, VALV
|
|
---- DML_RESULTS: insert_non_unique_key_test_tbl1
|
|
1,1,'one'
|
|
---- TYPES
|
|
INT,BIGINT,STRING
|
|
====
|
|
---- QUERY
|
|
# Insert VALUES with multiple rows
|
|
insert into insert_non_unique_key_test_tbl1
|
|
(id, vali, valv) values (2, 2, 'two'), (3, 3, 'three')
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 2
|
|
NumRowErrors: 0
|
|
====
|
|
---- QUERY
|
|
# Try to insert a row with value for auto_incrementing_id column
|
|
insert into insert_non_unique_key_test_tbl1
|
|
(id, auto_incrementing_id, vali, valv) values (4, 4, 4, 'four')
|
|
---- CATCH
|
|
auto-incrementing column is incorrectly set
|
|
====
|
|
---- QUERY
|
|
# Try to insert a row with value for auto_incrementing_id column
|
|
insert into insert_non_unique_key_test_tbl1
|
|
(id, auto_incrementing_id) values (5, 50000)
|
|
---- CATCH
|
|
auto-incrementing column is incorrectly set
|
|
====
|
|
---- QUERY
|
|
# Try to insert a row without value for key column
|
|
insert into insert_non_unique_key_test_tbl1
|
|
(vali, valv) values (6, 'six')
|
|
---- CATCH
|
|
AnalysisException: All primary key columns must be specified for INSERTing into Kudu tables. Missing columns are: id
|
|
====
|
|
---- QUERY
|
|
# Insert VALUES with SELECT from other table
|
|
insert into insert_non_unique_key_test_tbl1
|
|
select id, bigint_col, string_col from functional.alltypes where id = 10
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
====
|
|
---- QUERY
|
|
select * from insert_non_unique_key_test_tbl1 order by id;
|
|
---- RESULTS
|
|
1,1,'one'
|
|
2,2,'two'
|
|
3,3,'three'
|
|
10,0,'0'
|
|
---- TYPES
|
|
INT,BIGINT,STRING
|
|
====
|
|
---- QUERY
|
|
# Create Kudu table in CTAS statement with non unique primary key
|
|
create table insert_non_unique_key_test_tbl2 non unique primary key (id)
|
|
partition by hash (id) partitions 3
|
|
stored as kudu
|
|
as select * from insert_non_unique_key_test_tbl1;
|
|
select * from insert_non_unique_key_test_tbl2 order by id;
|
|
---- RESULTS
|
|
1,1,'one'
|
|
2,2,'two'
|
|
3,3,'three'
|
|
10,0,'0'
|
|
---- TYPES
|
|
INT,BIGINT,STRING
|
|
====
|
|
---- QUERY
|
|
# Create Kudu table with non unique primary key
|
|
create table insert_non_unique_key_test_tbl3
|
|
(id int non unique primary key, vali bigint null, valv string null)
|
|
partition by hash (id) partitions 3 stored as kudu
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
# Insert VALUES with SELECT from other table
|
|
insert into insert_non_unique_key_test_tbl3
|
|
select * from insert_non_unique_key_test_tbl2
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 4
|
|
NumRowErrors: 0
|
|
====
|
|
---- QUERY
|
|
select * from insert_non_unique_key_test_tbl3 order by id;
|
|
---- RESULTS
|
|
1,1,'one'
|
|
2,2,'two'
|
|
3,3,'three'
|
|
10,0,'0'
|
|
---- TYPES
|
|
INT,BIGINT,STRING
|
|
====
|
|
---- QUERY
|
|
# Create Kudu table with non unique primary key
|
|
create table insert_non_unique_key_test_tbl4
|
|
(id int non unique primary key, vali bigint null, valv string null)
|
|
partition by range (id)
|
|
(partition value = 0, partition value = 1,
|
|
partition value = 2, partition value = 3)
|
|
stored as kudu
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
# Insert two rows
|
|
insert into insert_non_unique_key_test_tbl4 values (1, 1, 'one'), (2, 2, 'two');
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 2
|
|
NumRowErrors: 0
|
|
====
|
|
---- QUERY
|
|
# Insert two rows with duplicated values for non unique primary key
|
|
insert into insert_non_unique_key_test_tbl4 values (1, 10, 'ten'), (1, 11, 'eleven');
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 2
|
|
NumRowErrors: 0
|
|
====
|
|
---- QUERY
|
|
select id, vali, valv, auto_incrementing_id from insert_non_unique_key_test_tbl4
|
|
order by id, auto_incrementing_id;
|
|
---- RESULTS
|
|
1,1,'one',1
|
|
1,10,'ten',2
|
|
1,11,'eleven',3
|
|
2,2,'two',1
|
|
---- TYPES
|
|
INT,BIGINT,STRING,BIGINT
|
|
==== |