IMPALA-3725 Support Kudu UPSERT in Impala

This patch introduces a new query statement, UPSERT, for Kudu
tables which operates like an INSERT and uses all of the analysis,
planning, and execution machinery as INSERT, except that if
there's a primary key collision instead of returning an error an
update is performed.

New syntax:
[with_clause] UPSERT INTO [TABLE] table_name [(column list)]
{
  query_stmt
 | VALUES (value [, value...]) [, (value [, (value...)]) ...]
}

where column list must contain all of the key columns in
table_name, if specified, and table_name must be a Kudu table.

This patch also improves the behavior of INSERTing into Kudu
tables without specifying all of the key columns - this now
results in an analysis exception, rather than attempting the
INSERT and receiving an error back from Kudu.

Change-Id: I8df5cea36b642e267f85ff6b163f3dd96b8386e9
Reviewed-on: http://gerrit.cloudera.org:8080/4047
Reviewed-by: Matthew Jacobs <mj@cloudera.com>
Tested-by: Internal Jenkins
This commit is contained in:
Thomas Tauber-Marshall
2016-08-18 09:57:13 -07:00
committed by Internal Jenkins
parent e9a4077b35
commit 832fb53763
17 changed files with 665 additions and 84 deletions

View File

@@ -261,6 +261,75 @@ delete ignore a from tdata a, tdata b where a.id = 666
row_regex: .*NumModifiedRows: 1.*
====
---- QUERY
select * from tdata
---- RESULTS
40,'he',0,43,'e',false
120,'she',0,99,'f',true
320,'',2,932,'',false
1,'unknown',1,43,'aaaaaaaaaaaaaaaaaaaa',false
2,'david',1,43,'b',false
3,'todd',1,43,'c',true
---- TYPES
INT,STRING,FLOAT,BIGINT,STRING,BOOLEAN
====
---- QUERY
upsert into table tdata values (40, 'they', 1, 43, cast('e' as VARCHAR(20)), false),
(1, NULL, 1, 0, cast('a' as VARCHAR(20)), true)
---- RESULTS
====
---- QUERY
select * from tdata
---- RESULTS
40,'they',1,43,'e',false
120,'she',0,99,'f',true
320,'',2,932,'',false
1,'NULL',1,0,'a',true
2,'david',1,43,'b',false
3,'todd',1,43,'c',true
---- TYPES
INT,STRING,FLOAT,BIGINT,STRING,BOOLEAN
====
---- QUERY
upsert into table tdata (id, valf) values (2, NULL), (120, 20), (0, 0)
---- RESULTS
====
---- QUERY
select * from tdata
---- RESULTS
40,'they',1,43,'e',false
120,'she',20,99,'f',true
320,'',2,932,'',false
1,'NULL',1,0,'a',true
2,'david',NULL,43,'b',false
3,'todd',1,43,'c',true
0,'NULL',0,NULL,'NULL',NULL
---- TYPES
INT,STRING,FLOAT,BIGINT,STRING,BOOLEAN
====
---- QUERY
upsert into table tdata (valb, name, id)
select false as valb, 'he' as name, id from tdata where id < 2
---- RESULTS
====
---- QUERY
select * from tdata
---- RESULTS
40,'they',1,43,'e',false
120,'she',20,99,'f',true
320,'',2,932,'',false
1,'he',1,0,'a',false
2,'david',NULL,43,'b',false
3,'todd',1,43,'c',true
0,'he',0,NULL,'NULL',false
---- TYPES
INT,STRING,FLOAT,BIGINT,STRING,BOOLEAN
====
---- QUERY
upsert into table tdata (id, name) values (null, '')
---- CATCH
Could not add Kudu WriteOp.: Invalid argument: column not nullable: id[int32 NOT NULL]
====
---- QUERY
# IMPALA-3454: A delete that requires a rewrite may not get the Kudu column order correct
# if the Kudu columns are of different types.
create table impala_3454 (key_1 tinyint, key_2 bigint, PRIMARY KEY (key_1, key_2))