mirror of
https://github.com/apache/impala.git
synced 2026-02-01 21:00:29 -05:00
KUDU-1938 added VARCHAR column type support to Kudu. This commit adds support for Kudu's VARCHAR type to Impala. The length of a Kudu varchar is applied as a character length as opposed to a byte length like Impala currently uses. When writing data to Kudu, the VARCHAR length is not an issue because Impala only officially supports ASCII characters and those characters are the same size in bytes and characters. Additionally, extra bytes would be truncated by the Kudu client if somehow a value was too long. When reading data from Kudu, it is possible that the value written by some other application is wider in bytes than Impala expects and can handle. This can happen due to multi-byte UTF-8 characters. In that case, we adjust the length in Impala to truncate the extra bytes of the value. This isn’t a great solution, but one other integrations have taken as well given Impala doesn’t support UTF-8 values. IMPALA-5675 tracks adding UTF-8 Character length support to VARCHAR columns and marked the truncation code with a TODO that references that Jira. Testing: * Performed manual testing of standard DDL and DML interaction * Manually reproduced a check failure due to multi-byte characters and tested that length truncation resolve that issue. * Added/adjusted the following automated tests: ** AnalyzeDDLTest: CTAS into Kudu with varchar type ** AnalyzeKuduDDLTest: CREATE TABLE in Kudu with VARCHAR type ** kudu_create.test: Create table with VARCHAR column, key, hash partition, and range partition ** kudu_describe.test: Describe table with VARCHAR column and key ** kudu_insert.test: Insert with VARCHAR columns including null and non-null defaults ** kudu_update.test: Updates with VARCHAR column ** kudu_upsert.test: Upserts with VARCHAR column ** kudu_delete.test Deletes with VARCHAR columns ** kudu-scan-node.test Tests basic predicates with VARCHAR columns Follow on work: - IMPALA-9580: Add min-max runtime filter support/tests - IMPALA-9581: Pushdown string predicates - IMPALA-9583: Automated multibyte truncation tests Change-Id: I0d4959410fdd882bfa980cb55e8a7837c7823da8 Reviewed-on: http://gerrit.cloudera.org:8080/14197 Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Reviewed-by: Thomas Tauber-Marshall <tmarshall@cloudera.com>
531 lines
22 KiB
Plaintext
531 lines
22 KiB
Plaintext
====
|
|
---- QUERY
|
|
create table tdata
|
|
(id int primary key, name string null, valf float null, vali bigint null,
|
|
valv string null, valb boolean null, valt tinyint null, vals smallint null,
|
|
vald double null, valdec decimal(9, 0) null, valdate date null, valvc varchar(10) null)
|
|
PARTITION BY RANGE (PARTITION VALUES < 10, PARTITION 10 <= VALUES < 30,
|
|
PARTITION 30 <= VALUES) STORED AS KUDU
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into table tdata values
|
|
(40,'he',0,43,'e',false,35,36,1.2,37,DATE '1970-01-05',cast('he' as varchar(10))),
|
|
(1,'unknown',1,43,'aaaaaaaaaaaaaaaaaaaa',false,-1,-2,0,-3,DATE '1970-01-01',cast('unknown' as varchar(10))),
|
|
(2,'david',1,43,'b',false,0,0,0,0,DATE '1970-01-02',cast('david' as varchar(10))),
|
|
(3,'todd',1,43,'c',true,3,3,3,3,DATE '1970-01-03',cast('todd' as varchar(10)))
|
|
---- RESULTS
|
|
: 4
|
|
====
|
|
---- QUERY
|
|
# VALUES, single row, all cols, results in insert
|
|
upsert into table tdata values (4, 'a', 0, 1, 'b', false, 1, 2, 1.5, 4,DATE '1970-01-04', cast('a' as varchar(10)))
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, NAME, VALF, VALI, VALV, VALB, VALT, VALS, VALD, VALDEC, VALDATE, VALVC
|
|
---- DML_RESULTS: tdata
|
|
40,'he',0,43,'e',false,35,36,1.2,37,1970-01-05,'he'
|
|
1,'unknown',1,43,'aaaaaaaaaaaaaaaaaaaa',false,-1,-2,0,-3,1970-01-01,'unknown'
|
|
2,'david',1,43,'b',false,0,0,0,0,1970-01-02,'david'
|
|
3,'todd',1,43,'c',true,3,3,3,3,1970-01-03,'todd'
|
|
4,'a',0,1,'b',false,1,2,1.5,4,1970-01-04,'a'
|
|
---- TYPES
|
|
INT,STRING,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE,DECIMAL,DATE,STRING
|
|
====
|
|
---- QUERY
|
|
# VALUES, single row, all cols, results in update
|
|
upsert into table tdata values (4, 'b', -1, 1, 'a', true, 2, 3, 2.5, 5, DATE '1970-01-04', cast('b' as varchar(10)))
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, NAME, VALF, VALI, VALV, VALB, VALT, VALS, VALD, VALDEC, VALDATE, VALVC
|
|
---- DML_RESULTS: tdata
|
|
40,'he',0,43,'e',false,35,36,1.2,37,1970-01-05,'he'
|
|
1,'unknown',1,43,'aaaaaaaaaaaaaaaaaaaa',false,-1,-2,0,-3,1970-01-01,'unknown'
|
|
2,'david',1,43,'b',false,0,0,0,0,1970-01-02,'david'
|
|
3,'todd',1,43,'c',true,3,3,3,3,1970-01-03,'todd'
|
|
4,'b',-1,1,'a',true,2,3,2.5,5,1970-01-04,'b'
|
|
---- TYPES
|
|
INT,STRING,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE,DECIMAL,DATE,STRING
|
|
====
|
|
---- QUERY
|
|
# VALUES, single row, all cols, insert NULL all types
|
|
upsert into table tdata values (10, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, NAME, VALF, VALI, VALV, VALB, VALT, VALS, VALD, VALDEC, VALDATE, VALVC
|
|
---- DML_RESULTS: tdata
|
|
40,'he',0,43,'e',false,35,36,1.2,37,1970-01-05,'he'
|
|
1,'unknown',1,43,'aaaaaaaaaaaaaaaaaaaa',false,-1,-2,0,-3,1970-01-01,'unknown'
|
|
2,'david',1,43,'b',false,0,0,0,0,1970-01-02,'david'
|
|
3,'todd',1,43,'c',true,3,3,3,3,1970-01-03,'todd'
|
|
4,'b',-1,1,'a',true,2,3,2.5,5,1970-01-04,'b'
|
|
10,'NULL',NULL,NULL,'NULL',NULL,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
---- TYPES
|
|
INT,STRING,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE,DECIMAL,DATE,STRING
|
|
====
|
|
---- QUERY
|
|
# VALUES, single row, all cols, update NULL all types
|
|
upsert into table tdata values (4, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL)
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, NAME, VALF, VALI, VALV, VALB, VALT, VALS, VALD, VALDEC, VALDATE, VALVC
|
|
---- DML_RESULTS: tdata
|
|
40,'he',0,43,'e',false,35,36,1.2,37,1970-01-05,'he'
|
|
1,'unknown',1,43,'aaaaaaaaaaaaaaaaaaaa',false,-1,-2,0,-3,1970-01-01,'unknown'
|
|
2,'david',1,43,'b',false,0,0,0,0,1970-01-02,'david'
|
|
3,'todd',1,43,'c',true,3,3,3,3,1970-01-03,'todd'
|
|
4,'NULL',NULL,NULL,'NULL',NULL,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
10,'NULL',NULL,NULL,'NULL',NULL,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
---- TYPES
|
|
INT,STRING,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE,DECIMAL,DATE,STRING
|
|
====
|
|
---- QUERY
|
|
# VALUES, single row, all cols, update from NULL all types
|
|
upsert into table tdata values (4, 'four', 5, 6, 'f', true, 7, 8, 7.5, 9, DATE '1970-01-04', cast('four' as varchar(10)))
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, NAME, VALF, VALI, VALV, VALB, VALT, VALS, VALD, VALDEC, VALDATE, VALVC
|
|
---- DML_RESULTS: tdata
|
|
40,'he',0,43,'e',false,35,36,1.2,37,1970-01-05,'he'
|
|
1,'unknown',1,43,'aaaaaaaaaaaaaaaaaaaa',false,-1,-2,0,-3,1970-01-01,'unknown'
|
|
2,'david',1,43,'b',false,0,0,0,0,1970-01-02,'david'
|
|
3,'todd',1,43,'c',true,3,3,3,3,1970-01-03,'todd'
|
|
4,'four',5,6,'f',true,7,8,7.5,9,1970-01-04,'four'
|
|
10,'NULL',NULL,NULL,'NULL',NULL,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
---- TYPES
|
|
INT,STRING,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE,DECIMAL,DATE,STRING
|
|
====
|
|
---- QUERY
|
|
# VALUES, single row, all cols, insert boundary values
|
|
upsert into table tdata values
|
|
(max_int(), '', cast('nan' as float), min_bigint(), '', true, max_tinyint(),
|
|
min_smallint(), cast('inf' as double), cast(999999999 as decimal(9, 0)), DATE '9999-12-31', cast('' as varchar(10)))
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, NAME, VALF, VALI, VALV, VALB, VALT, VALS, VALD, VALDEC, VALDATE, VALVC
|
|
---- DML_RESULTS: tdata
|
|
40,'he',0,43,'e',false,35,36,1.2,37,1970-01-05,'he'
|
|
1,'unknown',1,43,'aaaaaaaaaaaaaaaaaaaa',false,-1,-2,0,-3,1970-01-01,'unknown'
|
|
2,'david',1,43,'b',false,0,0,0,0,1970-01-02,'david'
|
|
3,'todd',1,43,'c',true,3,3,3,3,1970-01-03,'todd'
|
|
4,'four',5,6,'f',true,7,8,7.5,9,1970-01-04,'four'
|
|
10,'NULL',NULL,NULL,'NULL',NULL,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
2147483647,'',NaN,-9223372036854775808,'',true,127,-32768,Infinity,999999999,9999-12-31,''
|
|
---- TYPES
|
|
INT,STRING,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE,DECIMAL,DATE,STRING
|
|
====
|
|
---- QUERY
|
|
# VALUES, single row, all cols, update boundary values
|
|
upsert into table tdata values
|
|
(max_int(), '', cast('-inf' as float), max_bigint(), '', true, min_tinyint(),
|
|
max_smallint(), cast('nan' as double), cast(-999999999 as decimal(9, 0)), DATE '0001-01-01', cast('' as varchar(10)))
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, NAME, VALF, VALI, VALV, VALB, VALT, VALS, VALD, VALDEC, VALDATE, VALVC
|
|
---- DML_RESULTS: tdata
|
|
40,'he',0,43,'e',false,35,36,1.2,37,1970-01-05,'he'
|
|
1,'unknown',1,43,'aaaaaaaaaaaaaaaaaaaa',false,-1,-2,0,-3,1970-01-01,'unknown'
|
|
2,'david',1,43,'b',false,0,0,0,0,1970-01-02,'david'
|
|
3,'todd',1,43,'c',true,3,3,3,3,1970-01-03,'todd'
|
|
4,'four',5,6,'f',true,7,8,7.5,9,1970-01-04,'four'
|
|
10,'NULL',NULL,NULL,'NULL',NULL,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
2147483647,'',-Infinity,9223372036854775807,'',true,-128,32767,NaN,-999999999,0001-01-01,''
|
|
---- TYPES
|
|
INT,STRING,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE,DECIMAL,DATE,STRING
|
|
====
|
|
---- QUERY
|
|
# VALUES, single row, subset of cols, results in insert
|
|
upsert into table tdata (id, name, vali, valb, vald, valvc) values (5, 'five', -5, NULL, 0.5, cast('five' as varchar(10)))
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, NAME, VALF, VALI, VALV, VALB, VALT, VALS, VALD, VALDEC, VALDATE, VALVC
|
|
---- DML_RESULTS: tdata
|
|
40,'he',0,43,'e',false,35,36,1.2,37,1970-01-05,'he'
|
|
1,'unknown',1,43,'aaaaaaaaaaaaaaaaaaaa',false,-1,-2,0,-3,1970-01-01,'unknown'
|
|
2,'david',1,43,'b',false,0,0,0,0,1970-01-02,'david'
|
|
3,'todd',1,43,'c',true,3,3,3,3,1970-01-03,'todd'
|
|
4,'four',5,6,'f',true,7,8,7.5,9,1970-01-04,'four'
|
|
10,'NULL',NULL,NULL,'NULL',NULL,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
2147483647,'',-Infinity,9223372036854775807,'',true,-128,32767,NaN,-999999999,0001-01-01,''
|
|
5,'five',NULL,-5,'NULL',NULL,NULL,NULL,0.5,NULL,NULL,'five'
|
|
---- TYPES
|
|
INT,STRING,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE,DECIMAL,DATE,STRING
|
|
====
|
|
---- QUERY
|
|
# VALUES, single row, subset of cols, results in update
|
|
upsert into table tdata (id, name, valf, valv, valb, valvc) values (5, NULL, 0, 'six', false, NULL)
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, NAME, VALF, VALI, VALV, VALB, VALT, VALS, VALD, VALDEC, VALDATE, VALVC
|
|
---- DML_RESULTS: tdata
|
|
40,'he',0,43,'e',false,35,36,1.2,37,1970-01-05,'he'
|
|
1,'unknown',1,43,'aaaaaaaaaaaaaaaaaaaa',false,-1,-2,0,-3,1970-01-01,'unknown'
|
|
2,'david',1,43,'b',false,0,0,0,0,1970-01-02,'david'
|
|
3,'todd',1,43,'c',true,3,3,3,3,1970-01-03,'todd'
|
|
4,'four',5,6,'f',true,7,8,7.5,9,1970-01-04,'four'
|
|
10,'NULL',NULL,NULL,'NULL',NULL,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
2147483647,'',-Infinity,9223372036854775807,'',true,-128,32767,NaN,-999999999,0001-01-01,''
|
|
5,'NULL',0,-5,'six',false,NULL,NULL,0.5,NULL,NULL,'NULL'
|
|
---- TYPES
|
|
INT,STRING,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE,DECIMAL,DATE,STRING
|
|
====
|
|
---- QUERY
|
|
# VALUES, multiple rows, all cols, no errors
|
|
upsert into table tdata values
|
|
(1, 'one', NULL, 44, 'a', true, -1, -2, 0, 0, DATE '1970-01-01', cast('one' as varchar(10))),
|
|
(6, '', -6, 40, 'b', NULL, 0, 0, 10, 11, DATE '1970-01-06', cast('' as varchar(10))),
|
|
(7, 'seven', 0, min_bigint(), NULL, true, 7, 1, 2, 3, DATE '1970-01-07', cast('seven' as varchar(10))),
|
|
(2, 'you', cast('inf' as float), 0, 't', false, NULL, min_smallint(), 0, 0, DATE '1970-01-02', cast('you' as varchar(10)))
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 4
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, NAME, VALF, VALI, VALV, VALB, VALT, VALS, VALD, VALDEC, VALDATE, VALVC
|
|
---- DML_RESULTS: tdata
|
|
40,'he',0,43,'e',false,35,36,1.2,37,1970-01-05,'he'
|
|
1,'one',NULL,44,'a',true,-1,-2,0,0,1970-01-01,'one'
|
|
2,'you',Infinity,0,'t',false,NULL,-32768,0,0,1970-01-02,'you'
|
|
3,'todd',1,43,'c',true,3,3,3,3,1970-01-03,'todd'
|
|
4,'four',5,6,'f',true,7,8,7.5,9,1970-01-04,'four'
|
|
10,'NULL',NULL,NULL,'NULL',NULL,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
2147483647,'',-Infinity,9223372036854775807,'',true,-128,32767,NaN,-999999999,0001-01-01,''
|
|
5,'NULL',0,-5,'six',false,NULL,NULL,0.5,NULL,NULL,'NULL'
|
|
6,'',-6,40,'b',NULL,0,0,10,11,1970-01-06,''
|
|
7,'seven',0,-9223372036854775808,'NULL',true,7,1,2,3,1970-01-07,'seven'
|
|
---- TYPES
|
|
INT,STRING,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE,DECIMAL,DATE,STRING
|
|
====
|
|
---- QUERY
|
|
# VALUES, multiple rows, subset of cols, no errors
|
|
upsert into table tdata (id, valb, name, vali, valvc) values
|
|
(1, true, NULL, 1, NULL),
|
|
(8, false, 'hello', 2, cast('hello' as varchar(10))),
|
|
(5, NULL, 'five', 10, cast('five' as varchar(10))),
|
|
(9, true, 'nine', 9, cast('nine' as varchar(10)))
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 4
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, NAME, VALF, VALI, VALV, VALB, VALT, VALS, VALD, VALDEC, VALDATE, VALVC
|
|
---- DML_RESULTS: tdata
|
|
40,'he',0,43,'e',false,35,36,1.2,37,1970-01-05,'he'
|
|
1,'NULL',NULL,1,'a',true,-1,-2,0,0,1970-01-01,'NULL'
|
|
2,'you',Infinity,0,'t',false,NULL,-32768,0,0,1970-01-02,'you'
|
|
3,'todd',1,43,'c',true,3,3,3,3,1970-01-03,'todd'
|
|
4,'four',5,6,'f',true,7,8,7.5,9,1970-01-04,'four'
|
|
10,'NULL',NULL,NULL,'NULL',NULL,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
2147483647,'',-Infinity,9223372036854775807,'',true,-128,32767,NaN,-999999999,0001-01-01,''
|
|
5,'five',0,10,'six',NULL,NULL,NULL,0.5,NULL,NULL,'five'
|
|
6,'',-6,40,'b',NULL,0,0,10,11,1970-01-06,''
|
|
7,'seven',0,-9223372036854775808,'NULL',true,7,1,2,3,1970-01-07,'seven'
|
|
8,'hello',NULL,2,'NULL',false,NULL,NULL,NULL,NULL,NULL,'hello'
|
|
9,'nine',NULL,9,'NULL',true,NULL,NULL,NULL,NULL,NULL,'nine'
|
|
---- TYPES
|
|
INT,STRING,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE,DECIMAL,DATE,STRING
|
|
====
|
|
---- QUERY
|
|
# SELECT, all cols, single row, no errors
|
|
upsert into table tdata
|
|
select id, 'a', valf, vali, valv, NULL, valt, vals, 3, valdec, valdate, cast('a' as varchar(10)) from tdata where id = 1
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, NAME, VALF, VALI, VALV, VALB, VALT, VALS, VALD, VALDEC, VALDATE, VALVC
|
|
---- DML_RESULTS: tdata
|
|
40,'he',0,43,'e',false,35,36,1.2,37,1970-01-05,'he'
|
|
1,'a',NULL,1,'a',NULL,-1,-2,3,0,1970-01-01,'a'
|
|
2,'you',Infinity,0,'t',false,NULL,-32768,0,0,1970-01-02,'you'
|
|
3,'todd',1,43,'c',true,3,3,3,3,1970-01-03,'todd'
|
|
4,'four',5,6,'f',true,7,8,7.5,9,1970-01-04,'four'
|
|
10,'NULL',NULL,NULL,'NULL',NULL,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
2147483647,'',-Infinity,9223372036854775807,'',true,-128,32767,NaN,-999999999,0001-01-01,''
|
|
5,'five',0,10,'six',NULL,NULL,NULL,0.5,NULL,NULL,'five'
|
|
6,'',-6,40,'b',NULL,0,0,10,11,1970-01-06,''
|
|
7,'seven',0,-9223372036854775808,'NULL',true,7,1,2,3,1970-01-07,'seven'
|
|
8,'hello',NULL,2,'NULL',false,NULL,NULL,NULL,NULL,NULL,'hello'
|
|
9,'nine',NULL,9,'NULL',true,NULL,NULL,NULL,NULL,NULL,'nine'
|
|
---- TYPES
|
|
INT,STRING,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE,DECIMAL,DATE,STRING
|
|
====
|
|
---- QUERY
|
|
# SELECT, all cols, multiple rows, no errors
|
|
upsert into table tdata
|
|
select id, valv, valf, vali, name, valb, valt, vals, vald, valdec, valdate, cast(valv as varchar(10)) from tdata where id % 2 = 0
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 6
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, NAME, VALF, VALI, VALV, VALB, VALT, VALS, VALD, VALDEC, VALDATE, VALVC
|
|
---- DML_RESULTS: tdata
|
|
40,'e',0,43,'he',false,35,36,1.2,37,1970-01-05,'e'
|
|
1,'a',NULL,1,'a',NULL,-1,-2,3,0,1970-01-01,'a'
|
|
2,'t',Infinity,0,'you',false,NULL,-32768,0,0,1970-01-02,'t'
|
|
3,'todd',1,43,'c',true,3,3,3,3,1970-01-03,'todd'
|
|
4,'f',5,6,'four',true,7,8,7.5,9,1970-01-04,'f'
|
|
10,'NULL',NULL,NULL,'NULL',NULL,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
2147483647,'',-Infinity,9223372036854775807,'',true,-128,32767,NaN,-999999999,0001-01-01,''
|
|
5,'five',0,10,'six',NULL,NULL,NULL,0.5,NULL,NULL,'five'
|
|
6,'b',-6,40,'',NULL,0,0,10,11,1970-01-06,'b'
|
|
7,'seven',0,-9223372036854775808,'NULL',true,7,1,2,3,1970-01-07,'seven'
|
|
8,'NULL',NULL,2,'hello',false,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
9,'nine',NULL,9,'NULL',true,NULL,NULL,NULL,NULL,NULL,'nine'
|
|
---- TYPES
|
|
INT,STRING,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE,DECIMAL,DATE,STRING
|
|
====
|
|
---- QUERY
|
|
# SELECT, subset of cols, single row, no errors
|
|
upsert into table tdata (id, valv, vali)
|
|
select int_col, string_col, bigint_col from functional.alltypes where id = 0
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, NAME, VALF, VALI, VALV, VALB, VALT, VALS, VALD, VALDEC, VALDATE, VALVC
|
|
---- DML_RESULTS: tdata
|
|
40,'e',0,43,'he',false,35,36,1.2,37,1970-01-05,'e'
|
|
1,'a',NULL,1,'a',NULL,-1,-2,3,0,1970-01-01,'a'
|
|
2,'t',Infinity,0,'you',false,NULL,-32768,0,0,1970-01-02,'t'
|
|
3,'todd',1,43,'c',true,3,3,3,3,1970-01-03,'todd'
|
|
4,'f',5,6,'four',true,7,8,7.5,9,1970-01-04,'f'
|
|
10,'NULL',NULL,NULL,'NULL',NULL,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
2147483647,'',-Infinity,9223372036854775807,'',true,-128,32767,NaN,-999999999,0001-01-01,''
|
|
5,'five',0,10,'six',NULL,NULL,NULL,0.5,NULL,NULL,'five'
|
|
6,'b',-6,40,'',NULL,0,0,10,11,1970-01-06,'b'
|
|
7,'seven',0,-9223372036854775808,'NULL',true,7,1,2,3,1970-01-07,'seven'
|
|
8,'NULL',NULL,2,'hello',false,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
9,'nine',NULL,9,'NULL',true,NULL,NULL,NULL,NULL,NULL,'nine'
|
|
0,'NULL',NULL,0,'0',NULL,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
---- TYPES
|
|
INT,STRING,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE,DECIMAL,DATE,STRING
|
|
====
|
|
---- QUERY
|
|
# SELECT, subset of cols, multiple rows, no errors
|
|
upsert into table tdata (id, valb, name, valt, valvc)
|
|
select int_col, bool_col, string_col, tinyint_col, cast(string_col as varchar(10)) from functional.alltypes where id < 4
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 4
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, NAME, VALF, VALI, VALV, VALB, VALT, VALS, VALD, VALDEC, VALDATE, VALVC
|
|
---- DML_RESULTS: tdata
|
|
40,'e',0,43,'he',false,35,36,1.2,37,1970-01-05,'e'
|
|
1,'1',NULL,1,'a',false,1,-2,3,0,1970-01-01,'1'
|
|
2,'2',Infinity,0,'you',true,2,-32768,0,0,1970-01-02,'2'
|
|
3,'3',1,43,'c',false,3,3,3,3,1970-01-03,'3'
|
|
4,'f',5,6,'four',true,7,8,7.5,9,1970-01-04,'f'
|
|
10,'NULL',NULL,NULL,'NULL',NULL,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
2147483647,'',-Infinity,9223372036854775807,'',true,-128,32767,NaN,-999999999,0001-01-01,''
|
|
5,'five',0,10,'six',NULL,NULL,NULL,0.5,NULL,NULL,'five'
|
|
6,'b',-6,40,'',NULL,0,0,10,11,1970-01-06,'b'
|
|
7,'seven',0,-9223372036854775808,'NULL',true,7,1,2,3,1970-01-07,'seven'
|
|
8,'NULL',NULL,2,'hello',false,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
9,'nine',NULL,9,'NULL',true,NULL,NULL,NULL,NULL,NULL,'nine'
|
|
0,'0',NULL,0,'0',true,0,NULL,NULL,NULL,NULL,'0'
|
|
---- TYPES
|
|
INT,STRING,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE,DECIMAL,DATE,STRING
|
|
====
|
|
---- QUERY
|
|
# SELECT, decimal column, multiple rows, no errors
|
|
upsert into table tdata (id, valdec)
|
|
select id, cast((valdec * 2) as decimal(9,0)) from tdata where valdec > 0
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 5
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
ID, NAME, VALF, VALI, VALV, VALB, VALT, VALS, VALD, VALDEC, VALDATE, VALVC
|
|
---- DML_RESULTS: tdata
|
|
40,'e',0,43,'he',false,35,36,1.2,74,1970-01-05,'e'
|
|
1,'1',NULL,1,'a',false,1,-2,3,0,1970-01-01,'1'
|
|
2,'2',Infinity,0,'you',true,2,-32768,0,0,1970-01-02,'2'
|
|
3,'3',1,43,'c',false,3,3,3,6,1970-01-03,'3'
|
|
4,'f',5,6,'four',true,7,8,7.5,18,1970-01-04,'f'
|
|
10,'NULL',NULL,NULL,'NULL',NULL,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
2147483647,'',-Infinity,9223372036854775807,'',true,-128,32767,NaN,-999999999,0001-01-01,''
|
|
5,'five',0,10,'six',NULL,NULL,NULL,0.5,NULL,NULL,'five'
|
|
6,'b',-6,40,'',NULL,0,0,10,22,1970-01-06,'b'
|
|
7,'seven',0,-9223372036854775808,'NULL',true,7,1,2,6,1970-01-07,'seven'
|
|
8,'NULL',NULL,2,'hello',false,NULL,NULL,NULL,NULL,NULL,'NULL'
|
|
9,'nine',NULL,9,'NULL',true,NULL,NULL,NULL,NULL,NULL,'nine'
|
|
0,'0',NULL,0,'0',true,0,NULL,NULL,NULL,NULL,'0'
|
|
---- TYPES
|
|
INT,STRING,FLOAT,BIGINT,STRING,BOOLEAN,TINYINT,SMALLINT,DOUBLE,DECIMAL,DATE,STRING
|
|
====
|
|
---- QUERY
|
|
# VALUES, single row, all cols, null for non-nullable column
|
|
upsert into table tdata values (null, '', 0, 0, cast('' as VARCHAR(20)), false, 0, 0, 0, null, null, cast('' as varchar(10)))
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 0
|
|
NumRowErrors: 1
|
|
====
|
|
---- QUERY
|
|
# VALUES, single row, subset of cols, null for non-nullable column
|
|
upsert into table tdata (id, name) values (null, '')
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 0
|
|
NumRowErrors: 1
|
|
====
|
|
---- QUERY
|
|
# VALUES, multiple rows, all cols, null for non-nullable column
|
|
upsert into table tdata values
|
|
(3,'todd',1,43,'c',true,3,3,3,null,null,cast('todd' as varchar(10))),
|
|
(4,'four',5,6,'f',true,7,8,7.5,null,null,cast('four' as varchar(10))),
|
|
(6,'',-6,40,'b',NULL,0,0,10,null,null,cast('' as varchar(10))),
|
|
(NULL,'seven',0,0,'NULL',true,7,1,2,null,null,cast('seven' as varchar(10)))
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 3
|
|
NumRowErrors: 1
|
|
====
|
|
---- QUERY
|
|
# VALUES, multiple rows, subset of cols, null for non-nullable column
|
|
upsert into table tdata (id, valv, valt, vals) values
|
|
(0, 'bbb', 1, 2),
|
|
(NULL, 'aaa', 2, 1),
|
|
(10, 'ccc', 11, 12)
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 2
|
|
NumRowErrors: 1
|
|
====
|
|
---- QUERY
|
|
# high cardinality
|
|
upsert into table tdata (id, valb, name, valt, valvc)
|
|
select int_col, bool_col, string_col, tinyint_col, cast(string_col as varchar(10))
|
|
from functional.alltypes limit 100
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 100
|
|
NumRowErrors: 0
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# IMPALA-6280: check that TupleIsNullPredicate is materialized correctly
|
|
upsert into table tdata (id, vali)
|
|
select t1.id, v.id from functional.alltypestiny t1
|
|
left outer join (select ifnull(id, 10) id from functional.alltypessmall) v
|
|
on t1.id = v.id limit 1
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 1
|
|
NumRowErrors: 0
|
|
====
|
|
---- QUERY
|
|
create table multiple_key_cols
|
|
(string_col string, bigint_col bigint, tinyint_col tinyint, smallint_col smallint,
|
|
bool_col boolean null, int_col int null, double_col double null,
|
|
float_col float null, primary key (string_col, bigint_col, tinyint_col, smallint_col))
|
|
PARTITION BY HASH (string_col) PARTITIONS 16 STORED AS KUDU
|
|
====
|
|
---- QUERY
|
|
insert into multiple_key_cols values
|
|
('a', 1, 2, 3, true, 4, 5, NULL),
|
|
('b', 1, 2, 3, false, 7, NULL, 9)
|
|
---- RESULTS
|
|
: 2
|
|
====
|
|
---- QUERY
|
|
# VALUES, multiple key columns, all cols
|
|
upsert into table multiple_key_cols values
|
|
('a', 1, 2, 3, true, NULL, 5, 6),
|
|
('a', -1, -2, -3, true, 0, NULL, NULL),
|
|
('c', 0, 0, 0, NULL, 10, 20, 30)
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 3
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
STRING_COL, BIGINT_COL, TINYINT_COL, SMALLINT_COL, BOOL_COL, INT_COL, DOUBLE_COL, FLOAT_COL
|
|
---- DML_RESULTS: multiple_key_cols
|
|
'a',1,2,3,true,NULL,5,6
|
|
'a',-1,-2,-3,true,0,NULL,NULL
|
|
'b',1,2,3,false,7,NULL,9
|
|
'c',0,0,0,NULL,10,20,30
|
|
---- TYPES
|
|
STRING,BIGINT,TINYINT,SMALLINT,BOOLEAN,INT,DOUBLE,FLOAT
|
|
====
|
|
---- QUERY
|
|
# VALUES, multiple key columns, subset of cols
|
|
upsert into table multiple_key_cols
|
|
(string_col, bool_col, bigint_col, smallint_col, tinyint_col) values
|
|
('a', false, 1, 3, 2),
|
|
('d', NULL, 0, 1, 2),
|
|
('b', true, 1, 3, 3)
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 3
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
STRING_COL, BIGINT_COL, TINYINT_COL, SMALLINT_COL, BOOL_COL, INT_COL, DOUBLE_COL, FLOAT_COL
|
|
---- DML_RESULTS: multiple_key_cols
|
|
'a',1,2,3,false,NULL,5,6
|
|
'a',-1,-2,-3,true,0,NULL,NULL
|
|
'b',1,2,3,false,7,NULL,9
|
|
'c',0,0,0,NULL,10,20,30
|
|
'd',0,2,1,NULL,NULL,NULL,NULL
|
|
'b',1,3,3,true,NULL,NULL,NULL
|
|
---- TYPES
|
|
STRING,BIGINT,TINYINT,SMALLINT,BOOLEAN,INT,DOUBLE,FLOAT
|
|
====
|
|
---- QUERY
|
|
# SELECT, multiple key columns, all cols
|
|
upsert into table multiple_key_cols
|
|
select string_col, bigint_col, tinyint_col, smallint_col, false, -1, -2, -3
|
|
from multiple_key_cols where string_col = 'a'
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 2
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
STRING_COL, BIGINT_COL, TINYINT_COL, SMALLINT_COL, BOOL_COL, INT_COL, DOUBLE_COL, FLOAT_COL
|
|
---- DML_RESULTS: multiple_key_cols
|
|
'a',1,2,3,false,-1,-2,-3
|
|
'a',-1,-2,-3,false,-1,-2,-3
|
|
'b',1,2,3,false,7,NULL,9
|
|
'c',0,0,0,NULL,10,20,30
|
|
'd',0,2,1,NULL,NULL,NULL,NULL
|
|
'b',1,3,3,true,NULL,NULL,NULL
|
|
---- TYPES
|
|
STRING,BIGINT,TINYINT,SMALLINT,BOOLEAN,INT,DOUBLE,FLOAT
|
|
====
|
|
---- QUERY
|
|
# SELECT, multiple key columns, subset of cols
|
|
upsert into table multiple_key_cols
|
|
(string_col, float_col, bigint_col, tinyint_col, double_col, smallint_col)
|
|
select 'b', float_col, 1, tinyint_col, double_col, 3
|
|
from functional.alltypes where id = 2 or id = 3
|
|
---- RUNTIME_PROFILE
|
|
NumModifiedRows: 2
|
|
NumRowErrors: 0
|
|
---- LABELS
|
|
STRING_COL, BIGINT_COL, TINYINT_COL, SMALLINT_COL, BOOL_COL, INT_COL, DOUBLE_COL, FLOAT_COL
|
|
---- DML_RESULTS: multiple_key_cols
|
|
'a',1,2,3,false,-1,-2,-3
|
|
'a',-1,-2,-3,false,-1,-2,-3
|
|
'b',1,2,3,false,7,20.2,2.200000047683716
|
|
'c',0,0,0,NULL,10,20,30
|
|
'd',0,2,1,NULL,NULL,NULL,NULL
|
|
'b',1,3,3,true,NULL,30.3,3.299999952316284
|
|
---- TYPES
|
|
STRING,BIGINT,TINYINT,SMALLINT,BOOLEAN,INT,DOUBLE,FLOAT
|
|
====
|
|
---- QUERY
|
|
# missing key column
|
|
upsert into table multiple_key_cols
|
|
(string_col, tinyint_col, smallint_col) values ('a', 1, 1)
|
|
---- CATCH
|
|
All primary key columns must be specified for UPSERTing into Kudu tables. Missing columns are: bigint_col
|
|
====
|