mirror of
https://github.com/apache/impala.git
synced 2026-01-28 00:04:45 -05:00
This patch adds support for INSERT OVERWRITE statements for Iceberg tables. We use Iceberg's ReplacePartitions interface for this. This interface provides consistent behavior with INSERT OVERWRITEs against regular tables. It's also consistent with other engines dynamic overwrites, e.g. Spark. INSERT OVERWRITE for partitioned tables replaces the partitions affected by the INSERT, while keeping the other partitions untouched. INSERT OVERWRITE is prohibited for tables that use the BUCKET partition transform because it would randomly overwrite table data. Testing * added e2e test Change-Id: Idf4acfb54cf62a3f3b2e8db9d04044580151299c Reviewed-on: http://gerrit.cloudera.org:8080/17012 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
169 lines
3.4 KiB
Plaintext
169 lines
3.4 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Create unpartitioned table for INSERT OVERWRITE
|
|
create table ice_nopart (i int, j int)
|
|
stored as iceberg;
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
INSERT INTO ice_nopart VALUES (1, 2);
|
|
SELECT * FROM ice_nopart;
|
|
---- RESULTS
|
|
1,2
|
|
---- TYPES
|
|
INT,INT
|
|
====
|
|
---- QUERY
|
|
INSERT OVERWRITE ice_nopart VALUES (10, 20);
|
|
SELECT * FROM ice_nopart;
|
|
---- RESULTS
|
|
10,20
|
|
---- TYPES
|
|
INT,INT
|
|
====
|
|
---- QUERY
|
|
INSERT OVERWRITE ice_nopart select cast(i+1 as int), cast(j+1 as int) from ice_nopart;
|
|
SELECT * FROM ice_nopart;
|
|
---- RESULTS
|
|
11,21
|
|
---- TYPES
|
|
INT,INT
|
|
====
|
|
---- QUERY
|
|
# INSERT empty result set clears table.
|
|
INSERT OVERWRITE ice_nopart select * from ice_nopart where false;
|
|
select * from ice_nopart;
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Create identity-partitioned table for INSERT OVERWRITE
|
|
create table ice_ident (i int)
|
|
partitioned by (j int)
|
|
stored as iceberg;
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
INSERT INTO ice_ident VALUES (1, 2);
|
|
SELECT * FROM ice_ident;
|
|
---- RESULTS
|
|
1,2
|
|
---- TYPES
|
|
INT,INT
|
|
====
|
|
---- QUERY
|
|
# Add values to a new partition keeping the old ones.
|
|
INSERT OVERWRITE ice_ident VALUES (10, 20);
|
|
SELECT * FROM ice_ident;
|
|
---- RESULTS
|
|
1,2
|
|
10,20
|
|
---- TYPES
|
|
INT,INT
|
|
====
|
|
---- QUERY
|
|
# INSERT only updates the affected partition.
|
|
INSERT OVERWRITE ice_ident select cast(i+1 as int), j from ice_ident where j = 2;
|
|
SELECT * FROM ice_ident;
|
|
---- RESULTS
|
|
2,2
|
|
10,20
|
|
---- TYPES
|
|
INT,INT
|
|
====
|
|
---- QUERY
|
|
# INSERT empty result set has no effect on partitioned table.
|
|
INSERT OVERWRITE ice_ident select * from ice_ident where false;
|
|
select * from ice_ident;
|
|
---- RESULTS
|
|
2,2
|
|
10,20
|
|
---- TYPES
|
|
INT,INT
|
|
====
|
|
---- QUERY
|
|
# Create DAY-partitioned table for INSERT OVERWRITE
|
|
create table ice_day (ts timestamp)
|
|
partition by spec (ts DAY)
|
|
stored as iceberg;
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into ice_day values ('2021-02-01 16:59:36.630928000');
|
|
insert into ice_day values ('2021-02-02 16:59:36.630928000');
|
|
insert into ice_day values ('2021-02-02 16:59:39.630928000');
|
|
insert into ice_day values ('2021-02-03 16:59:36.630928000');
|
|
====
|
|
---- QUERY
|
|
select * from ice_day;
|
|
---- RESULTS
|
|
2021-02-01 16:59:36.630928000
|
|
2021-02-02 16:59:36.630928000
|
|
2021-02-02 16:59:39.630928000
|
|
2021-02-03 16:59:36.630928000
|
|
---- TYPES
|
|
TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
# Update data for partition '2021-02-02'.
|
|
insert overwrite ice_day values ('2021-02-02 00:00:00');
|
|
select * from ice_day;
|
|
---- RESULTS
|
|
2021-02-01 16:59:36.630928000
|
|
2021-02-02 00:00:00
|
|
2021-02-03 16:59:36.630928000
|
|
---- TYPES
|
|
TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
# INSERT empty result set has no effect on partitioned table.
|
|
INSERT OVERWRITE ice_day select * from ice_day where false;
|
|
select * from ice_day;
|
|
---- RESULTS
|
|
2021-02-01 16:59:36.630928000
|
|
2021-02-02 00:00:00
|
|
2021-02-03 16:59:36.630928000
|
|
---- TYPES
|
|
TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
# Create TRUNCATE-partitioned table for INSERT OVERWRITE
|
|
create table ice_trunc (d decimal(10, 2))
|
|
partition by spec (d TRUNCATE 100)
|
|
stored as iceberg;
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into ice_trunc values (1.11);
|
|
insert into ice_trunc values (1.12);
|
|
insert into ice_trunc values (2.22);
|
|
insert into ice_trunc values (3.33);
|
|
====
|
|
---- QUERY
|
|
select * from ice_trunc;
|
|
---- RESULTS
|
|
1.11
|
|
1.12
|
|
2.22
|
|
3.33
|
|
---- TYPES
|
|
DECIMAL
|
|
====
|
|
---- QUERY
|
|
insert overwrite ice_trunc values(1.88), (1.9), (3.99), (4.44), (4.45), (5);
|
|
select * from ice_trunc
|
|
---- RESULTS
|
|
1.88
|
|
1.90
|
|
2.22
|
|
3.99
|
|
4.44
|
|
4.45
|
|
5.00
|
|
---- TYPES
|
|
DECIMAL
|
|
====
|