mirror of
https://github.com/apache/impala.git
synced 2026-02-01 12:00:22 -05:00
This change has been considered only for Iceberg tables mainly for table maintenance reasons. Iceberg table writes create new snapshots and these can accumulate over time. This commit allows a simple form of compaction of these snapshots. INSERT OVERWRITES have been blocked in case partition evolution is in place, because it would be possible to overwrite a data file with a newer schema that has less columns. This could cause unexpected data loss. For bucketed tables, the following syntax is allowed to be executed: INSERT OVERWRITE ice_tbl SELECT * FROM ice_tbl; The source and target table has to be the same and specified, only SELECT '*' queries are allowed. These requirements are also in place to avoid unexpected data loss. - Values are not allowed, because inserting a single record could overwrite a whole file in a bucket. - Only source table is allowed, because at the time of the insert it is unknown which files will be modified, similar to values. Testing: - Added e2e tests. Change-Id: Ibd1bc19d839297246eadeb754cdeeec1e306098a Reviewed-on: http://gerrit.cloudera.org:8080/18649 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
199 lines
4.4 KiB
Plaintext
199 lines
4.4 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Create unpartitioned table for INSERT OVERWRITE
|
|
create table ice_nopart (i int, j int)
|
|
stored as iceberg;
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
INSERT INTO ice_nopart VALUES (1, 2);
|
|
SELECT * FROM ice_nopart;
|
|
---- RESULTS
|
|
1,2
|
|
---- TYPES
|
|
INT,INT
|
|
====
|
|
---- QUERY
|
|
INSERT OVERWRITE ice_nopart VALUES (10, 20);
|
|
SELECT * FROM ice_nopart;
|
|
---- RESULTS
|
|
10,20
|
|
---- TYPES
|
|
INT,INT
|
|
====
|
|
---- QUERY
|
|
INSERT OVERWRITE ice_nopart select cast(i+1 as int), cast(j+1 as int) from ice_nopart;
|
|
SELECT * FROM ice_nopart;
|
|
---- RESULTS
|
|
11,21
|
|
---- TYPES
|
|
INT,INT
|
|
====
|
|
---- QUERY
|
|
# INSERT empty result set clears table.
|
|
INSERT OVERWRITE ice_nopart select * from ice_nopart where false;
|
|
select * from ice_nopart;
|
|
---- RESULTS
|
|
====
|
|
---- QUERY
|
|
# Create identity-partitioned table for INSERT OVERWRITE
|
|
create table ice_ident (i int)
|
|
partitioned by (j int)
|
|
stored as iceberg;
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
INSERT INTO ice_ident VALUES (1, 2);
|
|
SELECT * FROM ice_ident;
|
|
---- RESULTS
|
|
1,2
|
|
---- TYPES
|
|
INT,INT
|
|
====
|
|
---- QUERY
|
|
# Add values to a new partition keeping the old ones.
|
|
INSERT OVERWRITE ice_ident VALUES (10, 20);
|
|
SELECT * FROM ice_ident;
|
|
---- RESULTS
|
|
1,2
|
|
10,20
|
|
---- TYPES
|
|
INT,INT
|
|
====
|
|
---- QUERY
|
|
# INSERT only updates the affected partition.
|
|
INSERT OVERWRITE ice_ident select cast(i+1 as int), j from ice_ident where j = 2;
|
|
SELECT * FROM ice_ident;
|
|
---- RESULTS
|
|
2,2
|
|
10,20
|
|
---- TYPES
|
|
INT,INT
|
|
====
|
|
---- QUERY
|
|
# INSERT empty result set has no effect on partitioned table.
|
|
INSERT OVERWRITE ice_ident select * from ice_ident where false;
|
|
select * from ice_ident;
|
|
---- RESULTS
|
|
2,2
|
|
10,20
|
|
---- TYPES
|
|
INT,INT
|
|
====
|
|
---- QUERY
|
|
# Create DAY-partitioned table for INSERT OVERWRITE
|
|
create table ice_day (ts timestamp)
|
|
partitioned by spec (DAY(ts))
|
|
stored as iceberg;
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into ice_day values ('2021-02-01 16:59:36.630928000');
|
|
insert into ice_day values ('2021-02-02 16:59:36.630928000');
|
|
insert into ice_day values ('2021-02-02 16:59:39.630928000');
|
|
insert into ice_day values ('2021-02-03 16:59:36.630928000');
|
|
====
|
|
---- QUERY
|
|
select * from ice_day;
|
|
---- RESULTS
|
|
2021-02-01 16:59:36.630928000
|
|
2021-02-02 16:59:36.630928000
|
|
2021-02-02 16:59:39.630928000
|
|
2021-02-03 16:59:36.630928000
|
|
---- TYPES
|
|
TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
# Update data for partition '2021-02-02'.
|
|
insert overwrite ice_day values ('2021-02-02 00:00:00');
|
|
select * from ice_day;
|
|
---- RESULTS
|
|
2021-02-01 16:59:36.630928000
|
|
2021-02-02 00:00:00
|
|
2021-02-03 16:59:36.630928000
|
|
---- TYPES
|
|
TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
# INSERT empty result set has no effect on partitioned table.
|
|
INSERT OVERWRITE ice_day select * from ice_day where false;
|
|
select * from ice_day;
|
|
---- RESULTS
|
|
2021-02-01 16:59:36.630928000
|
|
2021-02-02 00:00:00
|
|
2021-02-03 16:59:36.630928000
|
|
---- TYPES
|
|
TIMESTAMP
|
|
====
|
|
---- QUERY
|
|
# Create TRUNCATE-partitioned table for INSERT OVERWRITE
|
|
create table ice_trunc (d decimal(10, 2))
|
|
partitioned by spec (TRUNCATE(100, d))
|
|
stored as iceberg;
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into ice_trunc values (1.11);
|
|
insert into ice_trunc values (1.12);
|
|
insert into ice_trunc values (2.22);
|
|
insert into ice_trunc values (3.33);
|
|
====
|
|
---- QUERY
|
|
select * from ice_trunc;
|
|
---- RESULTS
|
|
1.11
|
|
1.12
|
|
2.22
|
|
3.33
|
|
---- TYPES
|
|
DECIMAL
|
|
====
|
|
---- QUERY
|
|
insert overwrite ice_trunc values(1.88), (1.9), (3.99), (4.44), (4.45), (5);
|
|
select * from ice_trunc
|
|
---- RESULTS
|
|
1.88
|
|
1.90
|
|
2.22
|
|
3.99
|
|
4.44
|
|
4.45
|
|
5.00
|
|
---- TYPES
|
|
DECIMAL
|
|
====
|
|
---- QUERY
|
|
create table iceberg_overwrite_bucket (i int)
|
|
partitioned by spec (bucket(3, i))
|
|
stored as iceberg;
|
|
---- RESULTS
|
|
'Table has been created.'
|
|
====
|
|
---- QUERY
|
|
insert into iceberg_overwrite_bucket values (1);
|
|
insert into iceberg_overwrite_bucket values (1);
|
|
insert into iceberg_overwrite_bucket values (1);
|
|
select INPUT__FILE__NAME, count(*)
|
|
from iceberg_overwrite_bucket
|
|
group by INPUT__FILE__NAME;
|
|
---- RESULTS
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/iceberg_overwrite_bucket/data/.*.0.parq',1
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/iceberg_overwrite_bucket/data/.*.0.parq',1
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/iceberg_overwrite_bucket/data/.*.0.parq',1
|
|
---- TYPES
|
|
STRING, BIGINT
|
|
====
|
|
---- QUERY
|
|
insert overwrite iceberg_overwrite_bucket select * from iceberg_overwrite_bucket;
|
|
select INPUT__FILE__NAME, count(*)
|
|
from iceberg_overwrite_bucket
|
|
group by INPUT__FILE__NAME;
|
|
---- RESULTS
|
|
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/iceberg_overwrite_bucket/data/.*.0.parq',3
|
|
---- TYPES
|
|
STRING, BIGINT
|
|
==== |