Files
impala/testdata/workloads/functional-query/queries/QueryTest/iceberg-overwrite.test
Tamas Mate c0b0875bda IMPALA-11378: Allow INSERT OVERWRITE for bucket tranforms in some cases
This change has been considered only for Iceberg tables mainly for table
maintenance reasons. Iceberg table writes create new snapshots and these
can accumulate over time. This commit allows a simple form of compaction
of these snapshots.

INSERT OVERWRITES have been blocked in case partition evolution is in
place, because it would be possible to overwrite a data file with a
newer schema that has less columns. This could cause unexpected data
loss.

For bucketed tables, the following syntax is allowed to be executed:
  INSERT OVERWRITE ice_tbl SELECT * FROM ice_tbl;
The source and target table has to be the same and specified, only
SELECT '*' queries are allowed. These requirements are also in place to
avoid unexpected data loss.
 - Values are not allowed, because inserting a single record could
   overwrite a whole file in a bucket.
 - Only source table is allowed, because at the time of the insert it
   is unknown which files will be modified, similar to values.

Testing:
 - Added e2e tests.

Change-Id: Ibd1bc19d839297246eadeb754cdeeec1e306098a
Reviewed-on: http://gerrit.cloudera.org:8080/18649
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2022-08-01 13:36:51 +00:00

199 lines
4.4 KiB
Plaintext

====
---- QUERY
# Create unpartitioned table for INSERT OVERWRITE
create table ice_nopart (i int, j int)
stored as iceberg;
---- RESULTS
'Table has been created.'
====
---- QUERY
INSERT INTO ice_nopart VALUES (1, 2);
SELECT * FROM ice_nopart;
---- RESULTS
1,2
---- TYPES
INT,INT
====
---- QUERY
INSERT OVERWRITE ice_nopart VALUES (10, 20);
SELECT * FROM ice_nopart;
---- RESULTS
10,20
---- TYPES
INT,INT
====
---- QUERY
INSERT OVERWRITE ice_nopart select cast(i+1 as int), cast(j+1 as int) from ice_nopart;
SELECT * FROM ice_nopart;
---- RESULTS
11,21
---- TYPES
INT,INT
====
---- QUERY
# INSERT empty result set clears table.
INSERT OVERWRITE ice_nopart select * from ice_nopart where false;
select * from ice_nopart;
---- RESULTS
====
---- QUERY
# Create identity-partitioned table for INSERT OVERWRITE
create table ice_ident (i int)
partitioned by (j int)
stored as iceberg;
---- RESULTS
'Table has been created.'
====
---- QUERY
INSERT INTO ice_ident VALUES (1, 2);
SELECT * FROM ice_ident;
---- RESULTS
1,2
---- TYPES
INT,INT
====
---- QUERY
# Add values to a new partition keeping the old ones.
INSERT OVERWRITE ice_ident VALUES (10, 20);
SELECT * FROM ice_ident;
---- RESULTS
1,2
10,20
---- TYPES
INT,INT
====
---- QUERY
# INSERT only updates the affected partition.
INSERT OVERWRITE ice_ident select cast(i+1 as int), j from ice_ident where j = 2;
SELECT * FROM ice_ident;
---- RESULTS
2,2
10,20
---- TYPES
INT,INT
====
---- QUERY
# INSERT empty result set has no effect on partitioned table.
INSERT OVERWRITE ice_ident select * from ice_ident where false;
select * from ice_ident;
---- RESULTS
2,2
10,20
---- TYPES
INT,INT
====
---- QUERY
# Create DAY-partitioned table for INSERT OVERWRITE
create table ice_day (ts timestamp)
partitioned by spec (DAY(ts))
stored as iceberg;
---- RESULTS
'Table has been created.'
====
---- QUERY
insert into ice_day values ('2021-02-01 16:59:36.630928000');
insert into ice_day values ('2021-02-02 16:59:36.630928000');
insert into ice_day values ('2021-02-02 16:59:39.630928000');
insert into ice_day values ('2021-02-03 16:59:36.630928000');
====
---- QUERY
select * from ice_day;
---- RESULTS
2021-02-01 16:59:36.630928000
2021-02-02 16:59:36.630928000
2021-02-02 16:59:39.630928000
2021-02-03 16:59:36.630928000
---- TYPES
TIMESTAMP
====
---- QUERY
# Update data for partition '2021-02-02'.
insert overwrite ice_day values ('2021-02-02 00:00:00');
select * from ice_day;
---- RESULTS
2021-02-01 16:59:36.630928000
2021-02-02 00:00:00
2021-02-03 16:59:36.630928000
---- TYPES
TIMESTAMP
====
---- QUERY
# INSERT empty result set has no effect on partitioned table.
INSERT OVERWRITE ice_day select * from ice_day where false;
select * from ice_day;
---- RESULTS
2021-02-01 16:59:36.630928000
2021-02-02 00:00:00
2021-02-03 16:59:36.630928000
---- TYPES
TIMESTAMP
====
---- QUERY
# Create TRUNCATE-partitioned table for INSERT OVERWRITE
create table ice_trunc (d decimal(10, 2))
partitioned by spec (TRUNCATE(100, d))
stored as iceberg;
---- RESULTS
'Table has been created.'
====
---- QUERY
insert into ice_trunc values (1.11);
insert into ice_trunc values (1.12);
insert into ice_trunc values (2.22);
insert into ice_trunc values (3.33);
====
---- QUERY
select * from ice_trunc;
---- RESULTS
1.11
1.12
2.22
3.33
---- TYPES
DECIMAL
====
---- QUERY
insert overwrite ice_trunc values(1.88), (1.9), (3.99), (4.44), (4.45), (5);
select * from ice_trunc
---- RESULTS
1.88
1.90
2.22
3.99
4.44
4.45
5.00
---- TYPES
DECIMAL
====
---- QUERY
create table iceberg_overwrite_bucket (i int)
partitioned by spec (bucket(3, i))
stored as iceberg;
---- RESULTS
'Table has been created.'
====
---- QUERY
insert into iceberg_overwrite_bucket values (1);
insert into iceberg_overwrite_bucket values (1);
insert into iceberg_overwrite_bucket values (1);
select INPUT__FILE__NAME, count(*)
from iceberg_overwrite_bucket
group by INPUT__FILE__NAME;
---- RESULTS
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/iceberg_overwrite_bucket/data/.*.0.parq',1
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/iceberg_overwrite_bucket/data/.*.0.parq',1
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/iceberg_overwrite_bucket/data/.*.0.parq',1
---- TYPES
STRING, BIGINT
====
---- QUERY
insert overwrite iceberg_overwrite_bucket select * from iceberg_overwrite_bucket;
select INPUT__FILE__NAME, count(*)
from iceberg_overwrite_bucket
group by INPUT__FILE__NAME;
---- RESULTS
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/iceberg_overwrite_bucket/data/.*.0.parq',3
---- TYPES
STRING, BIGINT
====