IMPALA-13046: Update Iceberg mixed format deletes test

Updates iceberg-mixed-format-position-deletes.test for HIVE-28069. Newer
versions of Hive will now remove a data file if a delete would negate
all rows in the data file to reduce the number of small files produced.
The test now ensures every data file it expects to produce will have a
row after delete (or circumvent the merge logic by using different
formats).

Change-Id: I87c23cc541983223c6b766372f4e582c33ae6836
Reviewed-on: http://gerrit.cloudera.org:8080/21373
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
Michael Smith
2024-04-29 13:46:54 -07:00
committed by Impala Public Jenkins
parent b35aa81965
commit 20f908b1ab

View File

@@ -5,13 +5,11 @@ CREATE TABLE ice_mixed_formats(i int, j int)
STORED BY ICEBERG
STORED AS PARQUET
TBLPROPERTIES ('format-version'='2');
INSERT INTO ice_mixed_formats VALUES (1, 1);
INSERT INTO ice_mixed_formats VALUES (1, 1), (10, 10);
DELETE FROM ice_mixed_formats WHERE i = 1;
ALTER TABLE ice_mixed_formats SET TBLPROPERTIES ('write.format.default'='orc');
INSERT INTO ice_mixed_formats VALUES (2, 2);
INSERT INTO ice_mixed_formats VALUES (2, 2), (3, 3);
DELETE FROM ice_mixed_formats WHERE i = 2;
INSERT INTO ice_mixed_formats VALUES (3, 3);
INSERT INTO ice_mixed_formats VALUES (10, 10);
ALTER TABLE ice_mixed_formats SET TBLPROPERTIES ('write.format.default'='avro');
DELETE FROM ice_mixed_formats WHERE i = 3;
====
@@ -71,13 +69,11 @@ PARTITIONED BY SPEC (truncate(2, j))
STORED BY ICEBERG
STORED AS PARQUET
TBLPROPERTIES ('format-version'='2');
INSERT INTO ice_mixed_formats_partitioned VALUES (1, 1);
INSERT INTO ice_mixed_formats_partitioned VALUES (1, 1), (10, 1);
DELETE FROM ice_mixed_formats_partitioned WHERE i = 1;
ALTER TABLE ice_mixed_formats_partitioned SET TBLPROPERTIES ('write.format.default'='orc');
INSERT INTO ice_mixed_formats_partitioned VALUES (2, 2);
INSERT INTO ice_mixed_formats_partitioned VALUES (2, 2), (3, 2);
DELETE FROM ice_mixed_formats_partitioned WHERE i = 2;
INSERT INTO ice_mixed_formats_partitioned VALUES (3, 3);
INSERT INTO ice_mixed_formats_partitioned VALUES (10, 10);
ALTER TABLE ice_mixed_formats_partitioned SET TBLPROPERTIES ('write.format.default'='avro');
DELETE FROM ice_mixed_formats_partitioned WHERE i = 3;
====
@@ -91,7 +87,6 @@ row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_mixed_formats_partitioned/d
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_mixed_formats_partitioned/data/j_trunc=0/.*-delete-.*parquet','.*B','','.*'
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_mixed_formats_partitioned/data/j_trunc=2/.*-data-.*.orc','.*B','','.*'
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_mixed_formats_partitioned/data/j_trunc=2/.*-delete-.*orc','.*B','','.*'
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_mixed_formats_partitioned/data/j_trunc=10/.*-data-.*.orc','.*B','','.*'
row_regex:'$NAMENODE/test-warehouse/$DATABASE.db/ice_mixed_formats_partitioned/data/j_trunc=2/.*-delete-.*.avro','.*B','','.*'
---- TYPES
STRING, STRING, STRING, STRING
@@ -99,7 +94,7 @@ STRING, STRING, STRING, STRING
---- QUERY
select * from ice_mixed_formats_partitioned;
---- RESULTS
10,10
10,1
---- TYPES
INT,INT
====
@@ -113,7 +108,7 @@ BIGINT
---- QUERY
select * from ice_mixed_formats_partitioned where i > 1;
---- RESULTS
10,10
10,1
---- TYPES
INT,INT
====