Files
impala/testdata/workloads/functional-query/queries/QueryTest/iceberg-multiple-delete-per-partition.test
Zoltan Borok-Nagy bfae4d0b32 IMPALA-14496: Impala crashes when it writes multiple delete files per partition in a single DELETE operation
Impala crashes when it needs to write multiple delete files per
partition in a single DELETE operation. It is because
IcebergBufferedDeleteSink has its own DmlExecState object, but
sometimes the methods in TableSinkBase use the RuntimeState's
DmlExecState object. I.e. it can happen that we add a partition
to the IcebergBufferedDeleteSink's DmlExecState, but later we
expect to find it in the RuntimeState's DmlExecState.

This patch adds new methods to TableSinkBase that are specific
for writing delete files, and they always take a DmlExecState
object as a parameter. They are now used by IcebergBufferedDeleteSink.

Testing
 * added e2e tests

Change-Id: I46266007a6356e9ff3b63369dd855aff1396bb72
Reviewed-on: http://gerrit.cloudera.org:8080/23537
Reviewed-by: Mihaly Szjatinya <mszjat@pm.me>
Reviewed-by: Michael Smith <michael.smith@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2025-10-15 19:58:37 +00:00

50 lines
1.5 KiB
Plaintext

====
---- QUERY
# Regression test for IMPALA-14496 where a DELETE operation needs to write
# multiple delete files per partition.
CREATE TABLE multiple_deletes(
str STRING NULL,
year INT NULL,
last_modified TIMESTAMP NULL)
PARTITIONED BY SPEC (year)
STORED AS ICEBERG
TBLPROPERTIES ('format-version'='2');
INSERT INTO multiple_deletes SELECT string_col, year, timestamp_col FROM functional_parquet.alltypes;
INSERT INTO multiple_deletes SELECT * FROM multiple_deletes;
INSERT INTO multiple_deletes SELECT * FROM multiple_deletes;
INSERT INTO multiple_deletes SELECT * FROM multiple_deletes;
INSERT INTO multiple_deletes SELECT * FROM multiple_deletes;
INSERT INTO multiple_deletes SELECT * FROM multiple_deletes;
INSERT INTO multiple_deletes SELECT * FROM multiple_deletes;
INSERT INTO multiple_deletes SELECT * FROM multiple_deletes;
INSERT INTO multiple_deletes SELECT * FROM multiple_deletes;
INSERT INTO multiple_deletes SELECT * FROM multiple_deletes;
INSERT INTO multiple_deletes SELECT * FROM multiple_deletes;
SELECT count(*) FROM multiple_deletes;
---- RESULTS
7475200
---- TYPES
BIGINT
====
---- QUERY
SET parquet_file_size=8m;
DELETE FROM multiple_deletes WHERE last_modified >= '2008-12-30';
SELECT count(*) FROM multiple_deletes;
---- RESULTS
0
---- TYPES
BIGINT
====
---- QUERY
# Verify that the above DELETE statement wrote 4 delete files (2 per partition) in total.
SELECT count(*) FROM $DATABASE.multiple_deletes.`files`
WHERE content = 1;
---- RESULTS
4
---- TYPES
BIGINT
====