mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
Impala crashes when it needs to write multiple delete files per partition in a single DELETE operation. It is because IcebergBufferedDeleteSink has its own DmlExecState object, but sometimes the methods in TableSinkBase use the RuntimeState's DmlExecState object. I.e. it can happen that we add a partition to the IcebergBufferedDeleteSink's DmlExecState, but later we expect to find it in the RuntimeState's DmlExecState. This patch adds new methods to TableSinkBase that are specific for writing delete files, and they always take a DmlExecState object as a parameter. They are now used by IcebergBufferedDeleteSink. Testing * added e2e tests Change-Id: I46266007a6356e9ff3b63369dd855aff1396bb72 Reviewed-on: http://gerrit.cloudera.org:8080/23537 Reviewed-by: Mihaly Szjatinya <mszjat@pm.me> Reviewed-by: Michael Smith <michael.smith@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
50 lines
1.5 KiB
Plaintext
50 lines
1.5 KiB
Plaintext
====
|
|
---- QUERY
|
|
# Regression test for IMPALA-14496 where a DELETE operation needs to write
|
|
# multiple delete files per partition.
|
|
CREATE TABLE multiple_deletes(
|
|
str STRING NULL,
|
|
year INT NULL,
|
|
last_modified TIMESTAMP NULL)
|
|
PARTITIONED BY SPEC (year)
|
|
STORED AS ICEBERG
|
|
TBLPROPERTIES ('format-version'='2');
|
|
|
|
INSERT INTO multiple_deletes SELECT string_col, year, timestamp_col FROM functional_parquet.alltypes;
|
|
INSERT INTO multiple_deletes SELECT * FROM multiple_deletes;
|
|
INSERT INTO multiple_deletes SELECT * FROM multiple_deletes;
|
|
INSERT INTO multiple_deletes SELECT * FROM multiple_deletes;
|
|
INSERT INTO multiple_deletes SELECT * FROM multiple_deletes;
|
|
INSERT INTO multiple_deletes SELECT * FROM multiple_deletes;
|
|
INSERT INTO multiple_deletes SELECT * FROM multiple_deletes;
|
|
INSERT INTO multiple_deletes SELECT * FROM multiple_deletes;
|
|
INSERT INTO multiple_deletes SELECT * FROM multiple_deletes;
|
|
INSERT INTO multiple_deletes SELECT * FROM multiple_deletes;
|
|
INSERT INTO multiple_deletes SELECT * FROM multiple_deletes;
|
|
|
|
SELECT count(*) FROM multiple_deletes;
|
|
---- RESULTS
|
|
7475200
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
SET parquet_file_size=8m;
|
|
DELETE FROM multiple_deletes WHERE last_modified >= '2008-12-30';
|
|
|
|
SELECT count(*) FROM multiple_deletes;
|
|
---- RESULTS
|
|
0
|
|
---- TYPES
|
|
BIGINT
|
|
====
|
|
---- QUERY
|
|
# Verify that the above DELETE statement wrote 4 delete files (2 per partition) in total.
|
|
SELECT count(*) FROM $DATABASE.multiple_deletes.`files`
|
|
WHERE content = 1;
|
|
---- RESULTS
|
|
4
|
|
---- TYPES
|
|
BIGINT
|
|
====
|