mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
IMPALA-14224: Cleanup subdirectories in TRUNCATE
If an external table contains data files in subdirectories, and recursive listing is enabled, Impala considers the files in the subdirectories as part of the table. However, currently INSERT OVERWRITE and TRUNCATE do not always delete these files, leading to data corruption. This change takes care of TRUNCATE. Currently TRUNCATE can be run in two different ways: - if the table is being replicated, the HMS api is used - otherwise catalogd deletes the files itself. Two differences between these methods are: - calling HMS leads to an ALTER_TABLE event - calling HMS leads to recursive delete while catalogd only deletes files directly in the partition/table directory. This commit introduces the '--truncate_external_tables_with_hms' startup flag, with default value 'true'. If this flag is set to true, Impala always uses the HMS api for TRUNCATE operations. Note that HMS always deletes stats on TRUNCATE, so setting the DELETE_STATS_IN_TRUNCATE query option to false is not supported if '--truncate_external_tables_with_hms' is set to true: an exception is thrown. Testing: - extended the tests in test_recursive_listing.py::TestRecursiveListing to include TRUNCATE - Moved tests with DELETE_STATS_IN_TRUNCATE=0 from truncate-table.test to truncate-table-no-delete-stats.test, which is run in a new custom cluster test (custom_cluster/test_no_delete_stats_in_truncate.py). Change-Id: Ic0fcc6cf1eca8a0bcf2f93dbb61240da05e35519 Reviewed-on: http://gerrit.cloudera.org:8080/23166 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
committed by
Impala Public Jenkins
parent
c446291ccf
commit
9f12714d1c
@@ -302,6 +302,10 @@ DEFINE_bool(keeps_warmup_tables_loaded, false,
|
||||
"--invalidate_tables_on_memory_pressure is turned on. Otherwise, these tables will "
|
||||
"keep being loaded and invalidated.");
|
||||
|
||||
DEFINE_bool(truncate_external_tables_with_hms, true, "Always use HMS to truncate"
|
||||
"external tables. When false, HMS api is only used for tables being replicated. Using"
|
||||
"HMS has the effect of deleting files recursively and triggering an HMS event.");
|
||||
|
||||
DECLARE_string(state_store_host);
|
||||
DECLARE_int32(state_store_port);
|
||||
DECLARE_string(state_store_2_host);
|
||||
|
||||
@@ -147,6 +147,7 @@ DECLARE_int32(reset_metadata_lock_duration_ms);
|
||||
DECLARE_int32(catalog_reset_max_threads);
|
||||
DECLARE_string(warmup_tables_config_file);
|
||||
DECLARE_bool(keeps_warmup_tables_loaded);
|
||||
DECLARE_bool(truncate_external_tables_with_hms);
|
||||
|
||||
// HS2 SAML2.0 configuration
|
||||
// Defined here because TAG_FLAG caused issues in global-flags.cc
|
||||
@@ -552,6 +553,7 @@ Status PopulateThriftBackendGflags(TBackendGflags& cfg) {
|
||||
cfg.__set_catalog_reset_max_threads(FLAGS_catalog_reset_max_threads);
|
||||
cfg.__set_warmup_tables_config_file(FLAGS_warmup_tables_config_file);
|
||||
cfg.__set_keeps_warmup_tables_loaded(FLAGS_keeps_warmup_tables_loaded);
|
||||
cfg.__set_truncate_external_tables_with_hms(FLAGS_truncate_external_tables_with_hms);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
||||
@@ -349,4 +349,6 @@ struct TBackendGflags {
|
||||
158: required string warmup_tables_config_file
|
||||
|
||||
159: required bool keeps_warmup_tables_loaded
|
||||
|
||||
160: required bool truncate_external_tables_with_hms
|
||||
}
|
||||
|
||||
@@ -597,4 +597,8 @@ public class BackendConfig {
|
||||
public boolean keepsWarmupTablesLoaded() {
|
||||
return backendCfg_.keeps_warmup_tables_loaded;
|
||||
}
|
||||
|
||||
public boolean truncateExternalTablesWithHms() {
|
||||
return backendCfg_.truncate_external_tables_with_hms;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3655,15 +3655,25 @@ public class CatalogOpExecutor {
|
||||
catalog_.getLock().writeLock().unlock();
|
||||
modification.addCatalogServiceIdentifiersToTable();
|
||||
HdfsTable hdfsTable = (HdfsTable) table;
|
||||
boolean isTableBeingReplicated = false;
|
||||
boolean truncateWithHms = BackendConfig.INSTANCE.truncateExternalTablesWithHms();
|
||||
Stopwatch sw = Stopwatch.createStarted();
|
||||
|
||||
if (truncateWithHms && !params.isDelete_stats()) {
|
||||
throw new ImpalaRuntimeException("Setting the query option "
|
||||
+ "'DELETE_STATS_IN_TRUNCATE' to false is not supported when the flag "
|
||||
+ "'--truncate_external_tables_with_hms' is set to true.");
|
||||
}
|
||||
|
||||
try {
|
||||
// if the table is being replicated we issue the HMS API to truncate the table
|
||||
// since it generates additional events which are used by Hive Replication.
|
||||
try (MetaStoreClient client = catalog_.getMetaStoreClient(catalogTimeline)) {
|
||||
if (isTableBeingReplicated(client.getHiveClient(), hdfsTable)) {
|
||||
isTableBeingReplicated = true;
|
||||
// We will issue HMS API in these cases. Register in-flight event before we do.
|
||||
if (!truncateWithHms) {
|
||||
// if the table is being replicated we issue the HMS API to truncate the table
|
||||
// since it generates additional events which are used by Hive Replication.
|
||||
truncateWithHms = isTableBeingReplicated(client.getHiveClient(), hdfsTable);
|
||||
}
|
||||
|
||||
if (truncateWithHms) {
|
||||
// We will issue an HMS API call. Register in-flight event before we do.
|
||||
modification.registerInflightEvent();
|
||||
String dbName = Preconditions.checkNotNull(hdfsTable.getDb()).getName();
|
||||
client.getHiveClient()
|
||||
@@ -3673,7 +3683,7 @@ public class CatalogOpExecutor {
|
||||
hdfsTable.getFullName(), sw.elapsed(TimeUnit.MILLISECONDS));
|
||||
}
|
||||
}
|
||||
if (!isTableBeingReplicated) {
|
||||
if (!truncateWithHms) {
|
||||
// when table is replicated we let the HMS API handle the file deletion logic
|
||||
// otherwise we delete the files.
|
||||
Collection<? extends FeFsPartition> parts = hdfsTable.loadAllPartitions();
|
||||
|
||||
107
testdata/workloads/functional-query/queries/QueryTest/truncate-table-no-delete-stats.test
vendored
Normal file
107
testdata/workloads/functional-query/queries/QueryTest/truncate-table-no-delete-stats.test
vendored
Normal file
@@ -0,0 +1,107 @@
|
||||
====
|
||||
---- QUERY
|
||||
# First create a partitioned table
|
||||
create table t4 like functional.alltypes
|
||||
location '$FILESYSTEM_PREFIX/test-warehouse/$DATABASE.db/t4';
|
||||
insert into t4 partition(year, month) select * from functional.alltypes;
|
||||
compute incremental stats t4;
|
||||
# if DELETE_STATS_IN_TRUNCATE is unset then truncate should not
|
||||
# delete the table statistics.
|
||||
truncate table t4;
|
||||
show table stats t4;
|
||||
---- LABELS
|
||||
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION, EC POLICY
|
||||
---- RESULTS
|
||||
'2009','1',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2009','2',280,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2009','3',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2009','4',300,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2009','5',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2009','6',300,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2009','7',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2009','8',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2009','9',300,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2009','10',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2009','11',300,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2009','12',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2010','1',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2010','2',280,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2010','3',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2010','4',300,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2010','5',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2010','6',300,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2010','7',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2010','8',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2010','9',300,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2010','10',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2010','11',300,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2010','12',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'Total','',7300,0,'0B','0B','','','','',''
|
||||
---- TYPES
|
||||
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING, STRING
|
||||
====
|
||||
---- QUERY
|
||||
show column stats t4;
|
||||
---- LABELS
|
||||
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES
|
||||
---- RESULTS
|
||||
'id','INT',7300,0,4,4,-1,-1
|
||||
'bool_col','BOOLEAN',2,0,1,1,3650,3650
|
||||
'tinyint_col','TINYINT',10,0,1,1,-1,-1
|
||||
'smallint_col','SMALLINT',10,0,2,2,-1,-1
|
||||
'int_col','INT',10,0,4,4,-1,-1
|
||||
'bigint_col','BIGINT',10,0,8,8,-1,-1
|
||||
'float_col','FLOAT',10,0,4,4,-1,-1
|
||||
'double_col','DOUBLE',10,0,8,8,-1,-1
|
||||
'date_string_col','STRING',736,0,8,8,-1,-1
|
||||
'string_col','STRING',10,0,1,1,-1,-1
|
||||
'timestamp_col','TIMESTAMP',7300,0,16,16,-1,-1
|
||||
'year','INT',2,0,4,4,-1,-1
|
||||
'month','INT',12,0,4,4,-1,-1
|
||||
---- TYPES
|
||||
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
|
||||
====
|
||||
---- QUERY
|
||||
# Verify that truncate was successful
|
||||
select count(*) from t4;
|
||||
---- RESULTS
|
||||
0
|
||||
---- TYPES
|
||||
BIGINT
|
||||
====
|
||||
---- QUERY
|
||||
# Unpartitioned table case: Show that if DELETE_STATS_IN_TRUNCATE is
|
||||
# unset truncation removes all files
|
||||
# but does not delete table and column stats.
|
||||
create table t6 like functional.tinytable
|
||||
location '$FILESYSTEM_PREFIX/test-warehouse/$DATABASE.db/t6';
|
||||
insert into t6 select * from functional.tinytable;
|
||||
compute incremental stats t6;
|
||||
truncate table t6;
|
||||
show table stats t6;
|
||||
---- LABELS
|
||||
#ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION, EC POLICY
|
||||
---- RESULTS
|
||||
3,0,'0B','NOT CACHED','NOT CACHED','TEXT','false',regex:.*,'$ERASURECODE_POLICY'
|
||||
---- TYPES
|
||||
BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING, STRING
|
||||
====
|
||||
---- QUERY
|
||||
# Show that the truncation did not remove the column stats.
|
||||
show column stats t6;
|
||||
---- LABELS
|
||||
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES
|
||||
---- RESULTS
|
||||
'a','STRING',3,0,8,6.666666507720947,-1,-1
|
||||
'b','STRING',3,0,7,4,-1,-1
|
||||
---- TYPES
|
||||
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
|
||||
====
|
||||
---- QUERY
|
||||
# Verify that truncate was successful
|
||||
select count(*) from t6;
|
||||
---- RESULTS
|
||||
0
|
||||
---- TYPES
|
||||
BIGINT
|
||||
====
|
||||
@@ -199,74 +199,15 @@ create table t4 like functional.alltypes
|
||||
location '$FILESYSTEM_PREFIX/test-warehouse/$DATABASE.db/t4';
|
||||
insert into t4 partition(year, month) select * from functional.alltypes;
|
||||
compute incremental stats t4;
|
||||
# if DELETE_STATS_IN_TRUNCATE is unset then truncate should not
|
||||
# delete the table statistics.
|
||||
# DELETE_STATS_IN_TRUNCATE=false is unsupported unless
|
||||
# '--truncate_external_tables_with_hms=false' (for that case, see
|
||||
# custom_cluster/test_no_delete_stats_in_truncate.py).
|
||||
set DELETE_STATS_IN_TRUNCATE=0;
|
||||
truncate table t4;
|
||||
show table stats t4;
|
||||
---- LABELS
|
||||
YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION, EC POLICY
|
||||
---- RESULTS
|
||||
'2009','1',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2009','2',280,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2009','3',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2009','4',300,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2009','5',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2009','6',300,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2009','7',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2009','8',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2009','9',300,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2009','10',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2009','11',300,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2009','12',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2010','1',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2010','2',280,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2010','3',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2010','4',300,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2010','5',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2010','6',300,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2010','7',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2010','8',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2010','9',300,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2010','10',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2010','11',300,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'2010','12',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY'
|
||||
'Total','',7300,0,'0B','0B','','','','',''
|
||||
---- TYPES
|
||||
STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING, STRING
|
||||
---- CATCH
|
||||
ImpalaRuntimeException: Setting the query option 'DELETE_STATS_IN_TRUNCATE' to false is not supported when the flag '--truncate_external_tables_with_hms' is set to true.
|
||||
====
|
||||
---- QUERY
|
||||
show column stats t4;
|
||||
---- LABELS
|
||||
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES
|
||||
---- RESULTS
|
||||
'id','INT',7300,0,4,4,-1,-1
|
||||
'bool_col','BOOLEAN',2,0,1,1,3650,3650
|
||||
'tinyint_col','TINYINT',10,0,1,1,-1,-1
|
||||
'smallint_col','SMALLINT',10,0,2,2,-1,-1
|
||||
'int_col','INT',10,0,4,4,-1,-1
|
||||
'bigint_col','BIGINT',10,0,8,8,-1,-1
|
||||
'float_col','FLOAT',10,0,4,4,-1,-1
|
||||
'double_col','DOUBLE',10,0,8,8,-1,-1
|
||||
'date_string_col','STRING',736,0,8,8,-1,-1
|
||||
'string_col','STRING',10,0,1,1,-1,-1
|
||||
'timestamp_col','TIMESTAMP',7300,0,16,16,-1,-1
|
||||
'year','INT',2,0,4,4,-1,-1
|
||||
'month','INT',12,0,4,4,-1,-1
|
||||
---- TYPES
|
||||
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
|
||||
====
|
||||
---- QUERY
|
||||
#Verify that truncate was successful
|
||||
select count(*) from t4;
|
||||
---- RESULTS
|
||||
0
|
||||
---- TYPES
|
||||
BIGINT
|
||||
====
|
||||
---- QUERY
|
||||
insert into t4 partition(year, month) select * from functional.alltypes;
|
||||
compute incremental stats t4;
|
||||
# if DELETE_STATS_IN_TRUNCATE is set then truncate should
|
||||
# delete the statistics.
|
||||
set DELETE_STATS_IN_TRUNCATE=1;
|
||||
@@ -334,47 +275,20 @@ select count(*) from t4;
|
||||
BIGINT
|
||||
====
|
||||
---- QUERY
|
||||
# Unpartitioned table case: Show that if DELETE_STATS_IN_TRUNCATE is
|
||||
# unset truncation removes all files
|
||||
# but does not delete table and column stats.
|
||||
# Unpartitioned table case:
|
||||
# DELETE_STATS_IN_TRUNCATE=false is unsupported unless
|
||||
# '--truncate_external_tables_with_hms=false' (for that case, see
|
||||
# custom_cluster/test_no_delete_stats_in_truncate.py).
|
||||
create table t6 like functional.tinytable
|
||||
location '$FILESYSTEM_PREFIX/test-warehouse/$DATABASE.db/t6';
|
||||
insert into t6 select * from functional.tinytable;
|
||||
compute incremental stats t6;
|
||||
set DELETE_STATS_IN_TRUNCATE=0;
|
||||
truncate table t6;
|
||||
show table stats t6;
|
||||
---- LABELS
|
||||
#ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION, EC POLICY
|
||||
---- RESULTS
|
||||
3,0,'0B','NOT CACHED','NOT CACHED','TEXT','false',regex:.*,'$ERASURECODE_POLICY'
|
||||
---- TYPES
|
||||
BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING, STRING
|
||||
---- CATCH
|
||||
ImpalaRuntimeException: Setting the query option 'DELETE_STATS_IN_TRUNCATE' to false is not supported when the flag '--truncate_external_tables_with_hms' is set to true.
|
||||
====
|
||||
---- QUERY
|
||||
# Show that the truncation removed the column stats.
|
||||
show column stats t6;
|
||||
---- LABELS
|
||||
COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES
|
||||
---- RESULTS
|
||||
'a','STRING',3,0,8,6.666666507720947,-1,-1
|
||||
'b','STRING',3,0,7,4,-1,-1
|
||||
---- TYPES
|
||||
STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT
|
||||
====
|
||||
---- QUERY
|
||||
#Verify that truncate was successful
|
||||
select count(*) from t6;
|
||||
---- RESULTS
|
||||
0
|
||||
---- TYPES
|
||||
BIGINT
|
||||
====
|
||||
---- QUERY
|
||||
# Unpartitioned table case: Show that if DELETE_STATS_IN_TRUNCATE is
|
||||
# set truncation removes all files and deletes stats.
|
||||
insert into t6 select * from functional.tinytable;
|
||||
compute incremental stats t6;
|
||||
# table stats should be deleted
|
||||
set DELETE_STATS_IN_TRUNCATE=1;
|
||||
truncate table t6;
|
||||
|
||||
@@ -149,14 +149,7 @@ class TestEventProcessingCustomConfigsBase(CustomClusterTestSuite):
|
||||
# insert overwrite query from Impala also generates a INSERT self-event
|
||||
"insert overwrite table {0}.{1} partition "
|
||||
"(year, month) select * from functional.alltypessmall where year=2009 "
|
||||
"and month=1".format(db_name, tbl_name),
|
||||
# events processor doesn't process delete column stats events currently,
|
||||
# however, in case of incremental stats, there could be alter table and
|
||||
# alter partition events which should be ignored. Hence we run compute stats
|
||||
# before to make sure that the truncate table command generated alter events
|
||||
# are ignored.
|
||||
"compute incremental stats {0}.{1}".format(db_name, tbl_name),
|
||||
"{0} {1}.{2}".format(TRUNCATE_TBL_STMT, db_name, tbl_name)],
|
||||
"and month=1".format(db_name, tbl_name)],
|
||||
False: [
|
||||
"create table {0}.{1} like functional.alltypessmall "
|
||||
"stored as parquet".format(db_name, tbl_name),
|
||||
@@ -223,7 +216,14 @@ class TestEventProcessingCustomConfigsBase(CustomClusterTestSuite):
|
||||
"insert overwrite {0}.{1} partition(part) select * from {0}.{1}".format(
|
||||
db_name, acid_tbl_name),
|
||||
# recover partitions will generate add_partition events
|
||||
"alter table {0}.{1} recover partitions".format(db_name, recover_tbl_name)
|
||||
"alter table {0}.{1} recover partitions".format(db_name, recover_tbl_name),
|
||||
# events processor doesn't process delete column stats events currently,
|
||||
# however, in case of incremental stats, there could be alter table and
|
||||
# alter partition events which should be ignored. Hence we run compute stats
|
||||
# before to make sure that the truncate table command generated alter events
|
||||
# are ignored.
|
||||
"compute incremental stats {0}.{1}".format(db_name, tbl_name),
|
||||
"{0} {1}.{2}".format(TRUNCATE_TBL_STMT, db_name, tbl_name)
|
||||
]
|
||||
}
|
||||
return self_event_test_queries
|
||||
@@ -337,8 +337,10 @@ class TestEventProcessingCustomConfigsBase(CustomClusterTestSuite):
|
||||
if (TRUNCATE_TBL_STMT not in stmt):
|
||||
assert tbls_refreshed == tbls_refreshed_after, \
|
||||
"Failing query(impala={}): {}".format(use_impala_client, stmt)
|
||||
assert partitions_refreshed == partitions_refreshed_after, \
|
||||
"Failing query(impala={}): {}".format(use_impala_client, stmt)
|
||||
# TRUNCATE refreshes partitions
|
||||
if (TRUNCATE_TBL_STMT not in stmt):
|
||||
assert partitions_refreshed == partitions_refreshed_after, \
|
||||
"Failing query(impala={}): {}".format(use_impala_client, stmt)
|
||||
else:
|
||||
# hive was used to run the stmts, any events generated should not have been deemed
|
||||
# as self events unless there are empty partition add/drop events
|
||||
|
||||
40
tests/custom_cluster/test_no_delete_stats_in_truncate.py
Normal file
40
tests/custom_cluster/test_no_delete_stats_in_truncate.py
Normal file
@@ -0,0 +1,40 @@
|
||||
#!/usr/bin/env impala-python
|
||||
#
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
import pytest
|
||||
|
||||
from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
|
||||
|
||||
|
||||
@CustomClusterTestSuite.with_args(
|
||||
catalogd_args="--truncate_external_tables_with_hms=false",
|
||||
cluster_size=1)
|
||||
class TestNoDeleteStatsInTruncate(CustomClusterTestSuite):
|
||||
@classmethod
|
||||
def setup_class(cls):
|
||||
if cls.exploration_strategy() != 'exhaustive':
|
||||
pytest.skip('runs only in exhaustive')
|
||||
super(TestNoDeleteStatsInTruncate, cls).setup_class()
|
||||
|
||||
def test_stats_remain_after_truncate(self, unique_database, vector):
|
||||
vector.get_value('exec_option')['delete_stats_in_truncate'] = False
|
||||
self.run_test_case('QueryTest/truncate-table-no-delete-stats', vector,
|
||||
use_db=unique_database)
|
||||
@@ -177,10 +177,10 @@ class TestDdlStatements(TestDdlBase):
|
||||
assert len(self.filesystem_client.ls(
|
||||
"{1}/{0}.db/t1/".format(unique_database, WAREHOUSE))) == 2
|
||||
|
||||
# Truncating the table removes the data files and preserves the table's directory
|
||||
# Truncating the table removes the data files and the staging directory
|
||||
self.client.execute("truncate table {0}.t1".format(unique_database))
|
||||
assert len(self.filesystem_client.ls(
|
||||
"{1}/{0}.db/t1/".format(unique_database, WAREHOUSE))) == 1
|
||||
"{1}/{0}.db/t1/".format(unique_database, WAREHOUSE))) == 0
|
||||
|
||||
self.client.execute(
|
||||
"create table {0}.t2(i int) partitioned by (p int)".format(unique_database))
|
||||
|
||||
@@ -40,9 +40,6 @@ class TestRecursiveListing(ImpalaTestSuite):
|
||||
cls.ImpalaTestMatrix.clear_dimension('exec_option')
|
||||
cls.ImpalaTestMatrix.add_dimension(
|
||||
create_uncompressed_text_dimension(cls.get_workload()))
|
||||
cls.ImpalaTestMatrix.add_constraint(lambda v:
|
||||
(v.get_value('table_format').file_format == 'text'
|
||||
and v.get_value('table_format').compression_codec == 'none'))
|
||||
|
||||
def _show_files(self, table):
|
||||
files = self.client.execute("show files in {0}".format(table))
|
||||
@@ -142,6 +139,18 @@ class TestRecursiveListing(ImpalaTestSuite):
|
||||
assert len(self._show_files(fq_tbl_name)) == 1
|
||||
assert len(self._get_rows(fq_tbl_name)) == 1
|
||||
|
||||
# Verify that TRUNCATE removes data files in subdirectories too.
|
||||
# Regression test for IMPALA-13778.
|
||||
self.filesystem_client.make_dir("{0}/dir1".format(part_path))
|
||||
self.filesystem_client.create_file("{0}/dir1/file1.txt".format(part_path), "file1")
|
||||
self.execute_query_expect_success(self.client, "refresh {0}".format(fq_tbl_name))
|
||||
assert len(self._show_files(fq_tbl_name)) == 2
|
||||
assert len(self._get_rows(fq_tbl_name)) == 2
|
||||
|
||||
self.execute_query_expect_success(self.client, "truncate {0}".format(fq_tbl_name))
|
||||
assert len(self._show_files(fq_tbl_name)) == 0
|
||||
assert len(self._get_rows(fq_tbl_name)) == 0
|
||||
|
||||
@SkipIfFS.no_partial_listing
|
||||
@pytest.mark.execute_serially
|
||||
def test_large_staging_dirs(self, unique_database):
|
||||
|
||||
Reference in New Issue
Block a user