diff --git a/be/src/catalog/catalog-server.cc b/be/src/catalog/catalog-server.cc index f14406daa..44352901d 100644 --- a/be/src/catalog/catalog-server.cc +++ b/be/src/catalog/catalog-server.cc @@ -302,6 +302,10 @@ DEFINE_bool(keeps_warmup_tables_loaded, false, "--invalidate_tables_on_memory_pressure is turned on. Otherwise, these tables will " "keep being loaded and invalidated."); +DEFINE_bool(truncate_external_tables_with_hms, true, "Always use HMS to truncate" + "external tables. When false, HMS api is only used for tables being replicated. Using" + "HMS has the effect of deleting files recursively and triggering an HMS event."); + DECLARE_string(state_store_host); DECLARE_int32(state_store_port); DECLARE_string(state_store_2_host); diff --git a/be/src/util/backend-gflag-util.cc b/be/src/util/backend-gflag-util.cc index 1dfe632f3..90385f0f4 100644 --- a/be/src/util/backend-gflag-util.cc +++ b/be/src/util/backend-gflag-util.cc @@ -147,6 +147,7 @@ DECLARE_int32(reset_metadata_lock_duration_ms); DECLARE_int32(catalog_reset_max_threads); DECLARE_string(warmup_tables_config_file); DECLARE_bool(keeps_warmup_tables_loaded); +DECLARE_bool(truncate_external_tables_with_hms); // HS2 SAML2.0 configuration // Defined here because TAG_FLAG caused issues in global-flags.cc @@ -552,6 +553,7 @@ Status PopulateThriftBackendGflags(TBackendGflags& cfg) { cfg.__set_catalog_reset_max_threads(FLAGS_catalog_reset_max_threads); cfg.__set_warmup_tables_config_file(FLAGS_warmup_tables_config_file); cfg.__set_keeps_warmup_tables_loaded(FLAGS_keeps_warmup_tables_loaded); + cfg.__set_truncate_external_tables_with_hms(FLAGS_truncate_external_tables_with_hms); return Status::OK(); } diff --git a/common/thrift/BackendGflags.thrift b/common/thrift/BackendGflags.thrift index ba42add11..445a40dc2 100644 --- a/common/thrift/BackendGflags.thrift +++ b/common/thrift/BackendGflags.thrift @@ -349,4 +349,6 @@ struct TBackendGflags { 158: required string warmup_tables_config_file 159: required bool keeps_warmup_tables_loaded + + 160: required bool truncate_external_tables_with_hms } diff --git a/fe/src/main/java/org/apache/impala/service/BackendConfig.java b/fe/src/main/java/org/apache/impala/service/BackendConfig.java index af81e4812..a2b44df2b 100644 --- a/fe/src/main/java/org/apache/impala/service/BackendConfig.java +++ b/fe/src/main/java/org/apache/impala/service/BackendConfig.java @@ -597,4 +597,8 @@ public class BackendConfig { public boolean keepsWarmupTablesLoaded() { return backendCfg_.keeps_warmup_tables_loaded; } + + public boolean truncateExternalTablesWithHms() { + return backendCfg_.truncate_external_tables_with_hms; + } } diff --git a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java index f16e7b33f..76040125f 100644 --- a/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java +++ b/fe/src/main/java/org/apache/impala/service/CatalogOpExecutor.java @@ -3655,15 +3655,25 @@ public class CatalogOpExecutor { catalog_.getLock().writeLock().unlock(); modification.addCatalogServiceIdentifiersToTable(); HdfsTable hdfsTable = (HdfsTable) table; - boolean isTableBeingReplicated = false; + boolean truncateWithHms = BackendConfig.INSTANCE.truncateExternalTablesWithHms(); Stopwatch sw = Stopwatch.createStarted(); + + if (truncateWithHms && !params.isDelete_stats()) { + throw new ImpalaRuntimeException("Setting the query option " + + "'DELETE_STATS_IN_TRUNCATE' to false is not supported when the flag " + + "'--truncate_external_tables_with_hms' is set to true."); + } + try { - // if the table is being replicated we issue the HMS API to truncate the table - // since it generates additional events which are used by Hive Replication. try (MetaStoreClient client = catalog_.getMetaStoreClient(catalogTimeline)) { - if (isTableBeingReplicated(client.getHiveClient(), hdfsTable)) { - isTableBeingReplicated = true; - // We will issue HMS API in these cases. Register in-flight event before we do. + if (!truncateWithHms) { + // if the table is being replicated we issue the HMS API to truncate the table + // since it generates additional events which are used by Hive Replication. + truncateWithHms = isTableBeingReplicated(client.getHiveClient(), hdfsTable); + } + + if (truncateWithHms) { + // We will issue an HMS API call. Register in-flight event before we do. modification.registerInflightEvent(); String dbName = Preconditions.checkNotNull(hdfsTable.getDb()).getName(); client.getHiveClient() @@ -3673,7 +3683,7 @@ public class CatalogOpExecutor { hdfsTable.getFullName(), sw.elapsed(TimeUnit.MILLISECONDS)); } } - if (!isTableBeingReplicated) { + if (!truncateWithHms) { // when table is replicated we let the HMS API handle the file deletion logic // otherwise we delete the files. Collection parts = hdfsTable.loadAllPartitions(); diff --git a/testdata/workloads/functional-query/queries/QueryTest/truncate-table-no-delete-stats.test b/testdata/workloads/functional-query/queries/QueryTest/truncate-table-no-delete-stats.test new file mode 100644 index 000000000..b69ff3e23 --- /dev/null +++ b/testdata/workloads/functional-query/queries/QueryTest/truncate-table-no-delete-stats.test @@ -0,0 +1,107 @@ +==== +---- QUERY +# First create a partitioned table +create table t4 like functional.alltypes +location '$FILESYSTEM_PREFIX/test-warehouse/$DATABASE.db/t4'; +insert into t4 partition(year, month) select * from functional.alltypes; +compute incremental stats t4; +# if DELETE_STATS_IN_TRUNCATE is unset then truncate should not +# delete the table statistics. +truncate table t4; +show table stats t4; +---- LABELS +YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION, EC POLICY +---- RESULTS +'2009','1',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' +'2009','2',280,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' +'2009','3',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' +'2009','4',300,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' +'2009','5',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' +'2009','6',300,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' +'2009','7',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' +'2009','8',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' +'2009','9',300,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' +'2009','10',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' +'2009','11',300,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' +'2009','12',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' +'2010','1',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' +'2010','2',280,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' +'2010','3',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' +'2010','4',300,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' +'2010','5',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' +'2010','6',300,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' +'2010','7',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' +'2010','8',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' +'2010','9',300,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' +'2010','10',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' +'2010','11',300,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' +'2010','12',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' +'Total','',7300,0,'0B','0B','','','','','' +---- TYPES +STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING, STRING +==== +---- QUERY +show column stats t4; +---- LABELS +COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES +---- RESULTS +'id','INT',7300,0,4,4,-1,-1 +'bool_col','BOOLEAN',2,0,1,1,3650,3650 +'tinyint_col','TINYINT',10,0,1,1,-1,-1 +'smallint_col','SMALLINT',10,0,2,2,-1,-1 +'int_col','INT',10,0,4,4,-1,-1 +'bigint_col','BIGINT',10,0,8,8,-1,-1 +'float_col','FLOAT',10,0,4,4,-1,-1 +'double_col','DOUBLE',10,0,8,8,-1,-1 +'date_string_col','STRING',736,0,8,8,-1,-1 +'string_col','STRING',10,0,1,1,-1,-1 +'timestamp_col','TIMESTAMP',7300,0,16,16,-1,-1 +'year','INT',2,0,4,4,-1,-1 +'month','INT',12,0,4,4,-1,-1 +---- TYPES +STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT +==== +---- QUERY +# Verify that truncate was successful +select count(*) from t4; +---- RESULTS +0 +---- TYPES +BIGINT +==== +---- QUERY +# Unpartitioned table case: Show that if DELETE_STATS_IN_TRUNCATE is +# unset truncation removes all files +# but does not delete table and column stats. +create table t6 like functional.tinytable +location '$FILESYSTEM_PREFIX/test-warehouse/$DATABASE.db/t6'; +insert into t6 select * from functional.tinytable; +compute incremental stats t6; +truncate table t6; +show table stats t6; +---- LABELS +#ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION, EC POLICY +---- RESULTS +3,0,'0B','NOT CACHED','NOT CACHED','TEXT','false',regex:.*,'$ERASURECODE_POLICY' +---- TYPES +BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING, STRING +==== +---- QUERY +# Show that the truncation did not remove the column stats. +show column stats t6; +---- LABELS +COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES +---- RESULTS +'a','STRING',3,0,8,6.666666507720947,-1,-1 +'b','STRING',3,0,7,4,-1,-1 +---- TYPES +STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT +==== +---- QUERY +# Verify that truncate was successful +select count(*) from t6; +---- RESULTS +0 +---- TYPES +BIGINT +==== diff --git a/testdata/workloads/functional-query/queries/QueryTest/truncate-table.test b/testdata/workloads/functional-query/queries/QueryTest/truncate-table.test index 084ed75c1..c8976da29 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/truncate-table.test +++ b/testdata/workloads/functional-query/queries/QueryTest/truncate-table.test @@ -199,74 +199,15 @@ create table t4 like functional.alltypes location '$FILESYSTEM_PREFIX/test-warehouse/$DATABASE.db/t4'; insert into t4 partition(year, month) select * from functional.alltypes; compute incremental stats t4; -# if DELETE_STATS_IN_TRUNCATE is unset then truncate should not -# delete the table statistics. +# DELETE_STATS_IN_TRUNCATE=false is unsupported unless +# '--truncate_external_tables_with_hms=false' (for that case, see +# custom_cluster/test_no_delete_stats_in_truncate.py). set DELETE_STATS_IN_TRUNCATE=0; truncate table t4; -show table stats t4; ----- LABELS -YEAR, MONTH, #ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION, EC POLICY ----- RESULTS -'2009','1',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' -'2009','2',280,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' -'2009','3',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' -'2009','4',300,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' -'2009','5',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' -'2009','6',300,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' -'2009','7',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' -'2009','8',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' -'2009','9',300,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' -'2009','10',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' -'2009','11',300,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' -'2009','12',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' -'2010','1',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' -'2010','2',280,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' -'2010','3',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' -'2010','4',300,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' -'2010','5',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' -'2010','6',300,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' -'2010','7',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' -'2010','8',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' -'2010','9',300,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' -'2010','10',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' -'2010','11',300,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' -'2010','12',310,0,'0B','NOT CACHED','NOT CACHED','TEXT','true',regex:.*,'$ERASURECODE_POLICY' -'Total','',7300,0,'0B','0B','','','','','' ----- TYPES -STRING, STRING, BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING, STRING +---- CATCH +ImpalaRuntimeException: Setting the query option 'DELETE_STATS_IN_TRUNCATE' to false is not supported when the flag '--truncate_external_tables_with_hms' is set to true. ==== ---- QUERY -show column stats t4; ----- LABELS -COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES ----- RESULTS -'id','INT',7300,0,4,4,-1,-1 -'bool_col','BOOLEAN',2,0,1,1,3650,3650 -'tinyint_col','TINYINT',10,0,1,1,-1,-1 -'smallint_col','SMALLINT',10,0,2,2,-1,-1 -'int_col','INT',10,0,4,4,-1,-1 -'bigint_col','BIGINT',10,0,8,8,-1,-1 -'float_col','FLOAT',10,0,4,4,-1,-1 -'double_col','DOUBLE',10,0,8,8,-1,-1 -'date_string_col','STRING',736,0,8,8,-1,-1 -'string_col','STRING',10,0,1,1,-1,-1 -'timestamp_col','TIMESTAMP',7300,0,16,16,-1,-1 -'year','INT',2,0,4,4,-1,-1 -'month','INT',12,0,4,4,-1,-1 ----- TYPES -STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT -==== ----- QUERY -#Verify that truncate was successful -select count(*) from t4; ----- RESULTS -0 ----- TYPES -BIGINT -==== ----- QUERY -insert into t4 partition(year, month) select * from functional.alltypes; -compute incremental stats t4; # if DELETE_STATS_IN_TRUNCATE is set then truncate should # delete the statistics. set DELETE_STATS_IN_TRUNCATE=1; @@ -334,47 +275,20 @@ select count(*) from t4; BIGINT ==== ---- QUERY -# Unpartitioned table case: Show that if DELETE_STATS_IN_TRUNCATE is -# unset truncation removes all files -# but does not delete table and column stats. +# Unpartitioned table case: +# DELETE_STATS_IN_TRUNCATE=false is unsupported unless +# '--truncate_external_tables_with_hms=false' (for that case, see +# custom_cluster/test_no_delete_stats_in_truncate.py). create table t6 like functional.tinytable location '$FILESYSTEM_PREFIX/test-warehouse/$DATABASE.db/t6'; insert into t6 select * from functional.tinytable; compute incremental stats t6; set DELETE_STATS_IN_TRUNCATE=0; truncate table t6; -show table stats t6; ----- LABELS -#ROWS, #FILES, SIZE, BYTES CACHED, CACHE REPLICATION, FORMAT, INCREMENTAL STATS, LOCATION, EC POLICY ----- RESULTS -3,0,'0B','NOT CACHED','NOT CACHED','TEXT','false',regex:.*,'$ERASURECODE_POLICY' ----- TYPES -BIGINT, BIGINT, STRING, STRING, STRING, STRING, STRING, STRING, STRING +---- CATCH +ImpalaRuntimeException: Setting the query option 'DELETE_STATS_IN_TRUNCATE' to false is not supported when the flag '--truncate_external_tables_with_hms' is set to true. ==== ---- QUERY -# Show that the truncation removed the column stats. -show column stats t6; ----- LABELS -COLUMN, TYPE, #DISTINCT VALUES, #NULLS, MAX SIZE, AVG SIZE, #TRUES, #FALSES ----- RESULTS -'a','STRING',3,0,8,6.666666507720947,-1,-1 -'b','STRING',3,0,7,4,-1,-1 ----- TYPES -STRING, STRING, BIGINT, BIGINT, BIGINT, DOUBLE, BIGINT, BIGINT -==== ----- QUERY -#Verify that truncate was successful -select count(*) from t6; ----- RESULTS -0 ----- TYPES -BIGINT -==== ----- QUERY -# Unpartitioned table case: Show that if DELETE_STATS_IN_TRUNCATE is -# set truncation removes all files and deletes stats. -insert into t6 select * from functional.tinytable; -compute incremental stats t6; # table stats should be deleted set DELETE_STATS_IN_TRUNCATE=1; truncate table t6; diff --git a/tests/custom_cluster/test_events_custom_configs.py b/tests/custom_cluster/test_events_custom_configs.py index bcf86c9fc..a13686700 100644 --- a/tests/custom_cluster/test_events_custom_configs.py +++ b/tests/custom_cluster/test_events_custom_configs.py @@ -149,14 +149,7 @@ class TestEventProcessingCustomConfigsBase(CustomClusterTestSuite): # insert overwrite query from Impala also generates a INSERT self-event "insert overwrite table {0}.{1} partition " "(year, month) select * from functional.alltypessmall where year=2009 " - "and month=1".format(db_name, tbl_name), - # events processor doesn't process delete column stats events currently, - # however, in case of incremental stats, there could be alter table and - # alter partition events which should be ignored. Hence we run compute stats - # before to make sure that the truncate table command generated alter events - # are ignored. - "compute incremental stats {0}.{1}".format(db_name, tbl_name), - "{0} {1}.{2}".format(TRUNCATE_TBL_STMT, db_name, tbl_name)], + "and month=1".format(db_name, tbl_name)], False: [ "create table {0}.{1} like functional.alltypessmall " "stored as parquet".format(db_name, tbl_name), @@ -223,7 +216,14 @@ class TestEventProcessingCustomConfigsBase(CustomClusterTestSuite): "insert overwrite {0}.{1} partition(part) select * from {0}.{1}".format( db_name, acid_tbl_name), # recover partitions will generate add_partition events - "alter table {0}.{1} recover partitions".format(db_name, recover_tbl_name) + "alter table {0}.{1} recover partitions".format(db_name, recover_tbl_name), + # events processor doesn't process delete column stats events currently, + # however, in case of incremental stats, there could be alter table and + # alter partition events which should be ignored. Hence we run compute stats + # before to make sure that the truncate table command generated alter events + # are ignored. + "compute incremental stats {0}.{1}".format(db_name, tbl_name), + "{0} {1}.{2}".format(TRUNCATE_TBL_STMT, db_name, tbl_name) ] } return self_event_test_queries @@ -337,8 +337,10 @@ class TestEventProcessingCustomConfigsBase(CustomClusterTestSuite): if (TRUNCATE_TBL_STMT not in stmt): assert tbls_refreshed == tbls_refreshed_after, \ "Failing query(impala={}): {}".format(use_impala_client, stmt) - assert partitions_refreshed == partitions_refreshed_after, \ - "Failing query(impala={}): {}".format(use_impala_client, stmt) + # TRUNCATE refreshes partitions + if (TRUNCATE_TBL_STMT not in stmt): + assert partitions_refreshed == partitions_refreshed_after, \ + "Failing query(impala={}): {}".format(use_impala_client, stmt) else: # hive was used to run the stmts, any events generated should not have been deemed # as self events unless there are empty partition add/drop events diff --git a/tests/custom_cluster/test_no_delete_stats_in_truncate.py b/tests/custom_cluster/test_no_delete_stats_in_truncate.py new file mode 100644 index 000000000..c7b922d4b --- /dev/null +++ b/tests/custom_cluster/test_no_delete_stats_in_truncate.py @@ -0,0 +1,40 @@ +#!/usr/bin/env impala-python +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from __future__ import absolute_import, division, print_function + +import pytest + +from tests.common.custom_cluster_test_suite import CustomClusterTestSuite + + +@CustomClusterTestSuite.with_args( + catalogd_args="--truncate_external_tables_with_hms=false", + cluster_size=1) +class TestNoDeleteStatsInTruncate(CustomClusterTestSuite): + @classmethod + def setup_class(cls): + if cls.exploration_strategy() != 'exhaustive': + pytest.skip('runs only in exhaustive') + super(TestNoDeleteStatsInTruncate, cls).setup_class() + + def test_stats_remain_after_truncate(self, unique_database, vector): + vector.get_value('exec_option')['delete_stats_in_truncate'] = False + self.run_test_case('QueryTest/truncate-table-no-delete-stats', vector, + use_db=unique_database) diff --git a/tests/metadata/test_ddl.py b/tests/metadata/test_ddl.py index 867fe6528..de9753014 100644 --- a/tests/metadata/test_ddl.py +++ b/tests/metadata/test_ddl.py @@ -177,10 +177,10 @@ class TestDdlStatements(TestDdlBase): assert len(self.filesystem_client.ls( "{1}/{0}.db/t1/".format(unique_database, WAREHOUSE))) == 2 - # Truncating the table removes the data files and preserves the table's directory + # Truncating the table removes the data files and the staging directory self.client.execute("truncate table {0}.t1".format(unique_database)) assert len(self.filesystem_client.ls( - "{1}/{0}.db/t1/".format(unique_database, WAREHOUSE))) == 1 + "{1}/{0}.db/t1/".format(unique_database, WAREHOUSE))) == 0 self.client.execute( "create table {0}.t2(i int) partitioned by (p int)".format(unique_database)) diff --git a/tests/metadata/test_recursive_listing.py b/tests/metadata/test_recursive_listing.py index 8b00cc273..9e064a4ad 100644 --- a/tests/metadata/test_recursive_listing.py +++ b/tests/metadata/test_recursive_listing.py @@ -40,9 +40,6 @@ class TestRecursiveListing(ImpalaTestSuite): cls.ImpalaTestMatrix.clear_dimension('exec_option') cls.ImpalaTestMatrix.add_dimension( create_uncompressed_text_dimension(cls.get_workload())) - cls.ImpalaTestMatrix.add_constraint(lambda v: - (v.get_value('table_format').file_format == 'text' - and v.get_value('table_format').compression_codec == 'none')) def _show_files(self, table): files = self.client.execute("show files in {0}".format(table)) @@ -142,6 +139,18 @@ class TestRecursiveListing(ImpalaTestSuite): assert len(self._show_files(fq_tbl_name)) == 1 assert len(self._get_rows(fq_tbl_name)) == 1 + # Verify that TRUNCATE removes data files in subdirectories too. + # Regression test for IMPALA-13778. + self.filesystem_client.make_dir("{0}/dir1".format(part_path)) + self.filesystem_client.create_file("{0}/dir1/file1.txt".format(part_path), "file1") + self.execute_query_expect_success(self.client, "refresh {0}".format(fq_tbl_name)) + assert len(self._show_files(fq_tbl_name)) == 2 + assert len(self._get_rows(fq_tbl_name)) == 2 + + self.execute_query_expect_success(self.client, "truncate {0}".format(fq_tbl_name)) + assert len(self._show_files(fq_tbl_name)) == 0 + assert len(self._get_rows(fq_tbl_name)) == 0 + @SkipIfFS.no_partial_listing @pytest.mark.execute_serially def test_large_staging_dirs(self, unique_database):