IMPALA-14131: Add flag to configure the default value of

'impala.disableHmsSync'

FEATURE: Implement global 'disable_hms_sync_by_default' flag for event
processing. This change introduces a new catalogd startup flag,
`disable_hms_sync_by_default`, to simplify skipping/processing events.

Problem: Disabling event processing globally requires tedious process
of setting 'impala.disableHmsSync' property on every database and table,
especially if few specific tables requires sync up of events.

Solution: The new flag provides a global default for the
'impala.disableHmsSync' property.

Behavior:
- If `disable_hms_sync_by_default` is true (the intended default-off
state), event processing is skipped for all tables/databases unless
the property "impala.disableHmsSync"="false" is explicitly set.
- This allows users to easily keep event processing off by default
and opt-in specific databases or tables to start syncing.
- The check order is: table-property > db-property > global default.
- HMS polling remains independent and unaffected by this flag.

Change-Id: I4ee617aed48575502d9cf5cf2cbea6ec897d6839
Reviewed-on: http://gerrit.cloudera.org:8080/23487
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
Sai Hemanth Gantasala
2025-10-02 14:29:27 -07:00
committed by Impala Public Jenkins
parent b581e45286
commit 1684c2d9da
8 changed files with 103 additions and 7 deletions

View File

@@ -318,6 +318,12 @@ DEFINE_bool(truncate_external_tables_with_hms, true, "Always use HMS to truncate
"external tables. When false, HMS api is only used for tables being replicated. Using" "external tables. When false, HMS api is only used for tables being replicated. Using"
"HMS has the effect of deleting files recursively and triggering an HMS event."); "HMS has the effect of deleting files recursively and triggering an HMS event.");
DEFINE_bool(disable_hms_sync_by_default, false, "Catalogd flag that globally skips "
"HiveMetastore (HMS) event processing by default. If 'true', events are skipped for"
"all objects (with the exception to database level events) unless "
"'impala.disableHmsSync' is explicitly set to 'false' on a database or table."
"This simplifies rolling out event processing job-by-job.");
DECLARE_string(state_store_host); DECLARE_string(state_store_host);
DECLARE_int32(state_store_port); DECLARE_int32(state_store_port);
DECLARE_string(state_store_2_host); DECLARE_string(state_store_2_host);

View File

@@ -148,6 +148,7 @@ DECLARE_int32(catalog_reset_max_threads);
DECLARE_string(warmup_tables_config_file); DECLARE_string(warmup_tables_config_file);
DECLARE_bool(keeps_warmup_tables_loaded); DECLARE_bool(keeps_warmup_tables_loaded);
DECLARE_bool(truncate_external_tables_with_hms); DECLARE_bool(truncate_external_tables_with_hms);
DECLARE_bool(disable_hms_sync_by_default);
// HS2 SAML2.0 configuration // HS2 SAML2.0 configuration
// Defined here because TAG_FLAG caused issues in global-flags.cc // Defined here because TAG_FLAG caused issues in global-flags.cc
@@ -596,6 +597,7 @@ Status PopulateThriftBackendGflags(TBackendGflags& cfg) {
FLAGS_tuple_cache_cost_coefficient_read_rows); FLAGS_tuple_cache_cost_coefficient_read_rows);
cfg.__set_min_jdbc_scan_cardinality(FLAGS_min_jdbc_scan_cardinality); cfg.__set_min_jdbc_scan_cardinality(FLAGS_min_jdbc_scan_cardinality);
cfg.__set_max_stmt_metadata_loader_threads(FLAGS_max_stmt_metadata_loader_threads); cfg.__set_max_stmt_metadata_loader_threads(FLAGS_max_stmt_metadata_loader_threads);
cfg.__set_disable_hms_sync_by_default(FLAGS_disable_hms_sync_by_default);
return Status::OK(); return Status::OK();
} }

View File

@@ -361,4 +361,6 @@ struct TBackendGflags {
165: required i32 min_jdbc_scan_cardinality 165: required i32 min_jdbc_scan_cardinality
166: required i32 max_stmt_metadata_loader_threads 166: required i32 max_stmt_metadata_loader_threads
167: required bool disable_hms_sync_by_default
} }

View File

@@ -1218,7 +1218,7 @@ public class MetastoreShim extends Hive3MetastoreShimBase {
writeEventInfoList.get(i).getTableObj(), Table.class); writeEventInfoList.get(i).getTableObj(), Table.class);
if (event.getCatalogOpExecutor().getCatalog().isHmsEventSyncDisabled(tbl)) { if (event.getCatalogOpExecutor().getCatalog().isHmsEventSyncDisabled(tbl)) {
LOG.debug("Not adding write ids to table {}.{} for event {} " + LOG.debug("Not adding write ids to table {}.{} for event {} " +
"since table/db level flag {} is set to true", "since table/db level flag {} or global level flag is set to true",
tbl.getDbName(), tbl.getTableName(), event.getEventId(), tbl.getDbName(), tbl.getTableName(), event.getEventId(),
MetastoreEventPropertyKey.DISABLE_EVENT_HMS_SYNC.getKey()); MetastoreEventPropertyKey.DISABLE_EVENT_HMS_SYNC.getKey());
continue; continue;

View File

@@ -4831,9 +4831,9 @@ public class CatalogServiceCatalog extends Catalog {
return; return;
} }
if (isHmsEventSyncDisabled(tbl.getMetaStoreTable())) { if (isHmsEventSyncDisabled(tbl.getMetaStoreTable())) {
LOG.debug("Not adding write ids to table {}.{} for event {} " + LOG.debug("Not adding write ids to table {}.{} for event {} since table/db level" +
"since table/db level flag {} is set to true", dbName, tblName, eventId, " flag {} or disable_hms_sync_by_default is set to true", dbName,
MetastoreEventPropertyKey.DISABLE_EVENT_HMS_SYNC.getKey()); tblName, eventId, MetastoreEventPropertyKey.DISABLE_EVENT_HMS_SYNC.getKey());
return; return;
} }
if (eventId > 0 && eventId <= tbl.getCreateEventId()) { if (eventId > 0 && eventId <= tbl.getCreateEventId()) {
@@ -4902,7 +4902,10 @@ public class CatalogServiceCatalog extends Catalog {
} }
String dbFlagVal = getDbProperty(tbl.getDbName(), String dbFlagVal = getDbProperty(tbl.getDbName(),
MetastoreEventPropertyKey.DISABLE_EVENT_HMS_SYNC.getKey()); MetastoreEventPropertyKey.DISABLE_EVENT_HMS_SYNC.getKey());
return Boolean.parseBoolean(dbFlagVal); if (dbFlagVal != null) {
return Boolean.parseBoolean(dbFlagVal);
}
return BackendConfig.INSTANCE.isDisableHmsSyncByDefault();
} }
/** /**

View File

@@ -1255,9 +1255,17 @@ public class MetastoreEvents {
+ "database {}", + "database {}",
MetastoreEventPropertyKey.DISABLE_EVENT_HMS_SYNC.getKey(), MetastoreEventPropertyKey.DISABLE_EVENT_HMS_SYNC.getKey(),
dbFlagVal, dbName_); dbFlagVal, dbName_);
// flag value of null also returns false
return Boolean.valueOf(dbFlagVal);
} }
// flag value of null also returns false boolean globalDisableHmsSync = BackendConfig.INSTANCE.isDisableHmsSyncByDefault();
return Boolean.valueOf(dbFlagVal); if (globalDisableHmsSync) {
debugLog("Table level for table {} or Db level for db {}, flag {} is not set. " +
"Global flag disable_hms_sync_by_default is set to {}",
msTbl_.getTableName(), dbName_, MetastoreEventPropertyKey
.DISABLE_EVENT_HMS_SYNC.getKey(), globalDisableHmsSync);
}
return globalDisableHmsSync;
} }
/** /**

View File

@@ -624,4 +624,12 @@ public class BackendConfig {
public int getMaxStmtMetadataLoaderThreads() { public int getMaxStmtMetadataLoaderThreads() {
return backendCfg_.max_stmt_metadata_loader_threads; return backendCfg_.max_stmt_metadata_loader_threads;
} }
public boolean isDisableHmsSyncByDefault() {
return backendCfg_.disable_hms_sync_by_default;
}
public void setDisableHmsSyncByDefault(boolean disableHmsSyncByDefault) {
backendCfg_.disable_hms_sync_by_default = disableHmsSyncByDefault;
}
} }

View File

@@ -1820,6 +1820,73 @@ class TestEventProcessingCustomConfigs(TestEventProcessingCustomConfigsBase):
# Case-IV: Truncate table from Hive is currently generating single alter_partition # Case-IV: Truncate table from Hive is currently generating single alter_partition
# events. HIVE-28668 will address it. # events. HIVE-28668 will address it.
@pytest.mark.execute_serially
@CustomClusterTestSuite.with_args(
catalogd_args="--hms_event_polling_interval_s=1 "
"--disable_hms_sync_by_default=true")
def test_disable_hms_sync_globally(self, unique_database):
"""Verify IMPALA-14131: hms events are synced/skipped based on global flag
--disable_hms_sync_by_default and the db/table property 'impala.disableHmsSync'"""
tbl1 = unique_database + ".test_disable_hms_sync_1"
tbl2 = unique_database + ".test_disable_hms_sync_2"
EventProcessorUtils.wait_for_event_processing(self)
# Case 1: verify global config
events_skipped_before = EventProcessorUtils.get_int_metric('events-skipped', 0)
self.run_stmt_in_hive(
"""create table {} (id int) partitioned by (year int);
create table {} (id int);""".format(tbl1, tbl2))
EventProcessorUtils.wait_for_event_processing(self)
events_skipped_after = EventProcessorUtils.get_int_metric('events-skipped', 0)
assert events_skipped_after > events_skipped_before
table_names = self.client.execute("show tables in {}".format(unique_database))\
.get_data()
assert not table_names
def _check_insert_events(tbl, expected_val, skip_events=0, part=''):
EventProcessorUtils.wait_for_event_processing(self)
events_skipped_before = EventProcessorUtils.get_int_metric('events-skipped', 0)
# modify data externally
self.run_stmt_in_hive(
"""insert into {tb1} {partition} values(1),(2);"""
.format(tb1=tbl, partition=part))
EventProcessorUtils.wait_for_event_processing(self)
events_skipped_after = EventProcessorUtils.get_int_metric('events-skipped', 0)
assert events_skipped_after == events_skipped_before + skip_events, \
"Expected {} events to be skipped, but {} events were skipped.".format(
skip_events, events_skipped_after - events_skipped_before)
data = self.client.execute("select * from {}".format(tbl))
assert len(data.data) == expected_val, \
"Expected {} rows in table {}, but found {}.".format(expected_val, tbl,
len(data.data))
# Case 2: Enable hms sync at database level but disabled globally
def validate_hms_sync(unique_database, tbl, partition=''):
# load tables in cache
self.client.execute("invalidate metadata {}".format(tbl))
self.client.execute("describe {}".format(tbl))
self.run_stmt_in_hive(
"""ALTER DATABASE {} SET DBPROPERTIES ('impala.disableHmsSync'='false')"""
.format(unique_database))
_check_insert_events(tbl, 2, 0, partition)
validate_hms_sync(unique_database, tbl1, partition='partition(year=2024)')
validate_hms_sync(unique_database, tbl2)
# Case 3: disable hms sync at database level and enable it at table level
self.run_stmt_in_hive(
"""ALTER DATABASE {} SET DBPROPERTIES ('impala.disableHmsSync'='true')"""
.format(unique_database))
self.client.execute(
"""alter table {} SET TBLPROPERTIES ('impala.disableHmsSync'='false')"""
.format(tbl1))
self.client.execute(
"""alter table {} SET TBLPROPERTIES ('impala.disableHmsSync'='false')"""
.format(tbl2))
EventProcessorUtils.wait_for_event_processing(self)
_check_insert_events(tbl1, 4, skip_events=1, part='partition(year=2024)')
_check_insert_events(tbl2, 4, skip_events=0)
@SkipIfFS.hive @SkipIfFS.hive
class TestEventProcessingWithImpala(TestEventProcessingCustomConfigsBase): class TestEventProcessingWithImpala(TestEventProcessingCustomConfigsBase):