mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
IMPALA-14131: Add flag to configure the default value of
'impala.disableHmsSync' FEATURE: Implement global 'disable_hms_sync_by_default' flag for event processing. This change introduces a new catalogd startup flag, `disable_hms_sync_by_default`, to simplify skipping/processing events. Problem: Disabling event processing globally requires tedious process of setting 'impala.disableHmsSync' property on every database and table, especially if few specific tables requires sync up of events. Solution: The new flag provides a global default for the 'impala.disableHmsSync' property. Behavior: - If `disable_hms_sync_by_default` is true (the intended default-off state), event processing is skipped for all tables/databases unless the property "impala.disableHmsSync"="false" is explicitly set. - This allows users to easily keep event processing off by default and opt-in specific databases or tables to start syncing. - The check order is: table-property > db-property > global default. - HMS polling remains independent and unaffected by this flag. Change-Id: I4ee617aed48575502d9cf5cf2cbea6ec897d6839 Reviewed-on: http://gerrit.cloudera.org:8080/23487 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
committed by
Impala Public Jenkins
parent
b581e45286
commit
1684c2d9da
@@ -318,6 +318,12 @@ DEFINE_bool(truncate_external_tables_with_hms, true, "Always use HMS to truncate
|
||||
"external tables. When false, HMS api is only used for tables being replicated. Using"
|
||||
"HMS has the effect of deleting files recursively and triggering an HMS event.");
|
||||
|
||||
DEFINE_bool(disable_hms_sync_by_default, false, "Catalogd flag that globally skips "
|
||||
"HiveMetastore (HMS) event processing by default. If 'true', events are skipped for"
|
||||
"all objects (with the exception to database level events) unless "
|
||||
"'impala.disableHmsSync' is explicitly set to 'false' on a database or table."
|
||||
"This simplifies rolling out event processing job-by-job.");
|
||||
|
||||
DECLARE_string(state_store_host);
|
||||
DECLARE_int32(state_store_port);
|
||||
DECLARE_string(state_store_2_host);
|
||||
|
||||
@@ -148,6 +148,7 @@ DECLARE_int32(catalog_reset_max_threads);
|
||||
DECLARE_string(warmup_tables_config_file);
|
||||
DECLARE_bool(keeps_warmup_tables_loaded);
|
||||
DECLARE_bool(truncate_external_tables_with_hms);
|
||||
DECLARE_bool(disable_hms_sync_by_default);
|
||||
|
||||
// HS2 SAML2.0 configuration
|
||||
// Defined here because TAG_FLAG caused issues in global-flags.cc
|
||||
@@ -596,6 +597,7 @@ Status PopulateThriftBackendGflags(TBackendGflags& cfg) {
|
||||
FLAGS_tuple_cache_cost_coefficient_read_rows);
|
||||
cfg.__set_min_jdbc_scan_cardinality(FLAGS_min_jdbc_scan_cardinality);
|
||||
cfg.__set_max_stmt_metadata_loader_threads(FLAGS_max_stmt_metadata_loader_threads);
|
||||
cfg.__set_disable_hms_sync_by_default(FLAGS_disable_hms_sync_by_default);
|
||||
return Status::OK();
|
||||
}
|
||||
|
||||
|
||||
@@ -361,4 +361,6 @@ struct TBackendGflags {
|
||||
165: required i32 min_jdbc_scan_cardinality
|
||||
|
||||
166: required i32 max_stmt_metadata_loader_threads
|
||||
|
||||
167: required bool disable_hms_sync_by_default
|
||||
}
|
||||
|
||||
@@ -1218,7 +1218,7 @@ public class MetastoreShim extends Hive3MetastoreShimBase {
|
||||
writeEventInfoList.get(i).getTableObj(), Table.class);
|
||||
if (event.getCatalogOpExecutor().getCatalog().isHmsEventSyncDisabled(tbl)) {
|
||||
LOG.debug("Not adding write ids to table {}.{} for event {} " +
|
||||
"since table/db level flag {} is set to true",
|
||||
"since table/db level flag {} or global level flag is set to true",
|
||||
tbl.getDbName(), tbl.getTableName(), event.getEventId(),
|
||||
MetastoreEventPropertyKey.DISABLE_EVENT_HMS_SYNC.getKey());
|
||||
continue;
|
||||
|
||||
@@ -4831,9 +4831,9 @@ public class CatalogServiceCatalog extends Catalog {
|
||||
return;
|
||||
}
|
||||
if (isHmsEventSyncDisabled(tbl.getMetaStoreTable())) {
|
||||
LOG.debug("Not adding write ids to table {}.{} for event {} " +
|
||||
"since table/db level flag {} is set to true", dbName, tblName, eventId,
|
||||
MetastoreEventPropertyKey.DISABLE_EVENT_HMS_SYNC.getKey());
|
||||
LOG.debug("Not adding write ids to table {}.{} for event {} since table/db level" +
|
||||
" flag {} or disable_hms_sync_by_default is set to true", dbName,
|
||||
tblName, eventId, MetastoreEventPropertyKey.DISABLE_EVENT_HMS_SYNC.getKey());
|
||||
return;
|
||||
}
|
||||
if (eventId > 0 && eventId <= tbl.getCreateEventId()) {
|
||||
@@ -4902,7 +4902,10 @@ public class CatalogServiceCatalog extends Catalog {
|
||||
}
|
||||
String dbFlagVal = getDbProperty(tbl.getDbName(),
|
||||
MetastoreEventPropertyKey.DISABLE_EVENT_HMS_SYNC.getKey());
|
||||
return Boolean.parseBoolean(dbFlagVal);
|
||||
if (dbFlagVal != null) {
|
||||
return Boolean.parseBoolean(dbFlagVal);
|
||||
}
|
||||
return BackendConfig.INSTANCE.isDisableHmsSyncByDefault();
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -1255,9 +1255,17 @@ public class MetastoreEvents {
|
||||
+ "database {}",
|
||||
MetastoreEventPropertyKey.DISABLE_EVENT_HMS_SYNC.getKey(),
|
||||
dbFlagVal, dbName_);
|
||||
// flag value of null also returns false
|
||||
return Boolean.valueOf(dbFlagVal);
|
||||
}
|
||||
// flag value of null also returns false
|
||||
return Boolean.valueOf(dbFlagVal);
|
||||
boolean globalDisableHmsSync = BackendConfig.INSTANCE.isDisableHmsSyncByDefault();
|
||||
if (globalDisableHmsSync) {
|
||||
debugLog("Table level for table {} or Db level for db {}, flag {} is not set. " +
|
||||
"Global flag disable_hms_sync_by_default is set to {}",
|
||||
msTbl_.getTableName(), dbName_, MetastoreEventPropertyKey
|
||||
.DISABLE_EVENT_HMS_SYNC.getKey(), globalDisableHmsSync);
|
||||
}
|
||||
return globalDisableHmsSync;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -624,4 +624,12 @@ public class BackendConfig {
|
||||
public int getMaxStmtMetadataLoaderThreads() {
|
||||
return backendCfg_.max_stmt_metadata_loader_threads;
|
||||
}
|
||||
|
||||
public boolean isDisableHmsSyncByDefault() {
|
||||
return backendCfg_.disable_hms_sync_by_default;
|
||||
}
|
||||
|
||||
public void setDisableHmsSyncByDefault(boolean disableHmsSyncByDefault) {
|
||||
backendCfg_.disable_hms_sync_by_default = disableHmsSyncByDefault;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1820,6 +1820,73 @@ class TestEventProcessingCustomConfigs(TestEventProcessingCustomConfigsBase):
|
||||
# Case-IV: Truncate table from Hive is currently generating single alter_partition
|
||||
# events. HIVE-28668 will address it.
|
||||
|
||||
@pytest.mark.execute_serially
|
||||
@CustomClusterTestSuite.with_args(
|
||||
catalogd_args="--hms_event_polling_interval_s=1 "
|
||||
"--disable_hms_sync_by_default=true")
|
||||
def test_disable_hms_sync_globally(self, unique_database):
|
||||
"""Verify IMPALA-14131: hms events are synced/skipped based on global flag
|
||||
--disable_hms_sync_by_default and the db/table property 'impala.disableHmsSync'"""
|
||||
tbl1 = unique_database + ".test_disable_hms_sync_1"
|
||||
tbl2 = unique_database + ".test_disable_hms_sync_2"
|
||||
EventProcessorUtils.wait_for_event_processing(self)
|
||||
|
||||
# Case 1: verify global config
|
||||
events_skipped_before = EventProcessorUtils.get_int_metric('events-skipped', 0)
|
||||
self.run_stmt_in_hive(
|
||||
"""create table {} (id int) partitioned by (year int);
|
||||
create table {} (id int);""".format(tbl1, tbl2))
|
||||
EventProcessorUtils.wait_for_event_processing(self)
|
||||
events_skipped_after = EventProcessorUtils.get_int_metric('events-skipped', 0)
|
||||
assert events_skipped_after > events_skipped_before
|
||||
table_names = self.client.execute("show tables in {}".format(unique_database))\
|
||||
.get_data()
|
||||
assert not table_names
|
||||
|
||||
def _check_insert_events(tbl, expected_val, skip_events=0, part=''):
|
||||
EventProcessorUtils.wait_for_event_processing(self)
|
||||
events_skipped_before = EventProcessorUtils.get_int_metric('events-skipped', 0)
|
||||
# modify data externally
|
||||
self.run_stmt_in_hive(
|
||||
"""insert into {tb1} {partition} values(1),(2);"""
|
||||
.format(tb1=tbl, partition=part))
|
||||
EventProcessorUtils.wait_for_event_processing(self)
|
||||
events_skipped_after = EventProcessorUtils.get_int_metric('events-skipped', 0)
|
||||
assert events_skipped_after == events_skipped_before + skip_events, \
|
||||
"Expected {} events to be skipped, but {} events were skipped.".format(
|
||||
skip_events, events_skipped_after - events_skipped_before)
|
||||
data = self.client.execute("select * from {}".format(tbl))
|
||||
assert len(data.data) == expected_val, \
|
||||
"Expected {} rows in table {}, but found {}.".format(expected_val, tbl,
|
||||
len(data.data))
|
||||
|
||||
# Case 2: Enable hms sync at database level but disabled globally
|
||||
def validate_hms_sync(unique_database, tbl, partition=''):
|
||||
# load tables in cache
|
||||
self.client.execute("invalidate metadata {}".format(tbl))
|
||||
self.client.execute("describe {}".format(tbl))
|
||||
self.run_stmt_in_hive(
|
||||
"""ALTER DATABASE {} SET DBPROPERTIES ('impala.disableHmsSync'='false')"""
|
||||
.format(unique_database))
|
||||
_check_insert_events(tbl, 2, 0, partition)
|
||||
|
||||
validate_hms_sync(unique_database, tbl1, partition='partition(year=2024)')
|
||||
validate_hms_sync(unique_database, tbl2)
|
||||
|
||||
# Case 3: disable hms sync at database level and enable it at table level
|
||||
self.run_stmt_in_hive(
|
||||
"""ALTER DATABASE {} SET DBPROPERTIES ('impala.disableHmsSync'='true')"""
|
||||
.format(unique_database))
|
||||
self.client.execute(
|
||||
"""alter table {} SET TBLPROPERTIES ('impala.disableHmsSync'='false')"""
|
||||
.format(tbl1))
|
||||
self.client.execute(
|
||||
"""alter table {} SET TBLPROPERTIES ('impala.disableHmsSync'='false')"""
|
||||
.format(tbl2))
|
||||
EventProcessorUtils.wait_for_event_processing(self)
|
||||
_check_insert_events(tbl1, 4, skip_events=1, part='partition(year=2024)')
|
||||
_check_insert_events(tbl2, 4, skip_events=0)
|
||||
|
||||
|
||||
@SkipIfFS.hive
|
||||
class TestEventProcessingWithImpala(TestEventProcessingCustomConfigsBase):
|
||||
|
||||
Reference in New Issue
Block a user