IMPALA-8675: Remove db/table count metrics from impalad in LocalCatalog mode

In the /metrics webUI, coordinator shows metrics of
"catalog.num-databases" and "catalog.num-tables" for its local catalog
cache. They are updated at the end of each query execution, via
Frontend.getCatalogMetrics().

In LocalCatalog mode, there is no need for every coordinator to have the
full list of tables of every database. However, getCatalogMetrics ends
up iterating over every DB and fetching these lists (if uncached) in
order to provide a count. This introduces unnecessary catalog RPCs at
the end of each query execution. When catalogd is slow/hanging in
processing such coordinator RPCs, simple queries will also be hanging.

This patch removes tracking the db/table count metrics from coordinator
side in LocalCatalog mode. They will always be -1. The count isn't
particularly relevant – if someone wants to keep track of the size of
their catalog they are better off looking at that metric from catalogd.

Tests:
 - test_non_compact_catalog_topic_updates uses these two metrics to
   detect new catalog updates. Changed it to use "catalog.curr-version"
   instead.

Change-Id: I02a409b7b24577f75d7c439c85bc3491ec7c518c
Reviewed-on: http://gerrit.cloudera.org:8080/20500
Reviewed-by: Wenzhe Zhou <wzhou@cloudera.com>
Tested-by: Wenzhe Zhou <wzhou@cloudera.com>
This commit is contained in:
stiga-huang
2023-09-21 09:45:36 +08:00
committed by Wenzhe Zhou
parent 94f4f1d824
commit 6af8154ecb
4 changed files with 21 additions and 12 deletions

View File

@@ -330,7 +330,7 @@
"key": "catalog-server.topic-processing-time-s"
},
{
"description": "The number of databases in the catalog.",
"description": "The number of databases in the catalog. Untracked in LocalCatalog mode.",
"contexts": [
"IMPALAD"
],
@@ -340,7 +340,7 @@
"key": "catalog.num-databases"
},
{
"description": "The number of tables in the catalog.",
"description": "The number of tables in the catalog. Untracked in LocalCatalog mode.",
"contexts": [
"IMPALAD"
],

View File

@@ -1051,13 +1051,20 @@ public class Frontend {
return planCtx.getExplainString();
}
public TGetCatalogMetricsResult getCatalogMetrics() throws ImpalaException {
public TGetCatalogMetricsResult getCatalogMetrics() {
TGetCatalogMetricsResult resp = new TGetCatalogMetricsResult();
for (FeDb db : getCatalog().getDbs(PatternMatcher.MATCHER_MATCH_ALL)) {
resp.num_dbs++;
resp.num_tables += db.getAllTableNames().size();
if (BackendConfig.INSTANCE.getBackendCfg().use_local_catalog) {
// Don't track these two metrics in LocalCatalog mode since they might introduce
// catalogd RPCs when the db list or some table lists are not cached.
resp.num_dbs = -1;
resp.num_tables = -1;
FeCatalogUtils.populateCacheMetrics(getCatalog(), resp);
} else {
for (FeDb db : getCatalog().getDbs(PatternMatcher.MATCHER_MATCH_ALL)) {
resp.num_dbs++;
resp.num_tables += db.getAllTableNames().size();
}
}
FeCatalogUtils.populateCacheMetrics(getCatalog(), resp);
return resp;
}

View File

@@ -43,9 +43,9 @@ class TestCompactCatalogUpdates(CustomClusterTestSuite):
try:
# Check that initial catalop update topic has been received
impalad1 = self.cluster.impalads[0]
assert impalad1.service.get_metric_value("catalog.num-tables") > 0
assert impalad1.service.get_metric_value("catalog.curr-version") > 0
impalad2 = self.cluster.impalads[1]
assert impalad2.service.get_metric_value("catalog.num-tables") > 0
assert impalad2.service.get_metric_value("catalog.curr-version") > 0
client1 = impalad1.service.create_beeswax_client()
client2 = impalad2.service.create_beeswax_client()
@@ -55,10 +55,12 @@ class TestCompactCatalogUpdates(CustomClusterTestSuite):
result = client2.execute("select count(*) from functional.alltypes")
assert result.data[0] == "7300"
prev_v1 = impalad1.service.get_metric_value("catalog.curr-version")
prev_v2 = impalad2.service.get_metric_value("catalog.curr-version")
self.execute_query_expect_success(client1, "invalidate metadata", query_options)
self.execute_query_expect_success(client2, "show databases")
assert impalad1.service.get_metric_value("catalog.num-databases") > 0
assert impalad2.service.get_metric_value("catalog.num-databases") > 0
assert impalad1.service.get_metric_value("catalog.curr-version") > prev_v1
assert impalad2.service.get_metric_value("catalog.curr-version") > prev_v2
finally:
client1.close()
client2.close()

View File

@@ -517,7 +517,7 @@ class TestObservability(CustomClusterTestSuite):
cache_entry_median_size = cache_metrics[cache_entry_median_size_key]
cache_entry_99th_size = cache_metrics[cache_entry_99th_size_key]
assert cache_entry_median_size > 300 and cache_entry_median_size < 1000
assert cache_entry_median_size > 300 and cache_entry_median_size < 3000
assert cache_entry_99th_size > 12500 and cache_entry_99th_size < 19000
cache_hit_count_prev_run = cache_hit_count