IMPALA-7531: Daemon level catalog cache metrics

This patch adds the aggregated CatalogdMetaProvider cache stats to
the catalog metrics on the coordinators. They can be accessed under
<coordinator>:<web-port>/metrics#catalog.

These metrics are refreshed at the end of planning, for each query run.

Testing:
-------

Visual inspection by running a few queries locally and making sure
stats are updated. Also modified existing tests to account for this
behavior.

Change-Id: I23c131b77ca84aa4df8919213bbd83944fa112a5
Reviewed-on: http://gerrit.cloudera.org:8080/11511
Reviewed-by: Bharath Vissapragada <bharathv@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
Bharath Vissapragada
2018-09-24 15:31:17 -07:00
committed by Impala Public Jenkins
parent e83fe23a5f
commit ac33c0c42e
11 changed files with 402 additions and 30 deletions

View File

@@ -27,6 +27,23 @@ from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
RETRY_PROFILE_MSG = 'Retrying query planning due to inconsistent metadata'
class TestCompactCatalogUpdates(CustomClusterTestSuite):
def get_catalog_cache_metrics(self, impalad):
""" Returns catalog cache metrics as a dict by scraping the json metrics page on the
given impalad"""
child_groups =\
impalad.service.get_debug_webpage_json('metrics')['metric_group']['child_groups']
for group in child_groups:
if group['name'] != 'impala-server': continue
# Filter catalog cache metrics.
for child_group in group['child_groups']:
if child_group['name'] != 'catalog': continue
metrics_data = [(metric['name'], metric['value'])
for metric in child_group['metrics'] if 'catalog.cache' in metric['name']]
return dict(metrics_data)
assert False, "Catalog cache metrics not found in %s" % child_groups
@pytest.mark.execute_serially
@CustomClusterTestSuite.with_args(
impalad_args="--use_local_catalog=true",
@@ -215,15 +232,45 @@ class TestCompactCatalogUpdates(CustomClusterTestSuite):
@CustomClusterTestSuite.with_args(
impalad_args="--use_local_catalog=true",
catalogd_args="--catalog_topic_mode=minimal")
def test_cache_profile_metrics(self):
def test_cache_metrics(self, unique_database):
"""
Test that profile output includes impalad local cache metrics.
Test that profile output includes impalad local cache metrics. Also verifies that
the daemon level metrics are updated between query runs.
"""
try:
client = self.cluster.impalads[0].service.create_beeswax_client()
query = "select count(*) from functional.alltypes"
ret = self.execute_query_expect_success(client, query)
assert ret.runtime_profile.count("Frontend:") == 1
assert ret.runtime_profile.count("CatalogFetch") > 1
impalad = self.cluster.impalads[0]
client = impalad.service.create_beeswax_client()
cache_hit_rate_metric_key = "catalog.cache.hit-rate"
cache_miss_rate_metric_key = "catalog.cache.miss-rate"
cache_hit_count_metric_key = "catalog.cache.hit-count"
cache_request_count_metric_key = "catalog.cache.request-count"
cache_request_count_prev_run = 0
cache_hit_count_prev_run = 0
test_table_name = "%s.test_cache_metrics_test_tbl" % unique_database
# A mix of queries of various types.
queries_to_test = ["select count(*) from functional.alltypes",
"explain select count(*) from functional.alltypes",
"create table %s (a int)" % test_table_name,
"drop table %s" % test_table_name]
for _ in xrange(0, 10):
for query in queries_to_test:
ret = self.execute_query_expect_success(client, query)
assert ret.runtime_profile.count("Frontend:") == 1
assert ret.runtime_profile.count("CatalogFetch") > 1
cache_metrics = self.get_catalog_cache_metrics(impalad)
cache_hit_rate = cache_metrics[cache_hit_rate_metric_key]
cache_miss_rate = cache_metrics[cache_miss_rate_metric_key]
cache_hit_count = cache_metrics[cache_hit_count_metric_key]
cache_request_count = cache_metrics[cache_request_count_metric_key]
assert cache_hit_rate > 0.0 and cache_hit_rate < 1.0
assert cache_miss_rate > 0.0 and cache_miss_rate < 1.0
assert cache_hit_count > cache_hit_count_prev_run,\
"%s not updated between two query runs, query - %s"\
% (cache_hit_count_metric_key, query)
assert cache_request_count > cache_request_count_prev_run,\
"%s not updated betweeen two query runs, query - %s"\
% (cache_request_count_metric_key, query)
cache_hit_count_prev_run = cache_hit_count
cache_request_count_prev_run = cache_request_count
finally:
client.close()