mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
IMPALA-12876: Add catalogVersion and loaded timestamp in query profiles
When debugging stale metadata, it'd be helpful to know what catalog version of the tables are used and what's the time when catalogd loads those versions. This patch exposes these info in the query profile for each referenced table. E.g. Original Table Versions: tpch.customer, 2249, 1726052668932, Wed Sep 11 19:04:28 CST 2024 tpch.nation, 2255, 1726052790140, Wed Sep 11 19:06:30 CST 2024 tpch.orders, 2257, 1726052803258, Wed Sep 11 19:06:43 CST 2024 tpch.lineitem, 2254, 1726052785384, Wed Sep 11 19:06:25 CST 2024 tpch.supplier, 2256, 1726052794235, Wed Sep 11 19:06:34 CST 2024 Each line consists of the table name, catalog version, loaded timestamp and the timestamp string. Implementation: The loaded timestamp is updated whenever a CatalogObject updates its catalog version in catalogd. It's passed to impalads with the TCatalogObject broadcasted by statestore, or in DDL/DML responses. Currently, the loaded timestamp is added for table, view, function, data source, and hdfs cache pool in catalogd. However, only those of table and view are applied used in impalad. For the loaded timestamp of other types, users can check them in the /catalog WebUI of catalogd. Tests: - Adds e2e test Change-Id: I94b2fd59ed5aca664d6db4448c61ad21a88a4f98 Reviewed-on: http://gerrit.cloudera.org:8080/21782 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
committed by
Impala Public Jenkins
parent
c7ce233679
commit
58fd45f20c
@@ -414,11 +414,20 @@ class TestImpalaShell(ImpalaTestSuite):
|
||||
args = base_args + ['create table %s.shell_profile_test (id int)' % db]
|
||||
create = run_impala_shell_cmd(vector, args)
|
||||
assert "Referenced Tables: \n" in create.stdout
|
||||
assert "Original Table Versions: \n" in create.stdout
|
||||
|
||||
TABLE_VERSION = re.compile(
|
||||
r"Original Table Versions: (\w+\.\w+), (\d+), (\d+), ([^\n]*)\n")
|
||||
for statement in statements:
|
||||
args = base_args + [statement]
|
||||
result = run_impala_shell_cmd(vector, args)
|
||||
assert "Referenced Tables: %s.shell_profile_test" % unique_database in result.stdout
|
||||
m = TABLE_VERSION.search(result.stdout)
|
||||
assert m, "Original Table Versions not found in profile:\n" + result.stdout
|
||||
assert m.group(1) == unique_database + ".shell_profile_test"
|
||||
assert int(m.group(2)) > 0, "Invalid catalog version in " + m.group(0) + statement
|
||||
assert int(m.group(3)) > 0, "Invalid loaded timestamp in " + m.group(0) + statement
|
||||
assert len(m.group(4)) > 0, "Invalid timestamp string in " + m.group(0) + statement
|
||||
|
||||
def test_runtime_profile_multiple_referenced_tables(self, vector, unique_database):
|
||||
if vector.get_value('strict_hs2_protocol'):
|
||||
@@ -454,6 +463,16 @@ class TestImpalaShell(ImpalaTestSuite):
|
||||
for i in range(0, 2):
|
||||
assert "{db}.shell_profile_test{index}".format(db=db, index=i) in referenced_tables
|
||||
|
||||
TABLE_VERSIONS = re.compile(r"Original Table Versions: (\w+\.\w+), (\d+), (\d+), "
|
||||
r"([^\n]*)\n(\w+\.\w+), (\d+), (\d+), ([^\n]*)\n")
|
||||
m = TABLE_VERSIONS.search(result.stdout)
|
||||
assert m, "Original Table Versions not found in profile:\n" + result.stdout
|
||||
for i in (0, 4):
|
||||
assert db + ".shell_profile_test" in m.group(i + 1), "missing tables:" + m.group(0)
|
||||
assert int(m.group(i + 2)) > 0, "Invalid catalog version: " + m.group(0)
|
||||
assert int(m.group(i + 3)) > 0, "Invalid timestamp: " + m.group(0)
|
||||
assert len(m.group(i + 4)) > 0, "Invalid timestamp string: " + m.group(0)
|
||||
|
||||
def test_summary(self, vector):
|
||||
if vector.get_value('strict_hs2_protocol'):
|
||||
pytest.skip("Summary not supported in strict hs2 mode.")
|
||||
|
||||
Reference in New Issue
Block a user