Files
impala/tests/custom_cluster/test_shell_commandline.py
Riza Suminto 9c87cf41bf IMPALA-13396: Unify tmp dir management in CustomClusterTestSuite
There are many custom cluster tests that require creating temporary
directory. The temporary directory typically live within a scope of test
method and cleaned afterwards. However, some test do create temporary
directory directly and forgot to clean them afterwards, leaving junk
dirs under /tmp/ or $LOG_DIR.

This patch unify the temporary directory management inside
CustomClusterTestSuite. It introduce new 'tmp_dir_placeholders' arg in
CustomClusterTestSuite.with_args() that list tmp dirs to create.
'impalad_args', 'catalogd_args', and 'impala_log_dir' now accept
formatting pattern that is replaceable by a temporary dir path, defined
through 'tmp_dir_placeholders'.

There are few occurrences where mkdtemp is called and not replaceable by
this work, such as tests/comparison/cluster.py. In that case, this patch
change them to supply prefix arg so that developer knows that it comes
from Impala test script.

This patch also addressed several flake8 errors in modified files.

Testing:
- Pass custom cluster tests in exhaustive mode.
- Manually run few modified tests and observe that the temporary dirs
  are created and removed under logs/custom_cluster_tests/ as the tests
  go.

Change-Id: I8dd665e8028b3f03e5e33d572c5e188f85c3bdf5
Reviewed-on: http://gerrit.cloudera.org:8080/21836
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2024-10-02 01:25:39 +00:00

181 lines
7.6 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import absolute_import, division, print_function
import os
import pytest
import re
from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
from tests.common.test_dimensions import create_client_protocol_http_transport
from time import sleep
from tests.shell.util import run_impala_shell_cmd
class TestImpalaShellCommandLine(CustomClusterTestSuite):
"""Runs tests of the Impala shell by first standing up an Impala cluster with
specific startup flags. Then, the Impala shell is launched with specific arguments
in a separate process. Assertions are done by scanning the shell output and Impala
server logs for expected strings."""
LOG_DIR_HTTP_TRACING = "http_tracing"
LOG_DIR_HTTP_TRACING_OFF = "http_tracing_off"
IMPALA_ID_RE = "([0-9a-f]{16}:[0-9a-f]{16})"
@classmethod
def get_workload(self):
return 'functional-query'
@classmethod
def add_test_dimensions(cls):
"""Overrides all other add_dimension methods in super classes up the entire class
hierarchy ensuring that each test in this class only get run once using the
hs2-http protocol."""
cls.ImpalaTestMatrix.add_dimension(create_client_protocol_http_transport())
@pytest.mark.execute_serially
@CustomClusterTestSuite.with_args(
impalad_args="-v 2",
impala_log_dir="{" + LOG_DIR_HTTP_TRACING + "}",
tmp_dir_placeholders=[LOG_DIR_HTTP_TRACING])
def test_http_tracing_headers(self, vector):
"""Asserts that tracing headers are automatically added by the impala shell to
all calls to the backend impala engine made using the hs2 over http protocol.
The impala coordinator logs are searched to ensure these tracing headers were added
and also were passed through to the coordinator."""
args = ['--protocol', vector.get_value('protocol'), '-q', 'select version();profile']
result = run_impala_shell_cmd(vector, args)
# Shut down cluster to ensure logs flush to disk.
sleep(5)
self._stop_impala_cluster()
# Ensure the query ran successfully.
assert result.stdout.find("version()") > -1
assert result.stdout.find("impalad version") > -1
assert result.stdout.find("Query Runtime Profile") > -1
request_id_base = ""
request_id_serialnum = 0
session_id = ""
query_id = ""
last_known_query_id = ""
tracing_lines_count = 0
request_id_re = re.compile("x-request-id=([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-"
"[0-9a-f]{4}-[0-9a-f]{12})-(\\d+)")
session_id_re = re.compile("x-session-id={0}"
.format(TestImpalaShellCommandLine.IMPALA_ID_RE))
query_id_re = re.compile("x-query-id={0}"
.format(TestImpalaShellCommandLine.IMPALA_ID_RE))
profile_query_id_re = re.compile("Query \\(id={0}\\)"
.format(TestImpalaShellCommandLine.IMPALA_ID_RE))
# Find all HTTP Connection Tracing log lines.
with open(os.path.join(
self.get_tmp_dir(self.LOG_DIR_HTTP_TRACING), "impalad.INFO")) as log_file:
for line in log_file:
if line.find("HTTP Connection Tracing Headers") > -1:
tracing_lines_count += 1
# The impala shell builds a request_id that consists of the same randomly
# generated uuid and a serially increasing integer appended on the end.
# Ensure both these conditions are met.
m = request_id_re.search(line)
assert m is not None, \
"did not find request id in HTTP connection tracing log line '{0}'" \
.format(line)
if request_id_base == "":
# The current line is the very first HTTP connection tracing line in the logs.
request_id_base = m.group(1)
else:
assert request_id_base == m.group(1), \
"base request id expected '{0}', actual '{1}'" \
.format(request_id_base, m.group(1))
request_id_serialnum += 1
assert request_id_serialnum == int(m.group(2)), \
"request id serial number expected '{0}', actual '{1}'" \
.format(request_id_serialnum, m.group(2))
# The session_id is generated by impala and must be the same once it
# appears in a tracing log line.
m = session_id_re.search(line)
if m is not None:
if session_id == "":
session_id = m.group(1)
else:
assert session_id == m.group(1), \
"session id expected '{0}', actual '{1}'".format(session_id, m.group(1))
# The query_id is generated by impala and must be the same for the
# duration of the query.
m = query_id_re.search(line)
if m is None:
query_id = ""
else:
if query_id == "":
query_id = m.group(1)
last_known_query_id = query_id
else:
assert query_id == m.group(1), \
"query id expected '{0}', actual '{1}'".format(query_id, m.group(1))
# Assert that multiple HTTP connection tracing log lines were found.
assert tracing_lines_count > 10, \
"did not find enough HTTP connection tracing log lines, found {0} lines" \
.format(tracing_lines_count)
# Ensure the last found query id matches the actual query id
# from the impala query profile.
m = profile_query_id_re.search(result.stdout)
if m is not None:
assert last_known_query_id == m.group(1), \
"impala query profile id, expected '{0}', actual '{1}'" \
.format(last_known_query_id, m.group(1))
else:
pytest.fail("did not find Impala query id in shell stdout")
@pytest.mark.execute_serially
@CustomClusterTestSuite.with_args(
impalad_args="-v 2",
impala_log_dir="{" + LOG_DIR_HTTP_TRACING_OFF + "}",
tmp_dir_placeholders=[LOG_DIR_HTTP_TRACING_OFF])
def test_http_tracing_headers_off(self, vector):
"""Asserts the impala shell command line parameter to prevent the addition of http
tracing headers actually leaves out those tracing headers."""
args = ['--protocol', vector.get_value('protocol'), '--no_http_tracing',
'-q', 'select version();profile']
result = run_impala_shell_cmd(vector, args)
# Shut down cluster to ensure logs flush to disk.
sleep(5)
self._stop_impala_cluster()
# Ensure the query ran successfully.
assert result.stdout.find("version()") > -1
assert result.stdout.find("impalad version") > -1
assert result.stdout.find("Query Runtime Profile") > -1
# Find all HTTP Connection Tracing log lines (there should not be any).
with open(os.path.join(
self.get_tmp_dir(self.LOG_DIR_HTTP_TRACING_OFF), "impalad.INFO")) as log_file:
for line in log_file:
if line.find("HTTP Connection Tracing Headers") != -1:
pytest.fail("found HTTP connection tracing line line: {0}".format(line))