Files
impala/tests/custom_cluster/test_shell_commandline.py
jasonmfehr 63d13a35f3 IMPALA-11880: Adds support for authenticating to Impala using JWTs.
This support was modeled after the LDAP authentication.

If JWT authentication is used, the Impala shell enforces the use of the
hs2-http protocol since the JWT is sent via the "Authentication"
HTTP header.

The following flags have been added to the Impala shell:
* -j, --jwt: indicates that JWT authentication will be used
* --jwt_cmd: shell command to run to retrieve the JWT to use for
  authentication

Testing
New Python tests have been added:
* The shell tests ensure that the various command line arguments are
  handled properly. Situations such as a single authentication method,
  JWTs cannot be sent in clear text without the proper arguments, etc
  are asserted.
* The Python custom cluster tests leverage a test JWKS and test JWTs.
  Then, a custom Impala cluster is started with the test JWKS. The
  Impala shell attempts to authenticate using a valid JWT, an expired
  (invalid) JWT, and a valid JWT signed by a different, untrusted JWKS.
  These tests also exercise the Impala JWT authentication mechanism and
  assert the prometheus JWT auth success and failure metrics are
  reported accurately.

Change-Id: I52247f9262c548946269fe5358b549a3e8c86d4c
Reviewed-on: http://gerrit.cloudera.org:8080/19837
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2023-05-11 23:22:05 +00:00

174 lines
7.4 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import absolute_import, division, print_function
import os
import pytest
import re
import tempfile
from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
from tests.common.test_dimensions import create_client_protocol_http_transport
from time import sleep
from tests.shell.util import run_impala_shell_cmd
class TestImpalaShellCommandLine(CustomClusterTestSuite):
"""Runs tests of the Impala shell by first standing up an Impala cluster with
specific startup flags. Then, the Impala shell is launched with specific arguments
in a separate process. Assertions are done by scanning the shell output and Impala
server logs for expected strings."""
LOG_DIR_HTTP_TRACING = tempfile.mkdtemp(prefix="http_tracing")
LOG_DIR_HTTP_TRACING_OFF = tempfile.mkdtemp(prefix="http_tracing_off")
IMPALA_ID_RE = "([0-9a-f]{16}:[0-9a-f]{16})"
@classmethod
def get_workload(self):
return 'functional-query'
@classmethod
def add_test_dimensions(cls):
"""Overrides all other add_dimension methods in super classes up the entire class
hierarchy ensuring that each test in this class only get run once using the
hs2-http protocol."""
cls.ImpalaTestMatrix.add_dimension(create_client_protocol_http_transport())
@pytest.mark.execute_serially
@CustomClusterTestSuite.with_args("-log_dir={0} -v 2".format(LOG_DIR_HTTP_TRACING))
def test_http_tracing_headers(self, vector):
"""Asserts that tracing headers are automatically added by the impala shell to
all calls to the backend impala engine made using the hs2 over http protocol.
The impala coordinator logs are searched to ensure these tracing headers were added
and also were passed through to the coordinator."""
args = ['--protocol', vector.get_value('protocol'), '-q', 'select version();profile']
result = run_impala_shell_cmd(vector, args)
# Shut down cluster to ensure logs flush to disk.
sleep(5)
self._stop_impala_cluster()
# Ensure the query ran successfully.
assert result.stdout.find("version()") > -1
assert result.stdout.find("impalad version") > -1
assert result.stdout.find("Query Runtime Profile") > -1
request_id_base = ""
request_id_serialnum = 0
session_id = ""
query_id = ""
last_known_query_id = ""
tracing_lines_count = 0
request_id_re = re.compile("x-request-id=([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-"
"[0-9a-f]{4}-[0-9a-f]{12})-(\\d+)")
session_id_re = re.compile("x-session-id={0}"
.format(TestImpalaShellCommandLine.IMPALA_ID_RE))
query_id_re = re.compile("x-query-id={0}"
.format(TestImpalaShellCommandLine.IMPALA_ID_RE))
profile_query_id_re = re.compile("Query \\(id={0}\\)"
.format(TestImpalaShellCommandLine.IMPALA_ID_RE))
# Find all HTTP Connection Tracing log lines.
with open(os.path.join(self.LOG_DIR_HTTP_TRACING, "impalad.INFO")) as log_file:
for line in log_file:
if line.find("HTTP Connection Tracing Headers") > -1:
tracing_lines_count += 1
# The impala shell builds a request_id that consists of the same randomly
# generated uuid and a serially increasing integer appended on the end.
# Ensure both these conditions are met.
m = request_id_re.search(line)
assert m is not None, \
"did not find request id in HTTP connection tracing log line '{0}'" \
.format(line)
if request_id_base == "":
# The current line is the very first HTTP connection tracing line in the logs.
request_id_base = m.group(1)
else:
assert request_id_base == m.group(1), \
"base request id expected '{0}', actual '{1}'" \
.format(request_id_base, m.group(1))
request_id_serialnum += 1
assert request_id_serialnum == int(m.group(2)), \
"request id serial number expected '{0}', actual '{1}'" \
.format(request_id_serialnum, m.group(2))
# The session_id is generated by impala and must be the same once it
# appears in a tracing log line.
m = session_id_re.search(line)
if m is not None:
if session_id == "":
session_id = m.group(1)
else:
assert session_id == m.group(1), \
"session id expected '{0}', actual '{1}'".format(session_id, m.group(1))
# The query_id is generated by impala and must be the same for the
# duration of the query.
m = query_id_re.search(line)
if m is None:
query_id = ""
else:
if query_id == "":
query_id = m.group(1)
last_known_query_id = query_id
else:
assert query_id == m.group(1), \
"query id expected '{0}', actual '{1}'".format(query_id, m.group(1))
# Assert that multiple HTTP connection tracing log lines were found.
assert tracing_lines_count > 10, \
"did not find enough HTTP connection tracing log lines, found {0} lines" \
.format(tracing_lines_count)
# Ensure the last found query id matches the actual query id
# from the impala query profile.
m = profile_query_id_re.search(result.stdout)
if m is not None:
assert last_known_query_id == m.group(1), \
"impala query profile id, expected '{0}', actual '{1}'" \
.format(last_known_query_id, m.group(1))
else:
pytest.fail("did not find Impala query id in shell stdout")
@pytest.mark.execute_serially
@CustomClusterTestSuite.with_args("-log_dir={0} -v 2".format(LOG_DIR_HTTP_TRACING_OFF))
def test_http_tracing_headers_off(self, vector):
"""Asserts the impala shell command line parameter to prevent the addition of http
tracing headers actually leaves out those tracing headers."""
args = ['--protocol', vector.get_value('protocol'), '--no_http_tracing',
'-q', 'select version();profile']
result = run_impala_shell_cmd(vector, args)
# Shut down cluster to ensure logs flush to disk.
sleep(5)
self._stop_impala_cluster()
# Ensure the query ran successfully.
assert result.stdout.find("version()") > -1
assert result.stdout.find("impalad version") > -1
assert result.stdout.find("Query Runtime Profile") > -1
# Find all HTTP Connection Tracing log lines (there should not be any).
with open(os.path.join(self.LOG_DIR_HTTP_TRACING_OFF, "impalad.INFO")) as log_file:
for line in log_file:
if line.find("HTTP Connection Tracing Headers") != -1:
pytest.fail("found HTTP connection tracing line line: {0}".format(line))