IMPALA-11880: Adds support for authenticating to Impala using JWTs.

This support was modeled after the LDAP authentication.

If JWT authentication is used, the Impala shell enforces the use of the
hs2-http protocol since the JWT is sent via the "Authentication"
HTTP header.

The following flags have been added to the Impala shell:
* -j, --jwt: indicates that JWT authentication will be used
* --jwt_cmd: shell command to run to retrieve the JWT to use for
  authentication

Testing
New Python tests have been added:
* The shell tests ensure that the various command line arguments are
  handled properly. Situations such as a single authentication method,
  JWTs cannot be sent in clear text without the proper arguments, etc
  are asserted.
* The Python custom cluster tests leverage a test JWKS and test JWTs.
  Then, a custom Impala cluster is started with the test JWKS. The
  Impala shell attempts to authenticate using a valid JWT, an expired
  (invalid) JWT, and a valid JWT signed by a different, untrusted JWKS.
  These tests also exercise the Impala JWT authentication mechanism and
  assert the prometheus JWT auth success and failure metrics are
  reported accurately.

Change-Id: I52247f9262c548946269fe5358b549a3e8c86d4c
Reviewed-on: http://gerrit.cloudera.org:8080/19837
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
jasonmfehr
2023-04-25 12:18:25 -07:00
committed by Impala Public Jenkins
parent 1d0b111bcf
commit 63d13a35f3
15 changed files with 674 additions and 23 deletions

View File

@@ -181,6 +181,9 @@ testdata/impala-profiles/impala_profile_log_tpcds_compute_stats_v2_default.expec
testdata/impala-profiles/impala_profile_log_tpcds_compute_stats_v2_extended.expected.txt
testdata/hive_benchmark/grepTiny/part-00000
testdata/jwt/*.json
testdata/jwt/jwt_expired
testdata/jwt/jwt_signed
testdata/jwt/jwt_signed_untrusted
testdata/tzdb/2017c.zip
testdata/tzdb/2017c-corrupt.zip
testdata/tzdb_tiny/*

View File

@@ -187,6 +187,18 @@ class ImpalaHttpClient(TTransportBase):
custom_headers['Authorization'] = "Basic " + self.__basic_auth
return custom_headers
def getCustomHeadersWithBearerAuth(self, cookie_header, has_auth_cookie):
custom_headers = {}
if cookie_header:
# Add cookies to HTTP header.
custom_headers['Cookie'] = cookie_header
# Add the 'Authorization' header to request even if the auth cookie is
# present to avoid a round trip in case the cookie is expired when server
# receive the request. Since the 'auth' value is calculated once, so it
# won't cause a performance issue.
custom_headers['Authorization'] = "Bearer {0}".format(self.__bearer_token)
return custom_headers
def getCustomHeadersWithNegotiateAuth(self, cookie_header, has_auth_cookie):
import kerberos
custom_headers = {}
@@ -217,6 +229,12 @@ class ImpalaHttpClient(TTransportBase):
self.__basic_auth = basic_auth
self.__get_custom_headers_func = self.getCustomHeadersWithBasicAuth
# Set function to generate customized HTTP headers for JWT authorization.
def setJwtAuth(self, jwt):
# auth mechanism: JWT
self.__bearer_token = jwt
self.__get_custom_headers_func = self.getCustomHeadersWithBearerAuth
# Set function to generate customized HTTP headers for Kerberos authorization.
def setKerberosAuth(self, kerb_service):
# auth mechanism: GSSAPI

View File

@@ -135,7 +135,8 @@ class ImpalaClient(object):
ldap_password=None, use_ldap=False, client_connect_timeout_ms=60000,
verbose=True, use_http_base_transport=False, http_path=None,
http_cookie_names=None, http_socket_timeout_s=None, value_converter=None,
connect_max_tries=4, rpc_stdout=False, rpc_file=None, http_tracing=True):
connect_max_tries=4, rpc_stdout=False, rpc_file=None, http_tracing=True,
jwt=None):
self.connected = False
self.impalad_host = impalad[0]
self.impalad_port = int(impalad[1])
@@ -158,6 +159,7 @@ class ImpalaClient(object):
self.http_path = http_path
self.http_cookie_names = http_cookie_names
self.http_tracing = http_tracing
self.jwt = jwt
# This is set from ImpalaShell's signal handler when a query is cancelled
# from command line via CTRL+C. It is used to suppress error messages of
# query cancellation.
@@ -425,6 +427,8 @@ class ImpalaClient(object):
else:
auth = base64.encodebytes(user_passwd.encode()).decode().strip('\n')
transport.setLdapAuth(auth)
elif self.jwt is not None:
transport.setJwtAuth(self.jwt)
elif self.use_kerberos or self.kerberos_host_fqdn:
# Set the Kerberos service
if self.kerberos_host_fqdn is not None:

View File

@@ -190,14 +190,17 @@ class ImpalaShell(cmd.Cmd, object):
self.ca_cert = options.ca_cert
self.user = options.user
self.ldap_password_cmd = options.ldap_password_cmd
self.jwt_cmd = options.jwt_cmd
self.strict_hs2_protocol = options.strict_hs2_protocol
self.ldap_password = options.ldap_password
self.use_jwt = options.use_jwt
self.jwt = options.jwt
# When running tests in strict mode, the server uses the ldap
# protocol but can allow any password.
if options.use_ldap_test_password:
self.ldap_password = 'password'
self.use_ldap = options.use_ldap or \
(self.strict_hs2_protocol and not self.use_kerberos)
(self.strict_hs2_protocol and not self.use_kerberos and not self.use_jwt)
self.client_connect_timeout_ms = options.client_connect_timeout_ms
self.http_socket_timeout_s = None
if (options.http_socket_timeout_s != 'None' and
@@ -622,7 +625,8 @@ class ImpalaShell(cmd.Cmd, object):
use_http_base_transport=True, http_path=self.http_path,
http_cookie_names=self.http_cookie_names,
value_converter=value_converter, rpc_stdout=self.rpc_stdout,
rpc_file=self.rpc_file, http_tracing=self.http_tracing)
rpc_file=self.rpc_file, http_tracing=self.http_tracing,
jwt=self.jwt)
if protocol == 'hs2':
return ImpalaHS2Client(self.impalad, self.fetch_size, self.kerberos_host_fqdn,
self.use_kerberos, self.kerberos_service_name, self.use_ssl,
@@ -643,7 +647,7 @@ class ImpalaShell(cmd.Cmd, object):
value_converter=value_converter,
connect_max_tries=self.connect_max_tries,
rpc_stdout=self.rpc_stdout, rpc_file=self.rpc_file,
http_tracing=self.http_tracing)
http_tracing=self.http_tracing, jwt=self.jwt)
elif protocol == 'beeswax':
return ImpalaBeeswaxClient(self.impalad, self.fetch_size, self.kerberos_host_fqdn,
self.use_kerberos, self.kerberos_service_name, self.use_ssl,
@@ -951,6 +955,9 @@ class ImpalaShell(cmd.Cmd, object):
if self.use_ldap and self.ldap_password is None:
self.ldap_password = getpass.getpass("LDAP password for %s: " % self.user)
if self.use_jwt and self.jwt is None:
self.jwt = getpass.getpass("Enter JWT: ")
if not args: args = socket.getfqdn()
tokens = args.split(" ")
# validate the connection string.
@@ -995,6 +1002,8 @@ class ImpalaShell(cmd.Cmd, object):
self.use_kerberos = True
self.use_ldap = False
self.ldap_password = None
self.use_jwt = False
self.jwt = None
self.imp_client = self._new_impala_client()
self._connect()
except OSError:
@@ -1942,6 +1951,10 @@ def get_intro(options):
intro += ("\n\nLDAP authentication is enabled, but the connection to Impala is "
"not secured by TLS.\nALL PASSWORDS WILL BE SENT IN THE CLEAR TO IMPALA.")
if not options.ssl and options.creds_ok_in_clear and options.use_jwt:
intro += ("\n\nJWT authentication is enabled, but the connection to Impala is "
"not secured by TLS.\nALL JWTs WILL BE SENT IN THE CLEAR TO IMPALA.")
if options.protocol == 'beeswax':
intro += ("\n\nWARNING: The beeswax protocol is deprecated and will be removed in a "
"future version of Impala.")
@@ -1962,6 +1975,31 @@ def _validate_hs2_fp_format_specification(format_specification):
raise FatalShellException(e)
def read_password_cmd(password_cmd, auth_method_desc, strip_newline=False):
try:
p = subprocess.Popen(shlex.split(password_cmd), stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
password, stderr = p.communicate()
if p.returncode != 0:
print("Error retrieving %s (command was '%s', error was: "
"'%s')" % (auth_method_desc, password_cmd, stderr.strip()), file=sys.stderr)
raise FatalShellException()
if sys.version_info.major > 2:
# Ensure we can manipulate the password as a string later.
password = password.decode('utf-8')
if strip_newline:
password = password.rstrip('\r\n')
return password
except Exception as e:
print("Error retrieving %s (command was: '%s', exception "
"was: '%s')" % (auth_method_desc, password_cmd, e), file=sys.stderr)
raise FatalShellException()
def impala_shell_main():
"""
@@ -2049,8 +2087,18 @@ def impala_shell_main():
"must be a 1-character string." % delim, file=sys.stderr)
raise FatalShellException()
if options.use_kerberos and options.use_ldap:
print("Please specify at most one authentication mechanism (-k or -l)",
auth_method_count = 0
if options.use_kerberos:
auth_method_count += 1
if options.use_ldap:
auth_method_count += 1
if options.use_jwt:
auth_method_count += 1
if auth_method_count > 1:
print("Please specify at most one authentication mechanism (-k, -l, or -j)",
file=sys.stderr)
raise FatalShellException()
@@ -2065,6 +2113,25 @@ def impala_shell_main():
"mechanism (-l)", file=sys.stderr)
raise FatalShellException()
if options.use_jwt and options.protocol.lower() != 'hs2-http':
print("Invalid protocol '{0}'. JWT authentication requires using the 'hs2-http' "
"protocol".format(options.protocol), file=sys.stderr)
raise FatalShellException()
if options.use_jwt and options.strict_hs2_protocol:
print("JWT authentication is not supported when using strict hs2.", file=sys.stderr)
raise FatalShellException()
if options.use_jwt and not options.ssl and not options.creds_ok_in_clear:
print("JWTs may not be sent over insecure connections. Enable SSL or "
"set --auth_creds_ok_in_clear", file=sys.stderr)
raise FatalShellException()
if not options.use_jwt and options.jwt_cmd:
print("Option --jwt_cmd requires using JWT authentication mechanism (-j)",
file=sys.stderr)
raise FatalShellException()
if options.hs2_fp_format:
try:
_validate_hs2_fp_format_specification(options.hs2_fp_format)
@@ -2100,6 +2167,10 @@ def impala_shell_main():
if options.verbose:
ldap_msg = "with LDAP-based authentication"
print("{0} {1} {2}".format(start_msg, ldap_msg, py_version_msg), file=sys.stderr)
elif options.use_jwt:
if options.verbose:
ldap_msg = "with JWT-based authentication"
print("{0} {1} {2}".format(start_msg, ldap_msg, py_version_msg), file=sys.stderr)
else:
if options.verbose:
no_auth_msg = "with no authentication"
@@ -2107,21 +2178,11 @@ def impala_shell_main():
options.ldap_password = None
if options.use_ldap and options.ldap_password_cmd:
try:
p = subprocess.Popen(shlex.split(options.ldap_password_cmd), stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
options.ldap_password, stderr = p.communicate()
if p.returncode != 0:
print("Error retrieving LDAP password (command was '%s', error was: "
"'%s')" % (options.ldap_password_cmd, stderr.strip()), file=sys.stderr)
raise FatalShellException()
if sys.version_info.major > 2:
# Ensure we can manipulate the password as a string later.
options.ldap_password = options.ldap_password.decode('utf-8')
except Exception as e:
print("Error retrieving LDAP password (command was: '%s', exception "
"was: '%s')" % (options.ldap_password_cmd, e), file=sys.stderr)
raise FatalShellException()
options.ldap_password = read_password_cmd(options.ldap_password_cmd, "LDAP password")
options.jwt = None
if options.use_jwt and options.jwt_cmd:
options.jwt = read_password_cmd(options.jwt_cmd, "JWT", True)
if options.ssl:
if options.ca_cert is None:

View File

@@ -228,6 +228,10 @@ def get_option_parser(defaults):
action="store_true",
help="Use LDAP to authenticate with Impala. Impala must be configured"
" to allow LDAP authentication. \t\t")
parser.add_option("-j", "--jwt", dest="use_jwt",
action="store_true",
help="Use JWT to authenticate with Impala. Impala must be configured"
" to allow JWT authentication. \t\t")
parser.add_option("-u", "--user", dest="user",
help="User to authenticate with.")
parser.add_option("--ssl", dest="ssl",
@@ -267,6 +271,8 @@ def get_option_parser(defaults):
"unencrypted, and may be vulnerable to attack.")
parser.add_option("--ldap_password_cmd", dest="ldap_password_cmd",
help="Shell command to run to retrieve the LDAP password")
parser.add_option("--jwt_cmd", dest="jwt_cmd",
help="Shell command to run to retrieve the JWT")
parser.add_option("--var", dest="keyval", action="append",
help="Defines a variable to be used within the Impala session."
" Can be used multiple times to set different variables."

49
testdata/bin/jwt-generate.sh vendored Executable file
View File

@@ -0,0 +1,49 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Sets up a python 3 virtual environment with all necessary dependencies
# available for the jwt-util.py script.
set -euo pipefail
WORK_DIR="$(mktemp -d)"
trap "rm -rf ${WORK_DIR}" EXIT
echo "Using working directory: ${WORK_DIR}"
MOD_DIR="${WORK_DIR}/python_modules"
VENV_DIR="${WORK_DIR}/.venv"
DATA_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )/../jwt"
# dependecies for creating a python virtual environment
mkdir -p "${MOD_DIR}"
pip3 install virtualenv --target="${MOD_DIR}"
# turn off the prompt setting since the virtual environment is loaded in a
# non-interactive script
VIRTUAL_ENV_DISABLE_PROMPT=1
export VIRTUAL_ENV_DISABLE_PROMPT
# create and active the python virtual environment
"${MOD_DIR}/bin/virtualenv" --python python3 "${VENV_DIR}"
source "${VENV_DIR}/bin/activate"
# install necessary dependencies for the jwt generation python script
python -m pip install -r "$(dirname "${0}")/jwt_requirements.txt"
python "$(dirname "${0}")/jwt-util.py" "${DATA_DIR}"

146
testdata/bin/jwt-util.py vendored Normal file
View File

@@ -0,0 +1,146 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Generates a new RSA 2048 public/private key pair and uses that key pair to sign
# two new JWTs, one that is expired and one that is not expired. The public key is
# written to a file in JWKS format, and the two JWTS are also written to files.
#
# Also generates a valid, non-expired JWT using another generated JWK. This JWT can be
# used to test that JWT authentication only accepts JWTs signed by the JWK it trusts.
#
# The generates JWKS/JWTs are used by the 'tests/custom_cluster/test_shell_jwt_auth.py'
# Python custom cluster tests. Since the generated JWTs are valid for 10 years, they
# should not need to be regenerated.
from __future__ import absolute_import, division, print_function
import json
import os
import sys
from datetime import datetime
from jwcrypto import jwk, jwt
from time import time
# ensure the first parameter was provided and is a valid directory
work_dir = ""
if len(sys.argv) != 2:
print("[ERROR] missing first parameter to this script which must be a valid directory")
sys.exit(1)
if not os.path.isdir(sys.argv[1]):
print("[ERROR] first and only parameter to this script must be a valid directory")
sys.exit(1)
work_dir = sys.argv[1]
#
# Generate a signing JWK and two JWTs that will be signed by that JWK
#
# generate a key id using the current date-time to enable easy tracking of the keys
key_id = datetime.utcnow().strftime("%Y%m%d-%H%M%S")
# generate a new public/private keypair that can be used to sign JWTs
key = jwk.JWK.generate(kty="RSA", size=2048, alg="RS256", use="sig", kid=key_id)
# build a key set from the generated key
keyset = jwk.JWKSet()
keyset.add(key)
jwks_json_obj = json.loads(keyset.export(private_keys=False, as_dict=False))
# create and sign a JWT that expires in 10 years
token_valid = jwt.JWT(
header={
"alg": "RS256",
"kid": key.get("kid"),
"type": "JWT"
},
claims={
"sub": "test-user",
"kid": key.get("kid"),
"iss": "file://tests/util/jwt/jwt_util.py",
"aud": "impala-tests",
"iat": int(time()),
"exp": int(time()) + 315360000
}
)
token_valid.make_signed_token(key)
# create and sign a JWT that expired in the past
token_expired = jwt.JWT(
header={
"alg": "RS256",
"kid": key.get("kid"),
"type": "JWT"
},
claims={
"sub": "test-user",
"kid": key.get("kid"),
"iss": "file://tests/util/jwt/jwt_util.py",
"aud": "impala-tests",
"iat": int(time()) - 7200,
"exp": int(time()) - 3600
}
)
token_expired.make_signed_token(key)
# write out the jwks
with open(os.path.join(work_dir, "jwks_signing.json"), "w") as jwks_file:
jwks_file.write(json.dumps(jwks_json_obj, indent=2))
# write out the signed valid jwt
with open(os.path.join(work_dir, "jwt_signed"), "w") as jwt_file:
jwt_file.write(token_valid.serialize())
# write out the signed expired jwt
with open(os.path.join(work_dir, "jwt_expired"), "w") as jwt_file:
jwt_file.write(token_expired.serialize())
#
# Generate another valid signed JWT using a different JWK
#
# generate a key id using the current date-time to enable easy tracking of the keys
key_id_untrusted_jwk = "untrusted_jwk-{0}" \
.format(datetime.utcnow().strftime("%Y%m%d-%H%M%S"))
# generate a new public/private keypair that can be used to sign JWTs
untrusted_jwk = jwk.JWK.generate(kty="RSA", size=2048, alg="RS256", use="sig",
kid=key_id_untrusted_jwk)
# create and sign a JWT that expires in 10 years
token_untrusted = jwt.JWT(
header={
"alg": "RS256",
"kid": untrusted_jwk.get("kid"),
"type": "JWT"
},
claims={
"sub": "test-user",
"kid": untrusted_jwk.get("kid"),
"iss": "file://tests/util/jwt/jwt_util.py",
"aud": "impala-tests",
"iat": int(time()),
"exp": int(time()) + 315360000
}
)
token_untrusted.make_signed_token(untrusted_jwk)
# write out the signed jwt
with open(os.path.join(work_dir, "jwt_signed_untrusted"), "w") as jwt_untrusted_jwk_file:
jwt_untrusted_jwk_file.write(token_untrusted.serialize())

18
testdata/bin/jwt_requirements.txt vendored Normal file
View File

@@ -0,0 +1,18 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
jwcrypto==1.4.2

12
testdata/jwt/jwks_signing.json vendored Normal file
View File

@@ -0,0 +1,12 @@
{
"keys": [
{
"alg": "RS256",
"e": "AQAB",
"kid": "20230509-160415",
"kty": "RSA",
"n": "ol5QO-LOzfxjKdkqJIP6yQIi1STDMO5fmPLOPfxp6xQrETLDhZlkMjLju1qq2tXHLbKuCDVydqITsMaHOjpgp4x5tIXM0eDjvHvSXIilxd1zcs8rTMhsaHpBhVSef2ZVUChYDfCxqIrxZBNRRm7ue6Hgy8T7G_mDYlv2KoEJ_yE6hZJ56DTPfiFjQ8-z0r0Drh2Mz1Hyq_itKgQCbpSdctMMzmFzqSi_lf04efUWaJFViXgZ0VHjf0OmuxmnPmuijqhUlZ7iMdmxNcWuvM8A9eQW-04zbw_H5KyFiu4S4Gl2Txk8E7Qex0BFv4l3tDoHFxjAcbfcBy4NXYcTqtHzTw",
"use": "sig"
}
]
}

1
testdata/jwt/jwt_expired vendored Normal file
View File

@@ -0,0 +1 @@
eyJhbGciOiJSUzI1NiIsImtpZCI6IjIwMjMwNTA5LTE2MDQxNSIsInR5cGUiOiJKV1QifQ.eyJhdWQiOiJpbXBhbGEtdGVzdHMiLCJleHAiOjE2ODM2NDQ2NTUsImlhdCI6MTY4MzY0MTA1NSwiaXNzIjoiZmlsZTovL3Rlc3RzL3V0aWwvand0L2p3dF91dGlsLnB5Iiwia2lkIjoiMjAyMzA1MDktMTYwNDE1Iiwic3ViIjoidGVzdC11c2VyIn0.bd87-G_1G9gDXHWbUYaHVIoumk11aAdq7Jc_rRC-DCK0k2h1fmMWVt4DUXkYd1SYIp972RH6wB0FqZYfL4ZhCv33x3LKX7jJX1KNmYfIJt6zSKGW03o4P0H5ZcJjwQ66TmT6AvoB7LBn6KXMWKFOt2DwyicMjGfbi2roc1oCr6imuzFnFaSE8_Isb06I7p87cd85lvSr8xMG8T711Vks_eAkbGqdiHCQISv2qigLuSGtj9eNqdCwVGem0LIFpt9pfhh4o1whoZh2oAl7DknHrnyTdR7ZxkazV07WgskdS-QxO7I-jUZf87n1Kxsv4b-XcBVzpeIS4d0MlZRQl04elA

1
testdata/jwt/jwt_signed vendored Normal file
View File

@@ -0,0 +1 @@
eyJhbGciOiJSUzI1NiIsImtpZCI6IjIwMjMwNTA5LTE2MDQxNSIsInR5cGUiOiJKV1QifQ.eyJhdWQiOiJpbXBhbGEtdGVzdHMiLCJleHAiOjE5OTkwMDgyNTUsImlhdCI6MTY4MzY0ODI1NSwiaXNzIjoiZmlsZTovL3Rlc3RzL3V0aWwvand0L2p3dF91dGlsLnB5Iiwia2lkIjoiMjAyMzA1MDktMTYwNDE1Iiwic3ViIjoidGVzdC11c2VyIn0.dWMOkcBrwRansZrCZrlbYzr9alIQ23qlnw4t8Kx_v87CBB90qtmTV88nZAh4APtTE8IUnP0e45R2XyDoH3a8UVrrSOkEzI47wJ0I3GqSc_R_MsGoeGlKreZmcjGhY_ceOo7RWYaBdzsAZe1YXcKJbq2sQJ3issfjBa_fWt0Qhy0DvzssUf3V-g5nQUM3W3pOULiFtMhA8YmIdheHalRz3D_NWMAqe79iUv6tG0Eg08x-cl8GXYsDm45sU4WkP5fZps6Q4Fm05640FWXG8K0PoLzSI_Iac3zzSAPs-iYNeeNE6C9QxBYSLBvQrWL0SETafP82Mo-nEZsAJbMMSqm0cQ

1
testdata/jwt/jwt_signed_untrusted vendored Normal file
View File

@@ -0,0 +1 @@
eyJhbGciOiJSUzI1NiIsImtpZCI6InVudHJ1c3RlZF9qd2stMjAyMzA1MDktMTYwNDE1IiwidHlwZSI6IkpXVCJ9.eyJhdWQiOiJpbXBhbGEtdGVzdHMiLCJleHAiOjE5OTkwMDgyNTUsImlhdCI6MTY4MzY0ODI1NSwiaXNzIjoiZmlsZTovL3Rlc3RzL3V0aWwvand0L2p3dF91dGlsLnB5Iiwia2lkIjoidW50cnVzdGVkX2p3ay0yMDIzMDUwOS0xNjA0MTUiLCJzdWIiOiJ0ZXN0LXVzZXIifQ.jPzR_v1iQUFJMQI0d12N6xDAp6IyyoSfkGllsjiF27CCBSurjZdOPyfajgP6gXKagpcGxXSBkGsQqYcgNddt0-nspqQtYCPjvKR2hSYzY8J-VQ14lcBdSkUYSa1qyzHzgb6YHQrC5Dw1fo-tT-G7Pjr4UwkpaWa79Y_UvzaZ8rn88ceJlEHR50DN49HAVvcGovJDYPvvu-6XgGb2MmzhBtKuzPAT4vNaMYf0rsJzHED_J_Lezd95zO1kEHjju1RJzwyambakEvt_XpZxrlYs5nRdR8LXNKisDZWvD6sxnWHsFYfE718WezBZGI4eh2Y9XIEkrcJTEvudyDLAh7CoFQ

View File

@@ -43,6 +43,9 @@ class TestImpalaShellCommandLine(CustomClusterTestSuite):
@classmethod
def add_test_dimensions(cls):
"""Overrides all other add_dimension methods in super classes up the entire class
hierarchy ensuring that each test in this class only get run once using the
hs2-http protocol."""
cls.ImpalaTestMatrix.add_dimension(create_client_protocol_http_transport())
@pytest.mark.execute_serially
@@ -52,7 +55,7 @@ class TestImpalaShellCommandLine(CustomClusterTestSuite):
all calls to the backend impala engine made using the hs2 over http protocol.
The impala coordinator logs are searched to ensure these tracing headers were added
and also were passed through to the coordinator."""
args = ['--protocol', 'hs2-http', '-q', 'select version();profile']
args = ['--protocol', vector.get_value('protocol'), '-q', 'select version();profile']
result = run_impala_shell_cmd(vector, args)
# Shut down cluster to ensure logs flush to disk.
@@ -150,7 +153,7 @@ class TestImpalaShellCommandLine(CustomClusterTestSuite):
def test_http_tracing_headers_off(self, vector):
"""Asserts the impala shell command line parameter to prevent the addition of http
tracing headers actually leaves out those tracing headers."""
args = ['--protocol', 'hs2-http', '--no_http_tracing',
args = ['--protocol', vector.get_value('protocol'), '--no_http_tracing',
'-q', 'select version();profile']
result = run_impala_shell_cmd(vector, args)

View File

@@ -0,0 +1,224 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import absolute_import, division, print_function
import os
import pytest
import tempfile
from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
from tests.common.test_dimensions import create_client_protocol_http_transport
from time import sleep
from tests.shell.util import run_impala_shell_cmd
class TestImpalaShellJWTAuth(CustomClusterTestSuite):
"""Tests the Impala shell JWT authentication functionality by first standing up an
Impala cluster with specific startup flags to enable JWT authentication support.
Then, the Impala shell is launched in a separate process with authentication done using
JWTs. Assertions are done by scanning the shell output and Impala server logs for
expected strings.
These tests require a JWKS and three JWT files to be present in the 'testdata/jwt'
directory. The 'testdata/bin/jwt-generate.sh' script can be run to set up the
necessary files. Since the JWKS/JWT files are committed to the git repo, this script
should not need to be executed again.
"""
LOG_DIR_JWT_AUTH_SUCCESS = tempfile.mkdtemp(prefix="jwt_auth_success")
LOG_DIR_JWT_AUTH_FAIL = tempfile.mkdtemp(prefix="jwt_auth_fail")
LOG_DIR_JWT_AUTH_INVALID_JWK = tempfile.mkdtemp(prefix="jwt_auth_invalid_jwk")
JWKS_JWTS_DIR = os.path.join(os.environ['IMPALA_HOME'], 'testdata', 'jwt')
JWKS_JSON_PATH = os.path.join(JWKS_JWTS_DIR, 'jwks_signing.json')
JWT_SIGNED_PATH = os.path.join(JWKS_JWTS_DIR, 'jwt_signed')
JWT_EXPIRED_PATH = os.path.join(JWKS_JWTS_DIR, 'jwt_expired')
JWT_INVALID_JWK = os.path.join(JWKS_JWTS_DIR, 'jwt_signed_untrusted')
@classmethod
def get_workload(self):
return 'functional-query'
@classmethod
def add_test_dimensions(cls):
"""Overrides all other add_dimension methods in super classes up the entire class
hierarchy ensuring that each test in this class run using the hs2-http protocol."""
cls.ImpalaTestMatrix.add_dimension(create_client_protocol_http_transport())
@pytest.mark.execute_serially
@CustomClusterTestSuite.with_args(
"-log_dir={0} -v 2 -jwks_file_path={1} -jwt_custom_claim_username=sub "
"-jwt_token_auth=true -jwt_allow_without_tls=true"
.format(LOG_DIR_JWT_AUTH_SUCCESS, JWKS_JSON_PATH))
def test_jwt_auth_valid(self, vector):
"""Asserts the Impala shell can authenticate to Impala using JWT authentication.
Also executes a query to ensure the authentication was successful."""
args = ['--protocol', vector.get_value('protocol'), '-j', '--jwt_cmd',
'cat {0}'.format(TestImpalaShellJWTAuth.JWT_SIGNED_PATH),
'-q', 'select version()', '--auth_creds_ok_in_clear']
result = run_impala_shell_cmd(vector, args)
# Ensure the Impala coordinator is correctly reporting the jwt auth metrics
# must be done before the cluster shuts down since it calls to the coordinator
sleep(5)
self.__assert_success_fail_metric(success_count_min=15, success_count_max=16)
# Shut down cluster to ensure logs flush to disk.
self._stop_impala_cluster()
# Ensure JWT auth was enabled by checking the coordinator startup flags logged
# in the coordinator's INFO logfile
expected_strings = [
'--jwks_file_path={0}'.format(self.JWKS_JSON_PATH),
'effective username: test-user',
'connected_user (string) = "test-user"',
]
# Ensure JWT auth was successful by checking impala coordinator logs
self.__assert_log_file(self.LOG_DIR_JWT_AUTH_SUCCESS,
"impalad.INFO", expected_strings)
# Ensure the query ran successfully.
assert "version()" in result.stdout
assert "impalad version" in result.stdout
@pytest.mark.execute_serially
@CustomClusterTestSuite.with_args(
"-log_dir={0} -v 2 -jwks_file_path={1} -jwt_custom_claim_username=sub "
"-jwt_token_auth=true -jwt_allow_without_tls=true"
.format(LOG_DIR_JWT_AUTH_FAIL, JWKS_JSON_PATH))
def test_jwt_auth_expired(self, vector):
"""Asserts the Impala shell fails to authenticate when it presents a JWT that has a
valid signature but is expired."""
args = ['--protocol', vector.get_value('protocol'), '-j', '--jwt_cmd',
'cat {0}'.format(TestImpalaShellJWTAuth.JWT_EXPIRED_PATH),
'-q', 'select version()', '--auth_creds_ok_in_clear']
result = run_impala_shell_cmd(vector, args, expect_success=False)
# Ensure the Impala coordinator is correctly reporting the jwt auth metrics
# must be done before the cluster shuts down since it calls to the coordinator
sleep(5)
self.__assert_success_fail_metric(failure_count_min=4, failure_count_max=4)
# Shut down cluster to ensure logs flush to disk.
self._stop_impala_cluster()
# Ensure JWT auth was enabled by checking the coordinator startup flags logged
# in the coordinator's INFO logfile
expected_strings = ['--jwks_file_path={0}'.format(self.JWKS_JSON_PATH)]
self.__assert_log_file(self.LOG_DIR_JWT_AUTH_FAIL,
"impalad.INFO", expected_strings)
# Ensure JWT auth failed by checking impala coordinator logs
expected_strings = [
'Error verifying JWT token',
'Error verifying JWT Token: Verification failed, error: token expired'
]
self.__assert_log_file(self.LOG_DIR_JWT_AUTH_FAIL,
"impalad.ERROR", expected_strings)
# Ensure the shell login failed.
assert "Error connecting: HttpError" in result.stderr
assert "HTTP code 401: Unauthorized" in result.stderr
assert "Not connected to Impala, could not execute queries." in result.stderr
@pytest.mark.execute_serially
@CustomClusterTestSuite.with_args(
"-log_dir={0} -v 2 -jwks_file_path={1} -jwt_custom_claim_username=sub "
"-jwt_token_auth=true -jwt_allow_without_tls=true"
.format(LOG_DIR_JWT_AUTH_INVALID_JWK, JWKS_JSON_PATH))
def test_jwt_auth_invalid_jwk(self, vector):
"""Asserts the Impala shell fails to authenticate when it presents a JWT that has a
valid signature but is expired."""
args = ['--protocol', vector.get_value('protocol'), '-j', '--jwt_cmd',
'cat {0}'.format(TestImpalaShellJWTAuth.JWT_INVALID_JWK),
'-q', 'select version()', '--auth_creds_ok_in_clear']
result = run_impala_shell_cmd(vector, args, expect_success=False)
# Ensure the Impala coordinator is correctly reporting the jwt auth metrics
# must be done before the cluster shuts down since it calls to the coordinator
sleep(5)
self.__assert_success_fail_metric(failure_count_min=4, failure_count_max=4)
# Shut down cluster to ensure logs flush to disk.
self._stop_impala_cluster()
# Ensure JWT auth was enabled by checking the coordinator startup flags logged
# in the coordinator's INFO logfile
expected_strings = ['--jwks_file_path={0}'.format(self.JWKS_JSON_PATH)]
self.__assert_log_file(self.LOG_DIR_JWT_AUTH_INVALID_JWK,
"impalad.INFO", expected_strings)
# Ensure JWT auth failed by checking impala coordinator logs
expected_strings = [
'Error verifying JWT token',
'Error verifying JWT Token: Invalid JWK ID in the JWT token'
]
self.__assert_log_file(self.LOG_DIR_JWT_AUTH_INVALID_JWK,
"impalad.ERROR", expected_strings)
# Ensure the shell login failed.
assert "Error connecting: HttpError" in result.stderr
assert "HTTP code 401: Unauthorized" in result.stderr
assert "Not connected to Impala, could not execute queries." in result.stderr
def __assert_log_file(self, log_dir, log_file, expected_strings):
"""Given a list of strings, searches the specified log file for each of those
strings ensuring that at least one instance of each string exists within a
line of the log file
log_dir - path to the directory where the log file exists
log_file - name of the file within the specified directory that will be searched
expected_strings - list of strings to search for within the log file
"""
counter_dict = {}
for item in expected_strings:
counter_dict[item] = 0
log_path = os.path.join(log_dir, log_file)
with open(log_path) as file:
for line in file:
for key in counter_dict:
if line.find(key) >= 0:
counter_dict[key] += 1
for line, count in counter_dict.items():
assert count > 0, "Did not find expected string '{0}' in log file '{1}'" \
.format(line, log_path)
def __assert_success_fail_metric(self, success_count_min=0, success_count_max=0,
failure_count_min=0, failure_count_max=0):
"""Impala emits metrics that count the number of successful and failed JWT
authentications. This function asserts the JWT auth success/fail counters from the
coordinator are within the specified ranges."""
self.__assert_counter(
"impala.thrift-server.hiveserver2-http-frontend.total-jwt-token-auth-success",
success_count_min, success_count_max)
self.__assert_counter(
"impala.thrift-server.hiveserver2-http-frontend.total-jwt-token-auth-failure",
failure_count_min, failure_count_max)
def __assert_counter(self, counter_name, expected_count_min, expected_count_max):
"""Asserts the value of the specifed counter metric from the coordinator falls
within the specified min and max (inclusive)."""
counter_val = self.cluster.impalads[0].service.get_metric_value(counter_name)
assert counter_val >= expected_count_min and counter_val <= expected_count_max, \
"expected counter '{0}' to have a value between '{1}' and '{2}' inclusive " \
"but its value was {3}" \
.format(counter_name, expected_count_min, expected_count_max, counter_val)

View File

@@ -0,0 +1,104 @@
#!/usr/bin/env impala-python
# -*- coding: utf-8 -*-
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import absolute_import, division, print_function
from tests.common.impala_test_suite import ImpalaTestSuite
from tests.common.test_dimensions import create_client_protocol_http_transport
from tests.shell.util import run_impala_shell_cmd
class TestImpalaShellJwtAuth(ImpalaTestSuite):
@classmethod
def get_workload(self):
return 'functional-query'
@classmethod
def add_test_dimensions(cls):
"""Overrides all other add_dimension methods in super classes up the entire class
hierarchy ensuring that each test in this class only get run once."""
cls.ImpalaTestMatrix.add_dimension(create_client_protocol_http_transport())
def test_jwt_cmd_without_jwt_auth(self, vector):
"""Asserts the jwt_cmd arg is only allowed when JWT auth is enabled."""
result = run_impala_shell_cmd(vector, ['--jwt_cmd=echo', '--protocol=hs2-http',
'--auth_creds_ok_in_clear'], expect_success=False)
assert "Option --jwt_cmd requires using JWT authentication mechanism (-j)" \
in result.stderr
def test_jwt_cmd_invalid(self, vector):
"""Asserts an invalid jwt_cmd arg value produces an explanatory error message."""
result = run_impala_shell_cmd(vector, ['-j', '--protocol=hs2-http',
'--auth_creds_ok_in_clear', '--jwt_cmd=idontexist'],
expect_success=False)
assert "Error retrieving JWT" in result.stderr
assert "command was: 'idontexist'" in result.stderr
def test_jwt_auth_without_ssl_creds_in_clear(self, vector):
"""Asserts that JWTs do not get sent over insecure network connections if the user
does not provide the auth_creds_ok_in_clear arg."""
result = run_impala_shell_cmd(vector, ['-j', '--protocol=hs2-http'],
expect_success=False)
assert "JWTs may not be sent over insecure connections. Enable SSL or " \
"set --auth_creds_ok_in_clear" in result.stderr
def test_jwt_auth_protocol_beeswax(self, vector):
"""Asserts that JWT auth does not work with the beeswax protocol."""
result = run_impala_shell_cmd(vector, ['-j', '--protocol=beeswax'],
expect_success=False)
assert "Invalid protocol 'beeswax'. JWT authentication requires using the " \
"'hs2-http' protocol" in result.stderr
def test_jwt_auth_protocol_hs2_no_http(self, vector):
"""Asserts that JWT auth does not work with the plain hs2 protocol."""
result = run_impala_shell_cmd(vector, ['-j', '--protocol=hs2'], expect_success=False)
assert "Invalid protocol 'hs2'. JWT authentication requires using the " \
"'hs2-http' protocol" in result.stderr
def test_jwt_auth_protocol_strict_hs2(self, vector):
"""Asserts that JWT auth does not work when strict hs2 is enabled."""
result = run_impala_shell_cmd(vector, ['-j', '--protocol=hs2-http',
'--strict_hs2_protocol'],
expect_success=False)
assert "JWT authentication is not supported when using strict hs2." in result.stderr
def test_multiple_auth_ldap_jwt(self, vector):
"""Asserts that ldap and jwt auth cannot both be enabled."""
result = run_impala_shell_cmd(vector, ['-l', '-j'], expect_success=False)
assert "Please specify at most one authentication mechanism (-k, -l, or -j)" \
in result.stderr
def test_multiple_auth_ldap_kerberos(self, vector):
"""Asserts that ldap and kerberos auth cannot both be enabled."""
result = run_impala_shell_cmd(vector, ['-l', '-k'], expect_success=False)
assert "Please specify at most one authentication mechanism (-k, -l, or -j)" \
in result.stderr
def test_multiple_auth_jwt_kerberos(self, vector):
"""Asserts that jwt and kerberos auth cannot both be enabled."""
result = run_impala_shell_cmd(vector, ['-j', '-k'], expect_success=False)
assert "Please specify at most one authentication mechanism (-k, -l, or -j)" \
in result.stderr
def test_multiple_auth_ldap_jwt_kerberos(self, vector):
"""Asserts ldap, jwt, and kerberos auth cannot all be enabled."""
result = run_impala_shell_cmd(vector, ['-l', '-j', '-k'], expect_success=False)
assert "Please specify at most one authentication mechanism (-k, -l, or -j)" \
in result.stderr