mirror of
https://github.com/apache/impala.git
synced 2025-12-30 03:01:44 -05:00
For files that have a Cloudera copyright (and no other copyright notice), make changes to follow the ASF source file header policy here: http://www.apache.org/legal/src-headers.html#headers Specifically: 1) Remove the Cloudera copyright. 2) Modify NOTICE.txt according to http://www.apache.org/legal/src-headers.html#notice to follow that format and add a line for Cloudera. 3) Replace or add the existing ASF license text with the one given on the website. Much of this change was automatically generated via: git grep -li 'Copyright.*Cloudera' > modified_files.txt cat modified_files.txt | xargs perl -n -i -e 'print unless m#Copyright.*Cloudera#i;' cat modified_files_txt | xargs fix_apache_license.py [1] Some manual fixups were performed following those steps, especially when license text was completely missing from the file. [1] https://gist.github.com/anonymous/ff71292094362fc5c594 with minor modification to ORIG_LICENSE to match Impala's license text. Change-Id: I2e0bd8420945b953e1b806041bea4d72a3943d86 Reviewed-on: http://gerrit.cloudera.org:8080/3779 Reviewed-by: Dan Hecht <dhecht@cloudera.com> Tested-by: Internal Jenkins
198 lines
9.0 KiB
Python
198 lines
9.0 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
#
|
|
# Client tests for SQL statement authorization
|
|
|
|
import os
|
|
import pytest
|
|
import shutil
|
|
import tempfile
|
|
import json
|
|
from time import sleep, time
|
|
from getpass import getuser
|
|
from TCLIService import TCLIService
|
|
from thrift.transport.TSocket import TSocket
|
|
from thrift.transport.TTransport import TBufferedTransport
|
|
from thrift.protocol import TBinaryProtocol
|
|
from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
|
|
from tests.hs2.hs2_test_suite import operation_id_to_query_id
|
|
from tests.util.filesystem_utils import WAREHOUSE
|
|
|
|
AUTH_POLICY_FILE = "%s/authz-policy.ini" % WAREHOUSE
|
|
|
|
class TestAuthorization(CustomClusterTestSuite):
|
|
AUDIT_LOG_DIR = tempfile.mkdtemp(dir=os.getenv('LOG_DIR'))
|
|
|
|
def setup(self):
|
|
host, port = (self.cluster.impalads[0].service.hostname,
|
|
self.cluster.impalads[0].service.hs2_port)
|
|
self.socket = TSocket(host, port)
|
|
self.transport = TBufferedTransport(self.socket)
|
|
self.transport.open()
|
|
self.protocol = TBinaryProtocol.TBinaryProtocol(self.transport)
|
|
self.hs2_client = TCLIService.Client(self.protocol)
|
|
|
|
def teardown(self):
|
|
if self.socket:
|
|
self.socket.close()
|
|
shutil.rmtree(self.AUDIT_LOG_DIR, ignore_errors=True)
|
|
|
|
@pytest.mark.execute_serially
|
|
@CustomClusterTestSuite.with_args("--server_name=server1\
|
|
--authorization_policy_file=%s\
|
|
--authorization_policy_provider_class=%s" %\
|
|
(AUTH_POLICY_FILE,
|
|
"org.apache.sentry.provider.file.LocalGroupResourceAuthorizationProvider"))
|
|
def test_custom_authorization_provider(self):
|
|
from tests.hs2.test_hs2 import TestHS2
|
|
open_session_req = TCLIService.TOpenSessionReq()
|
|
# User is 'test_user' (defined in the authorization policy file)
|
|
open_session_req.username = 'test_user'
|
|
open_session_req.configuration = dict()
|
|
resp = self.hs2_client.OpenSession(open_session_req)
|
|
TestHS2.check_response(resp)
|
|
|
|
# Try to query a table we are not authorized to access.
|
|
self.session_handle = resp.sessionHandle
|
|
execute_statement_req = TCLIService.TExecuteStatementReq()
|
|
execute_statement_req.sessionHandle = self.session_handle
|
|
execute_statement_req.statement = "describe tpch_seq.lineitem"
|
|
execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
|
|
assert 'User \'%s\' does not have privileges to access' % 'test_user' in\
|
|
str(execute_statement_resp)
|
|
|
|
# Now try the same operation on a table we are authorized to access.
|
|
execute_statement_req = TCLIService.TExecuteStatementReq()
|
|
execute_statement_req.sessionHandle = self.session_handle
|
|
execute_statement_req.statement = "describe tpch.lineitem"
|
|
execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
|
|
TestHS2.check_response(execute_statement_resp)
|
|
|
|
@pytest.mark.execute_serially
|
|
@CustomClusterTestSuite.with_args("--server_name=server1\
|
|
--authorization_policy_file=%s\
|
|
--authorized_proxy_user_config=hue=%s\
|
|
--abort_on_failed_audit_event=false\
|
|
--audit_event_log_dir=%s" % (AUTH_POLICY_FILE, getuser(), AUDIT_LOG_DIR))
|
|
def test_impersonation(self):
|
|
"""End-to-end impersonation + authorization test. Expects authorization to be
|
|
configured before running this test"""
|
|
# TODO: To reuse the HS2 utility code from the TestHS2 test suite we need to import
|
|
# the module within this test function, rather than as a top-level import. This way
|
|
# the tests in that module will not get pulled when executing this test suite. The fix
|
|
# is to split the utility code out of the TestHS2 class and support HS2 as a first
|
|
# class citizen in our test framework.
|
|
from tests.hs2.test_hs2 import TestHS2
|
|
open_session_req = TCLIService.TOpenSessionReq()
|
|
# Connected user is 'hue'
|
|
open_session_req.username = 'hue'
|
|
open_session_req.configuration = dict()
|
|
# Delegated user is the current user
|
|
open_session_req.configuration['impala.doas.user'] = getuser()
|
|
resp = self.hs2_client.OpenSession(open_session_req)
|
|
TestHS2.check_response(resp)
|
|
|
|
# Try to query a table we are not authorized to access.
|
|
self.session_handle = resp.sessionHandle
|
|
execute_statement_req = TCLIService.TExecuteStatementReq()
|
|
execute_statement_req.sessionHandle = self.session_handle
|
|
execute_statement_req.statement = "describe tpch_seq.lineitem"
|
|
execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
|
|
assert 'User \'%s\' does not have privileges to access' % getuser() in\
|
|
str(execute_statement_resp)
|
|
|
|
assert self.__wait_for_audit_record(user=getuser(), impersonator='hue'),\
|
|
'No matching audit event recorded in time window'
|
|
|
|
# Now try the same operation on a table we are authorized to access.
|
|
execute_statement_req = TCLIService.TExecuteStatementReq()
|
|
execute_statement_req.sessionHandle = self.session_handle
|
|
execute_statement_req.statement = "describe tpch.lineitem"
|
|
execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
|
|
|
|
TestHS2.check_response(execute_statement_resp)
|
|
|
|
# Verify the correct user information is in the runtime profile
|
|
query_id = operation_id_to_query_id(
|
|
execute_statement_resp.operationHandle.operationId)
|
|
profile_page = self.cluster.impalads[0].service.read_query_profile_page(query_id)
|
|
self.__verify_profile_user_fields(profile_page, effective_user=getuser(),
|
|
delegated_user=getuser(), connected_user='hue')
|
|
|
|
# Try to user we are not authorized to delegate to.
|
|
open_session_req.configuration['impala.doas.user'] = 'some_user'
|
|
resp = self.hs2_client.OpenSession(open_session_req)
|
|
assert 'User \'hue\' is not authorized to delegate to \'some_user\'' in str(resp)
|
|
|
|
# Create a new session which does not have a do_as_user.
|
|
open_session_req.username = 'hue'
|
|
open_session_req.configuration = dict()
|
|
resp = self.hs2_client.OpenSession(open_session_req)
|
|
TestHS2.check_response(resp)
|
|
|
|
# Run a simple query, which should succeed.
|
|
execute_statement_req = TCLIService.TExecuteStatementReq()
|
|
execute_statement_req.sessionHandle = resp.sessionHandle
|
|
execute_statement_req.statement = "select 1"
|
|
execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req)
|
|
TestHS2.check_response(execute_statement_resp)
|
|
|
|
# Verify the correct user information is in the runtime profile. Since there is
|
|
# no do_as_user the Delegated User field should be empty.
|
|
query_id = operation_id_to_query_id(
|
|
execute_statement_resp.operationHandle.operationId)
|
|
|
|
profile_page = self.cluster.impalads[0].service.read_query_profile_page(query_id)
|
|
self.__verify_profile_user_fields(profile_page, effective_user='hue',
|
|
delegated_user='', connected_user='hue')
|
|
|
|
self.socket.close()
|
|
self.socket = None
|
|
|
|
def __verify_profile_user_fields(self, profile_str, effective_user, connected_user,
|
|
delegated_user):
|
|
"""Verifies the given runtime profile string contains the specified values for
|
|
User, Connected User, and Delegated User"""
|
|
assert '\n User: %s\n' % effective_user in profile_str
|
|
assert '\n Connected User: %s\n' % connected_user in profile_str
|
|
assert '\n Delegated User: %s\n' % delegated_user in profile_str
|
|
|
|
def __wait_for_audit_record(self, user, impersonator, timeout_secs=30):
|
|
"""Waits until an audit log record is found that contains the given user and
|
|
impersonator, or until the timeout is reached.
|
|
"""
|
|
# The audit event might not show up immediately (the audit logs are flushed to disk
|
|
# on regular intervals), so poll the audit event logs until a matching record is
|
|
# found.
|
|
start_time = time()
|
|
while time() - start_time < timeout_secs:
|
|
for audit_file_name in os.listdir(self.AUDIT_LOG_DIR):
|
|
if self.__find_matching_audit_record(audit_file_name, user, impersonator):
|
|
return True
|
|
sleep(1)
|
|
return False
|
|
|
|
def __find_matching_audit_record(self, audit_file_name, user, impersonator):
|
|
with open(os.path.join(self.AUDIT_LOG_DIR, audit_file_name)) as audit_log_file:
|
|
for line in audit_log_file.readlines():
|
|
json_dict = json.loads(line)
|
|
if len(json_dict) == 0: continue
|
|
if json_dict[min(json_dict)]['user'] == user and\
|
|
json_dict[min(json_dict)]['impersonator'] == impersonator:
|
|
return True
|
|
return False
|