From 892eccc8d0d09594cf77ac039206da4ea604c74f Mon Sep 17 00:00:00 2001 From: Lenni Kuff Date: Mon, 2 Jun 2014 15:53:02 -0700 Subject: [PATCH] CDH-19184: Impala should show impersonated user (if there is one) rather than connected user Currently, we always display the 'User' as the connected user in the debug webpage and runtime profiles. This is confusing when impersonation + authorization is enabled because there is not an easy way to find the impersonated user other than looking at the audit log records. This change does the following: * Updates the "User" field in the runtime profile to show the "effective user". The effective user is the connected user if there is no impersonated user, otherwise it is the impersonated user. This should help CM display the correct user as well. * Add two new fields in the runtime profile "Connected User" & "Impersonated User" to make it easier to tell which user is which. * Update the /queries debug webpage to show the effective user rather than the connected user. Change-Id: I639de6738242d2c378e785271a72257301a53ade Reviewed-on: http://gerrit.ent.cloudera.com:8080/2863 Reviewed-by: Lenni Kuff Tested-by: jenkins (cherry picked from commit d4ad768780dfdfe0874f2b3e9c59074f1c3685d7) Reviewed-on: http://gerrit.ent.cloudera.com:8080/2935 --- be/src/service/impala-server-callbacks.cc | 4 ++- be/src/service/impala-server.cc | 13 +++----- be/src/service/impala-server.h | 6 ++-- be/src/service/query-exec-state.cc | 4 ++- be/src/service/query-exec-state.h | 10 +++++- tests/authorization/test_authorization.py | 39 +++++++++++++++++++++++ 6 files changed, 63 insertions(+), 13 deletions(-) diff --git a/be/src/service/impala-server-callbacks.cc b/be/src/service/impala-server-callbacks.cc index 4c0b2b72e..3694e489e 100644 --- a/be/src/service/impala-server-callbacks.cc +++ b/be/src/service/impala-server-callbacks.cc @@ -158,7 +158,7 @@ void ImpalaServer::InflightQueryIdsPathHandler(const Webserver::ArgumentMap& arg void ImpalaServer::RenderSingleQueryTableRow(const ImpalaServer::QueryStateRecord& record, bool render_end_time, bool render_cancel, stringstream* output) { (*output) << "" - << "" << record.user << "" + << "" << record.effective_user << "" << "" << record.default_db << "" << "" << record.stmt << "" << "" @@ -294,6 +294,7 @@ void ImpalaServer::SessionPathHandler(const Webserver::ArgumentMap& args, << "Session Type" << "Open Queries" << "User" + << "Impersonated User" << "Session ID" << "Network Address" << "Default Database" @@ -310,6 +311,7 @@ void ImpalaServer::SessionPathHandler(const Webserver::ArgumentMap& args, << "" << PrintTSessionType(session.second->session_type) << "" << "" << session.second->inflight_queries.size() << "" << "" << session.second->connected_user << "" + << "" << session.second->do_as_user << "" << "" << session.first << "" << "" << session.second->network_address << "" << "" << session.second->database << "" diff --git a/be/src/service/impala-server.cc b/be/src/service/impala-server.cc index aefc57b14..7403a3cf1 100644 --- a/be/src/service/impala-server.cc +++ b/be/src/service/impala-server.cc @@ -318,16 +318,13 @@ Status ImpalaServer::LogAuditRecord(const ImpalaServer::QueryExecState& exec_sta writer.String("status"); writer.String(exec_state.query_status().GetErrorMsg().c_str()); writer.String("user"); - // If do_as_user() is empty, the "user" field should be set to the connected user - // and "impersonator" field should be Null. Otherwise, the "user" should be set to - // the current do_as_user() and "impersonator" should be the connected user. + writer.String(exec_state.effective_user().c_str()); + writer.String("impersonator"); if (exec_state.do_as_user().empty()) { - writer.String(exec_state.connected_user().c_str()); - writer.String("impersonator"); + // If there is no do_as_user() is empty, the "impersonator" field should be Null. writer.Null(); } else { - writer.String(exec_state.do_as_user().c_str()); - writer.String("impersonator"); + // Otherwise, the impersonator is the current connected user. writer.String(exec_state.connected_user().c_str()); } writer.String("statement_type"); @@ -1593,7 +1590,7 @@ ImpalaServer::QueryStateRecord::QueryStateRecord(const QueryExecState& exec_stat stmt = exec_state.sql_stmt(); stmt_type = request.stmt_type; - user = exec_state.connected_user(); + effective_user = exec_state.effective_user(); default_db = exec_state.default_db(); start_time = exec_state.start_time(); end_time = exec_state.end_time(); diff --git a/be/src/service/impala-server.h b/be/src/service/impala-server.h index 09869bce1..0ace55b97 100644 --- a/be/src/service/impala-server.h +++ b/be/src/service/impala-server.h @@ -486,8 +486,10 @@ class ImpalaServer : public ImpalaServiceIf, public ImpalaHiveServer2ServiceIf, // Query id TUniqueId id; - // User that ran the query - std::string user; + // Queries are run and authorized on behalf of the effective_user. + // If there is no impersonated user, this will be the connected user. Otherwise, it + // will be set to the impersonated user. + std::string effective_user; // default db for this query std::string default_db; diff --git a/be/src/service/query-exec-state.cc b/be/src/service/query-exec-state.cc index 6b547c67a..7c49c9200 100644 --- a/be/src/service/query-exec-state.cc +++ b/be/src/service/query-exec-state.cc @@ -93,7 +93,9 @@ ImpalaServer::QueryExecState::QueryExecState( summary_profile_.AddInfoString("Query State", PrintQueryState(query_state_)); summary_profile_.AddInfoString("Query Status", "OK"); summary_profile_.AddInfoString("Impala Version", GetVersionString(/* compact */ true)); - summary_profile_.AddInfoString("User", connected_user()); + summary_profile_.AddInfoString("User", effective_user()); + summary_profile_.AddInfoString("Connected User", connected_user()); + summary_profile_.AddInfoString("Impersonated User", do_as_user()); summary_profile_.AddInfoString("Network Address", lexical_cast(session_->network_address)); summary_profile_.AddInfoString("Default Db", default_db()); diff --git a/be/src/service/query-exec-state.h b/be/src/service/query-exec-state.h index 77f62c372..b817b30d6 100644 --- a/be/src/service/query-exec-state.h +++ b/be/src/service/query-exec-state.h @@ -121,7 +121,15 @@ class ImpalaServer::QueryExecState { Status SetResultCache(QueryResultSet* cache, int64_t max_size); ImpalaServer::SessionState* session() const { return session_.get(); } - const std::string& connected_user() const { return query_ctxt_.session.connected_user; } + // Queries are run and authorized on behalf of the effective_user. + // When a do_as_user is specified (is not empty), the effective_user is set to the + // do_as_user. This is because the connected_user is acting as a "proxy user" for the + // do_as_user. When do_as_user is empty, the effective_user is always set to the + // connected_user. + const std::string& effective_user() const { + return do_as_user().empty() ? connected_user() : do_as_user(); + } + const std::string& connected_user() const { return session_->connected_user; } const std::string& do_as_user() const { return session_->do_as_user; } TSessionType::type session_type() const { return query_ctxt_.session.session_type; } const TUniqueId& session_id() const { return query_ctxt_.session.session_id; } diff --git a/tests/authorization/test_authorization.py b/tests/authorization/test_authorization.py index 7534cf67a..e85542645 100755 --- a/tests/authorization/test_authorization.py +++ b/tests/authorization/test_authorization.py @@ -28,6 +28,7 @@ from thrift.transport.TTransport import TBufferedTransport from thrift.protocol import TBinaryProtocol from tests.common.custom_cluster_test_suite import CustomClusterTestSuite from tests.common.impala_test_suite import IMPALAD_HS2_HOST_PORT +from tests.hs2.hs2_test_suite import operation_id_to_query_id class TestAuthorization(CustomClusterTestSuite): AUDIT_LOG_DIR = tempfile.mkdtemp(dir=os.getenv('LOG_DIR')) @@ -60,8 +61,10 @@ class TestAuthorization(CustomClusterTestSuite): # class citizen in our test framework. from tests.hs2.test_hs2 import TestHS2 open_session_req = TCLIService.TOpenSessionReq() + # Connected user is 'hue' open_session_req.username = 'hue' open_session_req.configuration = dict() + # Impersonated user is the current user open_session_req.configuration['impala.doas.user'] = getuser() resp = self.hs2_client.OpenSession(open_session_req) TestHS2.check_response(resp) @@ -86,14 +89,50 @@ class TestAuthorization(CustomClusterTestSuite): TestHS2.check_response(execute_statement_resp) + # Verify the correct user information is in the runtime profile + query_id = operation_id_to_query_id( + execute_statement_resp.operationHandle.operationId) + profile_page = self.cluster.impalads[0].service.read_query_profile_page(query_id) + self.__verify_profile_user_fields(profile_page, effective_user=getuser(), + impersonated_user=getuser(), connected_user='hue') + # Try to impersonate as a user we are not authorized to impersonate. open_session_req.configuration['impala.doas.user'] = 'some_user' resp = self.hs2_client.OpenSession(open_session_req) assert 'User \'hue\' is not authorized to impersonate \'some_user\'' in str(resp) + # Create a new session which does not have a do_as_user. + open_session_req.username = 'hue' + open_session_req.configuration = dict() + resp = self.hs2_client.OpenSession(open_session_req) + TestHS2.check_response(resp) + + # Run a simple query, which should succeed. + execute_statement_req = TCLIService.TExecuteStatementReq() + execute_statement_req.sessionHandle = resp.sessionHandle + execute_statement_req.statement = "select 1" + execute_statement_resp = self.hs2_client.ExecuteStatement(execute_statement_req) + TestHS2.check_response(execute_statement_resp) + + # Verify the correct user information is in the runtime profile. Since there is + # no do_as_user the Impersonated User field should be empty. + query_id = operation_id_to_query_id( + execute_statement_resp.operationHandle.operationId) + profile_page = self.cluster.impalads[0].service.read_query_profile_page(query_id) + self.__verify_profile_user_fields(profile_page, effective_user='hue', + impersonated_user='', connected_user='hue') + self.socket.close() self.socket = None + def __verify_profile_user_fields(self, profile_str, effective_user, connected_user, + impersonated_user): + """Verifies the given runtime profile string contains the specified values for + User, Connected User, and Impersonated User""" + assert '\n User: %s\n' % effective_user in profile_str + assert '\n Connected User: %s\n' % connected_user in profile_str + assert '\n Impersonated User: %s\n' % impersonated_user in profile_str + def __wait_for_audit_record(self, user, impersonator, timeout_secs=30): """Waits until an audit log record is found that contains the given user and impersonator, or until the timeout is reached.