From c23bf38a201e400a98b2f8f34b5aec95fad27e2b Mon Sep 17 00:00:00 2001 From: Sailesh Mukil Date: Mon, 15 Aug 2016 15:38:38 -0700 Subject: [PATCH] IMPALA-3893, IMPALA-3901: impala-shell prints incorrect coordinator address, overly verbose The webserver address was always configured as 0.0.0.0 (meaning that the webserver could be reached on any IP for that machine) unless otherwise specified. This is not a correct value to dispay to the user. This patch returns the hostname of the node, when requested, if the webserver host address is 0.0.0.0. This patch also does not print the coordinator link for very simple queries, as it's not necessary and is unnecessarily verbose. This patch also does away with pinging the impalad an extra time per query for finding the host time and webserver address. It instead remembers the webserver address at connect time and displays client local time for every query instead. Change-Id: I9d167b66f2dd8629e40a7094d21ea7ce6b43d23b Reviewed-on: http://gerrit.cloudera.org:8080/3994 Tested-by: Internal Jenkins Reviewed-by: Sailesh Mukil Tested-by: Sailesh Mukil --- be/src/service/impala-beeswax-server.cc | 1 - be/src/util/webserver.cc | 8 ++++- common/thrift/ImpalaService.thrift | 3 -- shell/impala_client.py | 2 +- shell/impala_shell.py | 38 +++++++++++--------- tests/shell/test_shell_commandline.py | 47 +++++++++++++++++++++---- 6 files changed, 69 insertions(+), 30 deletions(-) diff --git a/be/src/service/impala-beeswax-server.cc b/be/src/service/impala-beeswax-server.cc index cdfa52fce..3daa36bd1 100644 --- a/be/src/service/impala-beeswax-server.cc +++ b/be/src/service/impala-beeswax-server.cc @@ -502,7 +502,6 @@ void ImpalaServer::PingImpalaService(TPingImpalaServiceResp& return_val) { VLOG_RPC << "PingImpalaService()"; return_val.version = GetVersionString(true); return_val.webserver_address = ExecEnv::GetInstance()->webserver()->Url(); - return_val.epoch_time = reinterpret_cast(std::time(0)); VLOG_RPC << "PingImpalaService(): return_val=" << ThriftDebugString(return_val); } diff --git a/be/src/util/webserver.cc b/be/src/util/webserver.cc index dcbfec63d..41974bb99 100644 --- a/be/src/util/webserver.cc +++ b/be/src/util/webserver.cc @@ -219,8 +219,14 @@ bool Webserver::IsSecure() const { } string Webserver::Url() { + string hostname = http_address_.hostname; + if (IsWildcardAddress(http_address_.hostname)) { + if (!GetHostname(&hostname).ok()) { + hostname = http_address_.hostname; + } + } return Substitute("$0://$1:$2", IsSecure() ? "https" : "http", - http_address_.hostname, http_address_.port); + hostname, http_address_.port); } Status Webserver::Start() { diff --git a/common/thrift/ImpalaService.thrift b/common/thrift/ImpalaService.thrift index 789641a62..25dfddfdc 100644 --- a/common/thrift/ImpalaService.thrift +++ b/common/thrift/ImpalaService.thrift @@ -251,9 +251,6 @@ struct TPingImpalaServiceResp { // The Impalad's webserver address. 2: string webserver_address - - // The Impalad's server time. - 3: i64 epoch_time } // Parameters for a ResetTable request which will invalidate a table's metadata. diff --git a/shell/impala_client.py b/shell/impala_client.py index 7782aa2b2..bc20b096d 100755 --- a/shell/impala_client.py +++ b/shell/impala_client.py @@ -235,7 +235,7 @@ class ImpalaClient(object): self.imp_service = ImpalaService.Client(protocol) result = self.ping_impala_service() self.connected = True - return result.version + return result def ping_impala_service(self): return self.imp_service.PingImpalaService() diff --git a/shell/impala_shell.py b/shell/impala_shell.py index b9dc60a01..74d7ecb73 100755 --- a/shell/impala_shell.py +++ b/shell/impala_shell.py @@ -91,6 +91,7 @@ class ImpalaShell(cmd.Cmd): # If not connected to an impalad, the server version is unknown. UNKNOWN_SERVER_VERSION = "Not Connected" DISCONNECTED_PROMPT = "[Not connected] > " + UNKNOWN_WEBSERVER = "0.0.0.0" # Message to display in shell when cancelling a query CANCELLATION_MESSAGE = ' Cancelling Query' # Number of times to attempt cancellation before giving up. @@ -134,6 +135,7 @@ class ImpalaShell(cmd.Cmd): self.verbose = options.verbose self.prompt = ImpalaShell.DISCONNECTED_PROMPT self.server_version = ImpalaShell.UNKNOWN_SERVER_VERSION + self.webserver_address = ImpalaShell.UNKNOWN_WEBSERVER self.refresh_after_connect = options.refresh_after_connect self.current_db = options.default_db @@ -680,7 +682,9 @@ class ImpalaShell(cmd.Cmd): def _connect(self): try: - self.server_version = self.imp_client.connect() + result = self.imp_client.connect() + self.server_version = result.version + self.webserver_address = result.webserver_address except TApplicationException: # We get a TApplicationException if the transport is valid, # but the RPC does not exist. @@ -751,9 +755,11 @@ class ImpalaShell(cmd.Cmd): return self._execute_stmt(query) def do_create(self, args): + # We want to print the webserver link only for CTAS queries. + print_web_link = "select" in args query = self.imp_client.create_beeswax_query("create %s" % args, self.set_query_options) - return self._execute_stmt(query) + return self._execute_stmt(query, print_web_link=print_web_link) def do_drop(self, args): query = self.imp_client.create_beeswax_query("drop %s" % args, @@ -780,7 +786,7 @@ class ImpalaShell(cmd.Cmd): """Executes a SELECT... query, fetching all rows""" query = self.imp_client.create_beeswax_query("select %s" % args, self.set_query_options) - return self._execute_stmt(query) + return self._execute_stmt(query, print_web_link=True) def do_compute(self, args): """Executes a COMPUTE STATS query. @@ -850,7 +856,7 @@ class ImpalaShell(cmd.Cmd): "#Rows", "Est. #Rows", "Peak Mem", "Est. Peak Mem", "Detail"]) - def _execute_stmt(self, query, is_insert=False): + def _execute_stmt(self, query, is_insert=False, print_web_link=False): """ The logic of executing any query statement The client executes the query and the query_handle is returned immediately, @@ -862,26 +868,24 @@ class ImpalaShell(cmd.Cmd): The execution time is printed and the query is closed if it hasn't been already """ - self._print_if_verbose("Query: %s" % (query.query,)) + self._print_if_verbose("Query: %s" % query.query) # TODO: Clean up this try block and refactor it (IMPALA-3814) try: - # Get the hostname, webserver port and the current coordinator time. - coordinator = self.imp_client.ping_impala_service() - # If the coordinator is on a different time zone, the epoch time returned will be - # different than from this system, so time.localtime(coordinator.epoch_time) will - # return the timestamp of the server. - self._print_if_verbose("Query submitted at: %s (Coordinator: %s)" % (time.strftime( - "%Y-%m-%d %H:%M:%S", time.localtime(coordinator.epoch_time)), - coordinator.webserver_address)) + if self.webserver_address == ImpalaShell.UNKNOWN_WEBSERVER: + print_web_link = False + if print_web_link: + self._print_if_verbose("Query submitted at: %s (Coordinator: %s)" % + (time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()), + self.webserver_address)) start_time = time.time() self.last_query_handle = self.imp_client.execute_query(query) self.query_handle_closed = False self.last_summary = time.time() - if coordinator.webserver_address: + if print_web_link: self._print_if_verbose( "Query progress can be monitored at: %s/query_plan?query_id=%s" % - (coordinator.webserver_address, self.last_query_handle.id)) + (self.webserver_address, self.last_query_handle.id)) wait_to_finish = self.imp_client.wait_to_finish(self.last_query_handle, self._periodic_wait_callback) @@ -1020,7 +1024,7 @@ class ImpalaShell(cmd.Cmd): """Executes an INSERT query""" query = self.imp_client.create_beeswax_query("insert %s" % args, self.set_query_options) - return self._execute_stmt(query, is_insert=True) + return self._execute_stmt(query, is_insert=True, print_web_link=True) def do_explain(self, args): """Explain the query execution plan""" @@ -1103,7 +1107,7 @@ class ImpalaShell(cmd.Cmd): def default(self, args): query = self.imp_client.create_beeswax_query(args, self.set_query_options) - return self._execute_stmt(query) + return self._execute_stmt(query, print_web_link=True) def emptyline(self): """If an empty line is entered, do nothing""" diff --git a/tests/shell/test_shell_commandline.py b/tests/shell/test_shell_commandline.py index 29c504054..31cf5704d 100644 --- a/tests/shell/test_shell_commandline.py +++ b/tests/shell/test_shell_commandline.py @@ -420,14 +420,47 @@ class TestImpalaShell(ImpalaTestSuite): result = run_impala_shell_cmd(args, expect_success=True) assert_var_substitution(result) - def test_query_start_time_message(self): - results = run_impala_shell_cmd('--query="select 1"') - assert "Query submitted at: " in results.stderr + # Checks if 'messages' exists/does not exist in 'result_stderr' based on the value of + # 'should_exist' + def _validate_shell_messages(self, result_stderr, messages, should_exist=True): + for msg in messages: + if should_exist: + assert msg in result_stderr + else: + assert msg not in result_stderr - def test_query_coordinator_link_message(self): - results = run_impala_shell_cmd('--query="select 1"') - assert "(Coordinator: " in results.stderr - assert "Query progress can be monitored at: " in results.stderr + def test_query_time_and_link_message(self, unique_database): + shell_messages = ["Query submitted at: ", "(Coordinator: ", + "Query progress can be monitored at: "] + # CREATE statements should not print query time and webserver address. + results = run_impala_shell_cmd('--query="create table %s.shell_msg_test (id int)"' % + unique_database) + self._validate_shell_messages(results.stderr, shell_messages, should_exist=False) + + # SELECT, INSERT and CTAS queries should print the query time message and webserver + # address. + results = run_impala_shell_cmd('--query="insert into %s.shell_msg_test values (1)"' % + unique_database) + self._validate_shell_messages(results.stderr, shell_messages, should_exist=True) + results = run_impala_shell_cmd('--query="select * from %s.shell_msg_test"' % + unique_database) + self._validate_shell_messages(results.stderr, shell_messages, should_exist=True) + results = run_impala_shell_cmd('--query="create table %s.shell_msg_ctas_test as \ + select * from %s.shell_msg_test"' % (unique_database, unique_database)) + self._validate_shell_messages(results.stderr, shell_messages, should_exist=True) + + # DROP statements should not print query time and webserver address. + results = run_impala_shell_cmd('--query="drop table %s.shell_msg_test"' % + unique_database) + self._validate_shell_messages(results.stderr, shell_messages, should_exist=False) + run_impala_shell_cmd('--query="drop table %s.shell_msg_ctas_test"' % + unique_database) + + # Simple queries should not print query time and webserver address. + results = run_impala_shell_cmd('--query="use default"') + self._validate_shell_messages(results.stderr, shell_messages, should_exist=False) + results = run_impala_shell_cmd('--query="show tables"') + self._validate_shell_messages(results.stderr, shell_messages, should_exist=False) def test_missing_query_file(self): result = run_impala_shell_cmd('-f nonexistent.sql', expect_success=False)