Files
impala/tests/hs2/test_json_endpoints.py
Henry Robinson 9f61397fc4 IMPALA-2905: Handle coordinator fragment lifecycle like all others
The plan-root fragment instance that runs on the coordinator should be
handled like all others: started via RPC and run asynchronously. Without
this, the fragment requires special-case code throughout the
coordinator, and does not show up in system metrics etc.

This patch adds a new sink type, PlanRootSink, to the root fragment
instance so that the coordinator can pull row batches that are pushed by
the root instance. The coordinator signals completion to the fragment
instance via closing the consumer side of the sink, whereupon the
instance is free to complete.

Since the root instance now runs asynchronously wrt to the coordinator,
we add several coordination methods to allow the coordinator to wait for
a point in the instance's execution to be hit - e.g. to wait until the
instance has been opened.

Done in this patch:

* Add PlanRootSink
* Add coordination to PFE to allow coordinator to observe lifecycle
* Make FragmentMgr a singleton
* Removed dead code from Coordinator::Wait() and elsewhere.
* Moved result output exprs out of QES and into PlanRootSink.
* Remove special-case limit-based teardown of coordinator fragment, and
  supporting functions in PlanFragmentExecutor.
* Simplified lifecycle of PlanFragmentExecutor by separating Open() into
  Open() and Exec(), the latter of which drives the sink by reading
  rows from the plan tree.
* Add child profile to PlanFragmentExecutor to measure time spent in
  each lifecycle phase.
* Removed dependency between InitExecProfiles() and starting root
  fragment.
* Removed mostly dead-code handling of LIMIT 0 queries.
* Ensured that SET returns a result set in all cases.
* Fix test_get_log() HS2 test. Errors are only guaranteed to be visible
  after fetch calls return EOS, but test was assuming this would happen
  after first fetch.

Change-Id: Ibb0064ec2f085fa3a5598ea80894fb489a01e4df
Reviewed-on: http://gerrit.cloudera.org:8080/4402
Tested-by: Internal Jenkins
Reviewed-by: Henry Robinson <henry@cloudera.com>
2016-10-16 15:55:29 +00:00

94 lines
4.3 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# Tests for query expiration.
import json
import pytest
from urllib2 import urlopen
from tests.hs2.hs2_test_suite import HS2TestSuite
from TCLIService import TCLIService
class TestJsonEndpoints(HS2TestSuite):
def _get_json_queries(self, http_addr):
"""Get the json output of the /queries page from the impalad web UI at http_addr."""
resp = urlopen("http://%s/queries?json" % http_addr)
assert resp.msg == 'OK'
return json.loads(resp.read())
@pytest.mark.execute_serially
def test_waiting_in_flight_queries(self):
"""Confirm that the in_flight_queries endpoint shows a query at eos as waiting"""
open_session_req = TCLIService.TOpenSessionReq()
default_database = "functional"
open_session_req.configuration = {"use:database": default_database}
open_session_resp = self.hs2_client.OpenSession(open_session_req)
TestJsonEndpoints.check_response(open_session_resp)
http_addr = open_session_resp.configuration['http_addr']
# Execute a SELECT, and check that in_flight_queries shows one executing query.
select_statement_req = TCLIService.TExecuteStatementReq()
select_statement_req.sessionHandle = open_session_resp.sessionHandle
select_statement_req.statement = "SELECT * FROM functional.alltypes LIMIT 0"
select_statement_resp = self.hs2_client.ExecuteStatement(select_statement_req)
TestJsonEndpoints.check_response(select_statement_resp)
queries_json = self._get_json_queries(http_addr)
assert len(queries_json["in_flight_queries"]) == 1
assert queries_json["num_in_flight_queries"] == 1
assert queries_json["num_executing_queries"] == 1
assert queries_json["num_waiting_queries"] == 0
query = queries_json["in_flight_queries"][0]
assert query["default_db"] == default_database
assert query["stmt"] == select_statement_req.statement
assert query["stmt_type"] == "QUERY"
assert query["rows_fetched"] == 0
assert query["executing"]
assert not query["waiting"]
# Fetch the results, putting the query at eos, and check that in_flight_queries
# shows one waiting query.
fetch_results_req = TCLIService.TFetchResultsReq()
fetch_results_req.operationHandle = select_statement_resp.operationHandle
fetch_results_req.maxRows = 100
fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
TestJsonEndpoints.check_response(fetch_results_resp)
# Fetch one more time to ensure that query is at EOS (first fetch might return 0-size
# row batch)
fetch_results_resp = self.hs2_client.FetchResults(fetch_results_req)
TestJsonEndpoints.check_response(fetch_results_resp)
queries_json = self._get_json_queries(http_addr)
assert len(queries_json["in_flight_queries"]) == 1
assert queries_json["num_in_flight_queries"] == 1
assert queries_json["num_executing_queries"] == 0
assert queries_json["num_waiting_queries"] == 1
query = queries_json["in_flight_queries"][0]
assert not query["executing"]
assert query["waiting"]
# Close the query and check that in_flight_queries is empty.
close_operation_req = TCLIService.TCloseOperationReq()
close_operation_req.operationHandle = select_statement_resp.operationHandle
close_operation_resp = self.hs2_client.CloseOperation(close_operation_req)
TestJsonEndpoints.check_response(close_operation_resp)
queries_json = self._get_json_queries(http_addr)
assert len(queries_json["in_flight_queries"]) == 0
assert queries_json["num_in_flight_queries"] == 0
assert queries_json["num_executing_queries"] == 0
assert queries_json["num_waiting_queries"] == 0