Files
impala/tests/custom_cluster/test_query_concurrency.py
Lars Volker 0164fa769e IMPALA-6955: fix test_query_concurrency and server startup sequence
custom_cluster/test_query_concurrency created two threads to poll the
query_plan debug webpage without joining them. Then they could outlive
the test itself and continued to poll the webpage while the minicluster
of the next test started up.

During startup of the coordinator, the ImpalaServer would register its
HTTP handlers with the webserver before registering itself with the
ExecEnv. When the incoming request from the polling threads called
GetClientRequestState(), that call would dereference a nullptr returned
by the ExecEnv, which would cause the process to crash.

To fix this we join the threads in test_query_concurrency before
returning from the test method.

To fix the underlying race that made the crash possible we change the
initialization order to register the HTTP handlers after the
ImpalaServer has been registered with the ExecEnv. We also add some
DCHECKs to make sure that we access the ImpalaServer only through a
singleton instance and that it is properly registered with the ExecEnv.

Change-Id: If22f71ab6edaf9a6b46afc0985c73dc4625b5103
Reviewed-on: http://gerrit.cloudera.org:8080/12019
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2018-12-06 02:22:21 +00:00

91 lines
3.8 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import pytest
import time
from threading import Thread
from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
from tests.common.skip import SkipIfBuildType
@SkipIfBuildType.not_dev_build
class TestQueryConcurrency(CustomClusterTestSuite):
"""Tests if multiple queries are registered on the coordinator when
submitted in parallel along with clients trying to access the web UI.
The intention here is to check that the web server call paths don't hold
global locks that can conflict with other requests and prevent the impalad
from servicing them. It is done by simulating a metadata loading pause
using the configuration key --metadata_loading_pause_injection_ms that
makes the frontend hold the ClientRequestState::lock_ for longer duration."""
TEST_QUERY = "select count(*) from tpch.supplier"
POLLING_TIMEOUT_S = 15
@classmethod
def get_workload(self):
return 'functional-query'
@classmethod
def setup_class(cls):
if cls.exploration_strategy() != 'exhaustive':
pytest.skip('Runs only in exhaustive mode.')
super(TestQueryConcurrency, cls).setup_class()
def poll_query_page(self, impalad, query_id):
"""Polls the debug plan page of a given query id in a loop till the timeout
of POLLING_TIMEOUT_S is hit."""
start = time.time()
while time.time() - start < self.POLLING_TIMEOUT_S:
try:
impalad.service.read_debug_webpage("query_plan?query_id=" + query_id)
except Exception:
pass
time.sleep(1)
def check_registered_queries(self, impalad, count):
"""Asserts that the registered query count on a given impalad matches 'count'
before POLLING_TIMEOUT_S is hit."""
start = time.time()
while time.time() - start < self.POLLING_TIMEOUT_S:
inflight_query_ids = impalad.service.get_in_flight_queries()
if inflight_query_ids is not None and len(inflight_query_ids) == count:
return inflight_query_ids
time.sleep(1)
assert False, "Registered query count doesn't match: " + str(count)
@pytest.mark.execute_serially
@CustomClusterTestSuite.with_args("--stress_metadata_loading_pause_injection_ms=100000")
def test_query_concurrency(self, vector):
impalad = self.cluster.get_any_impalad()
client1 = impalad.service.create_beeswax_client()
client2 = impalad.service.create_beeswax_client()
q1 = Thread(target = client1.execute_async, args = (self.TEST_QUERY,))
q2 = Thread(target = client2.execute_async, args = (self.TEST_QUERY,))
q1.start()
inflight_query_ids = self.check_registered_queries(impalad, 1)
Thread(target = self.poll_query_page,\
args = (impalad, inflight_query_ids[0]['query_id'],)).start()
time.sleep(2)
q2.start()
inflight_query_ids = self.check_registered_queries(impalad, 2)
result = impalad.service.read_debug_webpage("query_profile_encoded?query_id="\
+ inflight_query_ids[1]['query_id'])
assert result.startswith("Could not obtain runtime profile")
client1.close()
client2.close()
q1.join()
q2.join()