mirror of
https://github.com/apache/impala.git
synced 2026-01-08 21:03:01 -05:00
When codegen is enabled, Prepare() for plan fragments can be relatively expensive (the cost comes mostly from preparing and optimizing the LLVM bitcode module). However, we call it on the critical path for query setup, during the ExecRemoteFragment() RPC issued by the coordinator. This can lead to high startup latencies for queries, particularly those with many fragments. This patch moves Prepare() to the fragment exec thread. This allows the coordinator to start many fragments in quick succession. Doing so complicates matters regarding cancellation, which originally could not occur during Prepare() as the coordinator would wait for all RPCs to finish before issuing any cancellation. To address the new complexity, this patch loosens the existing contract for concurrent calls of PlanFragmentExecutor::Prepare() and Cancel(). Previously Cancel() must not have been called before Prepare() had returned. Now they may be called in any order, including concurrently, and the two methods will coordinate to ensure the correct execution order. Cancel() will always block until Prepare() has finished, unless it was called strictly before Prepare(). Making this change allows us to rework the previous logic where Prepare() always had to be called before a fragment was registered (so that any concurrent Cancel() calls could not 'find' the fragment). The order is now register->prepare->exec->unregister. Cancellation may occur any time after ExecPlanFragment() returns. Change-Id: Ie39737dc419d7708dd881e68d1035e05d3256d19 Reviewed-on: http://gerrit.cloudera.org:8080/539 Reviewed-by: Henry Robinson <henry@cloudera.com> Tested-by: Internal Jenkins
66 lines
2.5 KiB
Python
66 lines
2.5 KiB
Python
# Copyright (c) 2015 Cloudera, Inc. All rights reserved.
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import pytest
|
|
from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
|
|
from tests.common.impala_test_suite import ImpalaTestSuite
|
|
from tests.common.impala_cluster import ImpalaCluster
|
|
from tests.verifiers.metric_verifier import MetricVerifier
|
|
|
|
class TestFragmentLifecycle(ImpalaTestSuite):
|
|
"""Using the debug action interface, check that failed queries correctly clean up *all*
|
|
fragments"""
|
|
|
|
@classmethod
|
|
def get_workload(self):
|
|
return 'functional'
|
|
|
|
@pytest.mark.execute_serially
|
|
def test_failure_in_prepare(self):
|
|
# Fail the scan node
|
|
self.client.execute("SET DEBUG_ACTION='-1:0:PREPARE:FAIL'");
|
|
try:
|
|
self.client.execute("SELECT COUNT(*) FROM functional.alltypes")
|
|
assert "Query should have thrown an error"
|
|
except ImpalaBeeswaxException:
|
|
pass
|
|
verifiers = [ MetricVerifier(i.service) for i in ImpalaCluster().impalads ]
|
|
|
|
for v in verifiers:
|
|
v.wait_for_metric("impala-server.num-fragments-in-flight", 0)
|
|
|
|
@pytest.mark.execute_serially
|
|
def test_failure_in_prepare_multi_fragment(self):
|
|
# Test that if one fragment fails that the others are cleaned up during the ensuing
|
|
# cancellation.
|
|
|
|
# Fail the scan node
|
|
self.client.execute("SET DEBUG_ACTION='-1:0:PREPARE:FAIL'");
|
|
|
|
# Force a query plan that will have three fragments or more.
|
|
try:
|
|
self.client.execute("SELECT COUNT(*) FROM functional.alltypes a JOIN [SHUFFLE] \
|
|
functional.alltypes b on a.id = b.id")
|
|
assert "Query should have thrown an error"
|
|
except ImpalaBeeswaxException:
|
|
pass
|
|
|
|
verifiers = [ MetricVerifier(i.service) for i in ImpalaCluster().impalads ]
|
|
for v in verifiers:
|
|
# Long timeout required because fragments may be blocked while sending data, default
|
|
# timeout is 60s before they wake up and cancel themselves.
|
|
#
|
|
# TODO: Fix when we have cancellable RPCs.
|
|
v.wait_for_metric("impala-server.num-fragments-in-flight", 0, timeout=65)
|