IMPALA-8924, IMPALA-8934: Result spooling failpoint tests, fix DCHECKs

Adds several "failpoint" tests to test_result_spooling.py. These tests
use debug_actions spread throughout buffered-plan-root-sink.cc to
trigger failures while result spooling is running. The tests validate
that all queries gracefully fail and do not cause any impalad crashes.

Fixed a few bugs that came up when adding these tests, as well as the
crash reported in IMPALA-8924 (which is now covered by the failpoint
tests added in this patch).

The first bug fixed was a DCHECK in SpillableRowBatchQueue::IsEmpty()
where the method was being called after the queue had been closed. The
fix is to only call IsEmpty() if IsOpen() returns true.

The second bug was an issue in the cancellation path where
BufferedPlanRootSink::GetNext would enter an infinite loop if the query
was cancelled and then GetNext was called. The fix is to check the
cancellation state in the outer while loop.

Testing:
* Added new tests to test_result_spooling.py
* Ran core tests

Change-Id: Ib96f797bc8a5ba8baf9fb28abd1f645345bbe932
Reviewed-on: http://gerrit.cloudera.org:8080/14214
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
Sahil Takiar
2019-09-10 13:31:01 -07:00
committed by Impala Public Jenkins
parent bca1b43efb
commit 391942d79d
5 changed files with 137 additions and 14 deletions

View File

@@ -25,6 +25,7 @@ from tests.common.impala_test_suite import ImpalaTestSuite
from tests.common.test_dimensions import create_exec_option_dimension
from tests.common.test_vector import ImpalaTestDimension
from tests.util.cancel_util import cancel_query_and_validate_state
from tests.util.failpoints_util import execute_query_expect_debug_action_failure
class TestResultSpooling(ImpalaTestSuite):
@@ -179,6 +180,19 @@ class TestResultSpooling(ImpalaTestSuite):
finally:
self.client.close_query(handle)
def test_exec_tree_failpoint(self, vector):
"""Inject a failure during exec tree execution. The GETNEXT:DELAY is necessary to
ensure the client issues a fetch request before the MEM_LIMIT_EXCEEDED exception is
thrown. Unlike the tests in TestResultSpoolingFailpoints this test injects a fail
during the execution of the exec tree, rather than in the result spooling code."""
vector.get_value('exec_option')['batch_size'] = 10
vector.get_value('exec_option')['debug_action'] = \
'4:GETNEXT:MEM_LIMIT_EXCEEDED|0:GETNEXT:DELAY'
vector.get_value('exec_option')['spool_query_results'] = 'true'
query = "select 1 from functional.alltypessmall a join functional.alltypessmall b " \
"on a.id = b.id"
execute_query_expect_debug_action_failure(self, query, vector)
def __validate_query(self, query, exec_options):
"""Compares the results of the given query with and without result spooling
enabled."""
@@ -344,3 +358,47 @@ class TestResultSpoolingCancellation(ImpalaTestSuite):
"Unexpected status code from cancel request: {0}".format(cancel_result)
finally:
if handle: self.client.close_query(handle)
class TestResultSpoolingFailpoints(ImpalaTestSuite):
"""Test result spooling failure handling. Uses debug actions to inject failures at
various points of result spooling execution (e.g. the when results are actually getting
spooled)."""
_debug_actions = [
# Inject a failure in BufferedPlanRootSink::Open.
'BPRS_BEFORE_OPEN:FAIL',
# Inject a failure immediately before BufferedPlanRootSink::Send adds a batch to
# the queue. The probability ensures that the error is thrown on a random
# RowBatch.
'BPRS_BEFORE_ADD_BATCH:FAIL@1.0',
# Inject a failure in BufferedPlanRootSink::FlushFinal.
'BPRS_BEFORE_FLUSH_FINAL:FAIL',
# Inject a failure immediately before the BufferedPlanRootSink::GetNext reads a
# batch from the queue. The probability ensures that the error is thrown on a
# random RowBatch.
'BPRS_BEFORE_GET_BATCH:FAIL@1.0']
_query = "select * from functional.alltypes"
@classmethod
def get_workload(cls):
return 'functional-query'
@classmethod
def add_test_dimensions(cls):
super(TestResultSpoolingFailpoints, cls).add_test_dimensions()
cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension('debug_action',
*cls._debug_actions))
# Result spooling should be independent of file format, so only testing for
# table_format=parquet/none in order to avoid a test dimension explosion.
cls.ImpalaTestMatrix.add_constraint(lambda v:
v.get_value('table_format').file_format == 'parquet' and
v.get_value('table_format').compression_codec == 'none')
def test_failpoints(self, vector):
vector.get_value('exec_option')['batch_size'] = 10
vector.get_value('exec_option')['debug_action'] = vector.get_value('debug_action')
vector.get_value('exec_option')['spool_query_results'] = 'true'
execute_query_expect_debug_action_failure(self, self._query, vector)