Files
impala/tests/custom_cluster/test_spilling.py
Michael Brown 9414d53a89 IMPALA-4904,IMPALA-4914: add targeted-stress to exhaustive tests
This patch allows any test suites whose workload is "targeted-stress" to
be run in so-called "exhaustive" mode. Before this patch, only suites
whose workload was "functional-query" would be run exhaustively. More on
this flaw is in IMPALA-3947.

The net effects are:

1. We fix IMPALA-4904, which allows test_ddl_stress to start running
   again.
2. We also improve the situation in IMPALA-4914 by getting
   TestSpillStress to run, though we don't fix its
   not-running-concurrently problem.

The other mini-cluster stress tests were disabled in this commit:
  IMPALA-2605: Omit the sort and mini stress tests
so they are not directly affected here.

I also tried to clarify what "exhaustive" means in some of our shell
scripts, via help text and comments.

An exhaustive build+test run showed test_ddl_stress and TestSpillStress
now get run and passed. This adds roughly 12 minutes to a build that's
on the order of 13-14 hours.

Change-Id: Ie6bd5bbd380e636d680368e908519b042d79dfec
Reviewed-on: http://gerrit.cloudera.org:8080/6002
Tested-by: Impala Public Jenkins
Reviewed-by: Jim Apple <jbapple-impala@apache.org>
2017-02-21 21:12:36 +00:00

123 lines
5.2 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import pytest
from copy import deepcopy
from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
from tests.common.test_dimensions import (
ImpalaTestDimension,
create_single_exec_option_dimension,
create_parquet_dimension)
class TestSpillStress(CustomClusterTestSuite):
# IMPALA-4914 part 3 TODO: In the agg_stress workload file used below, it's implied
# that the workload needs to run concurrently. This appears to attempt to try to do
# that by using TEST_IDS and NUM_ITERATIONS and assuming multiple instances of the
# test will run concurrently. But custom cluster tests are explicitly never run
# concurrently. If this test ought to run concurrently, then new test infra is needed
# to achieve that.
@classmethod
def get_workload(self):
return 'targeted-stress'
@classmethod
def setup_class(cls):
# Don't do anything expensive if we're not running exhaustively for this workload.
# This check comes first.
if cls.exploration_strategy() != 'exhaustive':
pytest.skip('runs only in exhaustive')
# Since test_spill_stress below runs TEST_IDS * NUM_ITERATIONS times, but we only
# need Impala to start once, it's inefficient to use
# @CustomClusterTestSuite.with_args() to restart Impala every time. Instead, start
# Impala here, once. We start impalad before we try to make connections to it.
#
# Start with 256KB buffers to reduce data size required to force spilling.
cls._start_impala_cluster([
'--impalad_args=--"read_size=262144"',
'--catalogd_args="--load_catalog_in_background=false"'])
# This super() call leapfrogs CustomClusterTestSuite.setup_class() and calls
# ImpalaTestSuite.setup_class(). This is done because this is an atypical custom
# cluster test that doesn't restart impalad per test method, but it still needs
# self.client (etc.) set up. IMPALA-4914 part 3 would address this.
# This happens last because we need impalad started above before we try to establish
# connections to it.
super(CustomClusterTestSuite, cls).setup_class()
@classmethod
def teardown_class(cls):
# This teardown leapfrog matches the setup leapfrog above, for equivalent reasons.
# CustomClusterTestSuite.teardown_class() overrides ImpalaTestSuite.teardown_class()
# and is a no-op. ImpalaTestSuite.teardown_class() stops client connections. The
# *next* custom cluster test that py.test chooses to run is responsible for
# restarting impalad.
super(CustomClusterTestSuite, cls).teardown_class()
def setup_method(self, method):
# We don't need CustomClusterTestSuite.setup_method() or teardown_method() here
# since we're not doing per-test method restart of Impala.
pass
def teardown_method(self, method):
pass
@classmethod
def add_test_dimensions(cls):
super(TestSpillStress, cls).add_test_dimensions()
cls.ImpalaTestMatrix.add_constraint(lambda v:\
v.get_value('table_format').file_format == 'text')
# Each client will get a different test id.
# TODO: this test takes extremely long so only run on exhaustive. It would
# be good to configure it so we can run some version on core.
TEST_IDS = xrange(0, 3)
NUM_ITERATIONS = [1]
cls.ImpalaTestMatrix.add_dimension(ImpalaTestDimension('test_id', *TEST_IDS))
cls.ImpalaTestMatrix.add_dimension(
ImpalaTestDimension('iterations', *NUM_ITERATIONS))
@pytest.mark.stress
def test_spill_stress(self, vector):
# Number of times to execute each query
for i in xrange(vector.get_value('iterations')):
self.run_test_case('agg_stress', vector)
class TestSpilling(CustomClusterTestSuite):
@classmethod
def get_workload(self):
return 'functional-query'
@classmethod
def add_test_dimensions(cls):
super(TestSpilling, cls).add_test_dimensions()
cls.ImpalaTestMatrix.clear_constraints()
cls.ImpalaTestMatrix.add_dimension(create_parquet_dimension('tpch'))
cls.ImpalaTestMatrix.add_dimension(create_single_exec_option_dimension())
# Reduce the IO read size. This reduces the memory required to trigger spilling.
@pytest.mark.execute_serially
@CustomClusterTestSuite.with_args(
impalad_args="--read_size=200000",
catalogd_args="--load_catalog_in_background=false")
def test_spilling(self, vector):
new_vector = deepcopy(vector)
# remove this. the test cases set this explicitly.
del new_vector.get_value('exec_option')['num_nodes']
self.run_test_case('QueryTest/spilling', new_vector)