Files
impala/tests/query_test/test_query_mem_limit.py
Tim Armstrong 548106f5e1 IMPALA-8451,IMPALA-8905: enable admission control for dockerised tests
This gives us some additional coverage for using admission
control in a simple but realistic configuration.

What are the implications of this change for test stability and
flakiness?

On one hand were are adding some more unpredictability
to tests, because they may be queued for an arbitrary amount of
time. On the other, we can prevent queries from contending over
memory. Currently we rely on luck to prevent concurrent queries
from forcing each other out-of-memory.

I think the unpredictability from the queueing is
preferable, because we can generally work around these by
fixing tests that are sensitive to being queued, whereas
contention over memory requires us to use crude workarounds
like forcing tests to execute serially.

Added observability for the configured queue wait time for each pool.
I noticed that I did not have a direct way to observe the effective
value when I set configs. This is IMPALA-8905.

I had to tweak tests in a few ways:
* Tests with large strings needed higher memory limits.
* Hardcoded instances of default-pool had to handle root.default
  as well.
* test_query_mem_limit needed to run without a mem_limit. I
  created a special pool root.no-limits with no memory limits
  to allow that.

Testing:
Ran the dockerised build 5-6 times to flush out flaky tests.

Change-Id: I7517673f9e348780fcf7cd6ce1f12c9c5a55373a
Reviewed-on: http://gerrit.cloudera.org:8080/13942
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2019-09-27 01:54:39 +00:00

134 lines
5.5 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# Targeted tests to validate per-query memory limit.
import pytest
import re
import sys
from copy import copy
from tests.beeswax.impala_beeswax import ImpalaBeeswaxException
from tests.common.impala_test_suite import ImpalaTestSuite
from tests.common.skip import SkipIfEC
from tests.common.test_dimensions import (
ImpalaTestDimension,
create_single_exec_option_dimension,
create_uncompressed_text_dimension)
class TestQueryMemLimit(ImpalaTestSuite):
"""Test class to do functional validation of per query memory limits.
A specific query is run on text files, with the memory limit being added as
an extra dimension. The query is expected to fail/pass depending on the limit
value.
"""
# There are a lot of 'unique' comments in lineitem.
# Almost 80% of the table size.
QUERIES = ["select count(distinct l_comment) from lineitem",
"select group_concat(l_linestatus) from lineitem"]
# TODO: It will be nice if we can get how much memory a query uses
# dynamically, even if it is a rough approximation.
# A mem_limit is expressed in bytes, with values <= 0 signifying no cap.
# These values are either really small, unlimited, or have a really large cap.
MAXINT_BYTES = str(sys.maxint)
MAXINT_MB = str(sys.maxint/(1024*1024))
MAXINT_GB = str(sys.maxint/(1024*1024*1024))
# We expect the tests with MAXINT_* using valid units [bmg] to succeed.
PASS_REGEX = re.compile("(%s|%s|%s)[bmg]?$" % (MAXINT_BYTES, MAXINT_MB, MAXINT_GB),
re.I)
MEM_LIMITS = ["-1", "0", "1", "10", "100", "1000", "10000", MAXINT_BYTES,
MAXINT_BYTES + "b", MAXINT_BYTES + "B",
MAXINT_MB + "m", MAXINT_MB + "M",
MAXINT_GB + "g", MAXINT_GB + "G",
# invalid per-query memory limits
"-1234", "-3.14", "xyz", "100%", MAXINT_BYTES + "k", "k" + MAXINT_BYTES]
MEM_LIMITS_CORE = ["-1", "0", "10000", MAXINT_BYTES,
MAXINT_BYTES + "b", MAXINT_MB + "M", MAXINT_GB + "g"]
@classmethod
def get_workload(self):
return 'tpch'
@classmethod
def add_test_dimensions(cls):
super(TestQueryMemLimit, cls).add_test_dimensions()
# Only run the query for text
cls.ImpalaTestMatrix.add_dimension(
create_uncompressed_text_dimension(cls.get_workload()))
# add mem_limit as a test dimension.
if cls.exploration_strategy() == 'core':
cls.ImpalaTestMatrix.add_dimension(\
ImpalaTestDimension('mem_limit', *TestQueryMemLimit.MEM_LIMITS_CORE))
else:
cls.ImpalaTestMatrix.add_dimension(\
ImpalaTestDimension('mem_limit', *TestQueryMemLimit.MEM_LIMITS))
# Make query a test dimension so we can support more queries.
cls.ImpalaTestMatrix.add_dimension(
ImpalaTestDimension('query', *TestQueryMemLimit.QUERIES))
# This query takes a very long time to finish with a bound on the batch_size.
# Remove the bound on the batch size.
cls.ImpalaTestMatrix.add_constraint(
lambda v: v.get_value('exec_option')['batch_size'] == 0)
@SkipIfEC.oom
@pytest.mark.execute_serially
def test_mem_limit(self, vector):
mem_limit = copy(vector.get_value('mem_limit'))
exec_options = copy(vector.get_value('exec_option'))
exec_options['mem_limit'] = mem_limit
# Send to the no-limits pool so that no memory limits apply.
exec_options['request_pool'] = "root.no-limits"
query = vector.get_value('query')
table_format = vector.get_value('table_format')
if mem_limit in["0", "-1"] or self.PASS_REGEX.match(mem_limit):
# should succeed
self.__exec_query(query, exec_options, True, table_format)
else:
# should fail
self.__exec_query(query, exec_options, False, table_format)
def __exec_query(self, query, exec_options, should_succeed, table_format):
try:
self.execute_query(query, exec_options, table_format=table_format)
assert should_succeed, "Query was expected to fail"
except ImpalaBeeswaxException, e:
assert not should_succeed, "Query should not have failed: %s" % e
class TestCodegenMemLimit(ImpalaTestSuite):
"""Tests that memory limit applies to codegen """
@classmethod
def get_workload(self):
return 'functional-query'
@classmethod
def add_test_dimensions(cls):
super(TestCodegenMemLimit, cls).add_test_dimensions()
# Run with num_nodes=1 to avoid races between fragments allocating memory.
cls.ImpalaTestMatrix.add_dimension(create_single_exec_option_dimension(
num_nodes=1, disable_codegen_rows_threshold=0))
# Only run the query for parquet
cls.ImpalaTestMatrix.add_constraint(
lambda v: v.get_value('table_format').file_format == 'parquet')
def test_codegen_mem_limit(self, vector):
self.run_test_case('QueryTest/codegen-mem-limit', vector)