IMPALA-6441: update explain string for stress test

- Update the explain string pattern match that the stress test uses as a
  binary search start point.
- Make matching code importable and testable.
- Add system test (no vectors needed) to ensure the stress test will
  always be able to find the correct string. It was failing before I
  updated the pattern.

Change-Id: I39af1af8809a6e6b99293798afb0a96b0c02c224
Reviewed-on: http://gerrit.cloudera.org:8080/9141
Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com>
Tested-by: Impala Public Jenkins
This commit is contained in:
Michael Brown
2018-01-25 10:09:51 -08:00
committed by Impala Public Jenkins
parent 0fdd81682d
commit 2ee2c4fdb9
3 changed files with 54 additions and 7 deletions

View File

@@ -17,11 +17,13 @@
# Functional tests running EXPLAIN statements. # Functional tests running EXPLAIN statements.
# #
import pytest
import re import re
from decimal import Decimal
from tests.common.impala_test_suite import ImpalaTestSuite from tests.common.impala_test_suite import ImpalaTestSuite
from tests.common.skip import SkipIfLocal, SkipIfNotHdfsMinicluster from tests.common.skip import SkipIfLocal, SkipIfNotHdfsMinicluster
from tests.stress.concurrent_select import match_memory_estimate
from tests.util.filesystem_utils import WAREHOUSE from tests.util.filesystem_utils import WAREHOUSE
# Tests the different explain levels [0-3] on a few queries. # Tests the different explain levels [0-3] on a few queries.
@@ -176,3 +178,22 @@ class TestExplainEmptyPartition(ImpalaTestSuite):
assert "missing relevant table and/or column statistics" in explain_result assert "missing relevant table and/or column statistics" in explain_result
# Also test IMPALA-1530 - adding the number of partitions missing stats # Also test IMPALA-1530 - adding the number of partitions missing stats
assert "partitions: 1/2 " in explain_result assert "partitions: 1/2 " in explain_result
class TestInfraIntegration(ImpalaTestSuite):
"""
This is a test suite to ensure separate test tooling in Python is compatible with the
product.
"""
def test_stress_binary_search_start_point(self):
"""
Test that the stress test can use EXPLAIN to find the start point for its binary
search.
"""
result = self.client.execute("explain select 1")
mem_limit, units = match_memory_estimate(result.data)
assert isinstance(units, str) and units.upper() in ('T', 'G', 'M', 'K', ''), (
'unexpected units {u} from explain memory estimation\n{output}:'.format(
u=units, output='\n'.join(result.data)))
assert Decimal(mem_limit) >= 0, (
'unexpected value from explain\n:' + '\n'.join(result.data))

0
tests/stress/__init__.py Normal file
View File

View File

@@ -90,7 +90,8 @@ MEM_LIMIT_EQ_THRESHOLD_PC = 0.975
MEM_LIMIT_EQ_THRESHOLD_MB = 50 MEM_LIMIT_EQ_THRESHOLD_MB = 50
# Regex to extract the estimated memory from an explain plan. # Regex to extract the estimated memory from an explain plan.
MEM_ESTIMATE_PATTERN = re.compile(r"Estimated.*Memory=(\d+.?\d*)(T|G|M|K)?B") MEM_ESTIMATE_PATTERN = re.compile(
r"Per-Host Resource Estimates: Memory=(\d+.?\d*)(T|G|M|K)?B")
PROFILES_DIR = "profiles" PROFILES_DIR = "profiles"
RESULT_HASHES_DIR = "result_hashes" RESULT_HASHES_DIR = "result_hashes"
@@ -1342,6 +1343,34 @@ def populate_runtime_info(
LOG.debug("Query after populating runtime info: %s", query) LOG.debug("Query after populating runtime info: %s", query)
def match_memory_estimate(explain_lines):
"""
Given a list of strings from EXPLAIN output, find the estimated memory needed. This is
used as a binary search start point.
Params:
explain_lines: list of str
Returns:
2-tuple str of memory limit in decimal string and units (one of 'T', 'G', 'M', 'K',
'' bytes)
Raises:
Exception if no match found
"""
# IMPALA-6441: This method is a public, first class method so it can be importable and
# tested with actual EXPLAIN output to make sure we always find the start point.
mem_limit, units = None, None
for line in explain_lines:
regex_result = MEM_ESTIMATE_PATTERN.search(line)
if regex_result:
mem_limit, units = regex_result.groups()
break
if None in (mem_limit, units):
raise Exception('could not parse explain string:\n' + '\n'.join(explain_lines))
return mem_limit, units
def estimate_query_mem_mb_usage(query, query_runner): def estimate_query_mem_mb_usage(query, query_runner):
"""Runs an explain plan then extracts and returns the estimated memory needed to run """Runs an explain plan then extracts and returns the estimated memory needed to run
the query. the query.
@@ -1355,11 +1384,8 @@ def estimate_query_mem_mb_usage(query, query_runner):
return return
LOG.debug("Explaining query\n%s", query.sql) LOG.debug("Explaining query\n%s", query.sql)
cursor.execute('EXPLAIN ' + query.sql) cursor.execute('EXPLAIN ' + query.sql)
first_val = cursor.fetchone()[0] explain_lines = cursor.fetchall()
regex_result = MEM_ESTIMATE_PATTERN.search(first_val) mem_limit, units = match_memory_estimate(explain_lines)
if not regex_result:
return
mem_limit, units = regex_result.groups()
return parse_mem_to_mb(mem_limit, units) return parse_mem_to_mb(mem_limit, units)