mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
IMPALA-6441: update explain string for stress test
- Update the explain string pattern match that the stress test uses as a binary search start point. - Make matching code importable and testable. - Add system test (no vectors needed) to ensure the stress test will always be able to find the correct string. It was failing before I updated the pattern. Change-Id: I39af1af8809a6e6b99293798afb0a96b0c02c224 Reviewed-on: http://gerrit.cloudera.org:8080/9141 Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com> Tested-by: Impala Public Jenkins
This commit is contained in:
committed by
Impala Public Jenkins
parent
0fdd81682d
commit
2ee2c4fdb9
@@ -17,11 +17,13 @@
|
||||
|
||||
# Functional tests running EXPLAIN statements.
|
||||
#
|
||||
import pytest
|
||||
import re
|
||||
|
||||
from decimal import Decimal
|
||||
|
||||
from tests.common.impala_test_suite import ImpalaTestSuite
|
||||
from tests.common.skip import SkipIfLocal, SkipIfNotHdfsMinicluster
|
||||
from tests.stress.concurrent_select import match_memory_estimate
|
||||
from tests.util.filesystem_utils import WAREHOUSE
|
||||
|
||||
# Tests the different explain levels [0-3] on a few queries.
|
||||
@@ -176,3 +178,22 @@ class TestExplainEmptyPartition(ImpalaTestSuite):
|
||||
assert "missing relevant table and/or column statistics" in explain_result
|
||||
# Also test IMPALA-1530 - adding the number of partitions missing stats
|
||||
assert "partitions: 1/2 " in explain_result
|
||||
|
||||
|
||||
class TestInfraIntegration(ImpalaTestSuite):
|
||||
"""
|
||||
This is a test suite to ensure separate test tooling in Python is compatible with the
|
||||
product.
|
||||
"""
|
||||
def test_stress_binary_search_start_point(self):
|
||||
"""
|
||||
Test that the stress test can use EXPLAIN to find the start point for its binary
|
||||
search.
|
||||
"""
|
||||
result = self.client.execute("explain select 1")
|
||||
mem_limit, units = match_memory_estimate(result.data)
|
||||
assert isinstance(units, str) and units.upper() in ('T', 'G', 'M', 'K', ''), (
|
||||
'unexpected units {u} from explain memory estimation\n{output}:'.format(
|
||||
u=units, output='\n'.join(result.data)))
|
||||
assert Decimal(mem_limit) >= 0, (
|
||||
'unexpected value from explain\n:' + '\n'.join(result.data))
|
||||
|
||||
0
tests/stress/__init__.py
Normal file
0
tests/stress/__init__.py
Normal file
@@ -90,7 +90,8 @@ MEM_LIMIT_EQ_THRESHOLD_PC = 0.975
|
||||
MEM_LIMIT_EQ_THRESHOLD_MB = 50
|
||||
|
||||
# Regex to extract the estimated memory from an explain plan.
|
||||
MEM_ESTIMATE_PATTERN = re.compile(r"Estimated.*Memory=(\d+.?\d*)(T|G|M|K)?B")
|
||||
MEM_ESTIMATE_PATTERN = re.compile(
|
||||
r"Per-Host Resource Estimates: Memory=(\d+.?\d*)(T|G|M|K)?B")
|
||||
|
||||
PROFILES_DIR = "profiles"
|
||||
RESULT_HASHES_DIR = "result_hashes"
|
||||
@@ -1342,6 +1343,34 @@ def populate_runtime_info(
|
||||
LOG.debug("Query after populating runtime info: %s", query)
|
||||
|
||||
|
||||
def match_memory_estimate(explain_lines):
|
||||
"""
|
||||
Given a list of strings from EXPLAIN output, find the estimated memory needed. This is
|
||||
used as a binary search start point.
|
||||
|
||||
Params:
|
||||
explain_lines: list of str
|
||||
|
||||
Returns:
|
||||
2-tuple str of memory limit in decimal string and units (one of 'T', 'G', 'M', 'K',
|
||||
'' bytes)
|
||||
|
||||
Raises:
|
||||
Exception if no match found
|
||||
"""
|
||||
# IMPALA-6441: This method is a public, first class method so it can be importable and
|
||||
# tested with actual EXPLAIN output to make sure we always find the start point.
|
||||
mem_limit, units = None, None
|
||||
for line in explain_lines:
|
||||
regex_result = MEM_ESTIMATE_PATTERN.search(line)
|
||||
if regex_result:
|
||||
mem_limit, units = regex_result.groups()
|
||||
break
|
||||
if None in (mem_limit, units):
|
||||
raise Exception('could not parse explain string:\n' + '\n'.join(explain_lines))
|
||||
return mem_limit, units
|
||||
|
||||
|
||||
def estimate_query_mem_mb_usage(query, query_runner):
|
||||
"""Runs an explain plan then extracts and returns the estimated memory needed to run
|
||||
the query.
|
||||
@@ -1355,11 +1384,8 @@ def estimate_query_mem_mb_usage(query, query_runner):
|
||||
return
|
||||
LOG.debug("Explaining query\n%s", query.sql)
|
||||
cursor.execute('EXPLAIN ' + query.sql)
|
||||
first_val = cursor.fetchone()[0]
|
||||
regex_result = MEM_ESTIMATE_PATTERN.search(first_val)
|
||||
if not regex_result:
|
||||
return
|
||||
mem_limit, units = regex_result.groups()
|
||||
explain_lines = cursor.fetchall()
|
||||
mem_limit, units = match_memory_estimate(explain_lines)
|
||||
return parse_mem_to_mb(mem_limit, units)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user