From 2ee2c4fdb9b75d4acadd73627d09193c0198ade8 Mon Sep 17 00:00:00 2001 From: Michael Brown Date: Thu, 25 Jan 2018 10:09:51 -0800 Subject: [PATCH] IMPALA-6441: update explain string for stress test - Update the explain string pattern match that the stress test uses as a binary search start point. - Make matching code importable and testable. - Add system test (no vectors needed) to ensure the stress test will always be able to find the correct string. It was failing before I updated the pattern. Change-Id: I39af1af8809a6e6b99293798afb0a96b0c02c224 Reviewed-on: http://gerrit.cloudera.org:8080/9141 Reviewed-by: Tim Armstrong Tested-by: Impala Public Jenkins --- tests/metadata/test_explain.py | 23 ++++++++++++++++++- tests/stress/__init__.py | 0 tests/stress/concurrent_select.py | 38 ++++++++++++++++++++++++++----- 3 files changed, 54 insertions(+), 7 deletions(-) create mode 100644 tests/stress/__init__.py diff --git a/tests/metadata/test_explain.py b/tests/metadata/test_explain.py index 9a602db72..22fc1771b 100644 --- a/tests/metadata/test_explain.py +++ b/tests/metadata/test_explain.py @@ -17,11 +17,13 @@ # Functional tests running EXPLAIN statements. # -import pytest import re +from decimal import Decimal + from tests.common.impala_test_suite import ImpalaTestSuite from tests.common.skip import SkipIfLocal, SkipIfNotHdfsMinicluster +from tests.stress.concurrent_select import match_memory_estimate from tests.util.filesystem_utils import WAREHOUSE # Tests the different explain levels [0-3] on a few queries. @@ -176,3 +178,22 @@ class TestExplainEmptyPartition(ImpalaTestSuite): assert "missing relevant table and/or column statistics" in explain_result # Also test IMPALA-1530 - adding the number of partitions missing stats assert "partitions: 1/2 " in explain_result + + +class TestInfraIntegration(ImpalaTestSuite): + """ + This is a test suite to ensure separate test tooling in Python is compatible with the + product. + """ + def test_stress_binary_search_start_point(self): + """ + Test that the stress test can use EXPLAIN to find the start point for its binary + search. + """ + result = self.client.execute("explain select 1") + mem_limit, units = match_memory_estimate(result.data) + assert isinstance(units, str) and units.upper() in ('T', 'G', 'M', 'K', ''), ( + 'unexpected units {u} from explain memory estimation\n{output}:'.format( + u=units, output='\n'.join(result.data))) + assert Decimal(mem_limit) >= 0, ( + 'unexpected value from explain\n:' + '\n'.join(result.data)) diff --git a/tests/stress/__init__.py b/tests/stress/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/tests/stress/concurrent_select.py b/tests/stress/concurrent_select.py index ac86debaa..69a44347f 100755 --- a/tests/stress/concurrent_select.py +++ b/tests/stress/concurrent_select.py @@ -90,7 +90,8 @@ MEM_LIMIT_EQ_THRESHOLD_PC = 0.975 MEM_LIMIT_EQ_THRESHOLD_MB = 50 # Regex to extract the estimated memory from an explain plan. -MEM_ESTIMATE_PATTERN = re.compile(r"Estimated.*Memory=(\d+.?\d*)(T|G|M|K)?B") +MEM_ESTIMATE_PATTERN = re.compile( + r"Per-Host Resource Estimates: Memory=(\d+.?\d*)(T|G|M|K)?B") PROFILES_DIR = "profiles" RESULT_HASHES_DIR = "result_hashes" @@ -1342,6 +1343,34 @@ def populate_runtime_info( LOG.debug("Query after populating runtime info: %s", query) +def match_memory_estimate(explain_lines): + """ + Given a list of strings from EXPLAIN output, find the estimated memory needed. This is + used as a binary search start point. + + Params: + explain_lines: list of str + + Returns: + 2-tuple str of memory limit in decimal string and units (one of 'T', 'G', 'M', 'K', + '' bytes) + + Raises: + Exception if no match found + """ + # IMPALA-6441: This method is a public, first class method so it can be importable and + # tested with actual EXPLAIN output to make sure we always find the start point. + mem_limit, units = None, None + for line in explain_lines: + regex_result = MEM_ESTIMATE_PATTERN.search(line) + if regex_result: + mem_limit, units = regex_result.groups() + break + if None in (mem_limit, units): + raise Exception('could not parse explain string:\n' + '\n'.join(explain_lines)) + return mem_limit, units + + def estimate_query_mem_mb_usage(query, query_runner): """Runs an explain plan then extracts and returns the estimated memory needed to run the query. @@ -1355,11 +1384,8 @@ def estimate_query_mem_mb_usage(query, query_runner): return LOG.debug("Explaining query\n%s", query.sql) cursor.execute('EXPLAIN ' + query.sql) - first_val = cursor.fetchone()[0] - regex_result = MEM_ESTIMATE_PATTERN.search(first_val) - if not regex_result: - return - mem_limit, units = regex_result.groups() + explain_lines = cursor.fetchall() + mem_limit, units = match_memory_estimate(explain_lines) return parse_mem_to_mb(mem_limit, units)