mirror of
https://github.com/apache/impala.git
synced 2025-12-30 03:01:44 -05:00
IMPALA-6905: support regexes with more verifiers
Support row_regex and other lines for the subset and superset verifiers, which previously assumed that lines in the actual and expected had to match exactly. Use in test_stats_extrapolation to make the test more robust to irrelevant changes in the explain plan. Testing: Manually modified a superset and a subset test to check that tests fail as expected. Change-Id: Ia7a28d421c8e7cd84b14d07fcb71b76449156409 Reviewed-on: http://gerrit.cloudera.org:8080/10155 Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
committed by
Impala Public Jenkins
parent
770ef77d53
commit
d879fa9930
@@ -14,8 +14,7 @@ explain select id from alltypes;
|
||||
' table: rows=unavailable size=unavailable'
|
||||
' partitions: 0/12 rows=unavailable'
|
||||
' columns: unavailable'
|
||||
' extrapolated-rows=unavailable'
|
||||
' mem-estimate=16.00MB mem-reservation=0B'
|
||||
row_regex:.* extrapolated-rows=unavailable.*
|
||||
' tuple-ids=0 row-size=4B cardinality=unavailable'
|
||||
---- TYPES
|
||||
STRING
|
||||
@@ -68,8 +67,8 @@ row_regex:.*partitions=12/12 files=12 size=.*
|
||||
row_regex:.*table: rows=3650 size=.*
|
||||
' partitions: 0/12 rows=unavailable'
|
||||
' columns: all'
|
||||
' extrapolated-rows=3650'
|
||||
' mem-estimate=16.00MB mem-reservation=0B'
|
||||
row_regex:.* extrapolated-rows=3650.*
|
||||
row_regex:.*mem-estimate=.* mem-reservation=.*
|
||||
' tuple-ids=0 row-size=4B cardinality=3650'
|
||||
---- TYPES
|
||||
STRING
|
||||
@@ -93,8 +92,8 @@ row_regex:.*partitions=3/12 files=3 size=.*
|
||||
row_regex:.*table: rows=3650 size=.*
|
||||
' partitions: 0/3 rows=unavailable'
|
||||
' columns: all'
|
||||
' extrapolated-rows=904'
|
||||
' mem-estimate=16.00MB mem-reservation=0B'
|
||||
row_regex:.* extrapolated-rows=904.*
|
||||
row_regex:.*mem-estimate=.* mem-reservation=.*
|
||||
' tuple-ids=0 row-size=4B cardinality=904'
|
||||
---- TYPES
|
||||
STRING
|
||||
@@ -119,8 +118,8 @@ row_regex:.*partitions=12/12 files=24 size=.*
|
||||
row_regex:.*table: rows=3650 size=.*
|
||||
' partitions: 0/12 rows=unavailable'
|
||||
' columns: all'
|
||||
' extrapolated-rows=7300'
|
||||
' mem-estimate=16.00MB mem-reservation=0B'
|
||||
row_regex:.* extrapolated-rows=7300.*
|
||||
row_regex:.*mem-estimate=.* mem-reservation=.*
|
||||
' tuple-ids=0 row-size=4B cardinality=7300'
|
||||
---- TYPES
|
||||
STRING
|
||||
@@ -146,8 +145,8 @@ row_regex:.*partitions=12/24 files=12 size=.*
|
||||
row_regex:.*table: rows=3650 size=.*
|
||||
' partitions: 0/12 rows=unavailable'
|
||||
' columns: all'
|
||||
' extrapolated-rows=3651'
|
||||
' mem-estimate=16.00MB mem-reservation=0B'
|
||||
row_regex:.* extrapolated-rows=3651.*
|
||||
row_regex:.*mem-estimate=.* mem-reservation=.*
|
||||
' tuple-ids=0 row-size=4B cardinality=3651'
|
||||
---- TYPES
|
||||
STRING
|
||||
@@ -172,8 +171,8 @@ row_regex:.*partitions=12/24 files=12 size=.*
|
||||
row_regex:.*table: rows=10950 size=.*
|
||||
' partitions: 0/12 rows=unavailable'
|
||||
' columns: all'
|
||||
' extrapolated-rows=3651'
|
||||
' mem-estimate=16.00MB mem-reservation=0B'
|
||||
row_regex:.* extrapolated-rows=3651
|
||||
row_regex:.*mem-estimate=.* mem-reservation=.*
|
||||
' tuple-ids=0 row-size=4B cardinality=3651'
|
||||
---- TYPES
|
||||
STRING
|
||||
@@ -187,8 +186,7 @@ explain select id from alltypes;
|
||||
' table: rows=unavailable size=unavailable'
|
||||
' partitions: 0/24 rows=unavailable'
|
||||
' columns: unavailable'
|
||||
' extrapolated-rows=unavailable'
|
||||
' mem-estimate=16.00MB mem-reservation=0B'
|
||||
row_regex:.* extrapolated-rows=unavailable.*
|
||||
' tuple-ids=0 row-size=4B cardinality=unavailable'
|
||||
---- TYPES
|
||||
STRING
|
||||
|
||||
@@ -63,6 +63,18 @@ class QueryTestResult(object):
|
||||
def __str__(self):
|
||||
return '\n'.join(['%s' % row for row in self.rows])
|
||||
|
||||
def separate_rows(self):
|
||||
"""Returns rows that are literal rows and rows that are not literals (e.g. regex)
|
||||
in two lists."""
|
||||
literal_rows = []
|
||||
non_literal_rows = []
|
||||
for row in self.rows:
|
||||
if row.regex is None:
|
||||
literal_rows.append(row)
|
||||
else:
|
||||
non_literal_rows.append(row)
|
||||
return (literal_rows, non_literal_rows)
|
||||
|
||||
|
||||
# Represents a row in a result set
|
||||
class ResultRow(object):
|
||||
@@ -207,24 +219,42 @@ def assert_args_not_none(*args):
|
||||
for arg in args:
|
||||
assert arg is not None
|
||||
|
||||
def convert_results_to_sets(expected_results, actual_results):
|
||||
assert_args_not_none(expected_results, actual_results)
|
||||
expected_set = set(map(str, expected_results.rows))
|
||||
actual_set = set(map(str, actual_results.rows))
|
||||
return expected_set, actual_set
|
||||
|
||||
def verify_query_result_is_subset(expected_results, actual_results):
|
||||
"""Check whether the results in expected_results are a subset of the results in
|
||||
actual_results. This uses set semantics, i.e. any duplicates are ignored."""
|
||||
expected_set, actual_set = convert_results_to_sets(expected_results, actual_results)
|
||||
assert expected_set <= actual_set
|
||||
expected_literals, expected_non_literals = expected_results.separate_rows()
|
||||
expected_literal_strings = set([str(row) for row in expected_literals])
|
||||
actual_literal_strings = set([str(row) for row in actual_results.rows])
|
||||
# Expected literal strings must all be present in the actual strings.
|
||||
assert expected_literal_strings <= actual_literal_strings
|
||||
# Expected patterns must be present in the actual strings.
|
||||
for expected_row in expected_non_literals:
|
||||
matched = False
|
||||
for actual_row in actual_results.rows:
|
||||
if actual_row == expected_row:
|
||||
matched = True
|
||||
break
|
||||
assert matched, "Could not find expected row {0} in actual rows:\n{1}".format(
|
||||
str(expected_row), str(actual_results))
|
||||
|
||||
def verify_query_result_is_superset(expected_results, actual_results):
|
||||
"""Check whether the results in expected_results are a superset of the results in
|
||||
actual_results. This uses set semantics, i.e. any duplicates are ignored."""
|
||||
expected_set, actual_set = convert_results_to_sets(expected_results, actual_results)
|
||||
assert expected_set >= actual_set
|
||||
|
||||
expected_literals, expected_non_literals = expected_results.separate_rows()
|
||||
expected_literal_strings = set([str(row) for row in expected_literals])
|
||||
# Check that all actual rows are present in either expected_literal_strings or
|
||||
# expected_non_literals.
|
||||
for actual_row in actual_results.rows:
|
||||
if str(actual_row) in expected_literal_strings:
|
||||
# Matched to a literal string
|
||||
continue
|
||||
matched = False
|
||||
for expected_row in expected_non_literals:
|
||||
if actual_row == expected_row:
|
||||
matched = True
|
||||
break
|
||||
assert matched, "Could not find actual row {0} in expected rows:\n{1}".format(
|
||||
str(actual_row), str(expected_results))
|
||||
|
||||
def verify_query_result_is_equal(expected_results, actual_results):
|
||||
assert_args_not_none(expected_results, actual_results)
|
||||
|
||||
Reference in New Issue
Block a user