From 968c61c940fe78faa08e8cc4bd257dd2ec19dc11 Mon Sep 17 00:00:00 2001 From: Michael Ho Date: Tue, 12 Jan 2016 14:37:00 -0800 Subject: [PATCH] IMPALA-2824: Restore query options after each test. A failed test case inside a test file will leave the rest of the test cases in the file unexecuted. Some test cases may modify some query options such as memory limit and then restore them in the subsequent test cases in the same file. The failure of those test cases will leave the query options modified, causing cascading failures to other test cases which aren't expected to be run with the modified query options (e.g. lowered memory limit). This problem may lead to broken builds which are recorded in IMPALA-2724 and IMPALA-2824. This change fixes the problem above by checking if a test case modifies any query option and if so, restore those modified query options to their default values. This change makes the assumption that a test should not modify an option specified in its test vector so it's safe to restore the modified query options to their default values. Change-Id: Ib88d1dcb6a65183e1afc8eef0c764179a9f6a8ce Reviewed-on: http://gerrit.cloudera.org:8080/1774 Reviewed-by: Michael Ho Tested-by: Internal Jenkins --- .../queries/QueryTest/analytic-fns.test | 4 +- .../queries/QueryTest/distinct.test | 7 ---- .../queries/QueryTest/large_strings.test | 3 -- .../QueryTest/nested-types-subplan.test | 7 ---- .../queries/QueryTest/nested-types-tpch.test | 26 +++--------- .../queries/QueryTest/set.test | 28 ++++--------- .../QueryTest/single-node-nlj-exhaustive.test | 4 +- .../queries/QueryTest/spilling.test | 4 -- .../tpch/queries/tpch-outer-joins.test | 3 -- tests/beeswax/impala_beeswax.py | 1 + tests/common/impala_connection.py | 5 ++- tests/common/impala_test_suite.py | 42 +++++++++++++++++++ tests/metadata/test_set.py | 2 + 13 files changed, 65 insertions(+), 71 deletions(-) diff --git a/testdata/workloads/functional-query/queries/QueryTest/analytic-fns.test b/testdata/workloads/functional-query/queries/QueryTest/analytic-fns.test index 2a20deccc..4cb937fb0 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/analytic-fns.test +++ b/testdata/workloads/functional-query/queries/QueryTest/analytic-fns.test @@ -1591,6 +1591,4 @@ FROM functional_parquet.alltypes t1 CROSS JOIN functional_parquet.alltypes t2 LI ---- CATCH Memory limit exceeded ==== ----- QUERY -set mem_limit=0 -==== + diff --git a/testdata/workloads/functional-query/queries/QueryTest/distinct.test b/testdata/workloads/functional-query/queries/QueryTest/distinct.test index 2f210b198..e84feb6ae 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/distinct.test +++ b/testdata/workloads/functional-query/queries/QueryTest/distinct.test @@ -360,13 +360,6 @@ from alltypesagg bigint, bigint, bigint ==== ---- QUERY -# Unset the query option from the previous test. -set appx_count_distinct=false ----- RESULTS ----- TYPES -string, string -==== ----- QUERY # Large (more than 1 regular batches) distinct with multiple group by keys. SELECT COUNT(*) FROM (SELECT COUNT(DISTINCT p_partkey) diff --git a/testdata/workloads/functional-query/queries/QueryTest/large_strings.test b/testdata/workloads/functional-query/queries/QueryTest/large_strings.test index 68fe303c9..b25e5c80e 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/large_strings.test +++ b/testdata/workloads/functional-query/queries/QueryTest/large_strings.test @@ -112,6 +112,3 @@ select l_comment from tpch_parquet.lineitem) a ---- CATCH Memory limit exceeded ==== ----- QUERY -set mem_limit=0 -==== diff --git a/testdata/workloads/functional-query/queries/QueryTest/nested-types-subplan.test b/testdata/workloads/functional-query/queries/QueryTest/nested-types-subplan.test index 170363b4a..31bd88199 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/nested-types-subplan.test +++ b/testdata/workloads/functional-query/queries/QueryTest/nested-types-subplan.test @@ -536,13 +536,6 @@ where c_custkey = 67974; bigint,bigint ==== ---- QUERY -# Set the num_nodes query option back to its default value. -set num_nodes=0 ----- RESULTS ----- TYPES -string,string -==== ----- QUERY # IMPALA-2289: Test nested-loop join with right anti join mode inside a subplan. select count(*) from customer c left anti join c.c_orders where c_custkey between 8000 and 10000 diff --git a/testdata/workloads/functional-query/queries/QueryTest/nested-types-tpch.test b/testdata/workloads/functional-query/queries/QueryTest/nested-types-tpch.test index 8ea11f1f1..4984c1f1a 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/nested-types-tpch.test +++ b/testdata/workloads/functional-query/queries/QueryTest/nested-types-tpch.test @@ -162,9 +162,7 @@ BIGINT,BIGINT ---- QUERY # IMPALA-2376 # Set memory limit low enough to get the below query to consistently fail. -set mem_limit=4m -==== ----- QUERY +set mem_limit=4m; select max(cnt) from customer c, (select count(l_returnflag) cnt from c.c_orders.o_lineitems) v; ---- TYPES @@ -174,14 +172,10 @@ Memory limit exceeded Failed to allocate buffer for collection 'tpch_nested_parquet.customer.c_orders.item.o_lineitems'. ==== ---- QUERY -set num_scanner_threads=1 -==== ----- QUERY -set mem_limit=1g -==== ----- QUERY # IMPALA-2473: Scan query with large row size leading to oversized batches. # Should not OOM given single scanner thread and sufficient memory limit. +set num_scanner_threads=1; +set mem_limit=1g; select * from tpch_nested_parquet.customer c, c.c_orders o, o.o_lineitems order by l_partkey desc, l_suppkey desc, l_linenumber desc, c_custkey @@ -196,14 +190,10 @@ limit 5 bigint,string,string,smallint,string,decimal,string,string,bigint,string,decimal,string,string,string,int,string,bigint,bigint,int,decimal,decimal,decimal,decimal,string,string,string,string,string,string,string,string ==== ---- QUERY -set num_scanner_threads=1 -==== ----- QUERY -set mem_limit=500m -==== ----- QUERY # IMPALA-2473: Highly selective scan query with large row size. # Should not OOM given single scanner thread and sufficient memory limit. +set num_scanner_threads=1; +set mem_limit=500m; select * from tpch_nested_parquet.customer c, c.c_orders o, o.o_lineitems where c_phone='20-968-632-1388' and l_partkey = 127499 @@ -212,9 +202,3 @@ where c_phone='20-968-632-1388' and l_partkey = 127499 ---- TYPES bigint,string,string,smallint,string,decimal,string,string,bigint,string,decimal,string,string,string,int,string,bigint,bigint,int,decimal,decimal,decimal,decimal,string,string,string,string,string,string,string,string ==== ----- QUERY -set mem_limit=0 -==== ----- QUERY -set num_scanner_threads=0 -==== diff --git a/testdata/workloads/functional-query/queries/QueryTest/set.test b/testdata/workloads/functional-query/queries/QueryTest/set.test index 35de818be..f32107762 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/set.test +++ b/testdata/workloads/functional-query/queries/QueryTest/set.test @@ -31,10 +31,8 @@ set STRING, STRING ==== ---- QUERY -set explain_level=3 -==== ----- QUERY -set +set explain_level=3; +set; ---- RESULTS: VERIFY_IS_SUBSET 'ABORT_ON_DEFAULT_LIMIT_EXCEEDED','0' 'ABORT_ON_ERROR','0' @@ -65,10 +63,8 @@ set STRING, STRING ==== ---- QUERY -set explain_level='0' -==== ----- QUERY -set +set explain_level='0'; +set; ---- RESULTS: VERIFY_IS_SUBSET 'ABORT_ON_DEFAULT_LIMIT_EXCEEDED','0' 'ABORT_ON_ERROR','0' @@ -100,10 +96,8 @@ STRING, STRING ==== ---- QUERY # IMPALA-1906: Test that SET changes PARQUET_FILE_SIZE only if it's less than 2GB. -set parquet_file_size='1.5g' -==== ----- QUERY -set +set parquet_file_size='1.5g'; +set; ---- RESULTS: VERIFY_IS_SUBSET 'ABORT_ON_DEFAULT_LIMIT_EXCEEDED','0' 'ABORT_ON_ERROR','0' @@ -114,7 +108,7 @@ set 'DISABLE_CACHED_READS','0' 'DISABLE_CODEGEN','0' 'DISABLE_OUTERMOST_TOPN','0' -'EXPLAIN_LEVEL','0' +'EXPLAIN_LEVEL','1' 'HBASE_CACHE_BLOCKS','0' 'HBASE_CACHING','0' 'MAX_ERRORS','0' @@ -157,18 +151,14 @@ select 1 ==== ---- QUERY # Set mem_limit really small so that queries will fail. -set mem_limit=1 -==== ----- QUERY +set mem_limit=1; select count(string_col) from functional.alltypestiny ---- CATCH Memory limit exceeded ==== ---- QUERY # Set mem_limit back to unlimited and query should succeed again. -set mem_limit=0 -==== ----- QUERY +set mem_limit=0; select count(string_col) from functional.alltypestiny ---- RESULTS 8 diff --git a/testdata/workloads/functional-query/queries/QueryTest/single-node-nlj-exhaustive.test b/testdata/workloads/functional-query/queries/QueryTest/single-node-nlj-exhaustive.test index dba1c4d78..a6b3cae6c 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/single-node-nlj-exhaustive.test +++ b/testdata/workloads/functional-query/queries/QueryTest/single-node-nlj-exhaustive.test @@ -1,9 +1,7 @@ ==== ---- QUERY -set mem_limit=200m -==== ----- QUERY # IMPALA-2207: Analytic eval node feeding into build side of nested loop join. +set mem_limit=200m; select straight_join * from (values(1 id), (2), (3)) v1, (select *, count(*) over() from tpch.lineitem where l_orderkey < 100000) v2 order by id, l_orderkey, l_partkey, l_suppkey, l_linenumber diff --git a/testdata/workloads/functional-query/queries/QueryTest/spilling.test b/testdata/workloads/functional-query/queries/QueryTest/spilling.test index d0915cd39..203c4df6b 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/spilling.test +++ b/testdata/workloads/functional-query/queries/QueryTest/spilling.test @@ -350,7 +350,3 @@ from lineitem ---- TYPES BIGINT ==== ----- QUERY -set mem_limit=0; -set num_nodes=0; -==== \ No newline at end of file diff --git a/testdata/workloads/tpch/queries/tpch-outer-joins.test b/testdata/workloads/tpch/queries/tpch-outer-joins.test index 49c1b8cdd..1c255ab69 100644 --- a/testdata/workloads/tpch/queries/tpch-outer-joins.test +++ b/testdata/workloads/tpch/queries/tpch-outer-joins.test @@ -45,6 +45,3 @@ ON cast(t1.o_comment as char(120)) = cast(t2.o_comment as char(120)) ---- TYPES BIGINT ==== ----- QUERY -set mem_limit=0 -==== \ No newline at end of file diff --git a/tests/beeswax/impala_beeswax.py b/tests/beeswax/impala_beeswax.py index 4b6983510..3496b7410 100644 --- a/tests/beeswax/impala_beeswax.py +++ b/tests/beeswax/impala_beeswax.py @@ -141,6 +141,7 @@ class ImpalaBeeswaxClient(object): """Close the transport if it's still open""" if self.transport: self.transport.close() + self.connected = False def __get_transport(self): """Creates the proper transport type based environment (secure vs unsecure)""" diff --git a/tests/common/impala_connection.py b/tests/common/impala_connection.py index 7faca33b9..d7f24cc84 100644 --- a/tests/common/impala_connection.py +++ b/tests/common/impala_connection.py @@ -129,7 +129,10 @@ class BeeswaxConnection(ImpalaConnection): self.__beeswax_client.set_query_option(name, value) def get_configuration(self): - return self.__beeswax_client.get_query_options + return self.__beeswax_client.get_query_options() + + def get_default_configuration(self): + return self.__beeswax_client.get_default_configuration() def set_configuration(self, config_option_dict): assert config_option_dict is not None, "config_option_dict cannot be None" diff --git a/tests/common/impala_test_suite.py b/tests/common/impala_test_suite.py index f7d6a481a..961e22c5f 100644 --- a/tests/common/impala_test_suite.py +++ b/tests/common/impala_test_suite.py @@ -20,8 +20,11 @@ import pprint import pwd import pytest import grp +import re +import string from getpass import getuser from functools import wraps +from impala._thrift_gen.ImpalaService.ttypes import TImpalaQueryOptions from random import choice from tests.common.impala_service import ImpaladService from tests.common.impala_connection import ImpalaConnection, create_connection @@ -65,6 +68,9 @@ FILESYSTEM_PREFIX = os.getenv("FILESYSTEM_PREFIX") # this will be the same as fs.defaultFS. When running against a secondary filesystem, # this will be the same as FILESYSTEM_PREFIX. NAMENODE = FILESYSTEM_PREFIX or CORE_CONF.get('fs.defaultFS') +# Match any SET statement. Assume that query options' names +# only contain alphabets and underscores. +SET_PATTERN = re.compile(r'set\s*([a-zA-Z_]+)=*', re.I) # Base class for Impala tests. All impala test cases should inherit from this class class ImpalaTestSuite(BaseTestSuite): @@ -102,6 +108,9 @@ class ImpalaTestSuite(BaseTestSuite): # Create a connection to Impala. cls.client = cls.create_impala_client(IMPALAD) + # Default query options are populated on demand. + cls.default_query_options = {} + cls.impalad_test_service = cls.create_impala_service() cls.hdfs_client = cls.create_hdfs_client() @@ -147,6 +156,32 @@ class ImpalaTestSuite(BaseTestSuite): self.client.set_configuration({'sync_ddl': sync_ddl}) self.client.execute("drop database if exists `" + db_name + "` cascade") + @classmethod + def restore_query_options(self, query_options_changed): + """ + Restore the list of modified query options to their default values. + """ + # Populate the default query option if it's empty. + if not self.default_query_options: + try: + query_options = self.client.get_default_configuration() + for query_option in query_options: + self.default_query_options[query_option.key.upper()] = query_option.value + except Exception as e: + LOG.info('Unexpected exception when getting default query options: ' + str(e)) + return + # Restore all the changed query options. + for query_option in query_options_changed: + query_option = query_option.upper() + if not query_option in self.default_query_options: + continue + default_val = self.default_query_options[query_option] + query_str = 'SET '+ query_option + '=' + default_val + ';' + try: + self.client.execute(query_str) + except Exception as e: + LOG.info('Unexpected exception when executing ' + query_str + ' : ' + str(e)) + def run_test_case(self, test_file_name, vector, use_db=None, multiple_impalad=False, encoding=None): """ @@ -206,6 +241,7 @@ class ImpalaTestSuite(BaseTestSuite): # TODO: consider supporting result verification of all queries in the future result = None target_impalad_client = choice(target_impalad_clients) + query_options_changed = [] try: user = None if 'USER' in test_section: @@ -213,6 +249,9 @@ class ImpalaTestSuite(BaseTestSuite): user = test_section['USER'].strip() target_impalad_client = self.create_impala_client() for query in query.split(';'): + set_pattern_match = SET_PATTERN.match(query) + if set_pattern_match != None: + query_options_changed.append(set_pattern_match.groups()[0]) result = self.__execute_query(target_impalad_client, query, user=user) except Exception as e: if 'CATCH' in test_section: @@ -225,6 +264,9 @@ class ImpalaTestSuite(BaseTestSuite): assert expected_str in str(e) continue raise + finally: + if len(query_options_changed) > 0: + self.restore_query_options(query_options_changed) if 'CATCH' in test_section: assert test_section['CATCH'].strip() == '' diff --git a/tests/metadata/test_set.py b/tests/metadata/test_set.py index 4e3e81689..9b77d2c92 100644 --- a/tests/metadata/test_set.py +++ b/tests/metadata/test_set.py @@ -33,4 +33,6 @@ class TestSet(ImpalaTestSuite): cls.TestMatrix.add_dimension(create_uncompressed_text_dimension(cls.get_workload())) def test_set(self, vector): + # Please note that query options modified during the test will be reset + # at the end of each test. self.run_test_case('QueryTest/set', vector)