From 968c61c940fe78faa08e8cc4bd257dd2ec19dc11 Mon Sep 17 00:00:00 2001
From: Michael Ho <kwho@cloudera.com>
Date: Tue, 12 Jan 2016 14:37:00 -0800
Subject: [PATCH] IMPALA-2824: Restore query options after each test.

A failed test case inside a test file will leave the rest of
the test cases in the file unexecuted. Some test cases may
modify some query options such as memory limit and then
restore them in the subsequent test cases in the same file.
The failure of those test cases will leave the query options
modified, causing cascading failures to other test cases
which aren't expected to be run with the modified query
options (e.g. lowered memory limit). This problem may lead
to broken builds which are recorded in IMPALA-2724 and
IMPALA-2824.

This change fixes the problem above by checking if a test
case modifies any query option and if so, restore those
modified query options to their default values. This change
makes the assumption that a test should not modify an option
specified in its test vector so it's safe to restore the
modified query options to their default values.

Change-Id: Ib88d1dcb6a65183e1afc8eef0c764179a9f6a8ce
Reviewed-on: http://gerrit.cloudera.org:8080/1774
Reviewed-by: Michael Ho <kwho@cloudera.com>
Tested-by: Internal Jenkins
---
 .../queries/QueryTest/analytic-fns.test       |  4 +-
 .../queries/QueryTest/distinct.test           |  7 ----
 .../queries/QueryTest/large_strings.test      |  3 --
 .../QueryTest/nested-types-subplan.test       |  7 ----
 .../queries/QueryTest/nested-types-tpch.test  | 26 +++---------
 .../queries/QueryTest/set.test                | 28 ++++---------
 .../QueryTest/single-node-nlj-exhaustive.test |  4 +-
 .../queries/QueryTest/spilling.test           |  4 --
 .../tpch/queries/tpch-outer-joins.test        |  3 --
 tests/beeswax/impala_beeswax.py               |  1 +
 tests/common/impala_connection.py             |  5 ++-
 tests/common/impala_test_suite.py             | 42 +++++++++++++++++++
 tests/metadata/test_set.py                    |  2 +
 13 files changed, 65 insertions(+), 71 deletions(-)

diff --git a/testdata/workloads/functional-query/queries/QueryTest/analytic-fns.test b/testdata/workloads/functional-query/queries/QueryTest/analytic-fns.test
index 2a20deccc..4cb937fb0 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/analytic-fns.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/analytic-fns.test
@@ -1591,6 +1591,4 @@ FROM functional_parquet.alltypes t1 CROSS JOIN functional_parquet.alltypes t2 LI
 ---- CATCH
 Memory limit exceeded
 ====
----- QUERY
-set mem_limit=0
-====
+
diff --git a/testdata/workloads/functional-query/queries/QueryTest/distinct.test b/testdata/workloads/functional-query/queries/QueryTest/distinct.test
index 2f210b198..e84feb6ae 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/distinct.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/distinct.test
@@ -360,13 +360,6 @@ from alltypesagg
 bigint, bigint, bigint
 ====
 ---- QUERY
-# Unset the query option from the previous test.
-set appx_count_distinct=false
----- RESULTS
----- TYPES
-string, string
-====
----- QUERY
 # Large (more than 1 regular batches) distinct with multiple group by keys.
 SELECT COUNT(*) FROM
 (SELECT COUNT(DISTINCT p_partkey)
diff --git a/testdata/workloads/functional-query/queries/QueryTest/large_strings.test b/testdata/workloads/functional-query/queries/QueryTest/large_strings.test
index 68fe303c9..b25e5c80e 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/large_strings.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/large_strings.test
@@ -112,6 +112,3 @@ select l_comment from tpch_parquet.lineitem) a
 ---- CATCH
 Memory limit exceeded
 ====
----- QUERY
-set mem_limit=0
-====
diff --git a/testdata/workloads/functional-query/queries/QueryTest/nested-types-subplan.test b/testdata/workloads/functional-query/queries/QueryTest/nested-types-subplan.test
index 170363b4a..31bd88199 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/nested-types-subplan.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/nested-types-subplan.test
@@ -536,13 +536,6 @@ where c_custkey = 67974;
 bigint,bigint
 ====
 ---- QUERY
-# Set the num_nodes query option back to its default value.
-set num_nodes=0
----- RESULTS
----- TYPES
-string,string
-====
----- QUERY
 # IMPALA-2289: Test nested-loop join with right anti join mode inside a subplan.
 select count(*) from customer c left anti join c.c_orders
 where c_custkey between 8000 and 10000
diff --git a/testdata/workloads/functional-query/queries/QueryTest/nested-types-tpch.test b/testdata/workloads/functional-query/queries/QueryTest/nested-types-tpch.test
index 8ea11f1f1..4984c1f1a 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/nested-types-tpch.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/nested-types-tpch.test
@@ -162,9 +162,7 @@ BIGINT,BIGINT
 ---- QUERY
 # IMPALA-2376
 # Set memory limit low enough to get the below query to consistently fail.
-set mem_limit=4m
-====
----- QUERY
+set mem_limit=4m;
 select max(cnt) from customer c,
 (select count(l_returnflag) cnt from c.c_orders.o_lineitems) v;
 ---- TYPES
@@ -174,14 +172,10 @@ Memory limit exceeded
 Failed to allocate buffer for collection 'tpch_nested_parquet.customer.c_orders.item.o_lineitems'.
 ====
 ---- QUERY
-set num_scanner_threads=1
-====
----- QUERY
-set mem_limit=1g
-====
----- QUERY
 # IMPALA-2473: Scan query with large row size leading to oversized batches.
 # Should not OOM given single scanner thread and sufficient memory limit.
+set num_scanner_threads=1;
+set mem_limit=1g;
 select *
 from tpch_nested_parquet.customer c, c.c_orders o, o.o_lineitems
 order by l_partkey desc, l_suppkey desc, l_linenumber desc, c_custkey
@@ -196,14 +190,10 @@ limit 5
 bigint,string,string,smallint,string,decimal,string,string,bigint,string,decimal,string,string,string,int,string,bigint,bigint,int,decimal,decimal,decimal,decimal,string,string,string,string,string,string,string,string
 ====
 ---- QUERY
-set num_scanner_threads=1
-====
----- QUERY
-set mem_limit=500m
-====
----- QUERY
 # IMPALA-2473: Highly selective scan query with large row size.
 # Should not OOM given single scanner thread and sufficient memory limit.
+set num_scanner_threads=1;
+set mem_limit=500m;
 select *
 from tpch_nested_parquet.customer c, c.c_orders o, o.o_lineitems
 where c_phone='20-968-632-1388' and l_partkey = 127499
@@ -212,9 +202,3 @@ where c_phone='20-968-632-1388' and l_partkey = 127499
 ---- TYPES
 bigint,string,string,smallint,string,decimal,string,string,bigint,string,decimal,string,string,string,int,string,bigint,bigint,int,decimal,decimal,decimal,decimal,string,string,string,string,string,string,string,string
 ====
----- QUERY
-set mem_limit=0
-====
----- QUERY
-set num_scanner_threads=0
-====
diff --git a/testdata/workloads/functional-query/queries/QueryTest/set.test b/testdata/workloads/functional-query/queries/QueryTest/set.test
index 35de818be..f32107762 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/set.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/set.test
@@ -31,10 +31,8 @@ set
 STRING, STRING
 ====
 ---- QUERY
-set explain_level=3
-====
----- QUERY
-set
+set explain_level=3;
+set;
 ---- RESULTS: VERIFY_IS_SUBSET
 'ABORT_ON_DEFAULT_LIMIT_EXCEEDED','0'
 'ABORT_ON_ERROR','0'
@@ -65,10 +63,8 @@ set
 STRING, STRING
 ====
 ---- QUERY
-set explain_level='0'
-====
----- QUERY
-set
+set explain_level='0';
+set;
 ---- RESULTS: VERIFY_IS_SUBSET
 'ABORT_ON_DEFAULT_LIMIT_EXCEEDED','0'
 'ABORT_ON_ERROR','0'
@@ -100,10 +96,8 @@ STRING, STRING
 ====
 ---- QUERY
 # IMPALA-1906: Test that SET changes PARQUET_FILE_SIZE only if it's less than 2GB.
-set parquet_file_size='1.5g'
-====
----- QUERY
-set
+set parquet_file_size='1.5g';
+set;
 ---- RESULTS: VERIFY_IS_SUBSET
 'ABORT_ON_DEFAULT_LIMIT_EXCEEDED','0'
 'ABORT_ON_ERROR','0'
@@ -114,7 +108,7 @@ set
 'DISABLE_CACHED_READS','0'
 'DISABLE_CODEGEN','0'
 'DISABLE_OUTERMOST_TOPN','0'
-'EXPLAIN_LEVEL','0'
+'EXPLAIN_LEVEL','1'
 'HBASE_CACHE_BLOCKS','0'
 'HBASE_CACHING','0'
 'MAX_ERRORS','0'
@@ -157,18 +151,14 @@ select 1
 ====
 ---- QUERY
 # Set mem_limit really small so that queries will fail.
-set mem_limit=1
-====
----- QUERY
+set mem_limit=1;
 select count(string_col) from functional.alltypestiny
 ---- CATCH
 Memory limit exceeded
 ====
 ---- QUERY
 # Set mem_limit back to unlimited and query should succeed again.
-set mem_limit=0
-====
----- QUERY
+set mem_limit=0;
 select count(string_col) from functional.alltypestiny
 ---- RESULTS
 8
diff --git a/testdata/workloads/functional-query/queries/QueryTest/single-node-nlj-exhaustive.test b/testdata/workloads/functional-query/queries/QueryTest/single-node-nlj-exhaustive.test
index dba1c4d78..a6b3cae6c 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/single-node-nlj-exhaustive.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/single-node-nlj-exhaustive.test
@@ -1,9 +1,7 @@
 ====
 ---- QUERY
-set mem_limit=200m
-====
----- QUERY
 # IMPALA-2207: Analytic eval node feeding into build side of nested loop join.
+set mem_limit=200m;
 select straight_join * from (values(1 id), (2), (3)) v1,
 (select *, count(*) over() from tpch.lineitem where l_orderkey < 100000) v2
 order by id, l_orderkey, l_partkey, l_suppkey, l_linenumber
diff --git a/testdata/workloads/functional-query/queries/QueryTest/spilling.test b/testdata/workloads/functional-query/queries/QueryTest/spilling.test
index d0915cd39..203c4df6b 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/spilling.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/spilling.test
@@ -350,7 +350,3 @@ from lineitem
 ---- TYPES
 BIGINT
 ====
----- QUERY
-set mem_limit=0;
-set num_nodes=0;
-====
\ No newline at end of file
diff --git a/testdata/workloads/tpch/queries/tpch-outer-joins.test b/testdata/workloads/tpch/queries/tpch-outer-joins.test
index 49c1b8cdd..1c255ab69 100644
--- a/testdata/workloads/tpch/queries/tpch-outer-joins.test
+++ b/testdata/workloads/tpch/queries/tpch-outer-joins.test
@@ -45,6 +45,3 @@ ON cast(t1.o_comment as char(120)) = cast(t2.o_comment as char(120))
 ---- TYPES
 BIGINT
 ====
----- QUERY
-set mem_limit=0
-====
\ No newline at end of file
diff --git a/tests/beeswax/impala_beeswax.py b/tests/beeswax/impala_beeswax.py
index 4b6983510..3496b7410 100644
--- a/tests/beeswax/impala_beeswax.py
+++ b/tests/beeswax/impala_beeswax.py
@@ -141,6 +141,7 @@ class ImpalaBeeswaxClient(object):
     """Close the transport if it's still open"""
     if self.transport:
       self.transport.close()
+    self.connected = False
 
   def __get_transport(self):
     """Creates the proper transport type based environment (secure vs unsecure)"""
diff --git a/tests/common/impala_connection.py b/tests/common/impala_connection.py
index 7faca33b9..d7f24cc84 100644
--- a/tests/common/impala_connection.py
+++ b/tests/common/impala_connection.py
@@ -129,7 +129,10 @@ class BeeswaxConnection(ImpalaConnection):
       self.__beeswax_client.set_query_option(name, value)
 
   def get_configuration(self):
-    return self.__beeswax_client.get_query_options
+    return self.__beeswax_client.get_query_options()
+
+  def get_default_configuration(self):
+    return self.__beeswax_client.get_default_configuration()
 
   def set_configuration(self, config_option_dict):
     assert config_option_dict is not None, "config_option_dict cannot be None"
diff --git a/tests/common/impala_test_suite.py b/tests/common/impala_test_suite.py
index f7d6a481a..961e22c5f 100644
--- a/tests/common/impala_test_suite.py
+++ b/tests/common/impala_test_suite.py
@@ -20,8 +20,11 @@ import pprint
 import pwd
 import pytest
 import grp
+import re
+import string
 from getpass import getuser
 from functools import wraps
+from impala._thrift_gen.ImpalaService.ttypes import TImpalaQueryOptions
 from random import choice
 from tests.common.impala_service import ImpaladService
 from tests.common.impala_connection import ImpalaConnection, create_connection
@@ -65,6 +68,9 @@ FILESYSTEM_PREFIX = os.getenv("FILESYSTEM_PREFIX")
 # this will be the same as fs.defaultFS.  When running against a secondary filesystem,
 # this will be the same as FILESYSTEM_PREFIX.
 NAMENODE = FILESYSTEM_PREFIX or CORE_CONF.get('fs.defaultFS')
+# Match any SET statement. Assume that query options' names
+# only contain alphabets and underscores.
+SET_PATTERN = re.compile(r'set\s*([a-zA-Z_]+)=*', re.I)
 
 # Base class for Impala tests. All impala test cases should inherit from this class
 class ImpalaTestSuite(BaseTestSuite):
@@ -102,6 +108,9 @@ class ImpalaTestSuite(BaseTestSuite):
     # Create a connection to Impala.
     cls.client = cls.create_impala_client(IMPALAD)
 
+    # Default query options are populated on demand.
+    cls.default_query_options = {}
+
     cls.impalad_test_service = cls.create_impala_service()
     cls.hdfs_client = cls.create_hdfs_client()
 
@@ -147,6 +156,32 @@ class ImpalaTestSuite(BaseTestSuite):
     self.client.set_configuration({'sync_ddl': sync_ddl})
     self.client.execute("drop database if exists `" + db_name + "` cascade")
 
+  @classmethod
+  def restore_query_options(self, query_options_changed):
+    """
+    Restore the list of modified query options to their default values.
+    """
+    # Populate the default query option if it's empty.
+    if not self.default_query_options:
+      try:
+        query_options = self.client.get_default_configuration()
+        for query_option in query_options:
+          self.default_query_options[query_option.key.upper()] = query_option.value
+      except Exception as e:
+        LOG.info('Unexpected exception when getting default query options: ' + str(e))
+        return
+    # Restore all the changed query options.
+    for query_option in query_options_changed:
+      query_option = query_option.upper()
+      if not query_option in self.default_query_options:
+        continue
+      default_val = self.default_query_options[query_option]
+      query_str = 'SET '+ query_option + '=' + default_val + ';'
+      try:
+        self.client.execute(query_str)
+      except Exception as e:
+        LOG.info('Unexpected exception when executing ' + query_str + ' : ' + str(e))
+
   def run_test_case(self, test_file_name, vector, use_db=None, multiple_impalad=False,
       encoding=None):
     """
@@ -206,6 +241,7 @@ class ImpalaTestSuite(BaseTestSuite):
       # TODO: consider supporting result verification of all queries in the future
       result = None
       target_impalad_client = choice(target_impalad_clients)
+      query_options_changed = []
       try:
         user = None
         if 'USER' in test_section:
@@ -213,6 +249,9 @@ class ImpalaTestSuite(BaseTestSuite):
           user = test_section['USER'].strip()
           target_impalad_client = self.create_impala_client()
         for query in query.split(';'):
+          set_pattern_match = SET_PATTERN.match(query)
+          if set_pattern_match != None:
+            query_options_changed.append(set_pattern_match.groups()[0])
           result = self.__execute_query(target_impalad_client, query, user=user)
       except Exception as e:
         if 'CATCH' in test_section:
@@ -225,6 +264,9 @@ class ImpalaTestSuite(BaseTestSuite):
           assert expected_str in str(e)
           continue
         raise
+      finally:
+        if len(query_options_changed) > 0:
+          self.restore_query_options(query_options_changed)
 
       if 'CATCH' in test_section:
         assert test_section['CATCH'].strip() == ''
diff --git a/tests/metadata/test_set.py b/tests/metadata/test_set.py
index 4e3e81689..9b77d2c92 100644
--- a/tests/metadata/test_set.py
+++ b/tests/metadata/test_set.py
@@ -33,4 +33,6 @@ class TestSet(ImpalaTestSuite):
     cls.TestMatrix.add_dimension(create_uncompressed_text_dimension(cls.get_workload()))
 
   def test_set(self, vector):
+    # Please note that query options modified during the test will be reset
+    # at the end of each test.
     self.run_test_case('QueryTest/set', vector)