IMPALA-11977: Fix Python 3 broken imports and object model differences

Python 3 changed some object model methods: - __nonzero__ was removed in favor of __bool__ - func_dict / func_name were removed in favor of __dict__ / __name__ - The next() function was deprecated in favor of __next__ (Code locations should use next(iter) rather than iter.next()) - metaclasses are specified a different way - Locations that specify __eq__ should also specify __hash__ Python 3 also moved some packages around (urllib2, Queue, httplib, etc), and this adapts the code to use the new locations (usually handled on Python 2 via future). This also fixes the code to avoid referencing exception variables outside the exception block and variables outside of a comprehension. Several of these seem like false positives, but it is better to avoid the warning. This fixes these pylint warnings: bad-python3-import eq-without-hash metaclass-assignment next-method-called nonzero-method exception-escape comprehension-escape Testing: - Ran core tests - Ran release exhaustive tests Change-Id: I988ae6c139142678b0d40f1f4170b892eabf25ee Reviewed-on: http://gerrit.cloudera.org:8080/19592 Reviewed-by: Joe McDonnell <joemcdonnell@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2025-12-19 18:12:08 -05:00 · 2023-03-04 15:17:46 -08:00
parent aa4050b4d9
commit 0c7c6a335e
32 changed files with 210 additions and 153 deletions
--- a/bin/banned_py3k_warnings.txt
+++ b/bin/banned_py3k_warnings.txt
@@ -17,3 +17,10 @@ round-builtin
 deprecated-string-function
 sys-max-int
 exception-message-attribute
+bad-python3-import
+eq-without-hash
+metaclass-assignment
+next-method-called
+nonzero-method
+comprehension-escape
+exception-escape
--- a/testdata/bin/generate-schema-statements.py
+++ b/testdata/bin/generate-schema-statements.py
@@ -95,6 +95,7 @@
 # serially.
 #
 from __future__ import absolute_import, division, print_function
+from builtins import object
 import collections
 import csv
 import glob
@@ -592,7 +593,7 @@ class Statements(object):
    with open(filename, 'w') as f:
      f.write('\n\n'.join(output))

-  def __nonzero__(self):
+  def __bool__(self):
    return bool(self.create or self.load or self.load_base)

 def eval_section(section_str):
--- a/testdata/common/widetable.py
+++ b/testdata/common/widetable.py
@@ -52,7 +52,7 @@ def get_columns(num_cols):
  iter = itertools.cycle(templates)
  # Produces [bool_col1, tinyint_col1, ..., bool_col2, tinyint_col2, ...]
  # The final list has 'num_cols' elements.
-  return [iter.next() % (i // len(templates) + 1) for i in range(num_cols)]
+  return [next(iter) % (i // len(templates) + 1) for i in range(num_cols)]

 # Data generators for different types. Each generator yields an infinite number of
 # value strings suitable for writing to a CSV file.
@@ -83,7 +83,7 @@ def quote(iter_fn):
  def new_iter_fn():
    iter = iter_fn()
    while True:
-      yield "'%s'" % iter.next()
+      yield "'%s'" % next(iter)
  return new_iter_fn

 def get_data(num_cols, num_rows, delimiter=',', quote_strings=False):
@@ -101,12 +101,12 @@ def get_data(num_cols, num_rows, delimiter=',', quote_strings=False):
    ]
  # Create a generator instance for each column, cycling through the different types
  iter = itertools.cycle(generators)
-  column_generators = [iter.next()() for i in range(num_cols)]
+  column_generators = [next(iter)() for i in range(num_cols)]

  # Populate each row using column_generators
  rows = []
  for i in range(num_rows):
-    vals = [gen.next() for gen in column_generators]
+    vals = [next(gen) for gen in column_generators]
    rows.append(delimiter.join(vals))
  return rows

--- a/tests/benchmark/report_benchmark_results.py
+++ b/tests/benchmark/report_benchmark_results.py
@@ -238,12 +238,15 @@ def all_query_results(grouped):


 def get_commit_date(commit_sha):
-  import urllib2
+  try:
+    from urllib.request import Request, urlopen
+  except ImportError:
+    from urllib2 import Request, urlopen

  url = 'https://api.github.com/repos/apache/impala/commits/' + commit_sha
  try:
-    request = urllib2.Request(url)
-    response = urllib2.urlopen(request).read()
+    request = Request(url)
+    response = urlopen(request).read()
    data = json.loads(response.decode('utf8'))
    return data['commit']['committer']['date'][:10]
  except:
@@ -251,7 +254,7 @@ def get_commit_date(commit_sha):

 def get_impala_version(grouped):
  """Figure out Impala version by looking at query profile."""
-  first_result = all_query_results(grouped).next()
+  first_result = next(all_query_results(grouped))
  profile = first_result['result_list'][0]['runtime_profile']
  match = re.search('Impala Version:\s(.*)\s\(build\s(.*)\)', profile)
  version = match.group(1)
@@ -915,7 +918,7 @@ class ExecSummaryComparison(object):
    table.align = 'l'
    table.float_format = '.2'
    table_contains_at_least_one_row = False
-    for row in [row for row in self.rows if is_significant(row)]:
+    for row in [r for r in self.rows if is_significant(r)]:
      table_row = [row[OPERATOR],
          '{0:.2%}'.format(row[PERCENT_OF_QUERY]),
          '{0:.2%}'.format(row[RSTD]),
--- a/tests/common/custom_cluster_test_suite.py
+++ b/tests/common/custom_cluster_test_suite.py
@@ -115,67 +115,67 @@ class CustomClusterTestSuite(ImpalaTestSuite):
    method's func_dict"""
    def decorate(func):
      if impalad_args is not None:
-        func.func_dict[IMPALAD_ARGS] = impalad_args
-      func.func_dict[STATESTORED_ARGS] = statestored_args
+        func.__dict__[IMPALAD_ARGS] = impalad_args
+      func.__dict__[STATESTORED_ARGS] = statestored_args
      if catalogd_args is not None:
-        func.func_dict[CATALOGD_ARGS] = catalogd_args
+        func.__dict__[CATALOGD_ARGS] = catalogd_args
      if start_args is not None:
-        func.func_dict[START_ARGS] = start_args.split()
+        func.__dict__[START_ARGS] = start_args.split()
      if jvm_args is not None:
-        func.func_dict[JVM_ARGS] = jvm_args
+        func.__dict__[JVM_ARGS] = jvm_args
      if hive_conf_dir is not None:
-        func.func_dict[HIVE_CONF_DIR] = hive_conf_dir
+        func.__dict__[HIVE_CONF_DIR] = hive_conf_dir
      if kudu_args is not None:
-        func.func_dict[KUDU_ARGS] = kudu_args
+        func.__dict__[KUDU_ARGS] = kudu_args
      if default_query_options is not None:
-        func.func_dict[DEFAULT_QUERY_OPTIONS] = default_query_options
+        func.__dict__[DEFAULT_QUERY_OPTIONS] = default_query_options
      if impala_log_dir is not None:
-        func.func_dict[IMPALA_LOG_DIR] = impala_log_dir
+        func.__dict__[IMPALA_LOG_DIR] = impala_log_dir
      if cluster_size is not None:
-        func.func_dict[CLUSTER_SIZE] = cluster_size
+        func.__dict__[CLUSTER_SIZE] = cluster_size
      if num_exclusive_coordinators is not None:
-        func.func_dict[NUM_EXCLUSIVE_COORDINATORS] = num_exclusive_coordinators
+        func.__dict__[NUM_EXCLUSIVE_COORDINATORS] = num_exclusive_coordinators
      if statestored_timeout_s is not None:
-        func.func_dict[STATESTORED_TIMEOUT_S] = statestored_timeout_s
+        func.__dict__[STATESTORED_TIMEOUT_S] = statestored_timeout_s
      if impalad_timeout_s is not None:
-        func.func_dict[IMPALAD_TIMEOUT_S] = impalad_timeout_s
+        func.__dict__[IMPALAD_TIMEOUT_S] = impalad_timeout_s
      if expect_cores is not None:
-        func.func_dict[EXPECT_CORES] = expect_cores
+        func.__dict__[EXPECT_CORES] = expect_cores
      if reset_ranger is not False:
-        func.func_dict[RESET_RANGER] = True
+        func.__dict__[RESET_RANGER] = True
      return func
    return decorate

  def setup_method(self, method):
    cluster_args = list()
    for arg in [IMPALAD_ARGS, STATESTORED_ARGS, CATALOGD_ARGS, ADMISSIOND_ARGS, JVM_ARGS]:
-      if arg in method.func_dict:
-        cluster_args.append("--%s=%s " % (arg, method.func_dict[arg]))
-    if START_ARGS in method.func_dict:
-      cluster_args.extend(method.func_dict[START_ARGS])
+      if arg in method.__dict__:
+        cluster_args.append("--%s=%s " % (arg, method.__dict__[arg]))
+    if START_ARGS in method.__dict__:
+      cluster_args.extend(method.__dict__[START_ARGS])

-    if HIVE_CONF_DIR in method.func_dict:
-      self._start_hive_service(method.func_dict[HIVE_CONF_DIR])
+    if HIVE_CONF_DIR in method.__dict__:
+      self._start_hive_service(method.__dict__[HIVE_CONF_DIR])
      # Should let Impala adopt the same hive-site.xml. The only way is to add it in the
      # beginning of the CLASSPATH. Because there's already a hive-site.xml in the
      # default CLASSPATH (see bin/set-classpath.sh).
      cluster_args.append(
-        '--env_vars=CUSTOM_CLASSPATH=%s ' % method.func_dict[HIVE_CONF_DIR])
+        '--env_vars=CUSTOM_CLASSPATH=%s ' % method.__dict__[HIVE_CONF_DIR])

-    if KUDU_ARGS in method.func_dict:
-      self._restart_kudu_service(method.func_dict[KUDU_ARGS])
+    if KUDU_ARGS in method.__dict__:
+      self._restart_kudu_service(method.__dict__[KUDU_ARGS])

-    if RESET_RANGER in method.func_dict:
+    if RESET_RANGER in method.__dict__:
      self._reset_ranger_policy_repository()

    cluster_size = DEFAULT_CLUSTER_SIZE
-    if CLUSTER_SIZE in method.func_dict:
-      cluster_size = method.func_dict[CLUSTER_SIZE]
+    if CLUSTER_SIZE in method.__dict__:
+      cluster_size = method.__dict__[CLUSTER_SIZE]

    use_exclusive_coordinators = False
    num_coordinators = cluster_size
-    if NUM_EXCLUSIVE_COORDINATORS in method.func_dict:
-      num_coordinators = method.func_dict[NUM_EXCLUSIVE_COORDINATORS]
+    if NUM_EXCLUSIVE_COORDINATORS in method.__dict__:
+      num_coordinators = method.__dict__[NUM_EXCLUSIVE_COORDINATORS]
      use_exclusive_coordinators = True

    # Start a clean new cluster before each test
@@ -183,17 +183,17 @@ class CustomClusterTestSuite(ImpalaTestSuite):
      "cluster_size": cluster_size,
      "num_coordinators": num_coordinators,
      "expected_num_impalads": cluster_size,
-      "default_query_options": method.func_dict.get(DEFAULT_QUERY_OPTIONS),
+      "default_query_options": method.__dict__.get(DEFAULT_QUERY_OPTIONS),
      "use_exclusive_coordinators": use_exclusive_coordinators
    }
-    if IMPALA_LOG_DIR in method.func_dict:
-      kwargs["impala_log_dir"] = method.func_dict[IMPALA_LOG_DIR]
-    if STATESTORED_TIMEOUT_S in method.func_dict:
-      kwargs["statestored_timeout_s"] = method.func_dict[STATESTORED_TIMEOUT_S]
-    if IMPALAD_TIMEOUT_S in method.func_dict:
-      kwargs["impalad_timeout_s"] = method.func_dict[IMPALAD_TIMEOUT_S]
+    if IMPALA_LOG_DIR in method.__dict__:
+      kwargs["impala_log_dir"] = method.__dict__[IMPALA_LOG_DIR]
+    if STATESTORED_TIMEOUT_S in method.__dict__:
+      kwargs["statestored_timeout_s"] = method.__dict__[STATESTORED_TIMEOUT_S]
+    if IMPALAD_TIMEOUT_S in method.__dict__:
+      kwargs["impalad_timeout_s"] = method.__dict__[IMPALAD_TIMEOUT_S]

-    if method.func_dict.get(EXPECT_CORES, False):
+    if method.__dict__.get(EXPECT_CORES, False):
      # Make a note of any core files that already exist
      possible_cores = find_all_files('*core*')
      self.pre_test_cores = set([f for f in possible_cores if is_core_dump(f)])
@@ -209,10 +209,10 @@ class CustomClusterTestSuite(ImpalaTestSuite):
      super(CustomClusterTestSuite, self).setup_class()

  def teardown_method(self, method):
-    if HIVE_CONF_DIR in method.func_dict:
+    if HIVE_CONF_DIR in method.__dict__:
      self._start_hive_service(None)  # Restart Hive Service using default configs

-    if method.func_dict.get(EXPECT_CORES, False):
+    if method.__dict__.get(EXPECT_CORES, False):
      # The core dumps expected to be generated by this test should be cleaned up
      possible_cores = find_all_files('*core*')
      post_test_cores = set([f for f in possible_cores if is_core_dump(f)])
--- a/tests/common/impala_connection.py
+++ b/tests/common/impala_connection.py
@@ -22,6 +22,7 @@
 from __future__ import absolute_import, division, print_function
 import abc
 import codecs
+from future.utils import with_metaclass
 import logging
 import re

@@ -77,8 +78,7 @@ class OperationHandle(object):


 # Represents an Impala connection.
-class ImpalaConnection(object):
-  __metaclass__ = abc.ABCMeta
+class ImpalaConnection(with_metaclass(abc.ABCMeta, object)):

  def __enter__(self):
    return self
--- a/tests/common/test_result_verifier.py
+++ b/tests/common/test_result_verifier.py
@@ -65,6 +65,11 @@ class QueryTestResult(object):
      return False
    return self.column_types == other.column_types and self.rows == other.rows

+  def __hash__(self):
+    # This is not intended to be hashed. If that is happening, then something is wrong.
+    # The regexes in ResultRow make it difficult to implement this correctly.
+    assert False
+
  def __ne__(self, other):
    return not self.__eq__(other)

@@ -158,6 +163,11 @@ class ResultRow(object):
      return other.regex.match(self.row_string)
    return self.columns == other.columns

+  def __hash__(self):
+    # This is not intended to be hashed. If that is happening, then something is wrong.
+    # The regexes make it difficult to implement this correctly.
+    assert False
+
  def __ne__(self, other):
    return not self.__eq__(other)

@@ -225,6 +235,11 @@ class ResultColumn(object):
    else:
      return self.value == other.value

+  def __hash__(self):
+    # This is not intended to be hashed. If that is happening, then something is wrong.
+    # The regexes make it difficult to implement this correctly.
+    assert False
+
  def __ne__(self, other):
    return not self.__eq__(other)

--- a/tests/comparison/cluster.py
+++ b/tests/comparison/cluster.py
@@ -22,6 +22,7 @@

 from __future__ import absolute_import, division, print_function
 from builtins import int, range, zip
+from future.utils import with_metaclass
 import hdfs
 import logging
 import os
@@ -34,17 +35,20 @@ from collections import defaultdict
 from collections import OrderedDict
 from contextlib import contextmanager
 from getpass import getuser
+from io import BytesIO
 from multiprocessing.pool import ThreadPool
 from random import choice
-from StringIO import StringIO
 from sys import maxsize
 from tempfile import mkdtemp
 from threading import Lock
 from time import mktime, strptime
-from urlparse import urlparse
 from xml.etree.ElementTree import parse as parse_xml
 from zipfile import ZipFile

+try:
+  from urllib.parse import urlparse
+except ImportError:
+  from urlparse import urlparse

 from tests.comparison.db_connection import HiveConnection, ImpalaConnection
 from tests.common.environ import HIVE_MAJOR_VERSION
@@ -65,14 +69,12 @@ CM_CLEAR_PORT = 7180
 CM_TLS_PORT = 7183


-class Cluster(object):
+class Cluster(with_metaclass(ABCMeta, object)):
  """This is a base class for clusters. Cluster classes provide various methods for
     interacting with a cluster. Ideally the various cluster implementations provide
     the same set of methods so any cluster implementation can be chosen at runtime.
  """

-  __metaclass__ = ABCMeta
-
  def __init__(self):
    self._hadoop_configs = None
    self._local_hadoop_conf_dir = None
@@ -323,7 +325,7 @@ class CmCluster(Cluster):

  def _init_local_hadoop_conf_dir(self):
    self._local_hadoop_conf_dir = mkdtemp()
-    data = StringIO(self.cm.get("/clusters/%s/services/%s/clientConfig"
+    data = BytesIO(self.cm.get("/clusters/%s/services/%s/clientConfig"
      % (self.cm_cluster.name, self._find_service("HIVE").name)))
    zip_file = ZipFile(data)
    for name in zip_file.namelist():
@@ -655,9 +657,7 @@ class CmImpala(Impala):
      raise Exception("Failed to restart Impala: %s" % command.resultMessage)


-class Impalad(object):
-
-  __metaclass__ = ABCMeta
+class Impalad(with_metaclass(ABCMeta, object)):

  def __init__(self):
    self.impala = None
--- a/tests/comparison/common.py
+++ b/tests/comparison/common.py
@@ -451,6 +451,9 @@ class ArrayColumn(CollectionColumn):
      return True
    return self.name == other.name and self.owner.identifier == other.owner.identifier

+  def __hash__(self):
+    return hash((self.name, self.owner.identifier))
+
  def __deepcopy__(self, memo):
    other = ArrayColumn(
        owner=self.owner,
@@ -480,6 +483,9 @@ class MapColumn(CollectionColumn):
      return True
    return self.name == other.name and self.owner.identifier == other.owner.identifier

+  def __hash__(self):
+    return hash((self.name, self.owner.identifier))
+
  def __deepcopy__(self, memo):
    other = MapColumn(
        owner=self.owner,
--- a/tests/comparison/data_generator_mapred_common.py
+++ b/tests/comparison/data_generator_mapred_common.py
@@ -27,7 +27,7 @@ from __future__ import absolute_import, division, print_function
 from base import range
 import base64
 import pickle
-import StringIO
+from io import BytesIO

 from tests.comparison.db_types import Decimal
 from tests.comparison.random_val_generator import RandomValGenerator
@@ -94,7 +94,7 @@ def estimate_bytes_per_row(table_data_generator, row_count):
  original_row_count = table_data_generator.row_count
  original_output_file = table_data_generator.output_file
  table_data_generator.row_count = row_count
-  table_data_generator.output_file = StringIO.StringIO()
+  table_data_generator.output_file = BytesIO()
  table_data_generator.populate_output_file()
  table_data_generator.output_file.flush()
  bytes_per_row = len(table_data_generator.output_file.getvalue()) / float(row_count)
--- a/tests/comparison/db_connection.py
+++ b/tests/comparison/db_connection.py
@@ -23,6 +23,7 @@
 '''
 from __future__ import absolute_import, division, print_function
 from builtins import filter, map, range, zip
+from future.utils import with_metaclass
 import hashlib
 import impala.dbapi
 import re
@@ -572,9 +573,7 @@ class DbCursor(object):
    return ()


-class DbConnection(object):
-
-  __metaclass__ = ABCMeta
+class DbConnection(with_metaclass(ABCMeta, object)):

  LOCK = Lock()

--- a/tests/comparison/db_types.py
+++ b/tests/comparison/db_types.py
@@ -16,6 +16,7 @@
 # under the License.

 from __future__ import absolute_import, division, print_function
+from future.utils import with_metaclass
 import re
 import sys

@@ -43,15 +44,13 @@ class DataTypeMetaclass(type):
        getattr(other, 'CMP_VALUE', other.__name__))


-class DataType(ValExpr):
+class DataType(with_metaclass(DataTypeMetaclass, ValExpr)):
  '''Base class for data types.

     Data types are represented as classes so inheritance can be used.

  '''

-  __metaclass__ = DataTypeMetaclass
-
  @staticmethod
  def group_by_type(vals):
    '''Group cols by their data type and return a dict of the results.'''
--- a/tests/comparison/query.py
+++ b/tests/comparison/query.py
@@ -16,7 +16,8 @@
 # under the License.

 from __future__ import absolute_import, division, print_function
-from builtins import range
+from builtins import object, range
+from future.utils import with_metaclass
 from abc import ABCMeta, abstractproperty
 from copy import deepcopy
 from logging import getLogger
@@ -47,13 +48,11 @@ class StatementExecutionMode(object):
  ) = range(5)


-class AbstractStatement(object):
+class AbstractStatement(with_metaclass(ABCMeta, object)):
  """
  Abstract query representation
  """

-  __metaclass__ = ABCMeta
-
  def __init__(self):
    # reference to statement's parent. For example the right side of a UNION clause
    # SELECT will have a parent as the SELECT on the left, which for the query
@@ -236,9 +235,9 @@ class SelectItemSubList(object):
  def __len__(self):
    return sum(1 for _ in self)

-  def __nonzero__(self):
+  def __bool__(self):
    try:
-      iter(self).next()
+      next(iter(self))
      return True
    except StopIteration:
      return False
@@ -260,7 +259,7 @@ class SelectItemSubList(object):
      items = list()
      while start < stop:
        try:
-          idx, item = self_iter.next()
+          idx, item = next(self_iter)
        except StopIteration:
          break
        if idx < start:
@@ -298,7 +297,7 @@ class SelectItemSubList(object):
      filtered_idx = 0
      while start < stop:
        try:
-          idx, item = self_iter.next()
+          idx, item = next(self_iter)
        except StopIteration:
          break
        if not self.filter(item):
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -230,7 +230,7 @@ def pytest_generate_tests(metafunc):

    if len(vectors) == 0:
      LOG.warning("No test vectors generated for test '%s'. Check constraints and "
-          "input vectors" % metafunc.function.func_name)
+          "input vectors" % metafunc.function.__name__)

    vector_names = list(map(str, vectors))
    # In the case this is a test result update or sanity run, select a single test vector
--- a/tests/custom_cluster/test_admission_controller.py
+++ b/tests/custom_cluster/test_admission_controller.py
@@ -1538,11 +1538,11 @@ class TestAdmissionControllerWithACService(TestAdmissionController):
    if self.exploration_strategy() != 'exhaustive':
      pytest.skip('runs only in exhaustive')

-    if 'start_args' not in method.func_dict:
-      method.func_dict['start_args'] = list()
-    method.func_dict["start_args"].append("--enable_admission_service")
-    if "impalad_args" in method.func_dict:
-      method.func_dict["admissiond_args"] = method.func_dict["impalad_args"]
+    if 'start_args' not in method.__dict__:
+      method.__dict__['start_args'] = list()
+    method.__dict__["start_args"].append("--enable_admission_service")
+    if "impalad_args" in method.__dict__:
+      method.__dict__["admissiond_args"] = method.__dict__["impalad_args"]
    super(TestAdmissionController, self).setup_method(method)

  @SkipIfNotHdfsMinicluster.tuned_for_minicluster
@@ -2284,9 +2284,9 @@ class TestAdmissionControllerStressWithACService(TestAdmissionControllerStress):
  def setup_method(self, method):
    if self.exploration_strategy() != 'exhaustive':
      pytest.skip('runs only in exhaustive')
-    if 'start_args' not in method.func_dict:
-      method.func_dict['start_args'] = list()
-    method.func_dict["start_args"].append("--enable_admission_service")
-    if "impalad_args" in method.func_dict:
-      method.func_dict["admissiond_args"] = method.func_dict["impalad_args"]
+    if 'start_args' not in method.__dict__:
+      method.__dict__['start_args'] = list()
+    method.__dict__["start_args"].append("--enable_admission_service")
+    if "impalad_args" in method.__dict__:
+      method.__dict__["admissiond_args"] = method.__dict__["impalad_args"]
    super(TestAdmissionControllerStress, self).setup_method(method)
--- a/tests/custom_cluster/test_executor_groups.py
+++ b/tests/custom_cluster/test_executor_groups.py
@@ -44,10 +44,10 @@ class TestExecutorGroups(CustomClusterTestSuite):

  def setup_method(self, method):
    # Always start the base cluster with the coordinator in its own executor group.
-    existing_args = method.func_dict.get("impalad_args", "")
-    method.func_dict["impalad_args"] = "%s -executor_groups=coordinator" % existing_args
-    method.func_dict["cluster_size"] = 1
-    method.func_dict["num_exclusive_coordinators"] = 1
+    existing_args = method.__dict__.get("impalad_args", "")
+    method.__dict__["impalad_args"] = "%s -executor_groups=coordinator" % existing_args
+    method.__dict__["cluster_size"] = 1
+    method.__dict__["num_exclusive_coordinators"] = 1
    self.num_groups = 1
    self.num_impalads = 1
    super(TestExecutorGroups, self).setup_method(method)
--- a/tests/custom_cluster/test_local_catalog.py
+++ b/tests/custom_cluster/test_local_catalog.py
@@ -20,7 +20,7 @@
 from __future__ import absolute_import, division, print_function
 from builtins import range
 import pytest
-import Queue
+import queue
 import random
 import re
 import threading
@@ -261,7 +261,7 @@ class TestLocalCatalogRetries(CustomClusterTestSuite):
    inconsistent_seen = [0]
    inconsistent_seen_lock = threading.Lock()
    # Tracks query failures for all other reasons.
-    failed_queries = Queue.Queue()
+    failed_queries = queue.Queue()
    try:
      client1 = self.cluster.impalads[0].service.create_beeswax_client()
      client2 = self.cluster.impalads[1].service.create_beeswax_client()
@@ -378,7 +378,7 @@ class TestLocalCatalogRetries(CustomClusterTestSuite):
      replans_seen_lock = threading.Lock()

      # Queue to propagate exceptions from failed queries, if any.
-      failed_queries = Queue.Queue()
+      failed_queries = queue.Queue()

      def stress_thread(client):
        while replans_seen[0] == 0:
--- a/tests/custom_cluster/test_saml2_sso.py
+++ b/tests/custom_cluster/test_saml2_sso.py
@@ -22,17 +22,23 @@ import datetime
 import os
 import pytest
 import uuid
-import urllib2
-import urlparse
 import xml.etree.ElementTree as ET
 import zlib

+try:
+  from urllib.parse import parse_qs, urlparse
+  from urllib.request import HTTPErrorProcessor, build_opener, Request
+except ImportError:
+  from urllib2 import HTTPErrorProcessor, build_opener, Request
+  from urlparse import parse_qs, urlparse
+
 from tests.common.custom_cluster_test_suite import CustomClusterTestSuite
 from tests.common.test_vector import ImpalaTestVector
 from tests.common.test_dimensions import create_client_protocol_dimension
 from tests.shell.util import run_impala_shell_cmd

-class NoRedirection(urllib2.HTTPErrorProcessor):
+
+class NoRedirection(HTTPErrorProcessor):
  """Allows inspecting http redirection responses. """
  def http_response(self, request, response):
    return response
@@ -150,8 +156,8 @@ class TestClientSaml(CustomClusterTestSuite):
  def _request_resource(self):
    """ Initial POST request to hs2-http port, response should be redirected
        to IDP and contain the authnrequest. """
-    opener = urllib2.build_opener(NoRedirection)
-    req = urllib2.Request("http://localhost:%s" % TestClientSaml.HOST_PORT, " ")
+    opener = build_opener(NoRedirection)
+    req = Request("http://localhost:%s" % TestClientSaml.HOST_PORT, " ")
    req.add_header('X-Hive-Token-Response-Port', TestClientSaml.CLIENT_PORT)
    response = opener.open(req)
    relay_state, client_id, saml_req_xml = \
@@ -161,11 +167,11 @@ class TestClientSaml(CustomClusterTestSuite):

  def _parse_redirection_response(self, response):
    assert response.getcode() == 302
-    client_id = response.info().getheader("X-Hive-Client-Identifier")
+    client_id = response.info().get("X-Hive-Client-Identifier", None)
    assert client_id is not None
-    new_url = response.info().getheader("location")
+    new_url = response.info()["location"]
    assert new_url.startswith(TestClientSaml.IDP_URL)
-    query = urlparse.parse_qs(urlparse.urlparse(new_url).query.encode('ASCII'))
+    query = parse_qs(urlparse(new_url).query.encode('ASCII'))
    relay_state = query["RelayState"][0]
    assert relay_state is not None
    saml_req = query["SAMLRequest"][0]
@@ -181,15 +187,15 @@ class TestClientSaml(CustomClusterTestSuite):
  def _request_resource_with_bearer(self, client_id, bearer_token):
    """ Send POST request to hs2-http port again, this time with bearer tokan.
        The response should contain a security cookie if the validation succeeded """
-    req = urllib2.Request("http://localhost:%s" % TestClientSaml.HOST_PORT, " ")
+    req = Request("http://localhost:%s" % TestClientSaml.HOST_PORT, " ")
    req.add_header('X-Hive-Client-Identifier', client_id)
    req.add_header('Authorization', "Bearer " + bearer_token)
-    opener = urllib2.build_opener(NoRedirection)
+    opener = build_opener(NoRedirection)
    response = opener.open(req)
    # saml2_ee_test_mode=true leads to returning 401 unauthorized - otherwise the
    # call would hang if there is no Thrift message.
    assert response.getcode() == 401
-    cookies = response.info().getheader('Set-Cookie')
+    cookies = response.info()['Set-Cookie']
    assert cookies.startswith("impala.auth=")

  def _send_authn_response(self, request_id, relay_state,
@@ -201,8 +207,8 @@ class TestClientSaml(CustomClusterTestSuite):
    authn_resp = self._generate_authn_response(request_id, attributes_xml)
    encoded_authn_resp = base64.urlsafe_b64encode(authn_resp)
    body = "SAMLResponse=%s&RelayState=%s" % (encoded_authn_resp, relay_state)
-    opener = urllib2.build_opener(NoRedirection)
-    req = urllib2.Request(TestClientSaml.SP_CALLBACK_URL, body)
+    opener = build_opener(NoRedirection)
+    req = Request(TestClientSaml.SP_CALLBACK_URL, body)
    response = opener.open(req)
    bearer_token = self._parse_xhtml_form(response, expect_success)
    return bearer_token
--- a/tests/custom_cluster/test_udf_concurrency.py
+++ b/tests/custom_cluster/test_udf_concurrency.py
@@ -128,7 +128,7 @@ class TestUdfConcurrency(CustomClusterTestSuite):
    # join all threads.
    for t in runner_threads: t.join()

-    for e in errors: print(e)
+    for err in errors: print(err)

    # Checks that no impalad has crashed.
    assert cluster.num_responsive_coordinators() == exp_num_coordinators
@@ -210,5 +210,5 @@ class TestUdfConcurrency(CustomClusterTestSuite):
    for t in runner_threads: t.join()

    # Check for any errors.
-    for e in errors: print(e)
+    for err in errors: print(err)
    assert len(errors) == 0
--- a/tests/hs2/test_hs2.py
+++ b/tests/hs2/test_hs2.py
@@ -29,7 +29,10 @@ import threading
 import time
 import uuid

-from urllib2 import urlopen
+try:
+  from urllib.request import urlopen
+except ImportError:
+  from urllib2 import urlopen

 from ImpalaService import ImpalaHiveServer2Service
 from tests.common.environ import ImpalaTestClusterProperties
--- a/tests/hs2/test_json_endpoints.py
+++ b/tests/hs2/test_json_endpoints.py
@@ -22,7 +22,10 @@ import json
 import pytest
 from time import time

-from urllib2 import urlopen
+try:
+  from urllib.request import urlopen
+except ImportError:
+  from urllib2 import urlopen

 from tests.common.environ import IS_DOCKERIZED_TEST_CLUSTER
 from tests.common.impala_cluster import ImpalaCluster
--- a/tests/performance/query.py
+++ b/tests/performance/query.py
@@ -54,6 +54,9 @@ class Query(object):
            self.workload_name == other.workload_name and
            self.db == other.db)

+  def __hash__(self):
+    return hash((self.query_str, self.name, self.scale_factor, self.test_vector, self.db))
+
  def _build_query(self):
    """Populates db, query_str, table_format_str"""
    self.db = QueryTestSectionReader.get_db_name(self.test_vector, self.scale_factor)
--- a/tests/query_test/test_insert_parquet.py
+++ b/tests/query_test/test_insert_parquet.py
@@ -54,6 +54,9 @@ class RoundFloat():
    """Compares this objects's value to a numeral after rounding it."""
    return round(self.value, self.num_digits) == round(numeral, self.num_digits)

+  def __hash__(self):
+    return hash(round(self.value, self.num_digits))
+

 class TimeStamp():
  """Class to construct timestamps with a default format specifier."""
@@ -68,6 +71,9 @@ class TimeStamp():
    """Compares this objects's value to another timetuple."""
    return self.timetuple == other_timetuple

+  def __hash__(self):
+    return hash(self.timetuple)
+

 class Date():
  """Class to compare dates specified as year-month-day to dates specified as days since
@@ -79,6 +85,9 @@ class Date():
  def __eq__(self, other_days_since_eopch):
    return self.days_since_epoch == other_days_since_eopch

+  def __hash__(self):
+    return hash(self.days_since_epoch)
+

 ColumnStats = namedtuple('ColumnStats', ['name', 'min', 'max', 'null_count'])

--- a/tests/shell/test_shell_commandline.py
+++ b/tests/shell/test_shell_commandline.py
@@ -80,7 +80,7 @@ def empty_table(unique_database, request):
  Returns:
    fq_table_name (str): the fully qualified name of the table: : dbname.table_name
  """
-  table_name = request.node.function.func_name
+  table_name = request.node.function.__name__
  fq_table_name = '.'.join([unique_database, table_name])
  stmt = "CREATE TABLE %s (i integer, s string)" % fq_table_name
  request.instance.execute_query_expect_success(request.instance.client, stmt,
--- a/tests/shell/test_shell_interactive.py
+++ b/tests/shell/test_shell_interactive.py
@@ -19,7 +19,8 @@
 # under the License.

 from __future__ import absolute_import, division, print_function
-import httplib
+import http.client
+import http.server
 import logging
 import os
 import pexpect
@@ -27,6 +28,7 @@ import pytest
 import re
 import signal
 import socket
+import socketserver
 import sys
 import threading
 from time import sleep
@@ -47,8 +49,6 @@ from tests.common.test_dimensions import (
 from tests.shell.util import (assert_var_substitution, ImpalaShell, get_impalad_port,
  get_shell_cmd, get_open_sessions_metric, spawn_shell, get_unused_port,
  create_impala_shell_executable_dimension, get_impala_shell_executable)
-import SimpleHTTPServer
-import SocketServer

 QUERY_FILE_PATH = os.path.join(os.environ['IMPALA_HOME'], 'tests', 'shell')

@@ -79,32 +79,30 @@ def tmp_history_file(request):
  return tmp.name


-class RequestHandler503(SimpleHTTPServer.SimpleHTTPRequestHandler):
+class RequestHandler503(http.server.SimpleHTTPRequestHandler):
  """A custom http handler that checks for duplicate 'Host' headers from the most
  recent http request, and always returns a 503 http code."""

  def __init__(self, request, client_address, server):
-    SimpleHTTPServer.SimpleHTTPRequestHandler.__init__(self, request, client_address,
-                                                       server)
+    http.server.SimpleHTTPRequestHandler.__init__(self, request, client_address,
+                                                   server)

  def should_send_body_text(self):
    # in RequestHandler503 we do not send any body text
    return False

  def do_POST(self):
-    # The unfortunately named self.headers here is an instance of mimetools.Message that
-    # contains the request headers.
-    request_headers = self.headers.headers
-
-    # Ensure that only one 'Host' header is contained in the request before responding.
-    host_hdr_count = sum([header.startswith('Host:') for header in request_headers])
-    assert host_hdr_count == 1, "duplicate 'Host:' headers in %s" % request_headers
+    # Ensure that a 'Host' header is contained in the request before responding.
+    assert "Host" in self.headers

    # Respond with 503.
-    self.send_response(code=httplib.SERVICE_UNAVAILABLE, message="Service Unavailable")
+    self.send_response(code=http.client.SERVICE_UNAVAILABLE,
+                       message="Service Unavailable")
+    # The Python 3 version of SimpleHTTPRequestHandler requires this to be called
+    # explicitly
+    self.end_headers()
    if self.should_send_body_text():
-      # Optionally send ody text with 503 message.
-      self.end_headers()
+      # Optionally send body text with 503 message.
      self.wfile.write("EXTRA")


@@ -123,7 +121,7 @@ class TestHTTPServer503(object):
  def __init__(self, clazz):
    self.HOST = "localhost"
    self.PORT = get_unused_port()
-    self.httpd = SocketServer.TCPServer((self.HOST, self.PORT), clazz)
+    self.httpd = socketserver.TCPServer((self.HOST, self.PORT), clazz)

    self.http_server_thread = threading.Thread(target=self.httpd.serve_forever)
    self.http_server_thread.start()
--- a/tests/statestore/test_statestore.py
+++ b/tests/statestore/test_statestore.py
@@ -25,9 +25,13 @@ import socket
 import threading
 import traceback
 import time
-import urllib2
 import uuid

+try:
+  from urllib.request import urlopen
+except ImportError:
+  from urllib2 import urlopen
+
 from Types.ttypes import TNetworkAddress
 from thrift.protocol import TBinaryProtocol
 from thrift.server.TServer import TServer
@@ -63,7 +67,7 @@ LOG = logging.getLogger('test_statestore')
 #    Test that topic deletions take effect correctly.

 def get_statestore_subscribers(host='localhost', port=25010):
-  response = urllib2.urlopen("http://{0}:{1}/subscribers?json".format(host, port))
+  response = urlopen("http://{0}:{1}/subscribers?json".format(host, port))
  page = response.read()
  return json.loads(page)

--- a/tests/stress/concurrent_select.py
+++ b/tests/stress/concurrent_select.py
@@ -64,7 +64,7 @@ import re
 import signal
 import sys
 import threading
-from Queue import Empty   # Must be before Queue below
+from queue import Empty   # Must be before Queue below
 from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser, Namespace, SUPPRESS
 from collections import defaultdict
 from copy import copy
@@ -196,10 +196,11 @@ def print_crash_info_if_exists(impala, start_time):
      LOG.info(
          "Timeout checking if impalads crashed: %s."
          % e + (" Will retry." if remaining_attempts else ""))
-  else:
-    LOG.error(
-        "Aborting after %s failed attempts to check if impalads crashed", max_attempts)
-    raise e
+      if not remaining_attempts:
+        LOG.error(
+            "Aborting after %s failed attempts to check if impalads crashed",
+            max_attempts)
+        raise e
  for message in crashed_impalads.values():
    print(message, file=sys.stderr)
  return crashed_impalads
--- a/tests/stress/query_retries_stress_runner.py
+++ b/tests/stress/query_retries_stress_runner.py
@@ -34,7 +34,7 @@ import subprocess
 import sys
 import threading
 import traceback
-import Queue
+import queue

 from argparse import ArgumentParser
 from argparse import RawDescriptionHelpFormatter
@@ -113,7 +113,7 @@ def run_concurrent_workloads(concurrency, coordinator, database, queries):

  # The exception queue is used to pass errors from the workload threads back to the main
  # thread.
-  exception_queue = Queue.Queue()
+  exception_queue = queue.Queue()

  # The main method for the workload runner threads.
  def __run_workload(stream_id):
--- a/tests/util/concurrent_workload.py
+++ b/tests/util/concurrent_workload.py
@@ -28,7 +28,7 @@ import logging
 import _strptime  # noqa: F401
 import sys
 import time
-from Queue import Queue
+from queue import Queue
 from threading import current_thread, Event, Thread

 from tests.common.impala_cluster import ImpalaCluster
@@ -78,17 +78,17 @@ class ConcurrentWorkload(object):
          logging.exception("Caught error, stopping")
    logging.info("%s exiting" % current_thread().name)

-  def compute_query_rate(self, queue, stop_ev):
+  def compute_query_rate(self, queue_obj, stop_ev):
    """Computes the query throughput rate in queries per second averaged over the last 5
    seconds. This method only returns when 'stop_ev' is set by the caller."""
    AVG_WINDOW_S = 5
    times = []
    while not stop_ev.is_set():
      # Don't block to check for stop_ev
-      if queue.empty():
+      if queue_obj.empty():
        time.sleep(0.1)
        continue
-      queue.get()
+      queue_obj.get()
      now = time.time()
      times.append(now)
      # Keep only timestamps within the averaging window
@@ -118,7 +118,7 @@ class ConcurrentWorkload(object):
      self.stop()
    assert self.stop_ev.is_set(), "Stop event expected to be set but it isn't"

-  def _print_query_rate(self, queue, stop_ev):
+  def _print_query_rate(self, queue_obj, stop_ev):
    """Prints the query throughput rate until 'stop_ev' is set by the caller."""
    PERIOD_S = 1

--- a/tests/util/filesystem_base.py
+++ b/tests/util/filesystem_base.py
@@ -19,9 +19,10 @@

 from __future__ import absolute_import, division, print_function
 from abc import ABCMeta, abstractmethod
+from future.utils import with_metaclass

-class BaseFilesystem(object):
-  __metaclass__ = ABCMeta
+
+class BaseFilesystem(with_metaclass(ABCMeta, object)):

  @abstractmethod
  def create_file(self, path, file_data, overwrite):
--- a/tests/util/hdfs_util.py
+++ b/tests/util/hdfs_util.py
@@ -19,7 +19,7 @@

 from __future__ import absolute_import, division, print_function
 import getpass
-import httplib
+import http.client
 import os.path
 import re
 import requests
@@ -130,7 +130,7 @@ class PyWebHdfsClientWithChmod(PyWebHdfsClient):
    '775'"""
    uri = self._create_uri(path, "SETPERMISSION", permission=permission)
    response = requests.put(uri, allow_redirects=True)
-    if not response.status_code == httplib.OK:
+    if not response.status_code == http.client.OK:
      _raise_pywebhdfs_exception(response.status_code, response.text)
    return True

@@ -138,21 +138,21 @@ class PyWebHdfsClientWithChmod(PyWebHdfsClient):
    """Sets the owner and the group of 'path to 'user' / 'group'"""
    uri = self._create_uri(path, "SETOWNER", owner=user, group=group)
    response = requests.put(uri, allow_redirects=True)
-    if not response.status_code == httplib.OK:
+    if not response.status_code == http.client.OK:
      _raise_pywebhdfs_exception(response.status_code, response.text)
    return True

  def setacl(self, path, acls):
    uri = self._create_uri(path, "SETACL", aclspec=acls)
    response = requests.put(uri, allow_redirects=True)
-    if not response.status_code == httplib.OK:
+    if not response.status_code == http.client.OK:
      _raise_pywebhdfs_exception(response.status_code, response.text)
    return True

  def getacl(self, path):
    uri = self._create_uri(path, "GETACLSTATUS")
    response = requests.get(uri, allow_redirects=True)
-    if not response.status_code == httplib.OK:
+    if not response.status_code == http.client.OK:
      _raise_pywebhdfs_exception(response.status_code, response.text)
    return response.json()

--- a/tests/util/ssh_util.py
+++ b/tests/util/ssh_util.py
@@ -72,9 +72,9 @@ class SshClient(paramiko.SSHClient):
        raise
      except Exception as e:
        LOG.warn("Error connecting to %s" % host_name, exc_info=True)
-    else:
-      LOG.error("Failed to ssh to %s" % host_name)
-      raise e
+        if retry >= retries - 1:
+          LOG.error("Failed to ssh to %s" % host_name)
+          raise e

    self.get_transport().set_keepalive(10)