IMPALA-11976: Fix use of deprecated functions/fields removed in Python 3

Python 3 moved several things around or removed deprecated
functions / fields:
 - sys.maxint was removed, but sys.maxsize provides similar functionality
 - long was removed, but int provides the same range
 - file() was removed, but open() already provided the same functionality
 - Exception.message was removed, but str(exception) is equivalent
 - Some encodings (like hex) were moved to codecs.encode()
 - string.letters -> string.ascii_letters
 - string.lowercase -> string.ascii_lowercase
 - string.strip was removed

This fixes all of those locations. Python 3 also has slightly different
rounding behavior from round(), so this changes round() to use future's
builtins.round() to get the Python 3 behavior.

This fixes the following pylint warnings:
 - file-builtin
- long-builtin
- invalid-str-codec
- round-builtin
- deprecated-string-function
- sys-max-int
- exception-message-attribute

Testing:
 - Ran cores tests

Change-Id: I094cd7fd06b0d417fc875add401d18c90d7a792f
Reviewed-on: http://gerrit.cloudera.org:8080/19591
Reviewed-by: Joe McDonnell <joemcdonnell@cloudera.com>
Tested-by: Joe McDonnell <joemcdonnell@cloudera.com>
This commit is contained in:
Joe McDonnell
2023-03-04 10:21:59 -08:00
parent c233634d74
commit aa4050b4d9
27 changed files with 64 additions and 53 deletions

View File

@@ -10,3 +10,10 @@ deprecated-itertools-function
dict-iter-method
dict-keys-not-iterating
dict-values-not-iterating
file-builtin
long-builtin
invalid-str-codec
round-builtin
deprecated-string-function
sys-max-int
exception-message-attribute

View File

@@ -46,7 +46,7 @@ import sys
if len(sys.argv) == 1 or sys.argv[1] == "-":
input_data = sys.stdin
elif len(sys.argv) == 2:
input_data = file(sys.argv[1])
input_data = open(sys.argv[1])
else:
print("Usage: %s [file]" % (sys.argv[0],), file=sys.stderr)
sys.exit(1)

View File

@@ -194,7 +194,7 @@ if args.utility_context:
else:
# Impala Coordinator dependencies.
num_jars_on_classpath = 0
dep_classpath = file(os.path.join(IMPALA_HOME, "fe/target/build-classpath.txt")).read()
dep_classpath = open(os.path.join(IMPALA_HOME, "fe/target/build-classpath.txt")).read()
for jar in dep_classpath.split(":"):
num_jars_on_classpath += 1
assert os.path.exists(jar), "missing jar from classpath: {0}".format(jar)
@@ -216,7 +216,7 @@ else:
assert num_frontend_jars == 1
# Impala Executor dependencies.
dep_classpath = file(os.path.join(IMPALA_HOME,
dep_classpath = open(os.path.join(IMPALA_HOME,
"java/executor-deps/target/build-executor-deps-classpath.txt")).read()
for jar in dep_classpath.split(":"):
assert os.path.exists(jar), "missing jar from classpath: {0}".format(jar)

View File

@@ -49,8 +49,8 @@ def get_fragment_instances():
# No valid thread_debug_info
if not tdi:
break
hi = long(tdi['instance_id_']['hi'])
lo = long(tdi['instance_id_']['lo'])
hi = int(tdi['instance_id_']['hi'])
lo = int(tdi['instance_id_']['lo'])
fi = "%lx:%lx" % (hi, lo)
if fi != "0:0":
fragment_instances[fi.strip('"')].append(thread.num)

View File

@@ -75,10 +75,10 @@ while time.time() - now < TIMEOUT_SECONDS:
print("HiveServer2 service is up at %s." % options.hs2_hostport)
exit(0)
except Exception as e:
if "SASL" in e.message: # Bail out on SASL failures
if "SASL" in str(e): # Bail out on SASL failures
print("SASL failure when attempting connection:")
raise
if "GSS" in e.message: # Other GSSAPI failures
if "GSS" in str(e): # Other GSSAPI failures
print("GSS failure when attempting connection:")
raise
print("Waiting for HiveServer2 at %s..." % options.hs2_hostport)

View File

@@ -60,10 +60,10 @@ while time.time() - now < TIMEOUT_SECONDS:
print("Metastore service is up at %s." % options.metastore_hostport)
exit(0)
except Exception as e:
if "SASL" in e.message: # Bail out on SASL failures
if "SASL" in str(e): # Bail out on SASL failures
print("SASL failure when attempting connection:")
raise
if "GSS" in e.message: # Other GSSAPI failures
if "GSS" in str(e): # Other GSSAPI failures
print("GSS failure when attempting connection:")
raise
print("Waiting for the Metastore at %s..." % options.metastore_hostport)

View File

@@ -21,6 +21,7 @@
from __future__ import absolute_import, division, print_function
import abc
import codecs
import logging
import re
@@ -386,8 +387,8 @@ class ImpylaHS2Connection(ImpalaConnection):
"""Return the string representation of the query id."""
guid_bytes = \
operation_handle.get_handle()._last_operation.handle.operationId.guid
return "{0}:{1}".format(guid_bytes[7::-1].encode('hex_codec'),
guid_bytes[16:7:-1].encode('hex_codec'))
return "{0}:{1}".format(codecs.encode(guid_bytes[7::-1], 'hex_codec'),
codecs.encode(guid_bytes[16:7:-1], 'hex_codec'))
def get_state(self, operation_handle):
LOG.info("-- getting state for operation: {0}".format(operation_handle))

View File

@@ -18,7 +18,7 @@
# The base class that should be used for almost all Impala tests
from __future__ import absolute_import, division, print_function
from builtins import range
from builtins import range, round
import glob
import grp
import json
@@ -1041,7 +1041,7 @@ class ImpalaTestSuite(BaseTestSuite):
# is specified; explicitly make sure there's nothing to
# read to avoid hanging, especially when running interactively
# with py.test.
stdin=file("/dev/null"),
stdin=open("/dev/null"),
env=env)
(stdout, stderr) = call.communicate()
call.wait()

View File

@@ -111,7 +111,7 @@ class KuduTestSuite(ImpalaTestSuite):
@classmethod
def random_table_name(cls):
return "".join(choice(string.lowercase) for _ in range(10))
return "".join(choice(string.ascii_lowercase) for _ in range(10))
@classmethod
def to_kudu_table_name(cls, db_name, tbl_name):

View File

@@ -21,7 +21,7 @@
# module depends on db_connection which use some query generator classes.
from __future__ import absolute_import, division, print_function
from builtins import range, zip
from builtins import int, range, zip
import hdfs
import logging
import os
@@ -37,7 +37,7 @@ from getpass import getuser
from multiprocessing.pool import ThreadPool
from random import choice
from StringIO import StringIO
from sys import maxint
from sys import maxsize
from tempfile import mkdtemp
from threading import Lock
from time import mktime, strptime
@@ -629,7 +629,7 @@ class Impala(Service):
impalads = self.impalads
promise = self._thread_pool.map_async(func, impalads)
# Python doesn't handle ctrl-c well unless a timeout is provided.
results = promise.get(maxint)
results = promise.get(maxsize)
if as_dict:
results = dict(zip(impalads, results))
return results
@@ -874,7 +874,7 @@ class MiniClusterImpalad(Impalad):
return int(pid)
def find_process_mem_mb_limit(self):
return long(self.get_metric("mem-tracker.process.limit")["value"]) // 1024 ** 2
return int(self.get_metric("mem-tracker.process.limit")["value"]) // 1024 ** 2
def find_core_dump_dir(self):
raise NotImplementedError()

View File

@@ -25,7 +25,7 @@
# TODO: IMPALA-4600: refactor this module
from __future__ import absolute_import, division, print_function
from builtins import range, zip
from builtins import range, round, zip
from copy import deepcopy
from decimal import Decimal
from logging import getLogger

View File

@@ -18,7 +18,7 @@
# Tests admission control
from __future__ import absolute_import, division, print_function
from builtins import range
from builtins import int, range, round
import itertools
import logging
import os
@@ -504,7 +504,7 @@ class TestAdmissionController(TestAdmissionControllerBase, HS2TestSuite):
self.execute_query_expect_success(self.client, query, exec_options)
# A bit too much memory to run on coordinator.
exec_options['mem_limit'] = long(self.PROC_MEM_TEST_LIMIT * 1.1)
exec_options['mem_limit'] = int(self.PROC_MEM_TEST_LIMIT * 1.1)
ex = self.execute_query_expect_failure(self.client, query, exec_options)
assert ("Rejected query from pool default-pool: request memory needed "
"1.10 GB is greater than memory available for admission 1.00 GB" in
@@ -2218,7 +2218,7 @@ class TestAdmissionControllerStress(TestAdmissionControllerBase):
# should be fine. This exercises the code that does the per-pool memory
# accounting (see MemTracker::GetPoolMemReserved()) without actually being throttled.
self.run_admission_test(vector, {'request_pool': self.pool_name,
'mem_limit': sys.maxint})
'mem_limit': sys.maxsize})
@pytest.mark.execute_serially
@SkipIfOS.redhat6

View File

@@ -47,7 +47,7 @@ class TestFrontendConnectionLimit(CustomClusterTestSuite):
client.execute(query)
except Exception as e:
client.close()
raise ImpalaBeeswaxException(e.message)
raise ImpalaBeeswaxException(str(e))
client.close()
@pytest.mark.execute_serially

View File

@@ -16,6 +16,7 @@
# under the License.
from __future__ import absolute_import, division, print_function
from builtins import round
import pytest
import requests

View File

@@ -91,9 +91,9 @@ class TestParquetMaxPageHeader(CustomClusterTestSuite):
"""Creates a file in HDFS containing two MAX_STRING_LENGTH lines."""
file_name = os.path.join(dir, file)
# Create two 10MB long strings.
random_text1 = "".join([random.choice(string.letters)
random_text1 = "".join([random.choice(string.ascii_letters)
for i in range(self.MAX_STRING_LENGTH)])
random_text2 = "".join([random.choice(string.letters)
random_text2 = "".join([random.choice(string.ascii_letters)
for i in range(self.MAX_STRING_LENGTH)])
put = subprocess.Popen(["hdfs", "dfs", "-put", "-d", "-f", "-", file_name],
stdin=subprocess.PIPE, bufsize=-1)

View File

@@ -65,8 +65,8 @@ class TestRestart(CustomClusterTestSuite):
cursor.execute("describe database functional")
return
except HiveServer2Error as e:
assert "AnalysisException: Database does not exist: functional" in e.message,\
"Unexpected exception: " + e.message
assert "AnalysisException: Database does not exist: functional" in str(e),\
"Unexpected exception: " + str(e)
sleep(1)
assert False, "Coordinator never received non-empty metadata from the restarted " \
"statestore after {0} seconds".format(wait_time_s)

View File

@@ -236,7 +236,7 @@ class TestHmsIntegration(ImpalaTestSuite):
dictionary that holds the parsed attributes."""
result = {}
output_lines = output.split('\n')
stat_names = list(map(string.strip, output_lines[0].split(',')))
stat_names = [s.strip() for s in output_lines[0].split(',')]
stat_values = output_lines[3].split(',')
assert len(stat_names) == len(stat_values)
for i in range(0, len(stat_names)):
@@ -248,7 +248,7 @@ class TestHmsIntegration(ImpalaTestSuite):
dictionary that holds the parsed attributes."""
result = {}
for line in output.split('\n'):
line_elements = list(map(string.strip, line.split(',')))
line_elements = [s.strip() for s in line.split(',')]
if len(line_elements) >= 2:
result[line_elements[0]] = line_elements[1]
return result

View File

@@ -17,6 +17,7 @@
# Impala tests for DDL statements
from __future__ import absolute_import, division, print_function
from builtins import int
import time
from tests.common.impala_test_suite import ImpalaTestSuite
@@ -110,15 +111,15 @@ class TestLastDdlTimeUpdate(ImpalaTestSuite):
if expect_changed_ddl_time:
# check that the new ddlTime is strictly greater than the old one.
assert long(afterDdlTime) > long(beforeDdlTime)
assert int(afterDdlTime) > int(beforeDdlTime)
else:
assert long(afterDdlTime) == long(beforeDdlTime)
assert int(afterDdlTime) == int(beforeDdlTime)
if expect_changed_stats_time:
# check that the new statsTime is strictly greater than the old one.
assert long(afterStatsTime) > long(beforeStatsTime)
assert int(afterStatsTime) > int(beforeStatsTime)
else:
assert long(afterStatsTime) == long(beforeStatsTime)
assert int(afterStatsTime) == int(beforeStatsTime)
def _update_name(self, new_tbl_name):
""""

View File

@@ -22,7 +22,7 @@ import re
from datetime import datetime
from impala.dbapi import connect
from tests.beeswax.impala_beeswax import ImpalaBeeswaxClient, ImpalaBeeswaxResult
from sys import maxint
from sys import maxsize
from tests.performance.query import HiveQueryResult, ImpalaQueryResult
from tests.util.shell_util import exec_process
from time import time
@@ -44,7 +44,7 @@ def get_hs2_hive_cursor(hiveserver, user=None, use_kerberos=False,
user=user,
database=database,
auth_mechanism="GSSAPI" if use_kerberos else "PLAIN",
timeout=maxint)
timeout=maxsize)
cursor = conn.cursor(configuration=execOptions)
LOG.info("Connected to {0}:{1}".format(host, port))

View File

@@ -18,7 +18,7 @@
# Targeted Impala insert tests
from __future__ import absolute_import, division, print_function
from builtins import map, range
from builtins import map, range, round
import os
from collections import namedtuple

View File

@@ -46,9 +46,9 @@ class TestQueryMemLimit(ImpalaTestSuite):
# dynamically, even if it is a rough approximation.
# A mem_limit is expressed in bytes, with values <= 0 signifying no cap.
# These values are either really small, unlimited, or have a really large cap.
MAXINT_BYTES = str(sys.maxint)
MAXINT_MB = str(sys.maxint // (1024 * 1024))
MAXINT_GB = str(sys.maxint // (1024 * 1024 * 1024))
MAXINT_BYTES = str(sys.maxsize)
MAXINT_MB = str(sys.maxsize // (1024 * 1024))
MAXINT_GB = str(sys.maxsize // (1024 * 1024 * 1024))
# We expect the tests with MAXINT_* using valid units [bmg] to succeed.
PASS_REGEX = re.compile("(%s|%s|%s)[bmg]?$" % (MAXINT_BYTES, MAXINT_MB, MAXINT_GB),
re.I)

View File

@@ -16,7 +16,7 @@
# under the License.
from __future__ import absolute_import, division, print_function
from builtins import range
from builtins import range, int
from copy import copy
import itertools
import logging
@@ -175,7 +175,7 @@ class TestScannersFuzzing(ImpalaTestSuite):
rng = random.Random()
random_seed = os.environ.get("SCANNER_FUZZ_SEED") or time.time()
LOG.info("Using random seed %d", random_seed)
rng.seed(long(random_seed))
rng.seed(int(random_seed))
tmp_table_dir = tempfile.mkdtemp(prefix="tmp-scanner-fuzz-%s" % fuzz_table,
dir=os.path.join(os.environ['IMPALA_HOME'], "testdata"))

View File

@@ -626,7 +626,7 @@ class TestImpalaShellInteractive(ImpalaTestSuite):
self._expect_with_cmd(child_proc, "select 'hi'", vector, ('hi'))
child_proc.sendline('exit;')
child_proc.expect(pexpect.EOF)
history_contents = file(new_hist.name).read()
history_contents = open(new_hist.name).read()
assert "select 'hi'" in history_contents
def test_rerun(self, vector, tmp_history_file):

View File

@@ -71,7 +71,7 @@ from copy import copy
from datetime import datetime
from multiprocessing import Lock, Process, Queue, Value
from random import choice, random, randrange, shuffle
from sys import exit, maxint
from sys import exit, maxsize
from tempfile import gettempdir
from textwrap import dedent
from threading import current_thread
@@ -596,7 +596,7 @@ class StressRunner(object):
else:
# Let the query run as long as necessary - it is nearly impossible to pick a
# good value that won't have false positives under load - see IMPALA-8222.
timeout = maxint
timeout = maxsize
report = query_runner.run_query(query, mem_limit, timeout_secs=timeout,
cancel_mech=cancel_mech)
LOG.debug("Got execution report for query")
@@ -858,7 +858,7 @@ def populate_runtime_info_for_random_queries(impala, candidate_queries, converte
return queries
def populate_runtime_info(query, impala, converted_args, timeout_secs=maxint):
def populate_runtime_info(query, impala, converted_args, timeout_secs=maxsize):
"""Runs the given query by itself repeatedly until the minimum memory is determined
with and without spilling. Potentially all fields in the Query class (except
'sql') will be populated by this method. 'required_mem_mb_without_spilling' and
@@ -997,7 +997,7 @@ def populate_runtime_info(query, impala, converted_args, timeout_secs=maxint):
LOG.info("Finding minimum memory required to avoid spilling")
lower_bound = max(limit_exceeded_mem, spill_mem)
upper_bound = min(non_spill_mem or maxint, impala.min_impalad_mem_mb)
upper_bound = min(non_spill_mem or maxsize, impala.min_impalad_mem_mb)
while True:
if old_required_mem_mb_without_spilling:
mem_limit = old_required_mem_mb_without_spilling
@@ -1034,7 +1034,7 @@ def populate_runtime_info(query, impala, converted_args, timeout_secs=maxint):
LOG.info("Finding absolute minimum memory required")
lower_bound = limit_exceeded_mem
upper_bound = min(
spill_mem or maxint, non_spill_mem or maxint, impala.min_impalad_mem_mb)
spill_mem or maxsize, non_spill_mem or maxsize, impala.min_impalad_mem_mb)
while True:
if old_required_mem_mb_with_spilling:
mem_limit = old_required_mem_mb_with_spilling

View File

@@ -18,13 +18,14 @@
# under the License.
from __future__ import absolute_import, division, print_function
from builtins import round
import logging
from multiprocessing import Value
import os
import re
from textwrap import dedent
from time import sleep, time
from sys import maxint
from sys import maxsize
from tests.stress.queries import QueryType
from tests.stress.util import create_and_start_daemon_thread, increment
@@ -103,7 +104,7 @@ class QueryRunner(object):
self.impalad_conn = self.impalad.impala.connect(impalad=self.impalad)
def run_query(self, query, mem_limit_mb, run_set_up=False,
timeout_secs=maxint, cancel_mech=None, retain_profile=False):
timeout_secs=maxsize, cancel_mech=None, retain_profile=False):
"""Run a query and return an execution report. If 'run_set_up' is True, set up sql
will be executed before the main query. This should be the case during the binary
search phase of the stress test. 'cancel_mech' is optionally a CancelMechanism
@@ -472,7 +473,7 @@ def _add_row_to_hash(row, curr_hash):
curr_hash += _hash_val(idx, val)
# Modulo the result to keep it "small" otherwise the math ops can be slow
# since python does infinite precision math.
curr_hash %= maxint
curr_hash %= maxsize
return curr_hash

View File

@@ -88,7 +88,7 @@ class TestTestFileParser(BaseTestSuite):
skip_unknown_sections=False)
assert 0, 'Expected error due to invalid section'
except RuntimeError as re:
assert re.message == "Unknown subsection: TYPES"
assert str(re) == "Unknown subsection: TYPES"
def test_parse_query_name(self):
results = parse_test_file_text(test_text, VALID_SECTIONS, False)

View File

@@ -50,13 +50,13 @@ class TestResultVerifier(ImpalaTestSuite):
res.rows[0]['does_not_exist']
assert False, 'Expected error due to column alias not existing'
except IndexError as e:
assert "No column with label: does_not_exist" in e.message
assert "No column with label: does_not_exist" in str(e)
try:
res.rows[0][2]
assert False, 'Expected error due to column position not existing'
except IndexError as e:
assert 'list index out of range' in e.message
assert 'list index out of range' in str(e)
def test_compute_aggregation(self, vector):
profile = '''