IMPALA-9627: Update utility scripts for Python 3

Updates utility scripts that don't use impala-python to work with Python 3 so we can build on systems that don't include Python 2 (such as SLES 15 SP4). Primarily adds 'universal_newlines=True' to subprocess calls so they return text rather than binary data in Python 3 with a change that's compatible with Python 2. Testing: - built in SLES 15 SP4 container with Python 3 Change-Id: I7f4ce71fa1183aaeeca55d0666aeb113640c5cf2 Reviewed-on: http://gerrit.cloudera.org:8080/19559 Reviewed-by: Michael Smith <michael.smith@cloudera.com> Tested-by: Michael Smith <michael.smith@cloudera.com>
2025-12-19 18:12:08 -05:00 · 2023-02-28 09:43:22 -08:00
parent 630650ba08
commit 0c72c98f91
3 changed files with 13 additions and 7 deletions
--- a/infra/python/bootstrap_virtualenv.py
+++ b/infra/python/bootstrap_virtualenv.py
@@ -42,7 +42,10 @@ import subprocess
 import sys
 import tarfile
 import tempfile
-import urllib
+try:
+  from urllib.request import pathname2url
+except ImportError:
+  from urllib import pathname2url
 from bootstrap_toolchain import ToolchainPackage

 LOG = logging.getLogger(os.path.splitext(os.path.basename(__file__))[0])
@@ -124,7 +127,7 @@ def exec_cmd(args, **kwargs):
     'args' and 'kwargs' use the same format as subprocess.Popen().
  '''
  process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
-      **kwargs)
+      universal_newlines=True, **kwargs)
  output = process.communicate()[0]
  if process.returncode != 0:
    raise Exception("Command returned non-zero status\nCommand: %s\nOutput: %s"
@@ -189,7 +192,7 @@ def exec_pip_install(args, cc="no-cc-available", env=None):
    third_party_pkg_install_cmd.append("--no-index")

  third_party_pkg_install_cmd.extend(["--find-links",
-      "file://%s" % urllib.pathname2url(os.path.abspath(DEPS_DIR))])
+      "file://%s" % pathname2url(os.path.abspath(DEPS_DIR))])
  third_party_pkg_install_cmd.extend(args)
  exec_cmd(third_party_pkg_install_cmd, env=env)

--- a/infra/python/deps/pip_download.py
+++ b/infra/python/deps/pip_download.py
@@ -51,7 +51,7 @@ def check_digest(filename, algorithm, expected_digest):
    print('Hash algorithm {0} is not supported by hashlib'.format(algorithm))
    return False
  h = hashlib.new(algorithm)
-  h.update(open(filename).read())
+  h.update(open(filename, mode='rb').read())
  actual_digest = h.hexdigest()
  return actual_digest == expected_digest

@@ -89,7 +89,8 @@ def get_package_info(pkg_name, pkg_version):
  # We parse the page with regex instead of an html parser because that requires
  # downloading an extra package before running this script. Since the HTML is guaranteed
  # to be formatted according to PEP 503, this is acceptable.
-  pkg_info = subprocess.check_output(["wget", "-q", "-O", "-", url])
+  pkg_info = subprocess.check_output(
+      ["wget", "-q", "-O", "-", url], universal_newlines=True)
  regex = r'<a .*?href=\".*?packages/(.*?)#(.*?)=(.*?)\".*?>(.*?)<\/a>'
  for match in re.finditer(regex, pkg_info):
    path = match.group(1)