mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
IMPALA-9626: Use Python from the toolchain for Impala
Historically Impala used the Python2 version that was available on the hosting platform, as long as that version was at least v2.6. This caused constant headache as all Python syntax had to be kept compatible with Python 2.6 (for Centos 6). It also caused a recent problem on Centos 8: here the system Python version was compiled with the system's GCC version (v8.3), which was much more recent than the Impala standard compiler version (GCC 4.9.2). When the Impala virtualenv was built, the system Python version supplied C compiler switches for models containing native code that were unknown for the Impala version of GCC, thus breaking virtualenv installation. This patch changes the Impala virtualenv to always use the Python2 version from the toolchain, which is built with the toolchain compiler. This ensures that - Impala always has a known Python 2.7 version for all its scripts, - virtualenv modules based on native code will always be installable, as the Python environment and the modules are built with the same compiler version. Additional changes: - Add an auto-use fixture to conftest.py to check that the tests are being run with Python 2.7.x - Make bootstrap_toolchain.py independent from the Impala virtualenv: remove the dependency on the "sh" library Tests: - Passed core-mode tests on CentOS 7.4 - Passed core-mode tests in Docker-based mode for centos:7 and ubuntu:16.04 Most content in this patch was developed but not published earlier by Tim Armstrong. Change-Id: Ic7b40cef89cfb3b467b61b2d54a94e708642882b Reviewed-on: http://gerrit.cloudera.org:8080/15624 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
committed by
Impala Public Jenkins
parent
21aa514353
commit
c97191b6a5
@@ -1,4 +1,4 @@
|
||||
#!/usr/bin/env impala-python
|
||||
#!/usr/bin/env python
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
@@ -58,18 +58,12 @@
|
||||
#
|
||||
# The script is directly executable, and it takes no parameters:
|
||||
# ./bootstrap_toolchain.py
|
||||
# It should NOT be run via 'python bootstrap_toolchain.py', as it relies on a specific
|
||||
# python environment.
|
||||
import logging
|
||||
import glob
|
||||
import multiprocessing.pool
|
||||
import os
|
||||
import random
|
||||
import re
|
||||
# TODO: This file should be runnable without using impala-python, and system python
|
||||
# does not have 'sh' available. Rework code to avoid importing sh (and anything else
|
||||
# that gets in the way).
|
||||
import sh
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
@@ -107,6 +101,26 @@ OS_MAPPING = [
|
||||
]
|
||||
|
||||
|
||||
def check_output(cmd_args):
|
||||
"""Run the command and return the output. Raise an exception if the command returns
|
||||
a non-zero return code. Similar to subprocess.check_output() which is only provided
|
||||
in python 2.7.
|
||||
"""
|
||||
process = subprocess.Popen(cmd_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
|
||||
stdout, _ = process.communicate()
|
||||
if process.wait() != 0:
|
||||
raise Exception("Command with args '%s' failed with exit code %s:\n%s"
|
||||
% (cmd_args, process.returncode, stdout))
|
||||
return stdout
|
||||
|
||||
|
||||
def get_toolchain_compiler():
|
||||
"""Return the <name>-<version> string for the compiler package to use for the
|
||||
toolchain."""
|
||||
# Currently we always use GCC.
|
||||
return "gcc-{0}".format(os.environ["IMPALA_GCC_VERSION"])
|
||||
|
||||
|
||||
def wget_and_unpack_package(download_path, file_name, destination, wget_no_clobber):
|
||||
if not download_path.endswith("/" + file_name):
|
||||
raise Exception("URL {0} does not match with expected file_name {1}"
|
||||
@@ -117,7 +131,10 @@ def wget_and_unpack_package(download_path, file_name, destination, wget_no_clobb
|
||||
download_path, destination, file_name, attempt))
|
||||
# --no-clobber avoids downloading the file if a file with the name already exists
|
||||
try:
|
||||
sh.wget(download_path, directory_prefix=destination, no_clobber=wget_no_clobber)
|
||||
cmd = ["wget", download_path, "--directory-prefix={0}".format(destination)]
|
||||
if wget_no_clobber:
|
||||
cmd.append("--no-clobber")
|
||||
check_output(cmd)
|
||||
break
|
||||
except Exception, e:
|
||||
if attempt == NUM_ATTEMPTS:
|
||||
@@ -125,8 +142,9 @@ def wget_and_unpack_package(download_path, file_name, destination, wget_no_clobb
|
||||
logging.error("Download failed; retrying after sleep: " + str(e))
|
||||
time.sleep(10 + random.random() * 5) # Sleep between 10 and 15 seconds.
|
||||
logging.info("Extracting {0}".format(file_name))
|
||||
sh.tar(z=True, x=True, f=os.path.join(destination, file_name), directory=destination)
|
||||
sh.rm(os.path.join(destination, file_name))
|
||||
check_output(["tar", "xzf", os.path.join(destination, file_name),
|
||||
"--directory={0}".format(destination)])
|
||||
os.unlink(os.path.join(destination, file_name))
|
||||
|
||||
|
||||
class DownloadUnpackTarball(object):
|
||||
@@ -241,7 +259,7 @@ class ToolchainPackage(EnvVersionedPackage):
|
||||
logging.error("Impala environment not set up correctly, make sure "
|
||||
"$IMPALA_TOOLCHAIN is set.")
|
||||
sys.exit(1)
|
||||
compiler = "gcc-{0}".format(os.environ["IMPALA_GCC_VERSION"])
|
||||
compiler = get_toolchain_compiler()
|
||||
label = get_platform_release_label(release=platform_release).toolchain
|
||||
toolchain_build_id = os.environ["IMPALA_TOOLCHAIN_BUILD_ID"]
|
||||
toolchain_host = os.environ["IMPALA_TOOLCHAIN_HOST"]
|
||||
@@ -409,7 +427,8 @@ def get_platform_release_label(release=None):
|
||||
if lsb_release_cache:
|
||||
release = lsb_release_cache
|
||||
else:
|
||||
release = "".join(map(lambda x: x.lower(), sh.lsb_release("-irs").split()))
|
||||
lsb_release = check_output(["lsb_release", "-irs"])
|
||||
release = "".join(map(lambda x: x.lower(), lsb_release.split()))
|
||||
# Only need to check against the major release if RHEL or CentOS
|
||||
for platform in ['centos', 'redhatenterpriseserver']:
|
||||
if platform in release:
|
||||
@@ -419,7 +438,6 @@ def get_platform_release_label(release=None):
|
||||
for mapping in OS_MAPPING:
|
||||
if re.search(mapping.lsb_release, release):
|
||||
return mapping
|
||||
|
||||
raise Exception("Could not find package label for OS version: {0}.".format(release))
|
||||
|
||||
|
||||
|
||||
@@ -135,6 +135,8 @@ export IMPALA_PROTOBUF_VERSION=3.5.1
|
||||
unset IMPALA_PROTOBUF_URL
|
||||
export IMPALA_POSTGRES_JDBC_DRIVER_VERSION=42.2.5
|
||||
unset IMPALA_POSTGRES_JDBC_DRIVER_URL
|
||||
export IMPALA_PYTHON_VERSION=2.7.16
|
||||
unset IMPALA_PYTHON_URL
|
||||
export IMPALA_RAPIDJSON_VERSION=1.1.0
|
||||
unset IMPALA_RAPIDJSON_URL
|
||||
export IMPALA_RE2_VERSION=20190301
|
||||
|
||||
@@ -22,7 +22,9 @@
|
||||
# Setting USE_THRIFT11_GEN_PY will add Thrift 11 Python generated code rather than the
|
||||
# default Thrift Python code.
|
||||
# Used to allow importing testdata, test, etc modules from other scripts.
|
||||
export PYTHONPATH=${IMPALA_HOME}
|
||||
|
||||
# ${IMPALA_HOME}/bin has bootstrap_toolchain.py, required by bootstrap_virtualenv.py
|
||||
export PYTHONPATH=${IMPALA_HOME}:${IMPALA_HOME}/bin
|
||||
|
||||
# Generated Thrift files are used by tests and other scripts.
|
||||
if [ -n "${USE_THRIFT11_GEN_PY:-}" ]; then
|
||||
@@ -31,6 +33,8 @@ else
|
||||
PYTHONPATH=${PYTHONPATH}:${IMPALA_HOME}/shell/gen-py
|
||||
fi
|
||||
|
||||
PYTHONPATH=${PYTHONPATH}:${IMPALA_HOME}/infra/python/env/lib
|
||||
|
||||
# There should be just a single version of python that created the
|
||||
# site-packages directory. We find it by performing shell independent expansion
|
||||
# of the following pattern:
|
||||
|
||||
@@ -46,6 +46,7 @@ import tarfile
|
||||
import tempfile
|
||||
import textwrap
|
||||
import urllib
|
||||
from bootstrap_toolchain import ToolchainPackage
|
||||
|
||||
LOG = logging.getLogger(os.path.splitext(os.path.basename(__file__))[0])
|
||||
|
||||
@@ -83,7 +84,7 @@ def create_virtualenv():
|
||||
for member in file.getmembers():
|
||||
file.extract(member, build_dir)
|
||||
file.close()
|
||||
python_cmd = detect_python_cmd()
|
||||
python_cmd = download_toolchain_python()
|
||||
exec_cmd([python_cmd, find_file(build_dir, "virtualenv*", "virtualenv.py"), "--quiet",
|
||||
"--python", python_cmd, ENV_DIR])
|
||||
shutil.rmtree(build_dir)
|
||||
@@ -189,21 +190,23 @@ def find_file(*paths):
|
||||
return files[0]
|
||||
|
||||
|
||||
def detect_python_cmd():
|
||||
'''Returns the system command that provides python 2.6 or greater.'''
|
||||
paths = os.getenv("PATH").split(os.path.pathsep)
|
||||
for cmd in ("python", "python27", "python2.7", "python-27", "python-2.7", "python26",
|
||||
"python2.6", "python-26", "python-2.6"):
|
||||
for path in paths:
|
||||
cmd_path = os.path.join(path, cmd)
|
||||
if not os.path.exists(cmd_path) or not os.access(cmd_path, os.X_OK):
|
||||
continue
|
||||
exit = subprocess.call([cmd_path, "-c", textwrap.dedent("""
|
||||
import sys
|
||||
sys.exit(int(sys.version_info[:2] < (2, 6)))""")])
|
||||
if exit == 0:
|
||||
return cmd_path
|
||||
raise Exception("Could not find minimum required python version 2.6")
|
||||
def download_toolchain_python():
|
||||
'''Grabs the Python implementation from the Impala toolchain, using the machinery from
|
||||
bin/bootstrap_toolchain.py
|
||||
'''
|
||||
|
||||
toolchain_root = os.environ.get("IMPALA_TOOLCHAIN")
|
||||
if not toolchain_root:
|
||||
raise Exception(
|
||||
"Impala environment not set up correctly, make sure $IMPALA_TOOLCHAIN is set.")
|
||||
|
||||
package = ToolchainPackage("python")
|
||||
package.download()
|
||||
python_cmd = os.path.join(package.pkg_directory(), "bin/python")
|
||||
if not os.path.exists(python_cmd):
|
||||
raise Exception("Unexpected error bootstrapping python from toolchain: {0} does not "
|
||||
"exist".format(python_cmd))
|
||||
return python_cmd
|
||||
|
||||
|
||||
def install_deps():
|
||||
|
||||
@@ -26,6 +26,7 @@ import contextlib
|
||||
import logging
|
||||
import os
|
||||
import pytest
|
||||
import sys
|
||||
|
||||
import tests.common
|
||||
from impala_py_lib.helpers import find_all_files, is_core_dump
|
||||
@@ -609,6 +610,15 @@ def cluster_properties():
|
||||
yield cluster_properties
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True, scope='session')
|
||||
def validate_python_version():
|
||||
"""Check the Python runtime version before running any tests. Since Impala switched
|
||||
to the toolchain Python, which is at least v2.7, the tests will not run on a version
|
||||
below that.
|
||||
"""
|
||||
assert sys.version_info > (2, 7), "Tests only support Python 2.7+"
|
||||
|
||||
|
||||
@pytest.hookimpl(trylast=True)
|
||||
def pytest_collection_modifyitems(items, config, session):
|
||||
"""Hook to handle --shard_tests command line option.
|
||||
|
||||
Reference in New Issue
Block a user