IMPALA-11959: Add Python 3 virtualenv

This adds a Python 3 equivalent to the impala-python
virtualenv base on the toolchain Python 3.7.16.
This modifies bootstrap_virtualenv.py to support
the two different modes. This adds py2-requirements.txt
and py3-requirements.txt to allow some differences
between the Python 2 and Python 3 virtualenvs.

Here are some specific package changes:
 - allpairs is replaced with allpairspy, as allpairs did
   not support Python 3.
 - requests is upgraded slightly, because otherwise is has issues
   with idna==2.8.
 - pylint is limited to Python 3, because we are adding it
   and don't need it on both
 - flake8 is limited to Python 2, because it will take
   some work to switch to a version that works on Python 3
 - cm_api is limited to Python 2, because it doesn't support
   Python 3
 - pytest-random does not support Python 3 and it is unused,
   so it is removed
 - Bump the version of setuptool-scm to support Python 3

This adds impala-pylint, which can be used to do further
Python 3 checks via --py3k. This also adds a bin/check-pylint-py3k.sh
script to enforce specific py3k checks. The banned py3k warnings
are specified in the bin/banned_py3k_warnings.txt. This is currently
empty, but this can ratchet up the py3k strictness over time
to avoid regressions.

This pulls in a new toolchain with the fix for IMPALA-11956
to get Python 3.7.16.

Testing:
 - Hand tested that the allpairs libraries produce the
   same results
 - The python3 virtualenv has no influence on regular
   tests yet

Change-Id: Ica4853f440c9a46a79bd5fb8e0a66730b0b4efc0
Reviewed-on: http://gerrit.cloudera.org:8080/19567
Reviewed-by: Joe McDonnell <joemcdonnell@cloudera.com>
Tested-by: Joe McDonnell <joemcdonnell@cloudera.com>
This commit is contained in:
Joe McDonnell
2021-04-06 21:36:07 -07:00
parent 148888e3ed
commit 566df80891
21 changed files with 452 additions and 75 deletions

View File

@@ -514,6 +514,10 @@ add_custom_target(impala_python ALL
COMMAND "${CMAKE_SOURCE_DIR}/bin/init-impala-python.sh"
)
add_custom_target(impala_python3 ALL
COMMAND "${CMAKE_SOURCE_DIR}/bin/init-impala-python.sh" "-python3"
)
set(IMPALA_PYTHON_INSTALLS "")
if (NOT $ENV{IMPALA_SYSTEM_PYTHON2} EQUAL "")
list(APPEND IMPALA_PYTHON_INSTALLS shell_python2_install)
@@ -524,7 +528,7 @@ endif()
add_custom_target(impala_shell_pypi ALL DEPENDS ${IMPALA_PYTHON_INSTALLS})
add_custom_target(notests_independent_targets DEPENDS
java cscope tarballs impala_python impala_shell_pypi
java cscope tarballs impala_python impala_python3 impala_shell_pypi
)
add_custom_target(notests_regular_targets DEPENDS
impalad statestored catalogd admissiond fesupport loggingsupport ImpalaUdf udasample udfsample impala-profile-tool

View File

View File

@@ -494,6 +494,9 @@ def get_toolchain_downloads():
"crcutil", "curl", "flatbuffers", "gdb", "gflags", "glog", "gperftools", "gtest",
"jwt-cpp", "libev", "libunwind", "lz4", "openldap", "openssl", "orc", "protobuf",
"python", "rapidjson", "re2", "snappy", "tpc-h", "tpc-ds", "zlib", "zstd"])
python3_package = ToolchainPackage(
"python", explicit_version=os.environ.get("IMPALA_PYTHON3_VERSION"))
toolchain_packages += [python3_package]
toolchain_packages += get_unique_toolchain_downloads(
["thrift:cpp", "thrift:java", "thrift:py"])
protobuf_package_clang = ToolchainPackage(

140
bin/check-pylint-py3k.sh Executable file
View File

@@ -0,0 +1,140 @@
#!/bin/bash
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
set -euo pipefail
BINDIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
# To allow incrementally banning individual pylint checks, this uses grep
# expressions to match banned pylint warnings. The grep expressions are stored
# in the bin/banned_py3k_warnings.txt file.
BANNED_PY3K_WARNINGS="${BINDIR}/banned_py3k_warnings.txt"
function print_usage {
echo "check-pylink-py3k.sh : Checks eligible python files for pylint py3k compliance."
echo "Fails if the python files have py3k warnings that match the patterns in "
echo "bin/banned_py3k_warnings.txt."
echo "[--error_output_file] : (optional) Also output the errors to a file"
echo "[--warning_output_file] : (optional) Also output the warnings to a file"
}
ERROR_OUTPUT_FILE=""
WARNING_OUTPUT_FILE=""
while [ -n "$*" ]
do
case "$1" in
--error_output_file)
ERROR_OUTPUT_FILE="${2-}"
shift;
;;
--warning_output_file)
WARNING_OUTPUT_FILE="${2-}"
shift;
;;
--help|*)
print_usage
exit 1
;;
esac
shift
done
pushd ${IMPALA_HOME} > /dev/null 2>&1
OUTPUT_TMP_DIR=$(mktemp -d)
PYLINT_OUTPUT_FILE="${OUTPUT_TMP_DIR}/pylint_output.txt"
ERROR_OUTPUT_TMP_FILE="${OUTPUT_TMP_DIR}/error_output_tmp.txt"
WARNING_OUTPUT_TMP_FILE="${OUTPUT_TMP_DIR}/warning_output_tmp.txt"
RETCODE=0
for file in $(git ls-files '**/*.py'); do
# Skip the shell entirely (but cover tests/shell)
if [[ "${file}" =~ "shell/" && ! "${file}" =~ "tests/shell" ]]; then
continue
fi
# For the moment, the focus is on enforcing py3k checks on files that use the
# impala-python virtualenv. Ignore executable python files that do not
# use impala-python. In practice, this tends to be scripts used during the
# build or various scripts for developers in bin.
FIRST_LINE=$(head -n1 ${file})
if [[ "${file}: ${FIRST_LINE}" =~ "#!" ]]; then
if [[ "${FIRST_LINE}" =~ "python3" ]]; then
>&2 echo "SKIPPING: ${file} is already using python3: ${FIRST_LINE}"
continue
fi
if [[ ! "${FIRST_LINE}" =~ "impala-python" ]]; then
>&2 echo "SKIPPING: ${file} is not using impala-python: ${FIRST_LINE}"
continue
fi
fi
>&2 echo "PROCESSING: ${file}"
# -s n (skip score for each file)
# --exit-zero: don't fail
impala-pylint -s n --exit-zero --py3k ${file} >> ${PYLINT_OUTPUT_FILE}
done
touch "${ERROR_OUTPUT_TMP_FILE}"
touch "${WARNING_OUTPUT_TMP_FILE}"
# Hitting a banned py3k warning will cause this to return an error
echo ""
echo ""
if grep -f "${BANNED_PY3K_WARNINGS}" "${PYLINT_OUTPUT_FILE}" > /dev/null 2>&1 ; then
echo "ERROR: Some python files contain these banned pylint warnings:" | \
tee "${ERROR_OUTPUT_TMP_FILE}"
grep -f "${BANNED_PY3K_WARNINGS}" "${PYLINT_OUTPUT_FILE}" | \
tee -a "${ERROR_OUTPUT_TMP_FILE}"
RETCODE=1
else
echo "No errors found" | tee "${ERROR_OUTPUT_TMP_FILE}"
fi
if [[ -n "${ERROR_OUTPUT_FILE}" ]]; then
cp "${ERROR_OUTPUT_TMP_FILE}" "${ERROR_OUTPUT_FILE}"
fi
# The remaining py3k warnings are interesting, but they are not yet enforced.
# Pylint produces annoying lines like "************* Module X", so try to filter those out
echo ""
echo ""
if grep -v -e '\*\*\*\*' -f "${BANNED_PY3K_WARNINGS}" \
"${PYLINT_OUTPUT_FILE}" > /dev/null 2>&1 ; then
echo "WARNING: Some python files contain these unenforced pylint warnings:" | \
tee "${WARNING_OUTPUT_TMP_FILE}"
grep -v -e '\*\*\*\*' -f "${BANNED_PY3K_WARNINGS}" "${PYLINT_OUTPUT_FILE}" | \
tee -a "${WARNING_OUTPUT_TMP_FILE}"
echo "WARNING SUMMARY table:"
cat "${WARNING_OUTPUT_TMP_FILE}" | grep -v "WARNING" | cut -d: -f4- | \
sed 's#^ ##' | sort | uniq -c
else
echo "No warnings found" | tee "${WARNING_OUTPUT_TMP_FILE}"
fi
if [[ -n "${WARNING_OUTPUT_FILE}" ]]; then
cp "${WARNING_OUTPUT_TMP_FILE}" "${WARNING_OUTPUT_FILE}"
fi
rm -rf "${OUTPUT_TMP_DIR}"
popd > /dev/null 2>&1
exit ${RETCODE}

View File

@@ -81,7 +81,7 @@ export USE_APACHE_HIVE=${USE_APACHE_HIVE-false}
# moving to a different build of the toolchain, e.g. when a version is bumped or a
# compile option is changed. The build id can be found in the output of the toolchain
# build jobs, it is constructed from the build number and toolchain git hash prefix.
export IMPALA_TOOLCHAIN_BUILD_ID=252-b144ba77b5
export IMPALA_TOOLCHAIN_BUILD_ID=258-821f1d91bd
# Versions of toolchain dependencies.
# -----------------------------------
export IMPALA_AVRO_VERSION=1.7.4-p5
@@ -159,6 +159,7 @@ export IMPALA_POSTGRES_JDBC_DRIVER_VERSION=42.5.1
unset IMPALA_POSTGRES_JDBC_DRIVER_URL
export IMPALA_PYTHON_VERSION=2.7.16
unset IMPALA_PYTHON_URL
export IMPALA_PYTHON3_VERSION=3.7.16
export IMPALA_RAPIDJSON_VERSION=1.1.0
unset IMPALA_RAPIDJSON_URL
export IMPALA_RE2_VERSION=20190301

21
bin/impala-pip3 Executable file
View File

@@ -0,0 +1,21 @@
#!/bin/bash
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
source "$(dirname "$0")/impala-python3-common.sh"
exec "$PY_ENV_DIR/bin/python3" "$PY_ENV_DIR/bin/pip3" "$@"

21
bin/impala-pylint Executable file
View File

@@ -0,0 +1,21 @@
#!/bin/bash
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
source "$(dirname "$0")/impala-python3-common.sh"
exec "$PY_ENV_DIR/bin/pylint" "$@"

23
bin/impala-python3 Executable file
View File

@@ -0,0 +1,23 @@
#!/bin/bash
#
##############################################################################
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
##############################################################################
source "$(dirname "$0")/impala-python3-common.sh"
exec "$PY_ENV_DIR/bin/python3" "$@"

View File

@@ -0,0 +1,32 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# This file is intended to be sourced to perform common setup for
# the Python 3 $IMPALA_HOME/bin/impala-py* executables.
set -euo pipefail
. $IMPALA_HOME/bin/report_build_error.sh
setup_report_build_error
. $IMPALA_HOME/bin/set-pythonpath.sh
export LD_LIBRARY_PATH="$(python "$IMPALA_HOME/infra/python/bootstrap_virtualenv.py" \
--print-ld-library-path)"
PY_DIR="$(dirname "$0")/../infra/python"
PY_ENV_DIR="${PY_DIR}/env-gcc${IMPALA_GCC_VERSION}-py3"
python "$PY_DIR/bootstrap_virtualenv.py" --python3

View File

@@ -27,5 +27,29 @@ bin=`dirname "$0"`
bin=`cd "$bin"; pwd`
. "$bin"/impala-config.sh
function print_usage {
echo "init-impala-python.sh - Script called from CMake to init python venvs"
echo "[-python3] : Init the python3 virtualenv (default is python2)"
}
IS_PYTHON3=false
while [ -n "$*" ]
do
case "$1" in
-python3)
IS_PYTHON3=true
;;
-help|*)
print_usage
exit 1
;;
esac
shift
done
cd $IMPALA_HOME
bin/impala-python -c 'print("Initialized impala-python")'
if $IS_PYTHON3 ; then
bin/impala-python3 -c 'print("Initialized impala-python3")'
else
bin/impala-python -c 'print("Initialized impala-python")'
fi

View File

@@ -27,6 +27,7 @@ shell/packaging/MANIFEST.in
shell/packaging/requirements.txt
testdata/cluster/node_templates/cdh7/etc/init.d/kms
testdata/authentication/*
bin/banned_py3k_warnings.txt
# See $IMPALA_HOME/LICENSE.txt
be/src/gutil/*

View File

@@ -55,7 +55,10 @@ SKIP_TOOLCHAIN_BOOTSTRAP = "SKIP_TOOLCHAIN_BOOTSTRAP"
GCC_VERSION = os.environ["IMPALA_GCC_VERSION"]
DEPS_DIR = os.path.join(os.path.dirname(__file__), "deps")
ENV_DIR = os.path.join(os.path.dirname(__file__), "env-gcc{0}".format(GCC_VERSION))
ENV_DIR_PY2 = os.path.join(os.path.dirname(__file__),
"env-gcc{0}".format(GCC_VERSION))
ENV_DIR_PY3 = os.path.join(os.path.dirname(__file__),
"env-gcc{0}-py3".format(GCC_VERSION))
# Setuptools requirements file. Setuptools is required during pip install for
# some packages. Newer setuptools dropped python 2 support, and some python
@@ -77,10 +80,16 @@ KUDU_REQS_PATH = os.path.join(DEPS_DIR, "kudu-requirements.txt")
# Interface) being installed by the requirements step.
ADLS_REQS_PATH = os.path.join(DEPS_DIR, "adls-requirements.txt")
# Extra packages specific to python 3
PY3_REQS_PATH = os.path.join(DEPS_DIR, "py3-requirements.txt")
def delete_virtualenv_if_exist():
if os.path.exists(ENV_DIR):
shutil.rmtree(ENV_DIR)
# Extra packages specific to python 2
PY2_REQS_PATH = os.path.join(DEPS_DIR, "py2-requirements.txt")
def delete_virtualenv_if_exist(venv_dir):
if os.path.exists(venv_dir):
shutil.rmtree(venv_dir)
def detect_virtualenv_version():
@@ -99,8 +108,16 @@ def detect_virtualenv_version():
return None
def create_virtualenv():
LOG.info("Creating python virtualenv")
def create_virtualenv(venv_dir, is_py3):
if is_py3:
# Python 3 is much simpler, because there is a builtin venv command
LOG.info("Creating python3 virtualenv")
python_cmd = download_toolchain_python(is_py3)
exec_cmd([python_cmd, "-m" "venv", venv_dir])
return
# Python 2
LOG.info("Creating python2 virtualenv")
build_dir = tempfile.mkdtemp()
# Try to find the virtualenv version by parsing the requirements file
# Default to "*" if we can't figure it out.
@@ -114,9 +131,9 @@ def create_virtualenv():
for member in file.getmembers():
file.extract(member, build_dir)
file.close()
python_cmd = download_toolchain_python()
python_cmd = download_toolchain_python(is_py3)
exec_cmd([python_cmd, find_file(build_dir, "virtualenv*", "virtualenv.py"), "--quiet",
"--python", python_cmd, ENV_DIR])
"--python", python_cmd, venv_dir])
shutil.rmtree(build_dir)
@@ -147,7 +164,7 @@ def select_cc():
return cc
def exec_pip_install(args, cc="no-cc-available", env=None):
def exec_pip_install(venv_dir, is_py3, args, cc="no-cc-available", env=None):
'''Executes "pip install" with the provided command line arguments. If 'cc' is set,
it is used as the C compiler. Otherwise compilation of C/C++ code is disabled by
setting the CC environment variable to a bogus value.
@@ -169,8 +186,12 @@ def exec_pip_install(args, cc="no-cc-available", env=None):
# Don't call the virtualenv pip directly, it uses a hashbang to to call the python
# virtualenv using an absolute path. If the path to the virtualenv is very long, the
# hashbang won't work.
impala_pip_base_cmd = [os.path.join(ENV_DIR, "bin", "python"),
os.path.join(ENV_DIR, "bin", "pip"), "install", "-v"]
if is_py3:
impala_pip_base_cmd = [os.path.join(venv_dir, "bin", "python3"),
os.path.join(venv_dir, "bin", "pip3"), "install", "-v"]
else:
impala_pip_base_cmd = [os.path.join(venv_dir, "bin", "python"),
os.path.join(venv_dir, "bin", "pip"), "install", "-v"]
# Passes --no-binary for IMPALA-3767: without this, Cython (and
# several other packages) fail download.
@@ -181,7 +202,9 @@ def exec_pip_install(args, cc="no-cc-available", env=None):
impala_pip_base_cmd[:] + ["--no-binary", ":all:", "--no-cache-dir"]
# When using a custom mirror, we also must use the index of that mirror.
if "PYPI_MIRROR" in os.environ:
# The python 3 virtualenv has trouble with using --index-url with PYPI_MIRROR,
# so it falls back to --no-index, which works fine.
if "PYPI_MIRROR" in os.environ and not is_py3:
third_party_pkg_install_cmd.extend(["--index-url",
"%s/simple" % os.environ["PYPI_MIRROR"]])
else:
@@ -217,7 +240,7 @@ def find_file(*paths):
return files[0]
def download_toolchain_python():
def download_toolchain_python(is_py3):
'''Grabs the Python implementation from the Impala toolchain, using the machinery from
bin/bootstrap_toolchain.py.
Skip the download if SKIP_TOOLCHAIN_BOOTSTRAP=true in the environment. In that case
@@ -229,27 +252,35 @@ def download_toolchain_python():
raise Exception("Impala environment not set up correctly, make sure "
"$IMPALA_TOOLCHAIN_PACKAGES_HOME is set.")
package = ToolchainPackage("python")
if is_py3:
package = ToolchainPackage("python",
explicit_version=os.environ["IMPALA_PYTHON3_VERSION"])
else:
package = ToolchainPackage("python")
if package.needs_download() and \
not (os.environ.get(SKIP_TOOLCHAIN_BOOTSTRAP) == 'true'):
package.download()
python_cmd = os.path.join(package.pkg_directory(), "bin/python")
if is_py3:
python_cmd = os.path.join(package.pkg_directory(), "bin/python3")
else:
python_cmd = os.path.join(package.pkg_directory(), "bin/python")
if not os.path.exists(python_cmd):
raise Exception("Unexpected error bootstrapping python from toolchain: {0} does not "
"exist".format(python_cmd))
return python_cmd
def install_deps():
LOG.info("Installing setuptools into the virtualenv")
exec_pip_install(["-r", SETUPTOOLS_REQS_PATH])
def install_deps(venv_dir, is_py3):
py_str = "3" if is_py3 else "2"
LOG.info("Installing setuptools into the python{0} virtualenv".format(py_str))
exec_pip_install(venv_dir, is_py3, ["-r", SETUPTOOLS_REQS_PATH])
cc = select_cc()
if cc is None:
raise Exception("CC not available")
env = dict(os.environ)
LOG.info("Installing packages into the virtualenv")
exec_pip_install(["-r", REQS_PATH], cc=cc, env=env)
mark_reqs_installed(REQS_PATH)
LOG.info("Installing packages into the python{0} virtualenv".format(py_str))
exec_pip_install(venv_dir, is_py3, ["-r", REQS_PATH], cc=cc, env=env)
mark_reqs_installed(venv_dir, REQS_PATH)
def have_toolchain():
@@ -264,26 +295,44 @@ def toolchain_pkg_dir(pkg_name):
pkg_name + "-" + pkg_version)
def install_adls_deps():
def install_adls_deps(venv_dir, is_py3):
# The ADLS dependencies require that the OS is at least CentOS 6.7 or above,
# which is why we break this into a seperate step. If the target filesystem is
# ADLS, the expectation is that the dev environment is running at least CentOS 6.7.
if os.environ.get('TARGET_FILESYSTEM') == "adls":
if reqs_are_installed(ADLS_REQS_PATH):
if reqs_are_installed(venv_dir, ADLS_REQS_PATH):
LOG.debug("Skipping ADLS deps: matching adls-installed-requirements.txt found")
return True
cc = select_cc()
assert cc is not None
LOG.info("Installing ADLS packages into the virtualenv")
exec_pip_install(["-r", ADLS_REQS_PATH], cc=cc)
mark_reqs_installed(ADLS_REQS_PATH)
py_str = "3" if is_py3 else "2"
LOG.info("Installing ADLS packages into the python{0} virtualenv".format(py_str))
exec_pip_install(venv_dir, is_py3, ["-r", ADLS_REQS_PATH], cc=cc)
mark_reqs_installed(venv_dir, ADLS_REQS_PATH)
def install_kudu_client_if_possible():
def install_py_version_deps(venv_dir, is_py3):
cc = select_cc()
assert cc is not None
if not is_py3:
if not reqs_are_installed(venv_dir, PY2_REQS_PATH):
# These are extra python2-only packages
LOG.info("Installing python2 packages into the virtualenv")
exec_pip_install(venv_dir, is_py3, ["-r", PY2_REQS_PATH], cc=cc)
mark_reqs_installed(venv_dir, PY2_REQS_PATH)
else:
if not reqs_are_installed(venv_dir, PY3_REQS_PATH):
# These are extra python3-only packages
LOG.info("Installing python3 packages into the virtualenv")
exec_pip_install(venv_dir, is_py3, ["-r", PY3_REQS_PATH], cc=cc)
mark_reqs_installed(venv_dir, PY3_REQS_PATH)
def install_kudu_client_if_possible(venv_dir, is_py3):
'''Installs the Kudu python module if possible, which depends on the toolchain and
the compiled requirements in requirements.txt. If the toolchain isn't
available, nothing will be done.'''
if reqs_are_installed(KUDU_REQS_PATH):
if reqs_are_installed(venv_dir, KUDU_REQS_PATH):
LOG.debug("Skipping Kudu: matching kudu-installed-requirements.txt found")
return
kudu_base_dir = os.environ["IMPALA_KUDU_HOME"]
@@ -291,11 +340,13 @@ def install_kudu_client_if_possible():
LOG.debug("Skipping Kudu: %s doesn't exist" % kudu_base_dir)
return
LOG.info("Installing Kudu into the virtualenv")
py_str = "3" if is_py3 else "2"
LOG.info("Installing Kudu into the python{0} virtualenv".format(py_str))
# The installation requires that KUDU_HOME/build/latest exists. An empty directory
# structure will be made to satisfy that. The Kudu client headers and lib will be made
# available through GCC environment variables.
fake_kudu_build_dir = os.path.join(tempfile.gettempdir(), "virtualenv-kudu")
fake_kudu_build_dir = os.path.join(tempfile.gettempdir(),
"virtualenv-kudu{0}".format(py_str))
try:
artifact_dir = os.path.join(fake_kudu_build_dir, "build", "latest")
if not os.path.exists(artifact_dir):
@@ -312,8 +363,8 @@ def install_kudu_client_if_possible():
env["CPLUS_INCLUDE_PATH"] = os.path.join(kudu_client_dir, "include")
env["LIBRARY_PATH"] = os.path.pathsep.join([os.path.join(kudu_client_dir, 'lib'),
os.path.join(kudu_client_dir, 'lib64')])
exec_pip_install(["-r", KUDU_REQS_PATH], cc=cc, env=env)
mark_reqs_installed(KUDU_REQS_PATH)
exec_pip_install(venv_dir, is_py3, ["-r", KUDU_REQS_PATH], cc=cc, env=env)
mark_reqs_installed(venv_dir, KUDU_REQS_PATH)
finally:
try:
shutil.rmtree(fake_kudu_build_dir)
@@ -353,17 +404,17 @@ def error_if_kudu_client_not_found(install_dir):
raise Exception("%s not found at %s" % (kudu_client_lib, lib_dir))
def mark_reqs_installed(reqs_path):
def mark_reqs_installed(venv_dir, reqs_path):
'''Mark that the requirements from the given file are installed by copying it into
the root directory of the virtualenv.'''
installed_reqs_path = os.path.join(ENV_DIR, os.path.basename(reqs_path))
installed_reqs_path = os.path.join(venv_dir, os.path.basename(reqs_path))
shutil.copyfile(reqs_path, installed_reqs_path)
def reqs_are_installed(reqs_path):
def reqs_are_installed(venv_dir, reqs_path):
'''Check if the requirements from the given file are installed in the virtualenv by
looking for a matching requirements file in the root directory of the virtualenv.'''
installed_reqs_path = os.path.join(ENV_DIR, os.path.basename(reqs_path))
installed_reqs_path = os.path.join(venv_dir, os.path.basename(reqs_path))
if not os.path.exists(installed_reqs_path):
return False
installed_reqs_file = open(installed_reqs_path)
@@ -381,11 +432,11 @@ def reqs_are_installed(reqs_path):
installed_reqs_file.close()
def setup_virtualenv_if_not_exists():
if not (reqs_are_installed(REQS_PATH)):
delete_virtualenv_if_exist()
create_virtualenv()
install_deps()
def setup_virtualenv_if_not_exists(venv_dir, is_py3):
if not (reqs_are_installed(venv_dir, REQS_PATH)):
delete_virtualenv_if_exist(venv_dir)
create_virtualenv(venv_dir, is_py3)
install_deps(venv_dir, is_py3)
LOG.debug("Virtualenv setup complete")
@@ -397,6 +448,8 @@ if __name__ == "__main__":
" the virtualenv even if it exists and appears to be completely up-to-date.")
parser.add_option("--print-ld-library-path", action="store_true", help="Print the"
" LD_LIBRARY_PATH that should be used when running python from the virtualenv.")
parser.add_option("--python3", action="store_true", help="Generate the python3"
" virtualenv")
options, args = parser.parse_args()
if options.print_ld_library_path:
@@ -411,10 +464,17 @@ if __name__ == "__main__":
sys.exit()
logging.basicConfig(level=getattr(logging, options.log_level))
if options.python3:
venv_dir = ENV_DIR_PY3
else:
venv_dir = ENV_DIR_PY2
if options.rebuild:
delete_virtualenv_if_exist()
delete_virtualenv_if_exist(venv_dir)
# Complete as many bootstrap steps as possible (see file comment for the steps).
setup_virtualenv_if_not_exists()
install_kudu_client_if_possible()
install_adls_deps()
setup_virtualenv_if_not_exists(venv_dir, options.python3)
install_kudu_client_if_possible(venv_dir, options.python3)
install_adls_deps(venv_dir, options.python3)
install_py_version_deps(venv_dir, options.python3)

View File

@@ -38,7 +38,8 @@ PYPI_MIRROR = os.environ.get('PYPI_MIRROR', 'https://pypi.python.org')
# The requirement files that list all of the required packages and versions.
REQUIREMENTS_FILES = ['requirements.txt', 'setuptools-requirements.txt',
'kudu-requirements.txt', 'adls-requirements.txt']
'kudu-requirements.txt', 'adls-requirements.txt',
'py2-requirements.txt', 'py3-requirements.txt']
def check_digest(filename, algorithm, expected_digest):

View File

@@ -0,0 +1,34 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Python2-only requirements
cm-api == 10.0.0
# Already available as part of python on Linux.
readline == 6.2.4.1; sys_platform == 'darwin'
flake8 == 3.9.2
mccabe == 0.6.1
pycodestyle == 2.7.0
pyflakes == 2.3.1
enum34 == 1.1.10
typing == 3.10.0.0
configparser == 4.0.2
functools32 == 3.2.3-2
importlib-metadata == 2.1.3
contextlib2 == 0.6.0
pathlib2 == 2.3.7.post1
zipp == 1.2.0

View File

@@ -0,0 +1,31 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Python3-only requirements
pylint == 2.10.2
astroid == 2.7.3
lazy-object-proxy == 1.6.0
wrapt == 1.12.1
typed-ast == 1.4.3
configparser == 4.0.2
isort == 4.3.21
futures == 3.3.0; python_version == "2.7"
singledispatch == 3.6.1
toml == 0.10.2
platformdirs == 2.4.1
typing-extensions == 3.10.0.2

View File

@@ -20,23 +20,8 @@
# Dependents are indented. Dependents that have multiple parents are not listed
# multiple times (though maybe they could be).
allpairs == 2.0.1
allpairspy == 2.5.0
argparse == 1.4.0
cm-api == 10.0.0
# Already available as part of python on Linux.
readline == 6.2.4.1; sys_platform == 'darwin'
flake8 == 3.9.2
mccabe == 0.6.1
pycodestyle == 2.7.0
pyflakes == 2.3.1
enum34 == 1.1.10
typing == 3.10.0.0
configparser == 4.0.2
functools32 == 3.2.3-2
importlib-metadata == 2.1.3
contextlib2 == 0.6.0
pathlib2 == 2.3.7.post1
zipp == 1.2.0
future == 0.18.3
gcovr == 4.2
Jinja2 == 2.11.3
@@ -61,14 +46,13 @@ pyparsing == 2.0.3
pytest == 2.9.2
py == 1.4.32
pytest-forked == 0.2
pytest-random == 0.02
pytest-runner == 4.2
pytest-xdist == 1.17.1
pytest-timeout == 1.2.1
python-magic == 0.4.11
pywebhdfs == 0.3.2
pbr == 3.1.1
requests == 2.20.0
requests == 2.21.0
chardet == 3.0.4
idna == 2.8
urllib3 == 1.24.2

View File

@@ -18,4 +18,4 @@
# Newer versions of setuptools don't support Python 2.7
setuptools == 44.1.1
wheel == 0.35.1
setuptools-scm == 4.1.2
setuptools-scm == 5.0.2

View File

@@ -48,8 +48,7 @@ import os
import sys
from itertools import product
from optparse import OptionParser
import metacomm.combinatorics.all_pairs2
all_pairs = metacomm.combinatorics.all_pairs2.all_pairs2
from allpairspy import AllPairs as all_pairs
parser = OptionParser()
parser.add_option("-w", "--workload", dest="workload",

View File

@@ -136,8 +136,8 @@ class ImpalaTestMatrix(object):
if self.is_valid(vec)]
def __generate_pairwise_combinations(self):
import metacomm.combinatorics.all_pairs2
all_pairs = metacomm.combinatorics.all_pairs2.all_pairs2
from allpairspy import AllPairs
all_pairs = AllPairs
# Pairwise fails if the number of inputs == 1. Use exhaustive in this case the
# results will be the same.

View File

@@ -1,5 +1,3 @@
#!/usr/bin/env python
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information

View File

@@ -19,7 +19,7 @@
#
import pytest
from decimal import Decimal, getcontext, ROUND_DOWN, ROUND_HALF_UP
from metacomm.combinatorics.all_pairs2 import all_pairs2 as all_pairs
from allpairspy import AllPairs as all_pairs
from random import randint
from tests.common.impala_test_suite import ImpalaTestSuite