Files
impala/infra/python/bootstrap_virtualenv.py
Dan Hecht ffa7829b70 IMPALA-3918: Remove Cloudera copyrights and add ASF license header
For files that have a Cloudera copyright (and no other copyright
notice), make changes to follow the ASF source file header policy here:

http://www.apache.org/legal/src-headers.html#headers

Specifically:
1) Remove the Cloudera copyright.
2) Modify NOTICE.txt according to
   http://www.apache.org/legal/src-headers.html#notice
   to follow that format and add a line for Cloudera.
3) Replace or add the existing ASF license text with the one given
   on the website.

Much of this change was automatically generated via:

git grep -li 'Copyright.*Cloudera' > modified_files.txt
cat modified_files.txt | xargs perl -n -i -e 'print unless m#Copyright.*Cloudera#i;'
cat modified_files_txt | xargs fix_apache_license.py [1]

Some manual fixups were performed following those steps, especially when
license text was completely missing from the file.

[1] https://gist.github.com/anonymous/ff71292094362fc5c594 with minor
    modification to ORIG_LICENSE to match Impala's license text.

Change-Id: I2e0bd8420945b953e1b806041bea4d72a3943d86
Reviewed-on: http://gerrit.cloudera.org:8080/3779
Reviewed-by: Dan Hecht <dhecht@cloudera.com>
Tested-by: Internal Jenkins
2016-08-09 08:19:41 +00:00

289 lines
11 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# This module will create a python virtual env and install external dependencies. If
# the virtualenv already exists and the list of dependencies matches the list of
# installed dependencies, nothing will be done.
#
# This module can be run with python >= 2.4 but python >= 2.6 must be installed on the
# system. If the default 'python' command refers to < 2.6, python 2.6 will be used
# instead.
import glob
import logging
import optparse
import os
import shutil
import subprocess
import sys
import tarfile
import tempfile
import textwrap
import urllib
LOG = logging.getLogger(os.path.splitext(os.path.basename(__file__))[0])
DEPS_DIR = os.path.join(os.path.dirname(__file__), "deps")
ENV_DIR = os.path.join(os.path.dirname(__file__), "env")
# Generated using "pip install --download <DIR> -r requirements.txt"
REQS_PATH = os.path.join(DEPS_DIR, "requirements.txt")
# After installing, the requirements.txt will be copied into the virtualenv to
# record what was installed.
INSTALLED_REQS_PATH = os.path.join(ENV_DIR, "installed-requirements.txt")
def delete_virtualenv_if_exist():
if os.path.exists(ENV_DIR):
shutil.rmtree(ENV_DIR)
def create_virtualenv():
LOG.info("Creating python virtualenv")
build_dir = tempfile.mkdtemp()
file = tarfile.open(find_file(DEPS_DIR, "virtualenv*.tar.gz"), "r:gz")
for member in file.getmembers():
file.extract(member, build_dir)
file.close()
python_cmd = detect_python_cmd()
exec_cmd([python_cmd, find_file(build_dir, "virtualenv*", "virtualenv.py"), "--quiet",
"--python", python_cmd, ENV_DIR])
shutil.rmtree(build_dir)
def exec_cmd(args, **kwargs):
'''Executes a command and waits for it to finish, raises an exception if the return
status is not zero. The command output is returned.
'args' and 'kwargs' use the same format as subprocess.Popen().
'''
process = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
**kwargs)
output = process.communicate()[0]
if process.returncode != 0:
raise Exception("Command returned non-zero status\nCommand: %s\nOutput: %s"
% (args, output))
return output
def exec_pip_install(args, **popen_kwargs):
# Don't call the virtualenv pip directly, it uses a hashbang to to call the python
# virtualenv using an absolute path. If the path to the virtualenv is very long, the
# hashbang won't work.
#
# Passes --no-binary for IMPALA-3767: without this, Cython (and
# several other packages) fail download.
exec_cmd([os.path.join(ENV_DIR, "bin", "python"), os.path.join(ENV_DIR, "bin", "pip"),
"install", "--no-binary", "--no-index", "--find-links",
"file://%s" % urllib.pathname2url(os.path.abspath(DEPS_DIR))] + args, **popen_kwargs)
def find_file(*paths):
'''Returns the path specified by the glob 'paths', raises an exception if no file is
found.
Ex: find_file('/etc', 'h*sts') --> /etc/hosts
'''
path = os.path.join(*paths)
files = glob.glob(path)
if len(files) > 1:
raise Exception("Found too many files at %s: %s" % (path, files))
if len(files) == 0:
raise Exception("No file found at %s" % path)
return files[0]
def detect_python_cmd():
'''Returns the system command that provides python 2.6 or greater.'''
paths = os.getenv("PATH").split(os.path.pathsep)
for cmd in ("python", "python27", "python2.7", "python-27", "python-2.7", "python26",
"python2.6", "python-26", "python-2.6"):
for path in paths:
cmd_path = os.path.join(path, cmd)
if not os.path.exists(cmd_path) or not os.access(cmd_path, os.X_OK):
continue
exit = subprocess.call([cmd_path, "-c", textwrap.dedent("""
import sys
sys.exit(int(sys.version_info[:2] < (2, 6)))""")])
if exit == 0:
return cmd_path
raise Exception("Could not find minimum required python version 2.6")
def install_deps():
LOG.info("Installing packages into the virtualenv")
exec_pip_install(["-r", REQS_PATH])
shutil.copyfile(REQS_PATH, INSTALLED_REQS_PATH)
def install_kudu_client_if_possible():
"""Installs the Kudu python module if possible. The Kudu module is the only one that
requires the toolchain. If the toolchain isn't in use or hasn't been populated
yet, nothing will be done. Also nothing will be done if the Kudu client lib required
by the module isn't available (as determined by KUDU_IS_SUPPORTED).
"""
if os.environ["KUDU_IS_SUPPORTED"] != "true":
LOG.debug("Skipping Kudu: Kudu is not supported")
return
impala_toolchain_dir = os.environ.get("IMPALA_TOOLCHAIN")
if not impala_toolchain_dir:
LOG.debug("Skipping Kudu: IMPALA_TOOLCHAIN not set")
return
toolchain_kudu_dir = os.path.join(
impala_toolchain_dir, "kudu-" + os.environ["IMPALA_KUDU_VERSION"])
if not os.path.exists(toolchain_kudu_dir):
LOG.debug("Skipping Kudu: %s doesn't exist" % toolchain_kudu_dir)
return
# The "pip" command could be used to provide the version of Kudu installed (if any)
# but it's a little too slow. Running the virtualenv python to detect the installed
# version is faster.
actual_version_string = exec_cmd([os.path.join(ENV_DIR, "bin", "python"), "-c",
textwrap.dedent("""
try:
import kudu
print kudu.__version__
except ImportError:
pass""")]).strip()
actual_version = [int(v) for v in actual_version_string.split(".") if v]
reqs_file = open(REQS_PATH)
try:
for line in reqs_file:
if not line.startswith("# kudu-python=="):
continue
expected_version_string = line.split()[1].split("==")[1]
break
else:
raise Exception("Unable to find kudu-python version in requirements file")
finally:
reqs_file.close()
expected_version = [int(v) for v in expected_version_string.split(".")]
if actual_version and actual_version == expected_version:
LOG.debug("Skipping Kudu: Installed %s == required %s"
% (actual_version_string, expected_version_string))
return
LOG.debug("Kudu installation required. Actual version %s. Required version %s.",
actual_version, expected_version)
LOG.info("Installing Kudu into the virtualenv")
# The installation requires that KUDU_HOME/build/latest exists. An empty directory
# structure will be made to satisfy that. The Kudu client headers and lib will be made
# available through GCC environment variables.
fake_kudu_build_dir = os.path.join(tempfile.gettempdir(), "virtualenv-kudu")
try:
artifact_dir = os.path.join(fake_kudu_build_dir, "build", "latest")
if not os.path.exists(artifact_dir):
os.makedirs(artifact_dir)
env = dict(os.environ)
env["KUDU_HOME"] = fake_kudu_build_dir
kudu_client_dir = find_kudu_client_install_dir()
env["CPLUS_INCLUDE_PATH"] = os.path.join(kudu_client_dir, "include")
env["LIBRARY_PATH"] = os.path.pathsep.join([os.path.join(kudu_client_dir, 'lib'),
os.path.join(kudu_client_dir, 'lib64')])
exec_pip_install(["kudu-python==" + expected_version_string], env=env)
finally:
try:
shutil.rmtree(fake_kudu_build_dir)
except Exception:
LOG.debug("Error removing temp Kudu build dir", exc_info=True)
def find_kudu_client_install_dir():
custom_client_dir = os.environ["KUDU_CLIENT_DIR"]
if custom_client_dir:
install_dir = os.path.join(custom_client_dir, "usr", "local")
error_if_kudu_client_not_found(install_dir)
else:
# If the toolchain appears to have been setup already, then the Kudu client is
# required to exist. It's possible that the toolchain won't be setup yet though
# since the toolchain bootstrap script depends on the virtualenv.
kudu_base_dir = os.path.join(os.environ["IMPALA_TOOLCHAIN"],
"kudu-%s" % os.environ["IMPALA_KUDU_VERSION"])
install_dir = os.path.join(kudu_base_dir, "debug")
if os.path.exists(kudu_base_dir):
error_if_kudu_client_not_found(install_dir)
return install_dir
def error_if_kudu_client_not_found(install_dir):
header_path = os.path.join(install_dir, "include", "kudu", "client", "client.h")
if not os.path.exists(header_path):
raise Exception("Kudu client header not found at %s" % header_path)
kudu_client_lib = "libkudu_client.so"
lib_dir = os.path.join(install_dir, "lib64")
if not os.path.exists(lib_dir):
lib_dir = os.path.join(install_dir, "lib")
for _, _, files in os.walk(lib_dir):
for file in files:
if file == kudu_client_lib:
return
raise Exception("%s not found at %s" % (kudu_client_lib, lib_dir))
def deps_are_installed():
if not os.path.exists(INSTALLED_REQS_PATH):
return False
installed_reqs_file = open(INSTALLED_REQS_PATH)
try:
reqs_file = open(REQS_PATH)
try:
if reqs_file.read() == installed_reqs_file.read():
return True
else:
LOG.info("Virtualenv upgrade needed")
return False
finally:
reqs_file.close()
finally:
installed_reqs_file.close()
def setup_virtualenv_if_not_exists():
if not deps_are_installed():
delete_virtualenv_if_exist()
create_virtualenv()
install_deps()
LOG.info("Virtualenv setup complete")
if __name__ == "__main__":
parser = optparse.OptionParser()
parser.add_option("-l", "--log-level", default="INFO",
choices=("DEBUG", "INFO", "WARN", "ERROR"))
parser.add_option("-r", "--rebuild", action="store_true", help="Force a rebuild of"
" the virtualenv even if it exists and appears to be completely up-to-date.")
parser.add_option("--print-ld-library-path", action="store_true", help="Print the"
" LD_LIBRARY_PATH that should be used when running python from the virtualenv.")
options, args = parser.parse_args()
if options.print_ld_library_path:
kudu_client_dir = find_kudu_client_install_dir()
print os.path.pathsep.join([os.path.join(kudu_client_dir, 'lib'),
os.path.join(kudu_client_dir, 'lib64')])
sys.exit()
logging.basicConfig(level=getattr(logging, options.log_level))
if options.rebuild:
delete_virtualenv_if_exist()
setup_virtualenv_if_not_exists()
install_kudu_client_if_possible()