Automatically enable toolchain for development

This patch adds logic to automatically download the pre-built toolchain
packages to the local developer machine using the bootstrap_toolchain.py
script in case there are not present. There is no manual user
intervention necessary to initiate the download process.

If desired the script can always be called to re-download the
dependencies from a correctly sourced Impala environment.

Change-Id: I636160efeadfac4b5c1feb478da5ae5da0c9fd00
Reviewed-on: http://gerrit.cloudera.org:8080/1429
Reviewed-by: Martin Grund <mgrund@cloudera.com>
Tested-by: Internal Jenkins
This commit is contained in:
Martin Grund
2015-11-12 17:03:55 -08:00
committed by Internal Jenkins
parent adb131c6c1
commit 1720409545
5 changed files with 166 additions and 21 deletions

3
.gitignore vendored
View File

@@ -37,3 +37,6 @@ tests/test-hive-udfs/target/
cdh-*-hdfs-data/
avro_schemas/
cluster_logs/
# Impala toolchain directory
toolchain/

View File

@@ -14,6 +14,36 @@
cmake_minimum_required(VERSION 2.6)
if (NOT $ENV{DISABLE_IMPALA_TOOLCHAIN})
# Enable toolchain variable if the environment is setup
set(IMPALA_TOOLCHAIN ON)
message(STATUS "Toolchain build.")
# If the toolchain directory does not yet exists, we assume that the dependencies
# should be downloaded. If the download script is not available fail the
# configuration.
if (NOT IS_DIRECTORY $ENV{IMPALA_TOOLCHAIN})
set(BOOTSTRAP_CMD "$ENV{IMPALA_HOME}/bin/bootstrap_toolchain.py")
# Download and unpack the dependencies
message(STATUS "Downloading and extracting dependencies.")
execute_process(COMMAND ${BOOTSTRAP_CMD} RESULT_VARIABLE BOOTSTRAP_RESULT)
if (${BOOTSTRAP_RESULT} EQUAL 0)
message(STATUS "Toolchain bootstrap complete.")
else()
message(FATAL_ERROR "Toolchain bootstrap failed.")
endif()
else()
message(STATUS "Impala toolchain picked up at $ENV{IMPALA_TOOLCHAIN}")
endif()
else()
set(IMPALA_TOOLCHAIN OFF)
message(STATUS "Impala toolchain was explicitly disabled using DISABLE_IMPALA_TOOLCHAIN.")
message(STATUS "Assuming system search path for dependencies.")
endif()
# Explicitly define project() to allow modifying the compiler before the project is
# initialized.
project(Impala)
if (NOT DEFINED BUILD_SHARED_LIBS)
set(BUILD_SHARED_LIBS OFF)
endif()
@@ -21,13 +51,6 @@ endif()
# Build compile commands database
set(CMAKE_EXPORT_COMPILE_COMMNDS ON)
# Enable toolchain variable if the environment is setup
if (NOT IMPALA_TOOLCHAIN)
message(STATUS "Regular build.")
else()
message(STATUS "Toolchain build.")
endif()
# generate CTest input files
enable_testing()
@@ -58,6 +81,7 @@ if (IMPALA_TOOLCHAIN)
# Define root path for all dependencies, this is in the form of
# set_dep_root(PACKAGE) ->
# PACKAGE_ROOT set to $ENV{IMPALA_TOOLCHAIN}/PACKAGE-$ENV{IMPALA_PACKAGE_VERSION}
set_dep_root(GCC)
set_dep_root(AVRO)
set_dep_root(BZIP2)
set_dep_root(CYRUS_SASL)

121
bin/bootstrap_toolchain.py Executable file
View File

@@ -0,0 +1,121 @@
#!/usr/bin/env impala-python
# Copyright (c) 2015, Cloudera, inc.
# Confidential Cloudera Information: Covered by NDA.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Bootstrapping the native toolchain with prebuilt binaries
#
# The purpose of this script is to download prebuilt artifacts of the native toolchain to
# satisfy the third-party dependencies for Impala. The script checks for the presence of
# IMPALA_HOME and IMPALA_TOOLCHAIN. IMPALA_HOME indicates that the environment is
# correctly setup and that we can deduce the version settings of the dependencies from the
# environment. IMPALA_TOOLCHAIN indicates the location where the prebuilt artifacts should
# be extracted to.
#
# The script is called as follows without any additional parameters:
#
# python bootstrap_toolchain.py
import sh
import os
import sys
import re
HOST = "https://native-toolchain.s3.amazonaws.com/build"
OS_MAPPING = {
"centos6" : "ec2-package-centos-6",
"centos5" : "ec2-package-centos-5",
"centos7" : "ec2-package-centos-7",
"debian6" : "ec2-package-debian-6",
"debian7" : "ec2-package-debian-7",
"suselinux11": "ec2-package-sles-11",
"ubuntu12.04" : "ec2-package-ubuntu-12-04",
"ubuntu14.04" : "ec2-package-ubuntu-14-04"
}
def get_release_label():
"""Gets the right package label from the OS version"""
release = "".join(map(lambda x: x.lower(), sh.lsb_release("-irs").split()))
for k, v in OS_MAPPING.iteritems():
if re.search(k, release):
return v
print("Pre-built toolchain archives not available for your platform.")
print("Clone and build native toolchain from source using this repository:")
print(" https://github.com/cloudera/native-toolchain")
raise Exception("Could not find package label for OS version: {0}.".format(release))
def download_package(destination, product, version, compiler):
label = get_release_label()
file_name = "{0}-{1}-{2}-{3}.tar.gz".format(product, version, compiler, label)
url_path="/{0}/{1}-{2}/{0}-{1}-{2}-{3}.tar.gz".format(product, version, compiler, label)
download_path = HOST + url_path
print "URL {0}".format(download_path)
print "Downloading {0} to {1}".format(file_name, destination)
# --no-clobber avoids downloading the file if a file with the name already exists
sh.wget(download_path, directory_prefix=destination, no_clobber=True)
print "Extracting {0}".format(file_name)
sh.tar(z=True, x=True, f=os.path.join(destination, file_name), directory=destination)
sh.rm(os.path.join(destination, file_name))
def bootstrap(packages):
"""Validates the presence of $IMPALA_HOME and $IMPALA_TOOLCHAIN in the environment. By
checking $IMPALA_HOME is present, we assume that IMPALA_{LIB}_VERSION will be present as
well. Will create the directory specified by $IMPALA_TOOLCHAIN if it does not yet
exist. Each of the packages specified in `packages` is downloaded and extracted into
$IMPALA_TOOLCHAIN.
"""
if not os.getenv("IMPALA_HOME"):
print("Impala environment not set up correctly, make sure "
"impala-config.sh is sourced.")
sys.exit(1)
# Create the destination directory if necessary
destination = os.getenv("IMPALA_TOOLCHAIN")
if not destination:
print("Impala environment not set up correctly, make sure "
"$IMPALA_TOOLCHAIN is present.")
sys.exit(1)
if not os.path.exists(destination):
os.makedirs(destination)
# Detect the compiler
compiler = "gcc-{0}".format(os.environ["IMPALA_GCC_VERSION"])
for p in packages:
pkg_name, pkg_version = unpack_name_and_version(p)
download_package(destination, pkg_name, pkg_version, compiler)
def unpack_name_and_version(package):
"""A package definition is either a string where the version is fetched from the
environment or a tuple where the package name and the package version are fully
specified.
"""
if isinstance(package, basestring):
env_var = "IMPALA_{0}_VERSION".format(package).replace("-", "_").upper()
try:
return package, os.environ[env_var]
except KeyError:
raise Exception("Could not find version for {0} in environment var {1}".format(
package, env_var))
return package[0], package[1]
if __name__ == "__main__":
packages = ["avro", "boost", "bzip2", "cyrus-sasl", "gcc", "gflags", "glog",
"gperftools", "gtest", "llvm", ("llvm", "3.7.0"), "lz4", "openldap",
"rapidjson", "re2", "snappy", "thrift", "zlib"]
bootstrap(packages)

View File

@@ -20,20 +20,6 @@
# will be set by other scripts before sourcing this file. Those options are not set in
# this script because scripts outside this repository may need to be updated and that
# is not practical at this time.
# Setting up Impala binary toolchain. The default path is /opt/bin-toolchain but can be
# set to any path that contains the necessary dependencies in the format of
# /opt/bin-toolchain/package-X.Y.Z
: ${IMPALA_TOOLCHAIN=}
# If USE_SYSTEM_GCC is set to 1 the toolchain's GCC will not be used. This flag should
# only be set if the toolchain was built with the system GCC as well.
: ${USE_SYSTEM_GCC=0}
# Export both variables
export USE_SYSTEM_GCC
export IMPALA_TOOLCHAIN
export JAVA_HOME="${JAVA_HOME:-/usr/java/default}"
if [ ! -d "$JAVA_HOME" ] ; then
echo "JAVA_HOME must be set to the location of your JDK!"
@@ -53,6 +39,15 @@ if [ -z $IMPALA_HOME ]; then
fi
fi
# Setting up Impala binary toolchain.
: ${DISABLE_IMPALA_TOOLCHAIN=0}
: ${IMPALA_TOOLCHAIN=$IMPALA_HOME/toolchain}
: ${USE_SYSTEM_GCC=0}
export USE_SYSTEM_GCC
export IMPALA_TOOLCHAIN
export DISABLE_IMPALA_TOOLCHAIN
export CDH_MAJOR_VERSION=5
export HADOOP_LZO=${HADOOP_LZO-$IMPALA_HOME/../hadoop-lzo}
export IMPALA_LZO=${IMPALA_LZO-$IMPALA_HOME/../Impala-lzo}

View File

@@ -18,6 +18,8 @@
set -euo pipefail
trap 'echo Error in $0 at line $LINENO: $(awk "NR == $LINENO" $0)' ERR
: ${IMPALA_TOOLCHAIN=}
BUILD_TESTS=1
CLEAN=0
TARGET_BUILD_TYPE=${TARGET_BUILD_TYPE:-""}