IMPALA-7499: build against CDH Kudu

This patch transitions from pulling in Kudu (libkudu_client.so and the
minicluster tarballs) from the toolchain to instead pull Kudu in with
the other CDH components.

For OSes where the CDH binaries are not provided but the toolchain
binaries are (only Ubuntu 14), we set USE_CDH_KUDU to false to
continue to download the toolchain binaries. We also continue
to use the toolchain binaries to build the client stub for OSes
where KUDU_IS_SUPPORTED is false.

This patch also fixes an issue in bootstrap_toolchain.py where we were
using the wrong g++ to compile the Kudu stub.

Testing:
- Verified building and running Impala works as expected for supported
  combinations of KUDU_IS_SUPPORTED/USE_CDH_KUDU

Change-Id: If6e1048438b6d09a1b38c58371d6212bb6dcc06c
Reviewed-on: http://gerrit.cloudera.org:8080/11363
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
Thomas Tauber-Marshall
2018-08-10 16:54:32 -07:00
committed by Impala Public Jenkins
parent ab6bd74ff7
commit 85f3bb0178
5 changed files with 151 additions and 102 deletions

View File

@@ -358,11 +358,10 @@ add_definitions(-DKUDU_HEADERS_USE_GLOG)
if(NOT $ENV{KUDU_CLIENT_DIR} EQUAL "")
set(kuduClient_DIR "$ENV{KUDU_CLIENT_DIR}/usr/local/share/kuduClient/cmake")
else()
set(kuduClient_DIR "$ENV{IMPALA_TOOLCHAIN}/kudu-$ENV{IMPALA_KUDU_VERSION}")
if ("${CMAKE_BUILD_TYPE}" STREQUAL "DEBUG")
set(kuduClient_DIR "${kuduClient_DIR}/debug/share/kuduClient/cmake")
set(kuduClient_DIR "$ENV{IMPALA_KUDU_HOME}/debug/share/kuduClient/cmake")
else()
set(kuduClient_DIR "${kuduClient_DIR}/release/share/kuduClient/cmake")
set(kuduClient_DIR "$ENV{IMPALA_KUDU_HOME}/release/share/kuduClient/cmake")
endif()
endif()
# When KUDU_IS_SUPPORTED is false, the Kudu client is expected to be a non-functional

View File

@@ -21,10 +21,15 @@
# and IMPALA_TOOLCHAIN. IMPALA_HOME indicates that the environment is correctly setup and
# that we can deduce the version settings of the dependencies from the environment.
# IMPALA_TOOLCHAIN indicates the location where the prebuilt artifacts should be extracted
# to. If DOWNLOAD_CDH_COMPONENTS is set to true, this script will also download and extract
# the CDH components (i.e. Hadoop, Hive, HBase and Sentry) into
# to. If DOWNLOAD_CDH_COMPONENTS is set to true, this script will also download and
# extract the CDH components (i.e. Hadoop, Hive, HBase and Sentry) into
# CDH_COMPONENTS_HOME.
#
# Kudu can be downloaded either from the toolchain or as a CDH component, depending on the
# value of USE_CDH_KUDU. If KUDU_IS_SUPPORTED is false, we download the toolchain Kudu and
# use the symbols to compile a non-functional stub library so that Impala has something to
# link against.
#
# By default, packages are downloaded from an S3 bucket named native-toolchain.
# The exact URL is based on IMPALA_<PACKAGE>_VERSION environment variables
# (configured in impala-config.sh) as well as the OS version being built on.
@@ -46,27 +51,32 @@ import sys
import tempfile
import time
from collections import namedtuple
TOOLCHAIN_HOST = "https://native-toolchain.s3.amazonaws.com/build"
OS_MAPPING = {
"centos6" : "ec2-package-centos-6",
"centos5" : "ec2-package-centos-5",
"centos7" : "ec2-package-centos-7",
"redhatenterpriseserver5" : "ec2-package-centos-5",
"redhatenterpriseserver6" : "ec2-package-centos-6",
"redhatenterpriseserver7" : "ec2-package-centos-7",
"debian6" : "ec2-package-debian-6",
"debian7" : "ec2-package-debian-7",
"debian8" : "ec2-package-debian-8",
"suselinux11": "ec2-package-sles-11",
"suselinux12": "ec2-package-sles-12",
"suse12.2": "ec2-package-sles-12",
"ubuntu12.04" : "ec2-package-ubuntu-12-04",
"ubuntu14.04" : "ec2-package-ubuntu-14-04",
"ubuntu15.04" : "ec2-package-ubuntu-14-04",
"ubuntu15.10" : "ec2-package-ubuntu-14-04",
"ubuntu16.04" : "ec2-package-ubuntu-16-04",
}
# Maps return values from 'lsb_release -irs' to the corresponding OS labels for both the
# toolchain and the CDH components.
OsMapping = namedtuple('OsMapping', ['lsb_release', 'toolchain', 'cdh'])
OS_MAPPING = [
OsMapping("centos5", "ec2-package-centos-5", None),
OsMapping("centos6", "ec2-package-centos-6", "redhat6"),
OsMapping("centos7", "ec2-package-centos-7", "redhat7"),
OsMapping("redhatenterpriseserver5", "ec2-package-centos-5", None),
OsMapping("redhatenterpriseserver6", "ec2-package-centos-6", "redhat6"),
OsMapping("redhatenterpriseserver7", "ec2-package-centos-7", "redhat7"),
OsMapping("debian6", "ec2-package-debian-6", None),
OsMapping("debian7", "ec2-package-debian-7", None),
OsMapping("debian8", "ec2-package-debian-8", "debian8"),
OsMapping("suselinux11", "ec2-package-sles-11", None),
OsMapping("suselinux12", "ec2-package-sles-12", "sles12"),
OsMapping("suse12.2", "ec2-package-sles-12", "sles12"),
OsMapping("ubuntu12.04", "ec2-package-ubuntu-12-04", None),
OsMapping("ubuntu14.04", "ec2-package-ubuntu-14-04", None),
OsMapping("ubuntu15.04", "ec2-package-ubuntu-14-04", None),
OsMapping("ubuntu15.10", "ec2-package-ubuntu-14-04", None),
OsMapping('ubuntu16.04', "ec2-package-ubuntu-16-04", "ubuntu1604")
]
class Package(object):
"""
@@ -90,17 +100,22 @@ class Package(object):
url_env_var = "IMPALA_{0}_URL".format(package_env_name)
self.url = os.environ.get(url_env_var)
def try_get_platform_release_label():
"""Gets the right package label from the OS version. Return None if not found."""
"""Gets the right package label from the OS version. Returns an OsMapping with both
'toolchain' and 'cdh' labels. Return None if not found.
"""
try:
return get_platform_release_label()
except:
except Exception:
return None
# Cache "lsb_release -irs" to avoid excessive logging from sh, and
# to shave a little bit of time.
lsb_release_cache = None
def get_platform_release_label(release=None):
"""Gets the right package label from the OS version. Raise exception if not found.
'release' can be provided to override the underlying OS version.
@@ -117,9 +132,9 @@ def get_platform_release_label(release=None):
release = release.split('.')[0]
break
lsb_release_cache = release
for k, v in OS_MAPPING.iteritems():
if re.search(k, release):
return v
for mapping in OS_MAPPING:
if re.search(mapping.lsb_release, release):
return mapping
raise Exception("Could not find package label for OS version: {0}.".format(release))
@@ -148,7 +163,7 @@ def download_package(destination, package, compiler, platform_release=None):
remove_existing_package(destination, package.name, package.version)
toolchain_build_id = os.environ["IMPALA_TOOLCHAIN_BUILD_ID"]
label = get_platform_release_label(release=platform_release)
label = get_platform_release_label(release=platform_release).toolchain
format_params = {'product': package.name, 'version': package.version,
'compiler': compiler, 'label': label, 'toolchain_build_id': toolchain_build_id}
file_name = "{product}-{version}-{compiler}-{label}.tar.gz".format(**format_params)
@@ -166,7 +181,8 @@ def bootstrap(toolchain_root, packages):
"""Downloads and unpacks each package in the list `packages` into `toolchain_root` if it
doesn't exist already.
"""
if not try_get_platform_release_label():
if not try_get_platform_release_label() \
or not try_get_platform_release_label().toolchain:
check_custom_toolchain(toolchain_root, packages)
return
@@ -181,7 +197,7 @@ def bootstrap(toolchain_root, packages):
else:
build_kudu_stub(toolchain_root, p.version, compiler)
write_version_file(toolchain_root, p.name, p.version, compiler,
get_platform_release_label())
get_platform_release_label().toolchain)
execute_many(handle_package, packages)
def check_output(cmd_args):
@@ -229,7 +245,7 @@ def check_for_existing_package(toolchain_root, pkg_name, pkg_version, compiler):
if not os.path.exists(version_file):
return False
label = get_platform_release_label()
label = get_platform_release_label().toolchain
pkg_version_string = "{0}-{1}-{2}-{3}".format(pkg_name, pkg_version, compiler, label)
with open(version_file) as f:
return f.read().strip() == pkg_version_string
@@ -340,7 +356,9 @@ extern "C" void %s() {
# Compile the library.
stub_client_lib_path = os.path.join(stub_build_dir, "libkudu_client.so")
subprocess.check_call(["g++", stub_client_src_file.name, "-shared", "-fPIC",
gpp = os.path.join(
toolchain_root, "gcc-%s" % os.environ.get("IMPALA_GCC_VERSION"), "bin", "g++")
subprocess.check_call([gpp, stub_client_src_file.name, "-shared", "-fPIC",
"-Wl,-soname,%s" % so_name, "-o", stub_client_lib_path])
# Replace the real libs with the stub.
@@ -377,8 +395,14 @@ def download_cdh_components(toolchain_root, cdh_components, url_prefix):
if os.path.isdir(pkg_directory):
return
platform_label = ""
# Kudu is the only component that's platform dependent.
if component.name == "kudu":
platform_label = "-%s" % get_platform_release_label().cdh
# Download the package if it doesn't exist
file_name = "{0}-{1}.tar.gz".format(component.name, component.version)
file_name = "{0}-{1}{2}.tar.gz".format(
component.name, component.version, platform_label)
if component.url is None:
download_path = url_prefix + file_name
else:
@@ -422,9 +446,18 @@ if __name__ == "__main__":
if not os.path.exists(toolchain_root):
os.makedirs(toolchain_root)
use_cdh_kudu = os.getenv("USE_CDH_KUDU") == "true"
if os.environ["KUDU_IS_SUPPORTED"] != "true":
# We need gcc to build the Kudu stub, so download it first, and we also
# need the toolchain Kudu.
bootstrap(toolchain_root, [Package("gcc")])
use_cdh_kudu = False
# LLVM and Kudu are the largest packages. Sort them first so that
# their download starts as soon as possible.
packages = map(Package, ["llvm", "kudu",
packages = []
if not use_cdh_kudu: packages += [Package("kudu")]
packages += map(Package, ["llvm",
"avro", "binutils", "boost", "breakpad", "bzip2", "cctz", "cmake", "crcutil",
"flatbuffers", "gcc", "gdb", "gflags", "glog", "gperftools", "gtest", "libev",
"libunwind", "lz4", "openldap", "openssl", "orc", "protobuf",
@@ -444,6 +477,12 @@ if __name__ == "__main__":
cdh_build_number = os.environ.get("CDH_BUILD_NUMBER")
cdh_components = map(Package, ["hadoop", "hbase", "hive", "sentry"])
if use_cdh_kudu:
if not try_get_platform_release_label() or not try_get_platform_release_label().cdh:
logging.error("CDH Kudu is not supported on this platform. Set USE_CDH_KUDU=false "
"to use the toolchain Kudu.")
sys.exit(1)
cdh_components += [Package("kudu")]
download_path_prefix= \
"https://{0}/build/cdh_components/{1}/tarballs/".format(cdh_host,
cdh_build_number)

View File

@@ -155,10 +155,6 @@ if [[ $OSTYPE == "darwin"* ]]; then
unset IMPALA_OPENSSL_URL
fi
# Kudu version in the toolchain; provides libkudu_client.so and minicluster binaries.
export IMPALA_KUDU_VERSION=5211897
unset IMPALA_KUDU_URL
: ${CDH_DOWNLOAD_HOST:=native-toolchain.s3.amazonaws.com}
export CDH_DOWNLOAD_HOST
export CDH_MAJOR_VERSION=6
@@ -252,59 +248,6 @@ export DOWNLOAD_CDH_COMPONENTS=${DOWNLOAD_CDH_COMPONENTS-"$NO_THIRDPARTY"}
export IS_OSX="$(if [[ "$OSTYPE" == "darwin"* ]]; then echo true; else echo false; fi)"
# To use a local build of Kudu, set KUDU_BUILD_DIR to the path Kudu was built in and
# set KUDU_CLIENT_DIR to the path KUDU was installed in.
# Example:
# git clone https://github.com/cloudera/kudu.git
# ...build 3rd party etc...
# mkdir -p $KUDU_BUILD_DIR
# cd $KUDU_BUILD_DIR
# cmake <path to Kudu source dir>
# make
# DESTDIR=$KUDU_CLIENT_DIR make install
export KUDU_BUILD_DIR=${KUDU_BUILD_DIR-}
export KUDU_CLIENT_DIR=${KUDU_CLIENT_DIR-}
if [[ -n "$KUDU_BUILD_DIR" && -z "$KUDU_CLIENT_DIR" ]]; then
echo When KUDU_BUILD_DIR is set KUDU_CLIENT_DIR must also be set. 1>&2
return 1
fi
if [[ -z "$KUDU_BUILD_DIR" && -n "$KUDU_CLIENT_DIR" ]]; then
echo When KUDU_CLIENT_DIR is set KUDU_BUILD_DIR must also be set. 1>&2
return 1
fi
# Only applies when using Kudu from the toolchain
export USE_KUDU_DEBUG_BUILD=${USE_KUDU_DEBUG_BUILD-false}
# Kudu doesn't compile on some old Linux distros. KUDU_IS_SUPPORTED enables building Kudu
# into the backend. The frontend build is OS independent since it is Java.
if [[ -z "${KUDU_IS_SUPPORTED-}" ]]; then
if [[ -n "$KUDU_BUILD_DIR" ]]; then
KUDU_IS_SUPPORTED=true
else
KUDU_IS_SUPPORTED=false
if ! $IS_OSX; then
if ! which lsb_release &>/dev/null; then
echo Unable to find the 'lsb_release' command. \
Please ensure it is available in your PATH. 1>&2
return 1
fi
DISTRO_VERSION="$(lsb_release -sir 2>&1)"
if [[ $? -ne 0 ]]; then
echo lsb_release command failed, output was: "$DISTRO_VERSION" 1>&2
return 1
fi
# Remove spaces, trim minor versions, and convert to lowercase.
DISTRO_VERSION="$(tr -d ' \n' <<< "$DISTRO_VERSION" | cut -d. -f1 | tr "A-Z" "a-z")"
case "$DISTRO_VERSION" in
centos6 | centos7 | debian7 | debian8 | suselinux12 | suse12 | ubuntu* )
KUDU_IS_SUPPORTED=true;;
esac
fi
fi
fi
export KUDU_IS_SUPPORTED
export HADOOP_LZO="${HADOOP_LZO-$IMPALA_HOME/../hadoop-lzo}"
export IMPALA_LZO="${IMPALA_LZO-$IMPALA_HOME/../Impala-lzo}"
export IMPALA_AUX_TEST_HOME="${IMPALA_AUX_TEST_HOME-$IMPALA_HOME/../Impala-auxiliary-tests}"
@@ -561,6 +504,78 @@ export AUX_CLASSPATH="$AUX_CLASSPATH:$HBASE_HOME/lib/hbase-hadoop-compat-${IMPAL
export HBASE_CONF_DIR="$IMPALA_FE_DIR/src/test/resources"
# To use a local build of Kudu, set KUDU_BUILD_DIR to the path Kudu was built in and
# set KUDU_CLIENT_DIR to the path KUDU was installed in.
# Example:
# git clone https://github.com/cloudera/kudu.git
# ...build 3rd party etc...
# mkdir -p $KUDU_BUILD_DIR
# cd $KUDU_BUILD_DIR
# cmake <path to Kudu source dir>
# make
# DESTDIR=$KUDU_CLIENT_DIR make install
export KUDU_BUILD_DIR=${KUDU_BUILD_DIR-}
export KUDU_CLIENT_DIR=${KUDU_CLIENT_DIR-}
if [[ -n "$KUDU_BUILD_DIR" && -z "$KUDU_CLIENT_DIR" ]]; then
echo When KUDU_BUILD_DIR is set KUDU_CLIENT_DIR must also be set. 1>&2
return 1
fi
if [[ -z "$KUDU_BUILD_DIR" && -n "$KUDU_CLIENT_DIR" ]]; then
echo When KUDU_CLIENT_DIR is set KUDU_BUILD_DIR must also be set. 1>&2
return 1
fi
# Only applies to the minicluster Kudu (we always link against the libkudu_client for the
# overall build type) and does not apply when using a local Kudu build.
export USE_KUDU_DEBUG_BUILD=${USE_KUDU_DEBUG_BUILD-false}
# Kudu doesn't compile on some old Linux distros. KUDU_IS_SUPPORTED enables building Kudu
# into the backend. We prefer to pull Kudu in from CDH, but will fall back to using the
# toolchain Kudu for distros where the CDH tarballs are not provided by setting
# USE_CDH_KUDU to false.
# The frontend build is OS independent since it is Java.
export USE_CDH_KUDU=${USE_CDH_KUDU-true}
if [[ -z "${KUDU_IS_SUPPORTED-}" ]]; then
if [[ -n "$KUDU_BUILD_DIR" ]]; then
KUDU_IS_SUPPORTED=true
else
KUDU_IS_SUPPORTED=false
USE_CDH_KUDU=false
if ! $IS_OSX; then
if ! which lsb_release &>/dev/null; then
echo Unable to find the 'lsb_release' command. \
Please ensure it is available in your PATH. 1>&2
return 1
fi
DISTRO_VERSION="$(lsb_release -sir 2>&1)"
if [[ $? -ne 0 ]]; then
echo lsb_release command failed, output was: "$DISTRO_VERSION" 1>&2
return 1
fi
# Remove spaces, trim minor versions, and convert to lowercase.
DISTRO_VERSION="$(tr -d ' \n' <<< "$DISTRO_VERSION" | cut -d. -f1 | tr "A-Z" "a-z")"
case "$DISTRO_VERSION" in
centos6 | centos7 | debian8 | suselinux12 | suse12 | ubuntu16 )
USE_CDH_KUDU=true
KUDU_IS_SUPPORTED=true;;
ubuntu14 )
USE_CDH_KUDU=false
KUDU_IS_SUPPORTED=true;;
esac
fi
fi
fi
export KUDU_IS_SUPPORTED
if $USE_CDH_KUDU; then
export IMPALA_KUDU_VERSION=1.8.0-cdh6.x-SNAPSHOT
export IMPALA_KUDU_HOME=${CDH_COMPONENTS_HOME}/kudu-$IMPALA_KUDU_VERSION
else
export IMPALA_KUDU_VERSION=5211897
export IMPALA_KUDU_HOME=${IMPALA_TOOLCHAIN}/kudu-$IMPALA_KUDU_VERSION
fi
unset IMPALA_KUDU_URL
# Set $THRIFT_HOME to the Thrift directory in toolchain.
export THRIFT_HOME="${IMPALA_TOOLCHAIN}/thrift-${IMPALA_THRIFT_VERSION}"

View File

@@ -273,12 +273,9 @@ def install_kudu_client_if_possible():
if os.environ["KUDU_IS_SUPPORTED"] != "true":
LOG.debug("Skipping Kudu: Kudu is not supported")
return
if not have_toolchain():
LOG.debug("Skipping Kudu: IMPALA_TOOLCHAIN not set")
return
toolchain_kudu_dir = toolchain_pkg_dir("kudu")
if not os.path.exists(toolchain_kudu_dir):
LOG.debug("Skipping Kudu: %s doesn't exist" % toolchain_kudu_dir)
kudu_base_dir = os.environ["IMPALA_KUDU_HOME"]
if not os.path.exists(kudu_base_dir):
LOG.debug("Skipping Kudu: %s doesn't exist" % kudu_base_dir)
return
LOG.info("Installing Kudu into the virtualenv")
@@ -316,8 +313,7 @@ def find_kudu_client_install_dir():
# If the toolchain appears to have been setup already, then the Kudu client is
# required to exist. It's possible that the toolchain won't be setup yet though
# since the toolchain bootstrap script depends on the virtualenv.
kudu_base_dir = os.path.join(os.environ["IMPALA_TOOLCHAIN"],
"kudu-%s" % os.environ["IMPALA_KUDU_VERSION"])
kudu_base_dir = os.environ["IMPALA_KUDU_HOME"]
install_dir = os.path.join(kudu_base_dir, "debug")
if os.path.exists(kudu_base_dir):
error_if_kudu_client_not_found(install_dir)

View File

@@ -27,7 +27,7 @@ if [[ -n "$KUDU_BUILD_DIR" ]]; then
KUDU_BIN_DIR="$KUDU_BUILD_DIR/bin"
KUDU_WWW_DIR="$KUDU_HOME/www"
else
KUDU_BIN_DIR="$IMPALA_TOOLCHAIN/kudu-$IMPALA_KUDU_VERSION"
KUDU_BIN_DIR="$IMPALA_KUDU_HOME"
if $USE_KUDU_DEBUG_BUILD; then
KUDU_BIN_DIR+=/debug/bin
else