IMPALA-11369: Separate thrift compiler for different component

Impala used to have one thrift compiler version to compile C++, Java,
and Python code.

Most Thrift serialization/deserialization between minor versions are
compatible with each other. So it is possible to have different thrift
compiler versions for different target codes. It is beneficial to do so
because it will allow Impala to upgrade separate components
independently.

This patch implements the infrastructure change required to do so. It
replace most of the 'THRIFT_*' environment variable and CMake variable
with 'THRFIT_CPP_*', 'THRFIT_JAVA_*', and 'THRFIT_PY_*' to compile C++,
Java, and Python code accordingly. All three still refer to the same
thrift version (thrift-0.11.0-p5).

Testing:
- Build Impala and pass core tests.

Change-Id: I56479dc69b79024d1a4d09211bbe88a61fa0c6a4
Reviewed-on: http://gerrit.cloudera.org:8080/18636
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
Riza Suminto
2022-06-16 15:58:34 -07:00
committed by Impala Public Jenkins
parent 15e28b48a4
commit 06b1db4675
15 changed files with 314 additions and 139 deletions

View File

@@ -87,6 +87,18 @@ function(set_dep_root NAME)
PARENT_SCOPE) PARENT_SCOPE)
endfunction() endfunction()
# Helper function that, given a package name and target component, constructs the
# package_target_ROOT variable based on the version number extracted from the
# environment. Mainly used for thrift resolution.
function(set_dep_root_for_target NAME TARGET)
string(TOLOWER ${NAME} NAME_LOWER)
string(TOLOWER ${TARGET} TARGET_LOWER)
string(REPLACE "_" "-" NAME_LOWER ${NAME_LOWER})
set(VAL_NAME "IMPALA_${NAME}_${TARGET}_VERSION")
set(${NAME}_${TARGET}_ROOT $ENV{IMPALA_TOOLCHAIN_PACKAGES_HOME}/${NAME_LOWER}-$ENV{${VAL_NAME}}
PARENT_SCOPE)
endfunction()
# Define root path for all dependencies, this is in the form of # Define root path for all dependencies, this is in the form of
# set_dep_root(PACKAGE) -> # set_dep_root(PACKAGE) ->
# PACKAGE_ROOT set to $ENV{IMPALA_TOOLCHAIN_PACKAGES_HOME}/PACKAGE-$ENV{IMPALA_PACKAGE_VERSION} # PACKAGE_ROOT set to $ENV{IMPALA_TOOLCHAIN_PACKAGES_HOME}/PACKAGE-$ENV{IMPALA_PACKAGE_VERSION}
@@ -117,7 +129,9 @@ set(PROTOBUF_CLANG_ROOT
set_dep_root(RE2) set_dep_root(RE2)
set_dep_root(RAPIDJSON) set_dep_root(RAPIDJSON)
set_dep_root(SNAPPY) set_dep_root(SNAPPY)
set_dep_root(THRIFT) set_dep_root_for_target(THRIFT CPP)
set_dep_root_for_target(THRIFT JAVA)
set_dep_root_for_target(THRIFT PY)
set_dep_root(ZLIB) set_dep_root(ZLIB)
set_dep_root(CCTZ) set_dep_root(CCTZ)
set_dep_root(CURL) set_dep_root(CURL)
@@ -258,19 +272,39 @@ find_package(Ldap REQUIRED)
IMPALA_ADD_THIRDPARTY_LIB(ldap ${LDAP_INCLUDE_DIR} ${LDAP_STATIC_LIBRARY} "") IMPALA_ADD_THIRDPARTY_LIB(ldap ${LDAP_INCLUDE_DIR} ${LDAP_STATIC_LIBRARY} "")
IMPALA_ADD_THIRDPARTY_LIB(lber "" ${LBER_STATIC_LIBRARY} "") IMPALA_ADD_THIRDPARTY_LIB(lber "" ${LBER_STATIC_LIBRARY} "")
# The environment variable $THRIFT_HOME is set in impala-config.sh # The environment variable $THRIFT_CPP_HOME is set in impala-config.sh
# Make sure it's consistent with $THRIFT_ROOT. # Make sure it's consistent with $THRIFT_CPP_ROOT.
if (NOT ($ENV{THRIFT_HOME} STREQUAL ${THRIFT_ROOT})) if (NOT ($ENV{THRIFT_CPP_HOME} STREQUAL ${THRIFT_CPP_ROOT}))
message(FATAL_ERROR "THRIFT_ROOT (${THRIFT_ROOT}) differs from environment " message(FATAL_ERROR "THRIFT_CPP_ROOT (${THRIFT_CPP_ROOT}) differs from environment "
"variable THRIFT_HOME ($ENV{THRIFT_HOME}).") "variable THRIFT_CPP_HOME ($ENV{THRIFT_CPP_HOME}).")
endif() endif()
# find thrift headers and libs # find thrift headers and libs
set(THRIFT_FIND_QUIETLY TRUE) set(THRIFT_CPP_FIND_QUIETLY TRUE)
find_package(Thrift REQUIRED) find_package(ThriftCpp REQUIRED)
IMPALA_ADD_THIRDPARTY_LIB(thrift ${THRIFT_INCLUDE_DIR} ${THRIFT_STATIC_LIB} "") IMPALA_ADD_THIRDPARTY_LIB(thrift ${THRIFT_CPP_INCLUDE_DIR} ${THRIFT_CPP_STATIC_LIB} "")
message(STATUS "Thrift version: ${THRIFT_VERSION}") message(STATUS "Thrift CPP version: ${THRIFT_CPP_VERSION}")
message(STATUS "Thrift contrib dir: ${THRIFT_CONTRIB_DIR}") message(STATUS "Thrift CPP contrib dir: ${THRIFT_CPP_CONTRIB_DIR}")
message(STATUS "Thrift compiler: ${THRIFT_COMPILER}") message(STATUS "Thrift CPP compiler: ${THRIFT_CPP_COMPILER}")
# The environment variable $THRIFT_JAVA_HOME is set in impala-config.sh
# Make sure it's consistent with $THRIFT_JAVA_ROOT.
if (NOT ($ENV{THRIFT_JAVA_HOME} STREQUAL ${THRIFT_JAVA_ROOT}))
message(FATAL_ERROR "THRIFT_JAVA_ROOT (${THRIFT_JAVA_ROOT}) differs from environment "
"variable THRIFT_JAVA_HOME ($ENV{THRIFT_JAVA_HOME}).")
endif()
find_package(ThriftJava REQUIRED)
message(STATUS "Thrift JAVA version: ${THRIFT_JAVA_VERSION}")
message(STATUS "Thrift JAVA compiler: ${THRIFT_JAVA_COMPILER}")
# The environment variable $THRIFT_PY_HOME is set in impala-config.sh
# Make sure it's consistent with $THRIFT_PY_ROOT.
if (NOT ($ENV{THRIFT_PY_HOME} STREQUAL ${THRIFT_PY_ROOT}))
message(FATAL_ERROR "THRIFT_PY_ROOT (${THRIFT_PY_ROOT}) differs from environment "
"variable THRIFT_PY_HOME ($ENV{THRIFT_PY_HOME}).")
endif()
find_package(ThriftPython REQUIRED)
message(STATUS "Thrift PY version: ${THRIFT_PY_VERSION}")
message(STATUS "Thrift PY compiler: ${THRIFT_PY_COMPILER}")
# find flatbuffers headers, lib and compiler # find flatbuffers headers, lib and compiler
find_package(FlatBuffers REQUIRED) find_package(FlatBuffers REQUIRED)

View File

@@ -35,7 +35,7 @@ can do so through the environment variables and scripts listed below.
| JAVA_VERSION | "java-7-oracle-amd64" | Can override to set a local Java version. | | JAVA_VERSION | "java-7-oracle-amd64" | Can override to set a local Java version. |
| JAVA | "${JAVA_HOME}/bin/java" | Java binary location. | | JAVA | "${JAVA_HOME}/bin/java" | Java binary location. |
| CLASSPATH | | See bin/set-classpath.sh for details. | | CLASSPATH | | See bin/set-classpath.sh for details. |
| PYTHONPATH | Will be changed to include: "${IMPALA_HOME}/shell/gen-py" "${IMPALA_HOME}/testdata" "${THRIFT_HOME}/python/lib/python2.7/site-packages" "${HIVE_HOME}/lib/py" | | PYTHONPATH | Will be changed to include: "${IMPALA_HOME}/shell/gen-py" "${IMPALA_HOME}/testdata" "${THRIFT_PY_HOME}/python/lib/python2.7/site-packages" "${HIVE_HOME}/lib/py" |
## Source Directories for Impala ## Source Directories for Impala
@@ -64,7 +64,9 @@ can do so through the environment variables and scripts listed below.
| HADOOP_LIB_DIR | "${HADOOP_HOME}/lib" | For 'libhdfs.a' or 'libhdfs.so' | | HADOOP_LIB_DIR | "${HADOOP_HOME}/lib" | For 'libhdfs.a' or 'libhdfs.so' |
| HIVE_HOME | "${CDP_COMPONENTS_HOME}/{hive-${IMPALA_HIVE_VERSION}/" | | | HIVE_HOME | "${CDP_COMPONENTS_HOME}/{hive-${IMPALA_HIVE_VERSION}/" | |
| HBASE_HOME | "${CDP_COMPONENTS_HOME}/hbase-${IMPALA_HBASE_VERSION}/" | | | HBASE_HOME | "${CDP_COMPONENTS_HOME}/hbase-${IMPALA_HBASE_VERSION}/" | |
| THRIFT_HOME | "${IMPALA_TOOLCHAIN}/thrift-${IMPALA_THRIFT_VERSION}" | | | THRIFT_CPP_HOME | "${IMPALA_TOOLCHAIN}/thrift-${IMPALA_THRIFT_CPP_VERSION}" | |
| THRIFT_JAVA_HOME | "${IMPALA_TOOLCHAIN}/thrift-${IMPALA_THRIFT_JAVA_VERSION}" | |
| THRIFT_PY_HOME | "${IMPALA_TOOLCHAIN}/thrift-${IMPALA_THRIFT_PY_VERSION}" | |
## Hive Dependency Overrides ## Hive Dependency Overrides
Typically used together to specify a local build of Apache Hive. Care should be taken Typically used together to specify a local build of Apache Hive. Care should be taken

View File

@@ -349,7 +349,7 @@ set(CLANG_INCLUDE_FLAGS
${CLANG_INCLUDE_FLAGS} ${CLANG_INCLUDE_FLAGS}
"-I${CMAKE_CURRENT_SOURCE_DIR}/src" "-I${CMAKE_CURRENT_SOURCE_DIR}/src"
"-I${CMAKE_CURRENT_SOURCE_DIR}/generated-sources" "-I${CMAKE_CURRENT_SOURCE_DIR}/generated-sources"
"-I${THRIFT_INCLUDE_DIR}" "-I${THRIFT_CPP_INCLUDE_DIR}"
"-I${SQUEASEL_INCLUDE_DIR}" "-I${SQUEASEL_INCLUDE_DIR}"
"-I${GLOG_INCLUDE_DIR}" "-I${GLOG_INCLUDE_DIR}"
"-I${GFLAGS_INCLUDE_DIR}" "-I${GFLAGS_INCLUDE_DIR}"

View File

@@ -212,10 +212,11 @@ class TemplatedDownloadUnpackTarball(DownloadUnpackTarball):
class EnvVersionedPackage(TemplatedDownloadUnpackTarball): class EnvVersionedPackage(TemplatedDownloadUnpackTarball):
def __init__(self, name, url_prefix_tmpl, destination_basedir, explicit_version=None, def __init__(self, name, url_prefix_tmpl, destination_basedir, explicit_version=None,
archive_basename_tmpl=None, unpack_directory_tmpl=None, makedir=False, archive_basename_tmpl=None, unpack_directory_tmpl=None, makedir=False,
template_subs_in={}): template_subs_in={}, target_comp=None):
template_subs = template_subs_in template_subs = template_subs_in
template_subs["name"] = name template_subs["name"] = name
template_subs["version"] = self.__compute_version(name, explicit_version) template_subs["version"] = self.__compute_version(name, explicit_version,
target_comp)
# The common case is that X.tar.gz unpacks to X directory. archive_basename_tmpl # The common case is that X.tar.gz unpacks to X directory. archive_basename_tmpl
# allows overriding the value of X (which defaults to ${name}-${version}). # allows overriding the value of X (which defaults to ${name}-${version}).
# If X.tar.gz unpacks to Y directory, then unpack_directory_tmpl allows overriding Y. # If X.tar.gz unpacks to Y directory, then unpack_directory_tmpl allows overriding Y.
@@ -224,17 +225,19 @@ class EnvVersionedPackage(TemplatedDownloadUnpackTarball):
archive_name_tmpl = archive_basename_tmpl + ".tar.gz" archive_name_tmpl = archive_basename_tmpl + ".tar.gz"
if unpack_directory_tmpl is None: if unpack_directory_tmpl is None:
unpack_directory_tmpl = archive_basename_tmpl unpack_directory_tmpl = archive_basename_tmpl
url_tmpl = self.__compute_url(name, archive_name_tmpl, url_prefix_tmpl) url_tmpl = self.__compute_url(name, archive_name_tmpl, url_prefix_tmpl, target_comp)
super(EnvVersionedPackage, self).__init__(url_tmpl, archive_name_tmpl, super(EnvVersionedPackage, self).__init__(url_tmpl, archive_name_tmpl,
destination_basedir, unpack_directory_tmpl, makedir, template_subs) destination_basedir, unpack_directory_tmpl, makedir, template_subs)
def __compute_version(self, name, explicit_version): def __compute_version(self, name, explicit_version, target_comp=None):
if explicit_version is not None: if explicit_version is not None:
return explicit_version return explicit_version
else: else:
# When getting the version from the environment, we need to standardize the name # When getting the version from the environment, we need to standardize the name
# to match expected environment variables. # to match expected environment variables.
std_env_name = name.replace("-", "_").upper() std_env_name = name.replace("-", "_").upper()
if target_comp:
std_env_name += '_' + target_comp.upper()
version_env_var = "IMPALA_{0}_VERSION".format(std_env_name) version_env_var = "IMPALA_{0}_VERSION".format(std_env_name)
env_version = os.environ.get(version_env_var) env_version = os.environ.get(version_env_var)
if not env_version: if not env_version:
@@ -242,10 +245,12 @@ class EnvVersionedPackage(TemplatedDownloadUnpackTarball):
name, version_env_var)) name, version_env_var))
return env_version return env_version
def __compute_url(self, name, archive_name_tmpl, url_prefix_tmpl): def __compute_url(self, name, archive_name_tmpl, url_prefix_tmpl, target_comp=None):
# The URL defined in the environment (IMPALA_*_URL) takes precedence. If that is # The URL defined in the environment (IMPALA_*_URL) takes precedence. If that is
# not defined, use the standard URL (url_prefix + archive_name) # not defined, use the standard URL (url_prefix + archive_name)
std_env_name = name.replace("-", "_").upper() std_env_name = name.replace("-", "_").upper()
if target_comp:
std_env_name += '_' + target_comp.upper()
url_env_var = "IMPALA_{0}_URL".format(std_env_name) url_env_var = "IMPALA_{0}_URL".format(std_env_name)
url_tmpl = os.environ.get(url_env_var) url_tmpl = os.environ.get(url_env_var)
if not url_tmpl: if not url_tmpl:
@@ -260,6 +265,11 @@ class ToolchainPackage(EnvVersionedPackage):
logging.error("Impala environment not set up correctly, make sure " logging.error("Impala environment not set up correctly, make sure "
"$IMPALA_TOOLCHAIN_PACKAGES_HOME is set.") "$IMPALA_TOOLCHAIN_PACKAGES_HOME is set.")
sys.exit(1) sys.exit(1)
target_comp = None
if ":" in name:
parts = name.split(':')
name = parts[0]
target_comp = parts[1]
compiler = get_toolchain_compiler() compiler = get_toolchain_compiler()
label = get_platform_release_label(release=platform_release).toolchain label = get_platform_release_label(release=platform_release).toolchain
toolchain_build_id = os.environ["IMPALA_TOOLCHAIN_BUILD_ID"] toolchain_build_id = os.environ["IMPALA_TOOLCHAIN_BUILD_ID"]
@@ -276,7 +286,8 @@ class ToolchainPackage(EnvVersionedPackage):
explicit_version=explicit_version, explicit_version=explicit_version,
archive_basename_tmpl=archive_basename_tmpl, archive_basename_tmpl=archive_basename_tmpl,
unpack_directory_tmpl=unpack_directory_tmpl, unpack_directory_tmpl=unpack_directory_tmpl,
template_subs_in=template_subs) template_subs_in=template_subs,
target_comp=target_comp)
def needs_download(self): def needs_download(self):
# If the directory doesn't exist, we need the download # If the directory doesn't exist, we need the download
@@ -451,6 +462,17 @@ def create_directory_from_env_var(env_var):
os.makedirs(dir_name) os.makedirs(dir_name)
def get_unique_toolchain_downloads(packages):
toolchain_packages = map(ToolchainPackage, packages)
unique_pkg_directories = set()
unique_packages = []
for p in toolchain_packages:
if p.pkg_directory() not in unique_pkg_directories:
unique_packages.append(p)
unique_pkg_directories.add(p.pkg_directory())
return unique_packages
def get_toolchain_downloads(): def get_toolchain_downloads():
toolchain_packages = [] toolchain_packages = []
# The LLVM and GCC packages are the largest packages in the toolchain (Kudu is handled # The LLVM and GCC packages are the largest packages in the toolchain (Kudu is handled
@@ -464,8 +486,9 @@ def get_toolchain_downloads():
["avro", "binutils", "boost", "breakpad", "bzip2", "calloncehack", "cctz", "cmake", ["avro", "binutils", "boost", "breakpad", "bzip2", "calloncehack", "cctz", "cmake",
"crcutil", "curl", "flatbuffers", "gdb", "gflags", "glog", "gperftools", "gtest", "crcutil", "curl", "flatbuffers", "gdb", "gflags", "glog", "gperftools", "gtest",
"jwt-cpp", "libev", "libunwind", "lz4", "openldap", "openssl", "orc", "protobuf", "jwt-cpp", "libev", "libunwind", "lz4", "openldap", "openssl", "orc", "protobuf",
"python", "rapidjson", "re2", "snappy", "thrift", "tpc-h", "tpc-ds", "zlib", "python", "rapidjson", "re2", "snappy", "tpc-h", "tpc-ds", "zlib", "zstd"])
"zstd"]) toolchain_packages += get_unique_toolchain_downloads(
["thrift:cpp", "thrift:java", "thrift:py"])
protobuf_package_clang = ToolchainPackage( protobuf_package_clang = ToolchainPackage(
"protobuf", explicit_version=os.environ.get("IMPALA_PROTOBUF_CLANG_VERSION")) "protobuf", explicit_version=os.environ.get("IMPALA_PROTOBUF_CLANG_VERSION"))
toolchain_packages += [protobuf_package_clang] toolchain_packages += [protobuf_package_clang]

View File

@@ -168,12 +168,22 @@ export IMPALA_TPC_DS_VERSION=2.1.0
unset IMPALA_TPC_DS_URL unset IMPALA_TPC_DS_URL
export IMPALA_TPC_H_VERSION=2.17.0 export IMPALA_TPC_H_VERSION=2.17.0
unset IMPALA_TPC_H_URL unset IMPALA_TPC_H_URL
export IMPALA_THRIFT_VERSION=0.11.0-p5
unset IMPALA_THRIFT_URL
export IMPALA_ZLIB_VERSION=1.2.11 export IMPALA_ZLIB_VERSION=1.2.11
unset IMPALA_ZLIB_URL unset IMPALA_ZLIB_URL
export IMPALA_CALLONCEHACK_VERSION=1.0.0 export IMPALA_CALLONCEHACK_VERSION=1.0.0
unset IMPALA_CALLONCEHACK_URL unset IMPALA_CALLONCEHACK_URL
# Thrift related environment variables.
# IMPALA_THRIFT_POM_VERSION is used to populate IMPALA_THRIFT_JAVA_VERSION and
# thrift.version in java/pom.xml.
# If upgrading IMPALA_THRIFT_PY_VERSION, remember to also upgrade thrift version in
# shell/packaging/requirements.txt
export IMPALA_THRIFT_CPP_VERSION=0.11.0-p5
unset IMPALA_THRIFT_CPP_URL
export IMPALA_THRIFT_POM_VERSION=0.11.0
export IMPALA_THRIFT_JAVA_VERSION=${IMPALA_THRIFT_POM_VERSION}-p5
unset IMPALA_THRIFT_JAVA_URL
export IMPALA_THRIFT_PY_VERSION=0.11.0-p5
unset IMPALA_THRIFT_PY_URL
if [[ $OSTYPE == "darwin"* ]]; then if [[ $OSTYPE == "darwin"* ]]; then
IMPALA_CYRUS_SASL_VERSION=2.1.26 IMPALA_CYRUS_SASL_VERSION=2.1.26
@@ -748,8 +758,10 @@ export IMPALA_TOOLCHAIN_KUDU_MAVEN_REPOSITORY=\
"file://${IMPALA_KUDU_JAVA_HOME}/repository" "file://${IMPALA_KUDU_JAVA_HOME}/repository"
export IMPALA_TOOLCHAIN_KUDU_MAVEN_REPOSITORY_ENABLED=true export IMPALA_TOOLCHAIN_KUDU_MAVEN_REPOSITORY_ENABLED=true
# Set $THRIFT_HOME to the Thrift directory in toolchain. # Set $THRIFT_XXX_HOME to the Thrift directory in toolchain.
export THRIFT_HOME="${IMPALA_TOOLCHAIN_PACKAGES_HOME}/thrift-${IMPALA_THRIFT_VERSION}" export THRIFT_CPP_HOME="${IMPALA_TOOLCHAIN_PACKAGES_HOME}/thrift-${IMPALA_THRIFT_CPP_VERSION}"
export THRIFT_JAVA_HOME="${IMPALA_TOOLCHAIN_PACKAGES_HOME}/thrift-${IMPALA_THRIFT_JAVA_VERSION}"
export THRIFT_PY_HOME="${IMPALA_TOOLCHAIN_PACKAGES_HOME}/thrift-${IMPALA_THRIFT_PY_VERSION}"
# ASAN needs a matching version of llvm-symbolizer to symbolize stack traces. # ASAN needs a matching version of llvm-symbolizer to symbolize stack traces.
export ASAN_SYMBOLIZER_PATH="${IMPALA_TOOLCHAIN_PACKAGES_HOME}/llvm-${IMPALA_LLVM_ASAN_VERSION}/bin/llvm-symbolizer" export ASAN_SYMBOLIZER_PATH="${IMPALA_TOOLCHAIN_PACKAGES_HOME}/llvm-${IMPALA_LLVM_ASAN_VERSION}/bin/llvm-symbolizer"
@@ -801,7 +813,9 @@ echo "HBASE_HOME = $HBASE_HOME"
echo "HBASE_CONF_DIR = $HBASE_CONF_DIR" echo "HBASE_CONF_DIR = $HBASE_CONF_DIR"
echo "RANGER_HOME = $RANGER_HOME" echo "RANGER_HOME = $RANGER_HOME"
echo "RANGER_CONF_DIR = $RANGER_CONF_DIR " echo "RANGER_CONF_DIR = $RANGER_CONF_DIR "
echo "THRIFT_HOME = $THRIFT_HOME" echo "THRIFT_CPP_HOME = $THRIFT_CPP_HOME"
echo "THRIFT_JAVA_HOME = $THRIFT_JAVA_HOME"
echo "THRIFT_PY_HOME = $THRIFT_PY_HOME"
echo "CLASSPATH = $CLASSPATH" echo "CLASSPATH = $CLASSPATH"
echo "LIBHDFS_OPTS = $LIBHDFS_OPTS" echo "LIBHDFS_OPTS = $LIBHDFS_OPTS"
echo "JAVA_HOME = $JAVA_HOME" echo "JAVA_HOME = $JAVA_HOME"

View File

@@ -28,7 +28,6 @@ PYTHONPATH=${PYTHONPATH}:${IMPALA_HOME}/bin
# Default version of thrift for the impala-shell is thrift >= 0.11.0. # Default version of thrift for the impala-shell is thrift >= 0.11.0.
PYTHONPATH=${PYTHONPATH}:${SHELL_HOME}/gen-py PYTHONPATH=${PYTHONPATH}:${SHELL_HOME}/gen-py
IMPALA_THRIFT_PY_VERSION=${IMPALA_THRIFT_VERSION}
THRIFT_PY_ROOT="${IMPALA_TOOLCHAIN_PACKAGES_HOME}/thrift-${IMPALA_THRIFT_PY_VERSION}" THRIFT_PY_ROOT="${IMPALA_TOOLCHAIN_PACKAGES_HOME}/thrift-${IMPALA_THRIFT_PY_VERSION}"

View File

@@ -32,11 +32,11 @@ PYTHONPATH=${PYTHONPATH}:${IMPALA_HOME}/infra/python/env-gcc${IMPALA_GCC_VERSION
# There should be just a single version of python that created the # There should be just a single version of python that created the
# site-packages directory. We find it by performing shell independent expansion # site-packages directory. We find it by performing shell independent expansion
# of the following pattern: # of the following pattern:
# ${THRIFT_HOME}/python/lib{64,}/python*/site-packages # ${THRIFT_PY_HOME}/python/lib{64,}/python*/site-packages
# Note: this could go wrong if we have used two different versions of # Note: this could go wrong if we have used two different versions of
# Python to build Thrift on this machine, and the first version is not # Python to build Thrift on this machine, and the first version is not
# compatible with the second. # compatible with the second.
for PYTHON_DIR in ${THRIFT_HOME}/python/lib{64,}; do for PYTHON_DIR in ${THRIFT_PY_HOME}/python/lib{64,}; do
[[ -d ${PYTHON_DIR} ]] || continue [[ -d ${PYTHON_DIR} ]] || continue
for PKG_DIR in ${PYTHON_DIR}/python*/site-packages; do for PKG_DIR in ${PYTHON_DIR}/python*/site-packages; do
PYTHONPATH=${PYTHONPATH}:${PKG_DIR}/ PYTHONPATH=${PYTHONPATH}:${PKG_DIR}/

View File

@@ -1,92 +0,0 @@
##############################################################################
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
##############################################################################
# - Find Thrift (a cross platform RPC lib/tool)
# THRIFT_ROOT hints the location
#
# This module defines
# THRIFT_VERSION, version string of ant if found
# THRIFT_INCLUDE_DIR, where to find THRIFT headers
# THRIFT_CONTRIB_DIR, where contrib thrift files (e.g. fb303.thrift) are installed
# THRIFT_LIBS, THRIFT libraries
# thriftstatic - imported static library
# prefer the thrift version supplied in THRIFT_HOME
if (NOT THRIFT_FIND_QUIETLY)
message(STATUS "THRIFT_HOME: $ENV{THRIFT_HOME}")
endif()
find_path(THRIFT_INCLUDE_DIR thrift/Thrift.h HINTS
${THRIFT_ROOT}/include
$ENV{THRIFT_HOME}/include/
/usr/local/include/
/opt/local/include/
)
find_path(THRIFT_CONTRIB_DIR share/fb303/if/fb303.thrift HINTS
${THRIFT_ROOT}/include
$ENV{THRIFT_HOME}
/usr/local/
)
set(THRIFT_LIB_PATHS
${THRIFT_ROOT}/lib
$ENV{THRIFT_HOME}/lib
/usr/local/lib
/opt/local/lib)
find_path(THRIFT_STATIC_LIB_PATH libthrift.a PATHS ${THRIFT_LIB_PATHS})
# prefer the thrift version supplied in THRIFT_HOME
find_library(THRIFT_LIB NAMES thrift HINTS ${THRIFT_LIB_PATHS})
find_program(THRIFT_COMPILER thrift
${THRIFT_ROOT}/bin
$ENV{THRIFT_HOME}/bin
/usr/local/bin
/usr/bin
NO_DEFAULT_PATH
)
if (THRIFT_LIB)
set(THRIFT_LIBS ${THRIFT_LIB})
set(THRIFT_STATIC_LIB ${THRIFT_STATIC_LIB_PATH}/libthrift.a)
exec_program(${THRIFT_COMPILER}
ARGS -version OUTPUT_VARIABLE THRIFT_VERSION RETURN_VALUE THRIFT_RETURN)
if (NOT THRIFT_FIND_QUIETLY)
message(STATUS "Thrift version: ${THRIFT_VERSION}")
endif ()
set(THRIFT_FOUND TRUE)
# for static linking with Thrift, THRIFT_STATIC_LIB is set in FindThrift.cmake
add_library(thriftstatic STATIC IMPORTED)
set_target_properties(thriftstatic PROPERTIES IMPORTED_LOCATION ${THRIFT_STATIC_LIB})
else ()
set(THRIFT_FOUND FALSE)
message(FATAL_ERROR "Thrift compiler/libraries NOT found. "
"Thrift support will be disabled (${THRIFT_RETURN}, "
"${THRIFT_INCLUDE_DIR}, ${THRIFT_LIB})")
endif ()
mark_as_advanced(
THRIFT_LIB
THRIFT_COMPILER
THRIFT_INCLUDE_DIR
thriftstatic
)

View File

@@ -0,0 +1,92 @@
##############################################################################
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
##############################################################################
# - Find Thrift (a cross platform RPC lib/tool)
# THRIFT_CPP_ROOT hints the location
#
# This module defines
# THRIFT_CPP_INCLUDE_DIR, where to find THRIFT headers
# THRIFT_CPP_CONTRIB_DIR, where contrib thrift files (e.g. fb303.thrift) are installed
# THRIFT_CPP_LIBS, THRIFT libraries
# THRIFT_CPP_COMPILER, where to find thrift compiler
# thriftstatic_cpp - imported static library
# prefer the thrift version supplied in THRIFT_CPP_HOME
if (NOT THRIFT_CPP_FIND_QUIETLY)
message(STATUS "THRIFT_CPP_HOME: $ENV{THRIFT_CPP_HOME}")
endif()
find_path(THRIFT_CPP_INCLUDE_DIR thrift/Thrift.h HINTS
${THRIFT_CPP_ROOT}/include
$ENV{THRIFT_CPP_HOME}/include/
/usr/local/include/
/opt/local/include/
)
find_path(THRIFT_CPP_CONTRIB_DIR share/fb303/if/fb303.thrift HINTS
${THRIFT_CPP_ROOT}/include
$ENV{THRIFT_CPP_HOME}
/usr/local/
)
set(THRIFT_CPP_LIB_PATHS
${THRIFT_CPP_ROOT}/lib
$ENV{THRIFT_CPP_HOME}/lib
/usr/local/lib
/opt/local/lib)
find_path(THRIFT_CPP_STATIC_LIB_PATH libthrift.a PATHS ${THRIFT_CPP_LIB_PATHS})
# prefer the thrift version supplied in THRIFT_CPP_HOME
find_library(THRIFT_CPP_LIB NAMES thrift HINTS ${THRIFT_CPP_LIB_PATHS})
find_program(THRIFT_CPP_COMPILER thrift
${THRIFT_CPP_ROOT}/bin
$ENV{THRIFT_CPP_HOME}/bin
/usr/local/bin
/usr/bin
NO_DEFAULT_PATH
)
if (THRIFT_CPP_LIB)
set(THRIFT_CPP_LIBS ${THRIFT_CPP_LIB})
set(THRIFT_CPP_STATIC_LIB ${THRIFT_CPP_STATIC_LIB_PATH}/libthrift.a)
exec_program(${THRIFT_CPP_COMPILER}
ARGS -version OUTPUT_VARIABLE THRIFT_CPP_VERSION RETURN_VALUE THRIFT_CPP_RETURN)
if (NOT THRIFT_CPP_FIND_QUIETLY)
message(STATUS "Thrift version: ${THRIFT_CPP_VERSION}")
endif ()
set(THRIFT_CPP_FOUND TRUE)
# for static linking with Thrift, THRIFT_CPP_STATIC_LIB is set in FindThrift.cmake
add_library(thriftstatic_cpp STATIC IMPORTED)
set_target_properties(thriftstatic_cpp PROPERTIES IMPORTED_LOCATION ${THRIFT_CPP_STATIC_LIB})
else ()
set(THRIFT_CPP_FOUND FALSE)
message(FATAL_ERROR "Thrift compiler/libraries NOT found. "
"Thrift support will be disabled (${THRIFT_CPP_RETURN}, "
"${THRIFT_CPP_INCLUDE_DIR}, ${THRIFT_CPP_LIB})")
endif ()
mark_as_advanced(
THRIFT_CPP_LIB
THRIFT_CPP_COMPILER
THRIFT_CPP_INCLUDE_DIR
thriftstatic_cpp
)

View File

@@ -0,0 +1,49 @@
##############################################################################
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
##############################################################################
# - Find Thrift (a cross platform RPC lib/tool)
# THRIFT_JAVA_ROOT hints the location
#
# This module defines
# THRIFT_JAVA_CONTRIB_DIR, where contrib thrift files (e.g. fb303.thrift) are installed
# THRIFT_JAVA_COMPILER, where to find thrift compiler
# prefer the thrift version supplied in THRIFT_JAVA_HOME
if (NOT THRIFT_JAVA_FIND_QUIETLY)
message(STATUS "THRIFT_JAVA_HOME: $ENV{THRIFT_JAVA_HOME}")
endif()
find_path(THRIFT_JAVA_CONTRIB_DIR share/fb303/if/fb303.thrift HINTS
${THRIFT_JAVA_ROOT}/include
$ENV{THRIFT_JAVA_HOME}
/usr/local/
)
find_program(THRIFT_JAVA_COMPILER thrift
${THRIFT_JAVA_ROOT}/bin
$ENV{THRIFT_JAVA_HOME}/bin
/usr/local/bin
/usr/bin
NO_DEFAULT_PATH
)
mark_as_advanced(
THRIFT_JAVA_COMPILER
)

View File

@@ -0,0 +1,49 @@
##############################################################################
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
##############################################################################
# - Find Thrift (a cross platform RPC lib/tool)
# THRIFT_PY_ROOT hints the location
#
# This module defines
# THRIFT_PY_CONTRIB_DIR, where contrib thrift files (e.g. fb303.thrift) are installed
# THRIFT_PY_COMPILER, where to find thrift compiler
# prefer the thrift version supplied in THRIFT_PY_HOME
if (NOT THRIFT_PY_FIND_QUIETLY)
message(STATUS "THRIFT_PY_HOME: $ENV{THRIFT_PY_HOME}")
endif()
find_path(THRIFT_PY_CONTRIB_DIR share/fb303/if/fb303.thrift HINTS
${THRIFT_PY_ROOT}/include
$ENV{THRIFT_PY_HOME}
/usr/local/
)
find_program(THRIFT_PY_COMPILER thrift
${THRIFT_PY_ROOT}/bin
$ENV{THRIFT_PY_HOME}/bin
/usr/local/bin
/usr/bin
NO_DEFAULT_PATH
)
mark_as_advanced(
THRIFT_PY_COMPILER
)

View File

@@ -58,7 +58,7 @@ function(THRIFT_GEN VAR)
# It depends on hive_meta_store, which in turn depends on fb303. # It depends on hive_meta_store, which in turn depends on fb303.
# The java dependency is handled by maven. # The java dependency is handled by maven.
# We need to generate C++ src file for the parent dependencies using the "-r" option. # We need to generate C++ src file for the parent dependencies using the "-r" option.
set(CPP_ARGS ${THRIFT_INCLUDE_DIR_OPTION} set(CPP_ARGS ${THRIFT_CPP_INCLUDE_DIR_OPTION}
--gen cpp:moveable_types,no_default_operators,templates -o ${BE_OUTPUT_DIR}) --gen cpp:moveable_types,no_default_operators,templates -o ${BE_OUTPUT_DIR})
IF (THRIFT_FILE STREQUAL "beeswax.thrift") IF (THRIFT_FILE STREQUAL "beeswax.thrift")
set(CPP_ARGS -r ${CPP_ARGS}) set(CPP_ARGS -r ${CPP_ARGS})
@@ -69,8 +69,8 @@ function(THRIFT_GEN VAR)
# from Hive or Parquet. # from Hive or Parquet.
add_custom_command( add_custom_command(
OUTPUT ${OUTPUT_BE_FILE} OUTPUT ${OUTPUT_BE_FILE}
COMMAND ${THRIFT_COMPILER} ${CPP_ARGS} ${THRIFT_FILE} COMMAND ${THRIFT_CPP_COMPILER} ${CPP_ARGS} ${THRIFT_FILE}
COMMAND ${THRIFT_COMPILER} ${PYTHON_ARGS} ${THRIFT_FILE} COMMAND ${THRIFT_PY_COMPILER} ${PYTHON_ARGS} ${THRIFT_FILE}
DEPENDS ${ABS_THRIFT_FILE} DEPENDS ${ABS_THRIFT_FILE}
COMMENT "Running thrift compiler on ${THRIFT_FILE}" COMMENT "Running thrift compiler on ${THRIFT_FILE}"
VERBATIM VERBATIM
@@ -78,9 +78,9 @@ function(THRIFT_GEN VAR)
ELSE (THRIFT_FILE STREQUAL ${TCLI_SERVICE_THRIFT} OR THRIFT_FILE STREQUAL "parquet.thrift") ELSE (THRIFT_FILE STREQUAL ${TCLI_SERVICE_THRIFT} OR THRIFT_FILE STREQUAL "parquet.thrift")
add_custom_command( add_custom_command(
OUTPUT ${OUTPUT_BE_FILE} OUTPUT ${OUTPUT_BE_FILE}
COMMAND ${THRIFT_COMPILER} ${CPP_ARGS} ${THRIFT_FILE} COMMAND ${THRIFT_CPP_COMPILER} ${CPP_ARGS} ${THRIFT_FILE}
COMMAND ${THRIFT_COMPILER} ${JAVA_FE_ARGS} ${THRIFT_FILE} COMMAND ${THRIFT_JAVA_COMPILER} ${JAVA_FE_ARGS} ${THRIFT_FILE}
COMMAND ${THRIFT_COMPILER} ${PYTHON_ARGS} ${THRIFT_FILE} COMMAND ${THRIFT_PY_COMPILER} ${PYTHON_ARGS} ${THRIFT_FILE}
DEPENDS ${ABS_THRIFT_FILE} DEPENDS ${ABS_THRIFT_FILE}
COMMENT "Running thrift compiler on ${THRIFT_FILE}" COMMENT "Running thrift compiler on ${THRIFT_FILE}"
VERBATIM VERBATIM
@@ -113,7 +113,7 @@ function(THRIFT_GEN_DS VAR)
list(APPEND ${VAR} ${OUTPUT_FILE}) list(APPEND ${VAR} ${OUTPUT_FILE})
add_custom_command( add_custom_command(
OUTPUT ${OUTPUT_FILE} OUTPUT ${OUTPUT_FILE}
COMMAND ${THRIFT_COMPILER} ${JAVA_EXT_DS_ARGS} ${THRIFT_FILE} && COMMAND ${THRIFT_JAVA_COMPILER} ${JAVA_EXT_DS_ARGS} ${THRIFT_FILE} &&
mkdir -p ${OUTPUT_DIR} && echo ${OUTPUT_FILE_MESSAGE} > ${OUTPUT_FILE} mkdir -p ${OUTPUT_DIR} && echo ${OUTPUT_FILE_MESSAGE} > ${OUTPUT_FILE}
DEPENDS ${ABS_THRIFT_FILE} DEPENDS ${ABS_THRIFT_FILE}
COMMENT "Running thrift compiler for ext-data-source on ${THRIFT_FILE}" COMMENT "Running thrift compiler for ext-data-source on ${THRIFT_FILE}"
@@ -125,9 +125,15 @@ endfunction(THRIFT_GEN_DS)
set(HIVE_THRIFT_SOURCE_DIR "hive-$ENV{IMPALA_HIVE_MAJOR_VERSION}-api") set(HIVE_THRIFT_SOURCE_DIR "hive-$ENV{IMPALA_HIVE_MAJOR_VERSION}-api")
set(TCLI_SERVICE_THRIFT "${HIVE_THRIFT_SOURCE_DIR}/TCLIService.thrift") set(TCLI_SERVICE_THRIFT "${HIVE_THRIFT_SOURCE_DIR}/TCLIService.thrift")
message("Using Thrift compiler: ${THRIFT_COMPILER}") message("Using Thrift CPP compiler: ${THRIFT_CPP_COMPILER}")
set(THRIFT_INCLUDE_DIR_OPTION -I ${THRIFT_CONTRIB_DIR} -I $ENV{HIVE_METASTORE_THRIFT_DIR} message("Using Thrift JAVA compiler: ${THRIFT_JAVA_COMPILER}")
-I ${HIVE_THRIFT_SOURCE_DIR}) message("Using Thrift PY compiler: ${THRIFT_PY_COMPILER}")
set(THRIFT_CPP_INCLUDE_DIR_OPTION -I ${THRIFT_CPP_CONTRIB_DIR}
-I $ENV{HIVE_METASTORE_THRIFT_DIR} -I ${HIVE_THRIFT_SOURCE_DIR})
set(THRIFT_JAVA_INCLUDE_DIR_OPTION -I ${THRIFT_JAVA_CONTRIB_DIR}
-I $ENV{HIVE_METASTORE_THRIFT_DIR} -I ${HIVE_THRIFT_SOURCE_DIR})
set(THRIFT_PY_INCLUDE_DIR_OPTION -I ${THRIFT_PY_CONTRIB_DIR}
-I $ENV{HIVE_METASTORE_THRIFT_DIR} -I ${HIVE_THRIFT_SOURCE_DIR})
set(BE_OUTPUT_DIR ${CMAKE_SOURCE_DIR}/be/generated-sources) set(BE_OUTPUT_DIR ${CMAKE_SOURCE_DIR}/be/generated-sources)
set(FE_OUTPUT_DIR ${CMAKE_SOURCE_DIR}/fe/generated-sources) set(FE_OUTPUT_DIR ${CMAKE_SOURCE_DIR}/fe/generated-sources)
# TODO: avoid duplicating generated java classes # TODO: avoid duplicating generated java classes
@@ -141,9 +147,9 @@ file(MAKE_DIRECTORY ${PYTHON_OUTPUT_DIR})
file(MAKE_DIRECTORY ${HIVE_THRIFT_SOURCE_DIR}) file(MAKE_DIRECTORY ${HIVE_THRIFT_SOURCE_DIR})
# Args passed to thrift for Java gen # Args passed to thrift for Java gen
set(JAVA_FE_ARGS ${THRIFT_INCLUDE_DIR_OPTION} --gen java -o ${FE_OUTPUT_DIR}) set(JAVA_FE_ARGS ${THRIFT_JAVA_INCLUDE_DIR_OPTION} --gen java -o ${FE_OUTPUT_DIR})
set(JAVA_EXT_DS_ARGS ${THRIFT_INCLUDE_DIR_OPTION} --gen java -o ${EXT_DS_OUTPUT_DIR}) set(JAVA_EXT_DS_ARGS ${THRIFT_JAVA_INCLUDE_DIR_OPTION} --gen java -o ${EXT_DS_OUTPUT_DIR})
set(PYTHON_ARGS ${THRIFT_INCLUDE_DIR_OPTION} -r --gen py:no_utf8strings -o set(PYTHON_ARGS ${THRIFT_PY_INCLUDE_DIR_OPTION} -r --gen py:no_utf8strings -o
${PYTHON_OUTPUT_DIR}) ${PYTHON_OUTPUT_DIR})
set (EXT_DATA_SRC_FILES set (EXT_DATA_SRC_FILES

View File

@@ -47,7 +47,7 @@ under the License.
<knox.version>${env.IMPALA_KNOX_VERSION}</knox.version> <knox.version>${env.IMPALA_KNOX_VERSION}</knox.version>
<gcs.version>${env.IMPALA_GCS_VERSION}</gcs.version> <gcs.version>${env.IMPALA_GCS_VERSION}</gcs.version>
<cos.version>${env.IMPALA_COS_VERSION}</cos.version> <cos.version>${env.IMPALA_COS_VERSION}</cos.version>
<thrift.version>0.11.0</thrift.version> <thrift.version>${env.IMPALA_THRIFT_POM_VERSION}</thrift.version>
<impala.extdatasrc.api.version>${project.version}</impala.extdatasrc.api.version> <impala.extdatasrc.api.version>${project.version}</impala.extdatasrc.api.version>
<impala.query.event.hook.api.version>${project.version}</impala.query.event.hook.api.version> <impala.query.event.hook.api.version>${project.version}</impala.query.event.hook.api.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>

View File

@@ -61,7 +61,6 @@ SHELL_HOME=${IMPALA_HOME}/shell
BUILD_DIR=${SHELL_HOME}/build BUILD_DIR=${SHELL_HOME}/build
TARBALL_ROOT=${BUILD_DIR}/impala-shell-${VERSION} TARBALL_ROOT=${BUILD_DIR}/impala-shell-${VERSION}
IMPALA_THRIFT_PY_VERSION="${IMPALA_THRIFT_VERSION}"
THRIFT_GEN_PY_DIR="${SHELL_HOME}/gen-py" THRIFT_GEN_PY_DIR="${SHELL_HOME}/gen-py"
echo "Deleting all files in ${TARBALL_ROOT}/{gen-py,lib,ext-py}" echo "Deleting all files in ${TARBALL_ROOT}/{gen-py,lib,ext-py}"

View File

@@ -43,7 +43,7 @@ MODULE_LIB_DIR="${PACKAGE_DIR}"/impala_shell
NO_CLEAN_DIST="${NO_CLEAN_DIST:-}" NO_CLEAN_DIST="${NO_CLEAN_DIST:-}"
THRIFT_GEN_PY_DIR=${SHELL_HOME}/gen-py THRIFT_GEN_PY_DIR=${SHELL_HOME}/gen-py
THRIFT_PY_ROOT="${IMPALA_TOOLCHAIN_PACKAGES_HOME}/thrift-${IMPALA_THRIFT_VERSION}" THRIFT_PY_ROOT="${IMPALA_TOOLCHAIN_PACKAGES_HOME}/thrift-${IMPALA_THRIFT_PY_VERSION}"
assemble_package_files() { assemble_package_files() {
mkdir -p "${MODULE_LIB_DIR}" mkdir -p "${MODULE_LIB_DIR}"