mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
IMPALA-9838: Switch to GCC 7.5.0
This upgrades GCC and libstdc++ to version 7.5.0. There
have been ABI changes since 4.9.2, so this means that
the native-toolchain produced with the new compiler is
not interoperable with one produced by the old compiler.
To allow that transition, IMPALA_TOOLCHAIN_PACKAGES_HOME
is now a subdirectory of IMPALA_TOOLCHAIN
(toolchain-packages-gcc${IMPALA_GCC_VERSION}) to distinguish
it from the old packages.
Some Python packages in the impala-python virtualenv are
compiled using the toolchain GCC and now use the new ABI.
This leads to two changes:
1. When constructing the LD_LIBRARY_PATH for impala-python,
we include the GCC libstdc++ libraries. Otherwise, certain
Python packages that use C++ fail on older OSes like Centos 7.
This fixes IMPALA-9804.
2. Since developers work on various branches, this changes
the virtualenv's directory location to a directory with
the GCC version in the name. This allows the virtualenv
built with GCC 7 to coexist with the current virtualenv
built with GCC 4.9.2. The location for the old virtualenv is
${IMPALA_HOME}/infra/python/env. The new location is
${IMPALA_HOME}/infra/python/env-gcc${IMPALA_GCC_VERSION}. This
required updating several impala-python scripts.
There are various odds-and-ends related to the transition:
1. Due to the small string optimization, the size of std::string
changed, which means that various data structures also changed
in size. This required updating some static asserts.
2. There is a bug in clang-tidy that reports a use-after-free
for some code using std::shared_ptr. Clang is not modeling
the shared_ptr correctly, so it is a false-positive. As a workaround,
this disables the clang-analyzer-cplusplus.NewDelete diagnostic.
3. Various small compilation fixes (includes, etc).
Performance testing:
- Ran single-node performance tests on TPC-H for the following
configurations:
- TPC-H Parquet scale 30 with normal configurations
- TPC-H Parquet scale 30 with codegen disabled
- TPC-H Kudu scale 10
None found any significant regressions. Full results are
posted on the JIRA.
- Ran single-node performance tests on targeted-perf scale 10.
No significant regressions.
- The size of binaries (impalad, etc) is slightly smaller with the new GCC:
GCC 4.9.2 release impalad binary: 545664
GCC 7.5.0 release impalad binary: 539900
- Compilation in DEBUG mode is roughly 15-25% faster
Functional testing:
- Ran core jobs, exhaustive release jobs, UBSAN
Change-Id: Ia0beb2b618ba669c9699f8dbc0c52d1203d004e4
Reviewed-on: http://gerrit.cloudera.org:8080/16045
Reviewed-by: Joe McDonnell <joemcdonnell@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
@@ -24,6 +24,7 @@ Checks: "-*,clang*,\
|
||||
-clang-analyzer-core.uninitialized.ArraySubscript,\
|
||||
-clang-analyzer-core.uninitialized.Assign,\
|
||||
-clang-analyzer-core.uninitialized.Branch,\
|
||||
-clang-analyzer-cplusplus.NewDelete,\
|
||||
-clang-analyzer-cplusplus.NewDeleteLeaks,\
|
||||
-clang-analyzer-deadcode.DeadStores,\
|
||||
-clang-analyzer-optin.performance.Padding,\
|
||||
|
||||
@@ -21,6 +21,8 @@
|
||||
|
||||
#include "sorter.h"
|
||||
|
||||
#include <random>
|
||||
|
||||
namespace impala {
|
||||
|
||||
/// Wrapper around BufferPool::PageHandle that tracks additional info about the page.
|
||||
|
||||
@@ -18,8 +18,6 @@
|
||||
#include "runtime/sorter-internal.h"
|
||||
|
||||
#include <boost/bind.hpp>
|
||||
#include <boost/random/mersenne_twister.hpp>
|
||||
#include <boost/random/uniform_int.hpp>
|
||||
#include <gutil/strings/substitute.h>
|
||||
|
||||
#include "exprs/scalar-expr-evaluator.h"
|
||||
@@ -34,8 +32,6 @@
|
||||
|
||||
#include "common/names.h"
|
||||
|
||||
using boost::uniform_int;
|
||||
using boost::mt19937_64;
|
||||
using namespace strings;
|
||||
|
||||
namespace impala {
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
|
||||
#include "runtime/thread-resource-mgr.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
|
||||
|
||||
@@ -78,7 +78,7 @@ inline bool operator<(const UniqueIdPB& lhs, const UniqueIdPB& rhs) {
|
||||
}
|
||||
|
||||
// TNetworkAddress
|
||||
STATIC_ASSERT_SIZE(TNetworkAddress, 24);
|
||||
STATIC_ASSERT_SIZE(TNetworkAddress, 48);
|
||||
|
||||
inline bool operator==(const TNetworkAddress& lhs, const TNetworkAddress& rhs) {
|
||||
return std::tie(lhs.hostname, lhs.port) == std::tie(rhs.hostname, rhs.port);
|
||||
@@ -109,7 +109,7 @@ inline bool operator==(const TStatus& lhs, const TStatus& rhs) {
|
||||
}
|
||||
|
||||
// TCounter
|
||||
STATIC_ASSERT_SIZE(TCounter, 32);
|
||||
STATIC_ASSERT_SIZE(TCounter, 56);
|
||||
|
||||
inline bool operator==(const TCounter& lhs, const TCounter& rhs) {
|
||||
return std::tie(lhs.name, lhs.unit, lhs.value)
|
||||
|
||||
@@ -68,12 +68,12 @@ fi
|
||||
# moving to a different build of the toolchain, e.g. when a version is bumped or a
|
||||
# compile option is changed. The build id can be found in the output of the toolchain
|
||||
# build jobs, it is constructed from the build number and toolchain git hash prefix.
|
||||
export IMPALA_TOOLCHAIN_BUILD_ID=29-34813f22eb
|
||||
export IMPALA_TOOLCHAIN_BUILD_ID=30-5570b0cd64
|
||||
# Versions of toolchain dependencies.
|
||||
# -----------------------------------
|
||||
export IMPALA_AVRO_VERSION=1.7.4-p5
|
||||
unset IMPALA_AVRO_URL
|
||||
export IMPALA_BINUTILS_VERSION=2.26.1
|
||||
export IMPALA_BINUTILS_VERSION=2.28
|
||||
unset IMPALA_BINUTILS_URL
|
||||
export IMPALA_BOOST_VERSION=1.61.0-p2
|
||||
unset IMPALA_BOOST_URL
|
||||
@@ -91,7 +91,7 @@ export IMPALA_CYRUS_SASL_VERSION=2.1.23
|
||||
unset IMPALA_CYRUS_SASL_URL
|
||||
export IMPALA_FLATBUFFERS_VERSION=1.6.0
|
||||
unset IMPALA_FLATBUFFERS_URL
|
||||
export IMPALA_GCC_VERSION=4.9.2
|
||||
export IMPALA_GCC_VERSION=7.5.0
|
||||
unset IMPALA_GCC_URL
|
||||
export IMPALA_GDB_VERSION=7.9.1-p1
|
||||
unset IMPALA_GDB_URL
|
||||
@@ -212,10 +212,10 @@ if [ -f "$IMPALA_HOME/bin/impala-config-local.sh" ]; then
|
||||
fi
|
||||
|
||||
# IMPALA_TOOLCHAIN_PACKAGES_HOME is the location inside IMPALA_TOOLCHAIN where native
|
||||
# toolchain packages are placed. This is currently the same as IMPALA_TOOLCHAIN, but
|
||||
# in future, this will be a subdirectory under IMPALA_TOOLCHAIN to allow different
|
||||
# compiler versions.
|
||||
export IMPALA_TOOLCHAIN_PACKAGES_HOME=${IMPALA_TOOLCHAIN}
|
||||
# toolchain packages are placed. This uses a subdirectory that contains the information
|
||||
# about the compiler to allow using different compiler versions.
|
||||
export IMPALA_TOOLCHAIN_PACKAGES_HOME=\
|
||||
${IMPALA_TOOLCHAIN}/toolchain-packages-gcc${IMPALA_GCC_VERSION}
|
||||
|
||||
export CDP_HADOOP_URL=${CDP_HADOOP_URL-}
|
||||
export CDP_HBASE_URL=${CDP_HBASE_URL-}
|
||||
|
||||
@@ -18,4 +18,4 @@
|
||||
# under the License.
|
||||
|
||||
source "$(dirname "$0")/impala-python-common.sh"
|
||||
exec "$PY_DIR/env/bin/flake8" "$@"
|
||||
exec "$PY_ENV_DIR/bin/flake8" "$@"
|
||||
|
||||
@@ -18,4 +18,4 @@
|
||||
# under the License.
|
||||
|
||||
source "$(dirname "$0")/impala-python-common.sh"
|
||||
exec "$PY_DIR/env/bin/gcovr" "$@"
|
||||
exec "$PY_ENV_DIR/bin/gcovr" "$@"
|
||||
|
||||
@@ -20,4 +20,4 @@
|
||||
##############################################################################
|
||||
|
||||
source $(dirname "$0")/impala-python-common.sh
|
||||
exec "$PY_DIR/env/bin/ipython" "$@"
|
||||
exec "$PY_ENV_DIR/bin/ipython" "$@"
|
||||
|
||||
@@ -18,4 +18,4 @@
|
||||
# under the License.
|
||||
|
||||
source "$(dirname "$0")/impala-python-common.sh"
|
||||
exec "$PY_DIR/env/bin/python" "$PY_DIR/env/bin/pip" "$@"
|
||||
exec "$PY_ENV_DIR/bin/python" "$PY_ENV_DIR/bin/pip" "$@"
|
||||
|
||||
@@ -20,4 +20,4 @@
|
||||
##############################################################################
|
||||
|
||||
source $(dirname "$0")/impala-python-common.sh
|
||||
exec "$PY_DIR/env/bin/py.test" "$@"
|
||||
exec "$PY_ENV_DIR/bin/py.test" "$@"
|
||||
|
||||
@@ -20,4 +20,4 @@
|
||||
##############################################################################
|
||||
|
||||
source "$(dirname "$0")/impala-python-common.sh"
|
||||
exec "$PY_DIR/env/bin/python" "$@"
|
||||
exec "$PY_ENV_DIR/bin/python" "$@"
|
||||
|
||||
@@ -28,4 +28,5 @@ export LD_LIBRARY_PATH="$(python "$IMPALA_HOME/infra/python/bootstrap_virtualenv
|
||||
--print-ld-library-path)"
|
||||
|
||||
PY_DIR="$(dirname "$0")/../infra/python"
|
||||
PY_ENV_DIR="${PY_DIR}/env-gcc${IMPALA_GCC_VERSION}"
|
||||
python "$PY_DIR/bootstrap_virtualenv.py"
|
||||
|
||||
@@ -32,12 +32,13 @@ IMPALA_THRIFT_PY_VERSION=${IMPALA_THRIFT11_VERSION}
|
||||
|
||||
THRIFT_PY_ROOT="${IMPALA_TOOLCHAIN_PACKAGES_HOME}/thrift-${IMPALA_THRIFT_PY_VERSION}"
|
||||
|
||||
LD_LIBRARY_PATH+=":$(PYTHONPATH=${PYTHONPATH} \
|
||||
export LD_LIBRARY_PATH=":$(PYTHONPATH=${PYTHONPATH} \
|
||||
python "$IMPALA_HOME/infra/python/bootstrap_virtualenv.py" \
|
||||
--print-ld-library-path)"
|
||||
|
||||
IMPALA_PY_DIR="$(dirname "$0")/../infra/python"
|
||||
IMPALA_PYTHON_EXECUTABLE="${IMPALA_PY_DIR}/env/bin/python"
|
||||
IMPALA_PY_ENV_DIR="${IMPALA_PY_DIR}/env-gcc${IMPALA_GCC_VERSION}"
|
||||
IMPALA_PYTHON_EXECUTABLE="${IMPALA_PY_ENV_DIR}/bin/python"
|
||||
|
||||
for PYTHON_LIB_DIR in ${THRIFT_PY_ROOT}/python/lib{64,}; do
|
||||
[[ -d ${PYTHON_LIB_DIR} ]] || continue
|
||||
|
||||
@@ -16,7 +16,7 @@
|
||||
# under the License.
|
||||
|
||||
# Sets up the python path for impala-python. This is needed because tests and other
|
||||
# utility scripts depend on some modules external to infra/python/env.
|
||||
# utility scripts depend on some modules external to infra/python/env-*.
|
||||
# TODO: we should try to reduce our reliance on PYTHONPATH if possible.
|
||||
#
|
||||
# Setting USE_THRIFT11_GEN_PY will add Thrift 11 Python generated code rather than the
|
||||
@@ -34,7 +34,7 @@ else
|
||||
PYTHONPATH=${PYTHONPATH}:${IMPALA_HOME}/shell/gen-py
|
||||
fi
|
||||
|
||||
PYTHONPATH=${PYTHONPATH}:${IMPALA_HOME}/infra/python/env/lib
|
||||
PYTHONPATH=${PYTHONPATH}:${IMPALA_HOME}/infra/python/env-gcc${IMPALA_GCC_VERSION}/lib
|
||||
|
||||
# There should be just a single version of python that created the
|
||||
# site-packages directory. We find it by performing shell independent expansion
|
||||
|
||||
@@ -52,8 +52,10 @@ LOG = logging.getLogger(os.path.splitext(os.path.basename(__file__))[0])
|
||||
|
||||
SKIP_TOOLCHAIN_BOOTSTRAP = "SKIP_TOOLCHAIN_BOOTSTRAP"
|
||||
|
||||
GCC_VERSION = os.environ["IMPALA_GCC_VERSION"]
|
||||
|
||||
DEPS_DIR = os.path.join(os.path.dirname(__file__), "deps")
|
||||
ENV_DIR = os.path.join(os.path.dirname(__file__), "env")
|
||||
ENV_DIR = os.path.join(os.path.dirname(__file__), "env-gcc{0}".format(GCC_VERSION))
|
||||
|
||||
# Requirements file with packages we need for our build and tests.
|
||||
REQS_PATH = os.path.join(DEPS_DIR, "requirements.txt")
|
||||
@@ -390,9 +392,14 @@ if __name__ == "__main__":
|
||||
options, args = parser.parse_args()
|
||||
|
||||
if options.print_ld_library_path:
|
||||
# Some python packages have native code that is compiled with the toolchain
|
||||
# compiler, so that code needs to dynamically link against matching library
|
||||
# versions.
|
||||
ld_library_dirs = [os.path.join(toolchain_pkg_dir("gcc"), 'lib64')]
|
||||
kudu_client_dir = find_kudu_client_install_dir()
|
||||
print(os.path.pathsep.join([os.path.join(kudu_client_dir, 'lib'),
|
||||
os.path.join(kudu_client_dir, 'lib64')]))
|
||||
ld_library_dirs.append(os.path.join(kudu_client_dir, 'lib'))
|
||||
ld_library_dirs.append(os.path.join(kudu_client_dir, 'lib64'))
|
||||
print(os.path.pathsep.join(ld_library_dirs))
|
||||
sys.exit()
|
||||
|
||||
logging.basicConfig(level=getattr(logging, options.log_level))
|
||||
|
||||
@@ -70,7 +70,7 @@ libaio1
|
||||
4. Run these commands to install cx_Oracle into the impala-python
|
||||
virtual environment:
|
||||
|
||||
$ source "${IMPALA_HOME}"/infra/python/env/bin/activate
|
||||
$ source "${IMPALA_HOME}"/infra/python/env-gcc${IMPALA_GCC_VERSION}/bin/activate
|
||||
(env)$ pip install cx_Oracle==5.2.1
|
||||
(env)$ deactivate
|
||||
|
||||
|
||||
Reference in New Issue
Block a user