mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
This reverts commit 52b87fcefd.
The original commit caused an issue when Impala is deployed together
with Apache Atlas. Coordinator failed to start with error message:
java.lang.NoClassDefFoundError: org/apache/logging/log4j/core/Layout
Solved minor conflict in impala-config.sh due to IMPALA-14478 applied
after IMPALA-14454.
Change-Id: I77127db8d833c675c18c30eb3d6542ca906cd2a9
Reviewed-on: http://gerrit.cloudera.org:8080/23788
Reviewed-by: Michael Smith <michael.smith@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
1325 lines
58 KiB
Bash
Executable File
1325 lines
58 KiB
Bash
Executable File
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
# Source this file from the $IMPALA_HOME directory to
|
|
# setup your environment. If $IMPALA_HOME is undefined
|
|
# this script will set it to the current working directory.
|
|
#
|
|
# Some config variables can be overridden. All overridable variables can be overridden
|
|
# by impala-config-branch.sh, which in turn can be by impala-config-local.sh. Some config
|
|
# variables in the second part of this file (e.g. locations of dependencies, secret keys)
|
|
# can be also overridden by setting environment variables before sourcing this file. We
|
|
# don't support this for variables that change between branches and versions, e.g.
|
|
# version numbers because it creates a "sticky config variable" problem where an old
|
|
# value stays in effect when switching between branches or rebasing until the developer
|
|
# opens a new shell. We also do not support overriding of some variables that are
|
|
# computed based on the values of other variables.
|
|
#
|
|
# This file must be kept compatible with bash options "set -euo pipefail". Those options
|
|
# will be set by other scripts before sourcing this file. Those options are not set in
|
|
# this script because scripts outside this repository may need to be updated and that
|
|
# is not practical at this time.
|
|
|
|
if ! [[ "'$IMPALA_HOME'" =~ [[:blank:]] ]]; then
|
|
if [ -z "$IMPALA_HOME" ]; then
|
|
if [[ ! -z "$ZSH_NAME" ]]; then
|
|
export IMPALA_HOME=$(dirname "$(cd $(dirname ${(%):-%x}) >/dev/null && pwd)")
|
|
else
|
|
export IMPALA_HOME=$(dirname "$(cd $(dirname "${BASH_SOURCE[0]}") >/dev/null && pwd)")
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
if [[ "'$IMPALA_HOME'" =~ [[:blank:]] ]]; then
|
|
echo "IMPALA_HOME cannot have spaces in the path"
|
|
exit 1
|
|
fi
|
|
|
|
export IMPALA_TOOLCHAIN=${IMPALA_TOOLCHAIN-"$IMPALA_HOME/toolchain"}
|
|
if [ -z "$IMPALA_TOOLCHAIN" ]; then
|
|
echo "IMPALA_TOOLCHAIN must be specified. Please set it to a valid directory or"\
|
|
"leave it unset."
|
|
return 1
|
|
fi
|
|
|
|
#######################################################################################
|
|
# Variables that can be overridden by impala-config-*.sh but not by environment vars. #
|
|
# All component versions and other variables that get updated periodically or between #
|
|
# branches go here to avoid the "sticky variable" problem (IMPALA-4653) where the #
|
|
# variable from a previously-sourced impala-config.sh overrides the new value. #
|
|
#######################################################################################
|
|
|
|
# The current Impala version that will be embedded in the Impala binary. This is
|
|
# also used to find the Impala frontend jar files, so the version must match
|
|
# the version in our Maven pom.xml files. This is validated via
|
|
# bin/validate-java-pom-version.sh during the build.
|
|
# WARNING: If changing this value, also run these commands:
|
|
# cd ${IMPALA_HOME}/java
|
|
# mvn versions:set -DnewVersion=YOUR_NEW_VERSION
|
|
export IMPALA_VERSION=5.0.0-SNAPSHOT
|
|
|
|
# Whether to build the backend on Avro C++ library or C.
|
|
# This is added temporarily to help transitioning from Avro C to C++ library.
|
|
export USE_AVRO_CPP=${USE_AVRO_CPP:=false}
|
|
|
|
# The unique build id of the toolchain to use if bootstrapping. This is generated by the
|
|
# native-toolchain build when publishing its build artifacts. This should be changed when
|
|
# moving to a different build of the toolchain, e.g. when a version is bumped or a
|
|
# compile option is changed. The build id can be found in the output of the toolchain
|
|
# build jobs, it is constructed from the build number and toolchain git hash prefix.
|
|
export IMPALA_TOOLCHAIN_BUILD_ID_AARCH64=159-1fade8203e
|
|
export IMPALA_TOOLCHAIN_BUILD_ID_X86_64=612-1fade8203e
|
|
export IMPALA_TOOLCHAIN_REPO=\
|
|
${IMPALA_TOOLCHAIN_REPO:-https://github.com/cloudera/native-toolchain.git}
|
|
export IMPALA_TOOLCHAIN_BRANCH=${IMPALA_TOOLCHAIN_BRANCH:-master}
|
|
export IMPALA_TOOLCHAIN_COMMIT_HASH=\
|
|
${IMPALA_TOOLCHAIN_COMMIT_HASH-1fade8203e3df234dc5bab35cd32ff1f40d1db17}
|
|
# Compare the build ref in build IDs by removing everything up-to-and-including the
|
|
# first hyphen.
|
|
if [ "${IMPALA_TOOLCHAIN_BUILD_ID_AARCH64#*-}" \
|
|
!= "${IMPALA_TOOLCHAIN_BUILD_ID_X86_64#*-}" ]; then
|
|
echo "IMPALA_TOOLCHAIN_BUILD_ID_AARCH64 and IMPALA_TOOLCHAIN_BUILD_ID_X86_64 must" \
|
|
"come from the same commit hash."
|
|
exit 1
|
|
fi
|
|
|
|
# Ensure the IMPALA_TOOLCHAIN_COMMIT_HASH matches the hash in the toolchain build ids.
|
|
TOOLCHAIN_SHORT_HASH="${IMPALA_TOOLCHAIN_BUILD_ID_X86_64#*-}"
|
|
if [[ ! "$IMPALA_TOOLCHAIN_COMMIT_HASH" == "$TOOLCHAIN_SHORT_HASH"* ]]; then
|
|
echo "ERROR: IMPALA_TOOLCHAIN_COMMIT_HASH '$IMPALA_TOOLCHAIN_COMMIT_HASH' does not " \
|
|
"start with TOOLCHAIN_SHORT_HASH '$TOOLCHAIN_SHORT_HASH'"
|
|
exit 1
|
|
fi
|
|
|
|
export ARCH_NAME=$(uname -p)
|
|
|
|
# Versions of toolchain dependencies.
|
|
# -----------------------------------
|
|
if $USE_AVRO_CPP; then
|
|
export IMPALA_AVRO_VERSION=1.11.1-p1
|
|
else
|
|
export IMPALA_AVRO_VERSION=1.7.4-p5
|
|
fi
|
|
unset IMPALA_AVRO_URL
|
|
export IMPALA_BINUTILS_VERSION=2.42
|
|
unset IMPALA_BINUTILS_URL
|
|
export IMPALA_BOOST_VERSION=1.74.0-p1
|
|
unset IMPALA_BOOST_URL
|
|
export IMPALA_BREAKPAD_VERSION=e09741c609dcd5f5274d40182c5e2cc9a002d5ba-p3
|
|
unset IMPALA_BREAKPAD_URL
|
|
export IMPALA_BZIP2_VERSION=1.0.8-p2
|
|
unset IMPALA_BZIP2_URL
|
|
export IMPALA_CCTZ_VERSION=2.2
|
|
unset IMPALA_CCTZ_URL
|
|
export IMPALA_CMAKE_VERSION=3.22.2
|
|
unset IMPALA_CMAKE_URL
|
|
export IMPALA_CRCUTIL_VERSION=2903870057d2f1f109b245650be29e856dc8b646
|
|
unset IMPALA_CRCUTIL_URL
|
|
export IMPALA_CURL_VERSION=8.14.1
|
|
unset IMPALA_CURL_URL
|
|
export IMPALA_CYRUS_SASL_VERSION=2.1.23
|
|
unset IMPALA_CYRUS_SASL_URL
|
|
export IMPALA_FLATBUFFERS_VERSION=1.9.0-p1
|
|
unset IMPALA_FLATBUFFERS_URL
|
|
export IMPALA_GCC_VERSION=10.4.0
|
|
unset IMPALA_GCC_URL
|
|
export IMPALA_GDB_VERSION=12.1-p1
|
|
unset IMPALA_GDB_URL
|
|
export IMPALA_GFLAGS_VERSION=2.2.0-p2
|
|
unset IMPALA_GFLAGS_URL
|
|
export IMPALA_GLOG_VERSION=0.6.0-p2
|
|
unset IMPALA_GLOG_URL
|
|
export IMPALA_GPERFTOOLS_VERSION=2.10-p1
|
|
unset IMPALA_GPERFTOOLS_URL
|
|
export IMPALA_GTEST_VERSION=1.14.0
|
|
unset IMPALA_GTEST_URL
|
|
export IMPALA_JWT_CPP_VERSION=0.5.0
|
|
unset IMPALA_JWT_CPP_URL
|
|
export IMPALA_LIBEV_VERSION=4.20-p1
|
|
unset IMPALA_LIBEV_URL
|
|
export IMPALA_LIBUNWIND_VERSION=1.7.2-p1
|
|
unset IMPALA_LIBUNWIND_URL
|
|
export IMPALA_LLVM_VERSION=5.0.1-p8
|
|
unset IMPALA_LLVM_URL
|
|
export IMPALA_LLVM_ASAN_VERSION=5.0.1-p8
|
|
unset IMPALA_LLVM_ASAN_URL
|
|
export IMPALA_OPENTELEMETRY_CPP_VERSION=1.20.0
|
|
unset IMPALA_OPENTELEMTRY_CPP_URL
|
|
|
|
# To limit maximum memory available for the mini-cluster and CDH cluster, add the
|
|
# following in $IMPALA_HOME/bin/impala-config-local.sh
|
|
# export IMPALA_CLUSTER_MAX_MEM_GB=<value>
|
|
|
|
# LLVM stores some files in subdirectories that are named after what
|
|
# version it thinks it is. We might think it is 5.0.1-p1, based on a
|
|
# patch we have applied, but LLVM thinks its version is 5.0.1.
|
|
export IMPALA_LLVM_UBSAN_BASE_VERSION=5.0.1
|
|
|
|
# Debug builds should use the release+asserts build to get additional coverage.
|
|
# Don't use the LLVM debug build because the binaries are too large to distribute.
|
|
export IMPALA_LLVM_DEBUG_VERSION=5.0.1-asserts-p8
|
|
unset IMPALA_LLVM_DEBUG_URL
|
|
export IMPALA_LZ4_VERSION=1.9.3
|
|
unset IMPALA_LZ4_URL
|
|
export IMPALA_ZSTD_VERSION=1.5.2
|
|
unset IMPALA_ZSTD_URL
|
|
export IMPALA_OPENLDAP_VERSION=2.4.47
|
|
unset IMPALA_OPENLDAP_URL
|
|
export IMPALA_ORC_VERSION=1.7.9-p11
|
|
unset IMPALA_ORC_URL
|
|
export IMPALA_PROTOBUF_VERSION=3.14.0
|
|
unset IMPALA_PROTOBUF_URL
|
|
export IMPALA_PROTOBUF_CLANG_VERSION=3.14.0-clangcompat-p2
|
|
unset IMPALA_PROTOBUF_CLANG_URL
|
|
export IMPALA_POSTGRES_JDBC_DRIVER_VERSION=42.5.6
|
|
unset IMPALA_POSTGRES_JDBC_DRIVER_URL
|
|
export IMPALA_MYSQL_JDBC_DRIVER_VERSION=8.2.0
|
|
unset IMPALA_MYSQL_JDBC_DRIVER_URL
|
|
export IMPALA_PYTHON_VERSION=2.7.16
|
|
unset IMPALA_PYTHON_URL
|
|
export IMPALA_PYTHON3_VERSION=3.8.18
|
|
export IMPALA_RAPIDJSON_VERSION=1.1.0-p1
|
|
unset IMPALA_RAPIDJSON_URL
|
|
export IMPALA_RE2_VERSION=2023-03-01
|
|
unset IMPALA_RE2_URL
|
|
export IMPALA_SNAPPY_VERSION=1.1.8
|
|
unset IMPALA_SNAPPY_URL
|
|
export IMPALA_SQUEASEL_VERSION=3.3
|
|
unset IMPALA_SQUEASEL_URL
|
|
# TPC utilities used for test/benchmark data generation.
|
|
export IMPALA_TPC_DS_VERSION=2.1.0-p1
|
|
unset IMPALA_TPC_DS_URL
|
|
export IMPALA_TPC_H_VERSION=2.17.0
|
|
unset IMPALA_TPC_H_URL
|
|
export IMPALA_ZLIB_VERSION=1.3.1
|
|
unset IMPALA_ZLIB_URL
|
|
export IMPALA_ARROW_VERSION=15.0.0
|
|
unset IMPALA_ARROW_URL
|
|
export IMPALA_CLOUDFLAREZLIB_VERSION=7aa510344e
|
|
unset IMPALA_CLOUDFLAREZLIB_URL
|
|
export IMPALA_CALLONCEHACK_VERSION=1.0.0
|
|
unset IMPALA_CALLONCEHACK_URL
|
|
if [[ $ARCH_NAME == 'aarch64' ]]; then
|
|
export IMPALA_HADOOP_CLIENT_VERSION=3.3.6-p1
|
|
unset IMPALA_HADOOP_CLIENT_URL
|
|
fi
|
|
export IMPALA_MOLD_VERSION=2.40.4
|
|
unset IMPALA_MOLD_URL
|
|
|
|
# Impala JDBC driver for testing.
|
|
export IMPALA_SIMBA_JDBC_DRIVER_VERSION=42-2.6.32.1041
|
|
|
|
# Find system python versions for testing
|
|
# IMPALA-14606: Stop building impala_python (Python 2) by default.
|
|
export IMPALA_SYSTEM_PYTHON2="${IMPALA_SYSTEM_PYTHON2_OVERRIDE-}"
|
|
export IMPALA_SYSTEM_PYTHON3="${IMPALA_SYSTEM_PYTHON3_OVERRIDE-$(command -v python3)}"
|
|
|
|
# Additional Python versions to use when building the impala-shell prebuilt tarball
|
|
# via make_shell_tarball.sh. That tarball includes precompiled packages, so it can be
|
|
# used without additional system dependencies needed for pip install.
|
|
# export IMPALA_EXTRA_PACKAGE_PYTHONS=python3.6;python3.10
|
|
|
|
if [[ $OSTYPE == "darwin"* ]]; then
|
|
IMPALA_CYRUS_SASL_VERSION=2.1.26
|
|
unset IMPALA_CYRUS_SASL_URL
|
|
IMPALA_GPERFTOOLS_VERSION=2.3
|
|
unset IMPALA_GPERFTOOLS_URL
|
|
fi
|
|
|
|
: ${IMPALA_TOOLCHAIN_HOST:=native-toolchain.s3.amazonaws.com}
|
|
export IMPALA_TOOLCHAIN_HOST
|
|
|
|
export CDP_BUILD_NUMBER=71942734
|
|
export CDP_MAVEN_REPOSITORY=\
|
|
"https://${IMPALA_TOOLCHAIN_HOST}/build/cdp_components/${CDP_BUILD_NUMBER}/maven"
|
|
export CDP_AVRO_JAVA_VERSION=1.11.1.7.3.1.500-182
|
|
export CDP_HADOOP_VERSION=3.1.1.7.3.1.500-182
|
|
export CDP_HBASE_VERSION=2.4.17.7.3.1.500-182
|
|
export CDP_HIVE_VERSION=3.1.3000.7.3.1.500-182
|
|
export CDP_ICEBERG_VERSION=1.5.2.7.3.1.500-182
|
|
export CDP_KNOX_VERSION=2.0.0.7.3.1.500-182
|
|
export CDP_ORC_JAVA_VERSION=1.8.3.7.3.1.500-182
|
|
export CDP_OZONE_VERSION=1.4.0.7.3.1.500-182
|
|
export CDP_PARQUET_VERSION=1.12.3.7.3.1.500-182
|
|
export CDP_RANGER_VERSION=2.4.0.7.3.1.500-182
|
|
export CDP_TEZ_VERSION=0.9.1.7.3.1.500-182
|
|
|
|
# Ref: https://infra.apache.org/release-download-pages.html#closer
|
|
: ${APACHE_MIRROR:="https://www.apache.org/dyn/closer.cgi"}
|
|
export APACHE_MIRROR
|
|
export APACHE_AVRO_JAVA_VERSION=1.11.1
|
|
export APACHE_HADOOP_VERSION=3.4.1
|
|
export APACHE_HBASE_VERSION=2.6.0
|
|
export APACHE_ICEBERG_VERSION=1.5.2
|
|
export APACHE_KNOX_VERSION=2.0.0
|
|
export APACHE_ORC_JAVA_VERSION=1.8.3
|
|
export APACHE_PARQUET_VERSION=1.12.3
|
|
export APACHE_RANGER_VERSION=2.4.0
|
|
export APACHE_TEZ_VERSION=0.10.2
|
|
export APACHE_HIVE_3_VERSION=3.1.3
|
|
export APACHE_HIVE_3_STORAGE_API_VERSION=2.7.0
|
|
export APACHE_OZONE_VERSION=1.4.0
|
|
|
|
# Java dependencies that are not also runtime components. Declaring versions here allows
|
|
# other branches to override them in impala-config-branch.sh for cleaner patches.
|
|
export IMPALA_BOUNCY_CASTLE_VERSION=1.79
|
|
export IMPALA_COMMONS_LANG3_VERSION=3.18.0
|
|
export IMPALA_COS_VERSION=3.1.0-8.0.8
|
|
export IMPALA_DERBY_VERSION=10.14.2.0
|
|
export IMPALA_GUAVA_VERSION=32.1.2-jre
|
|
export IMPALA_HUDI_VERSION=0.5.0-incubating
|
|
export IMPALA_HTTP_CORE_VERSION=4.4.14
|
|
export IMPALA_JACKSON_VERSION=2.18.1
|
|
export IMPALA_JACKSON_DATABIND_VERSION=2.15.3
|
|
export IMPALA_JSON_SMART_VERSION=2.4.11
|
|
export IMPALA_JUNIT_VERSION=4.12
|
|
export IMPALA_KITE_VERSION=1.1.0
|
|
export IMPALA_LOG4J2_VERSION=2.18.0
|
|
export IMPALA_PAC4J_VERSION=4.5.5
|
|
export IMPALA_RELOAD4j_VERSION=1.2.22
|
|
export IMPALA_SLF4J_VERSION=2.0.13
|
|
export IMPALA_SPRINGFRAMEWORK_VERSION=5.3.39
|
|
export IMPALA_XMLSEC_VERSION=2.2.6
|
|
export IMPALA_VELOCITY_ENGINE_CORE_VERSION=2.4.1
|
|
export IMPALA_OBS_VERSION=3.1.1-hw-42
|
|
export IMPALA_DBCP2_VERSION=2.12.0
|
|
export IMPALA_DROPWIZARD_METRICS_VERSION=4.2.26
|
|
export IMPALA_AIRCOMPRESSOR_VERSION=0.27
|
|
export IMPALA_DATASKETCHES_VERSION=6.0.0
|
|
export IMPALA_PAIMON_VERSION=1.3.1
|
|
# When Impala is building docker images on Redhat-based distributions,
|
|
# it is useful to be able to customize the base image. Some users will
|
|
# want to use open source / free distributions like Centos/Rocky/Alma/etc.
|
|
# Some users will want to produce images on top of official Redhat UBI
|
|
# images (which have certain guarantees about maintenance, CVEs, etc).
|
|
# These environment variables control the base images. They default to
|
|
# free distributions, but Redhat UBI images are known to work.
|
|
export IMPALA_REDHAT7_DOCKER_BASE=${IMPALA_REDHAT7_DOCKER_BASE:-"centos:centos7.9.2009"}
|
|
export IMPALA_REDHAT8_DOCKER_BASE=${IMPALA_REDHAT8_DOCKER_BASE:-"rockylinux:8.5"}
|
|
export IMPALA_REDHAT9_DOCKER_BASE=${IMPALA_REDHAT9_DOCKER_BASE:-"rockylinux:9.2"}
|
|
# Some users may want to use special, hardened base images for increased security.
|
|
# These images are usually not related to the OS where the build is running.
|
|
# The following environment variables allow a specific base image to be specified
|
|
# directly, without relying on the implicit build platform identification in
|
|
# CMakeLists.txt.
|
|
# Images published by Chainguard and the Wolfi project are known to be used, so the
|
|
# publicly available Wolfi base image is used as a default example.
|
|
export IMPALA_CUSTOM_DOCKER_BASE=${IMPALA_CUSTOM_DOCKER_BASE:-"cgr.dev/chainguard/wolfi-base:latest"}
|
|
export USE_CUSTOM_IMPALA_BASE_IMAGE=${USE_CUSTOM_IMPALA_BASE_IMAGE:-false}
|
|
|
|
# Selects the version of Java to use when start-impala-cluster.py starts with container
|
|
# images (created via e.g. 'make docker_debug_java11_images'). The Java version used in
|
|
# these images is independent of the Java version used to compile Impala.
|
|
# Accepts 8, 11, 17.
|
|
export IMPALA_DOCKER_JAVA=${IMPALA_DOCKER_JAVA:-"8"}
|
|
if [ "${IMPALA_DOCKER_USE_JAVA11:-}" = "true" ]; then
|
|
export IMPALA_DOCKER_JAVA=11
|
|
fi
|
|
|
|
# There are multiple compatible implementations of zlib. Cloudflare Zlib is an
|
|
# implementation with optimizations to use platform-specific CPU features that are not
|
|
# in the standard Zlib implementation. When set to true, this builds and links against
|
|
# Cloudflare Zlib. When false, the build uses the regular Madler Zlib. This defaults
|
|
# to true due to the large performance benefits.
|
|
export IMPALA_USE_CLOUDFLARE_ZLIB=${IMPALA_USE_CLOUDFLARE_ZLIB:-"true"}
|
|
|
|
# When IMPALA_(CDP_COMPONENT)_URL are overridden, they may contain '$(platform_label)'
|
|
# which will be substituted for the CDP platform label in bootstrap_toolchain.py
|
|
unset IMPALA_HADOOP_URL
|
|
unset IMPALA_HBASE_URL
|
|
unset IMPALA_HIVE_URL
|
|
unset IMPALA_OZONE_URL
|
|
unset IMPALA_KUDU_URL
|
|
unset IMPALA_KUDU_VERSION
|
|
|
|
export IMPALA_KERBERIZE=false
|
|
|
|
unset IMPALA_TOOLCHAIN_KUDU_MAVEN_REPOSITORY
|
|
unset IMPALA_TOOLCHAIN_KUDU_MAVEN_REPOSITORY_ENABLED
|
|
|
|
export IMPALA_USE_PYTHON3_TESTS=${IMPALA_USE_PYTHON3_TESTS:-true}
|
|
|
|
# Source the branch and local config override files here to override any
|
|
# variables above or any variables below that allow overriding via environment
|
|
# variable.
|
|
. "$IMPALA_HOME/bin/impala-config-branch.sh"
|
|
if [ -f "$IMPALA_HOME/bin/impala-config-local.sh" ]; then
|
|
. "$IMPALA_HOME/bin/impala-config-local.sh"
|
|
fi
|
|
|
|
# IMPALA_TOOLCHAIN_PACKAGES_HOME is the location inside IMPALA_TOOLCHAIN where native
|
|
# toolchain packages are placed. This uses a subdirectory that contains the information
|
|
# about the compiler to allow using different compiler versions.
|
|
IMPALA_TOOLCHAIN_PACKAGES_HOME=\
|
|
${IMPALA_TOOLCHAIN}/toolchain-packages-gcc${IMPALA_GCC_VERSION}
|
|
if ! [ -z ${NATIVE_TOOLCHAIN_HOME-} ]; then
|
|
IMPALA_TOOLCHAIN_PACKAGES_HOME=$(realpath ${NATIVE_TOOLCHAIN_HOME})/build
|
|
export SKIP_TOOLCHAIN_BOOTSTRAP=true
|
|
fi
|
|
export IMPALA_TOOLCHAIN_PACKAGES_HOME
|
|
|
|
export CDP_HADOOP_URL=${CDP_HADOOP_URL-}
|
|
export CDP_HBASE_URL=${CDP_HBASE_URL-}
|
|
export CDP_HIVE_URL=${CDP_HIVE_URL-}
|
|
export CDP_HIVE_SOURCE_URL=${CDP_HIVE_SOURCE_URL-}
|
|
export CDP_OZONE_URL=${CDP_OZONE_URL-}
|
|
export CDP_ICEBERG_URL=${CDP_ICEBERG_URL-}
|
|
export CDP_RANGER_URL=${CDP_RANGER_URL-}
|
|
export CDP_TEZ_URL=${CDP_TEZ_URL-}
|
|
|
|
export APACHE_HIVE_3_URL=${APACHE_HIVE_3_URL-}
|
|
export APACHE_HIVE_3_SOURCE_URL=${APACHE_HIVE_3_SOURCE_URL-}
|
|
export APACHE_OZONE_URL=${APACHE_OZONE_URL-}
|
|
|
|
export CDP_COMPONENTS_HOME="$IMPALA_TOOLCHAIN/cdp_components-$CDP_BUILD_NUMBER"
|
|
export CDH_MAJOR_VERSION=7
|
|
if ${USE_APACHE_COMPONENTS:=false}; then
|
|
export IMPALA_AVRO_JAVA_VERSION=${APACHE_AVRO_JAVA_VERSION}
|
|
export IMPALA_HADOOP_VERSION=${APACHE_HADOOP_VERSION}
|
|
export IMPALA_HBASE_VERSION=${APACHE_HBASE_VERSION}
|
|
export IMPALA_ICEBERG_VERSION=${APACHE_ICEBERG_VERSION}
|
|
export IMPALA_KNOX_VERSION=${APACHE_KNOX_VERSION}
|
|
export IMPALA_ORC_JAVA_VERSION=${APACHE_ORC_JAVA_VERSION}
|
|
export IMPALA_OZONE_VERSION=${APACHE_OZONE_VERSION}
|
|
export IMPALA_PARQUET_VERSION=${APACHE_PARQUET_VERSION}
|
|
export IMPALA_RANGER_VERSION=${RANGER_VERSION_OVERRIDE:-"$APACHE_RANGER_VERSION"}
|
|
export IMPALA_TEZ_VERSION=${APACHE_TEZ_VERSION}
|
|
export USE_APACHE_HADOOP=true
|
|
export USE_APACHE_HBASE=true
|
|
export USE_APACHE_HIVE_3=true
|
|
export USE_APACHE_TEZ=true
|
|
export USE_APACHE_RANGER=true
|
|
export USE_APACHE_OZONE=true
|
|
else
|
|
export IMPALA_AVRO_JAVA_VERSION=${CDP_AVRO_JAVA_VERSION}
|
|
export IMPALA_HADOOP_VERSION=${CDP_HADOOP_VERSION}
|
|
export IMPALA_HADOOP_URL=${CDP_HADOOP_URL-}
|
|
export IMPALA_HBASE_VERSION=${CDP_HBASE_VERSION}
|
|
export IMPALA_HBASE_URL=${CDP_HBASE_URL-}
|
|
export IMPALA_ICEBERG_VERSION=${CDP_ICEBERG_VERSION}
|
|
export IMPALA_ICEBERG_URL=${CDP_ICEBERG_URL-}
|
|
export IMPALA_KNOX_VERSION=${CDP_KNOX_VERSION}
|
|
export IMPALA_ORC_JAVA_VERSION=${CDP_ORC_JAVA_VERSION}
|
|
export IMPALA_OZONE_VERSION=${CDP_OZONE_VERSION}
|
|
export IMPALA_PARQUET_VERSION=${CDP_PARQUET_VERSION}
|
|
export IMPALA_RANGER_VERSION=${RANGER_VERSION_OVERRIDE:-"$CDP_RANGER_VERSION"}
|
|
export IMPALA_RANGER_URL=${CDP_RANGER_URL-}
|
|
export IMPALA_TEZ_VERSION=${CDP_TEZ_VERSION}
|
|
export IMPALA_TEZ_URL=${CDP_TEZ_URL-}
|
|
export USE_APACHE_HADOOP=${USE_APACHE_HADOOP:=false}
|
|
export USE_APACHE_HBASE=${USE_APACHE_HBASE:=false}
|
|
export USE_APACHE_HIVE_3=${USE_APACHE_HIVE_3:=false}
|
|
export USE_APACHE_TEZ=${USE_APACHE_TEZ:=false}
|
|
export USE_APACHE_RANGER=${USE_APACHE_RANGER:=false}
|
|
export USE_APACHE_OZONE=${USE_APACHE_OZONE:=false}
|
|
fi
|
|
|
|
export APACHE_COMPONENTS_HOME="$IMPALA_TOOLCHAIN/apache_components"
|
|
|
|
if $USE_APACHE_HADOOP; then
|
|
export HADOOP_HOME="$APACHE_COMPONENTS_HOME/hadoop-${IMPALA_HADOOP_VERSION}"
|
|
else
|
|
export HADOOP_HOME="$CDP_COMPONENTS_HOME/hadoop-${IMPALA_HADOOP_VERSION}"
|
|
fi
|
|
|
|
if $USE_APACHE_HIVE_3; then
|
|
# When USE_APACHE_HIVE_3 is set we use the apache hive version to build as well as
|
|
# deploy in the minicluster
|
|
export IMPALA_HIVE_DIST_TYPE="apache-hive-3"
|
|
export IMPALA_HIVE_VERSION=${APACHE_HIVE_3_VERSION}
|
|
export IMPALA_HIVE_URL=${APACHE_HIVE_3_URL-}
|
|
export IMPALA_HIVE_SOURCE_URL=${APACHE_HIVE_3_SOURCE_URL-}
|
|
export IMPALA_HIVE_STORAGE_API_VERSION=${APACHE_HIVE_3_STORAGE_API_VERSION}
|
|
else
|
|
# CDP hive version is used to build and deploy in minicluster when USE_APACHE_HIVE_* is
|
|
# false
|
|
export IMPALA_HIVE_DIST_TYPE="hive-3"
|
|
export IMPALA_HIVE_VERSION=${HIVE_VERSION_OVERRIDE:-"$CDP_HIVE_VERSION"}
|
|
export IMPALA_HIVE_URL=${CDP_HIVE_URL-}
|
|
export IMPALA_HIVE_SOURCE_URL=${CDP_HIVE_SOURCE_URL-}
|
|
export IMPALA_HIVE_STORAGE_API_VERSION=${HIVE_STORAGE_API_VERSION_OVERRIDE:-\
|
|
"2.3.0.$IMPALA_HIVE_VERSION"}
|
|
fi
|
|
# Thrift related environment variables.
|
|
# IMPALA_THRIFT_POM_VERSION is used to populate IMPALA_THRIFT_JAVA_VERSION and
|
|
# thrift.version in java/pom.xml.
|
|
# IMPALA_THRIFT_PY_VERSION is used to find the thrift compiler to produce
|
|
# the generated Python code. The code that uses the generated Python code gets
|
|
# the corresponding Thrift runtime library by pip installing thrift (and does not
|
|
# respect this version). If upgrading IMPALA_THRIFT_PY_VERSION, also upgrade the
|
|
# thrift version in shell/packaging/requirements.txt and
|
|
# infra/python/deps/requirements.txt.
|
|
export IMPALA_THRIFT_CPP_VERSION=0.16.0-p7
|
|
unset IMPALA_THRIFT_CPP_URL
|
|
if $USE_APACHE_HIVE_3; then
|
|
# Apache Hive 3 clients can't run on thrift versions >= 0.14 (IMPALA-11801)
|
|
export IMPALA_THRIFT_POM_VERSION=0.11.0
|
|
export IMPALA_THRIFT_JAVA_VERSION=${IMPALA_THRIFT_POM_VERSION}-p5
|
|
else
|
|
export IMPALA_THRIFT_POM_VERSION=0.16.0
|
|
export IMPALA_THRIFT_JAVA_VERSION=${IMPALA_THRIFT_POM_VERSION}-p7
|
|
fi
|
|
unset IMPALA_THRIFT_JAVA_URL
|
|
export IMPALA_THRIFT_PY_VERSION=0.16.0-p7
|
|
unset IMPALA_THRIFT_PY_URL
|
|
|
|
# Extract the first component of the hive version.
|
|
# Allow overriding of Hive source location in case we want to build Impala without
|
|
# a complete Hive build. This is used by various tests and scripts to enable and
|
|
# disable tests and functionality.
|
|
export IMPALA_HIVE_MAJOR_VERSION=$(echo "$IMPALA_HIVE_VERSION" | cut -d . -f 1)
|
|
|
|
# Hive 1 and 2 are no longer supported.
|
|
if [[ "${IMPALA_HIVE_MAJOR_VERSION}" == "1" ||
|
|
"${IMPALA_HIVE_MAJOR_VERSION}" == "2" ]]; then
|
|
echo "Hive 1 and 2 are no longer supported"
|
|
return 1
|
|
fi
|
|
|
|
if $USE_APACHE_OZONE; then
|
|
export IMPALA_OZONE_VERSION=${APACHE_OZONE_VERSION}
|
|
export IMPALA_OZONE_URL=${APACHE_OZONE_URL-}
|
|
else
|
|
export IMPALA_OZONE_VERSION=${CDP_OZONE_VERSION}
|
|
export IMPALA_OZONE_URL=${CDP_OZONE_URL-}
|
|
fi
|
|
|
|
# It is important to have a coherent view of the JAVA_HOME and JAVA executable.
|
|
# The JAVA_HOME should be determined first, then the JAVA executable should be
|
|
# derived from JAVA_HOME. For development, it is useful to be able to specify
|
|
# the JDK version as part of bin/impala-config-local.sh
|
|
|
|
# Decision tree:
|
|
# if IMPALA_JDK_VERSION is set, look for that version based on known locations
|
|
# else if JAVA_HOME is set, use it
|
|
# else look for system JDK
|
|
|
|
# Set package variables for Docker builds and OS-specific detection.
|
|
. "$IMPALA_HOME/bin/impala-config-java.sh"
|
|
|
|
DETECTED_JAVA_HOME=${JAVA_HOME:-}
|
|
if [[ -z "${IMPALA_JDK_VERSION:-}" ]]; then
|
|
# IMPALA_JDK_VERSION is empty or unset. Use JAVA_HOME or detect system default.
|
|
if [[ -z "${DETECTED_JAVA_HOME:-}" ]]; then
|
|
# Try to detect the system's JAVA_HOME
|
|
# If javac exists, then the system has a Java SDK (JRE does not have javac).
|
|
# Follow the symbolic links and use this to determine the system's JAVA_HOME.
|
|
DETECTED_JAVA_HOME="/usr/java/default"
|
|
if [ -n "$(which javac)" ]; then
|
|
DETECTED_JAVA_HOME=$(dirname $(dirname $(readlink -f $(which javac))))
|
|
fi
|
|
fi
|
|
else
|
|
# Now, we are looking for a specific version, and that will depend on the
|
|
# distribution. Currently, this is implemented for Redhat and Ubuntu.
|
|
DISTRIBUTION=Unknown
|
|
if [[ -f /etc/redhat-release ]]; then
|
|
echo "Identified Redhat image."
|
|
DISTRIBUTION=Redhat
|
|
else
|
|
source /etc/lsb-release
|
|
if [[ $DISTRIB_ID == Ubuntu ]]; then
|
|
echo "Identified Ubuntu image."
|
|
DISTRIBUTION=Ubuntu
|
|
fi
|
|
fi
|
|
if [[ "${DISTRIBUTION}" == "Unknown" ]]; then
|
|
echo "ERROR: auto-detection of JAVA_HOME only supported for Ubuntu and RedHat."
|
|
echo "Set JAVA_HOME to use a specific location."
|
|
return 1
|
|
fi
|
|
|
|
JVMS_PATH=/usr/lib/jvm
|
|
if [[ "${DISTRIBUTION}" == "Ubuntu" ]]; then
|
|
JAVA_PACKAGE_NAME="java-${UBUNTU_JAVA_VERSION}-openjdk-${UBUNTU_PACKAGE_ARCH}"
|
|
DETECTED_JAVA_HOME="${JVMS_PATH}/${JAVA_PACKAGE_NAME}"
|
|
elif [[ "${DISTRIBUTION}" == "Redhat" ]]; then
|
|
DETECTED_JAVA_HOME="${JVMS_PATH}/java-${REDHAT_JAVA_VERSION}"
|
|
fi
|
|
|
|
if [[ ! -d "${DETECTED_JAVA_HOME}" ]]; then
|
|
echo "ERROR: Could not detect Java ${IMPALA_JDK_VERSION}."\
|
|
"${DETECTED_JAVA_HOME} is not a directory."
|
|
return 1
|
|
fi
|
|
fi
|
|
|
|
# Update JAVA_HOME to the detected JAVA_HOME if it exists.
|
|
if [ ! -d "${DETECTED_JAVA_HOME}" ]; then
|
|
echo "JAVA_HOME must be set to the location of your JDK!"
|
|
return 1
|
|
fi
|
|
export JAVA_HOME="${DETECTED_JAVA_HOME}"
|
|
export JAVA="$JAVA_HOME/bin/java"
|
|
if [[ ! -e "$JAVA" ]]; then
|
|
echo "Could not find java binary at $JAVA" >&2
|
|
return 1
|
|
fi
|
|
# Target the Java version matching the JDK.
|
|
export IMPALA_JAVA_TARGET=$("$JAVA" -version 2>&1 | awk -F'[\".]' '/version/ {print $2}')
|
|
if [[ $IMPALA_JAVA_TARGET -eq 1 ]]; then
|
|
# Capture x from 1.x, i.e. Java 1.8 -> 8.
|
|
IMPALA_JAVA_TARGET=$("$JAVA" -version 2>&1 | awk -F'[\".]' '/version/ {print $3}')
|
|
fi
|
|
|
|
# Java libraries required by executables and java tests.
|
|
export LIB_JAVA=$(find "${JAVA_HOME}/" -name libjava.so | head -1)
|
|
export LIB_JSIG=$(find "${JAVA_HOME}/" -name libjsig.so | head -1)
|
|
export LIB_JVM=$(find "${JAVA_HOME}/" -name libjvm.so | head -1)
|
|
|
|
# Default to make, but allow overriding to e.g. ninja.
|
|
export IMPALA_MAKE_CMD=${IMPALA_MAKE_CMD:-make}
|
|
|
|
#########################################################################################
|
|
# Below here are variables that can be overridden by impala-config-*.sh and environment #
|
|
# vars, variables computed based on other variables, and variables that cannot be #
|
|
# overridden. #
|
|
#########################################################################################
|
|
|
|
# If true, will not call $IMPALA_HOME/bin/bootstrap_toolchain.py.
|
|
export SKIP_TOOLCHAIN_BOOTSTRAP=${SKIP_TOOLCHAIN_BOOTSTRAP-false}
|
|
|
|
# If true, will not download python dependencies.
|
|
export SKIP_PYTHON_DOWNLOAD=${SKIP_PYTHON_DOWNLOAD-false}
|
|
|
|
# Provide isolated python egg location and ensure it's only writable by user to avoid
|
|
# Python warnings during testing.
|
|
export PYTHON_EGG_CACHE="${IMPALA_HOME}/shell/build/.python-eggs"
|
|
mkdir -p "${PYTHON_EGG_CACHE}"
|
|
chmod 755 "${PYTHON_EGG_CACHE}"
|
|
|
|
# This flag is used in $IMPALA_HOME/cmake_modules/toolchain.cmake.
|
|
# If it's 0, Impala will be built with the compiler in the toolchain directory.
|
|
export USE_SYSTEM_GCC=${USE_SYSTEM_GCC-0}
|
|
|
|
# Allow the linker to be set to gold, mold, or regular ld. Gold is the default
|
|
# as it has been for a long time. Mold is a new linker that is faster than gold.
|
|
# Note: This is validated in the CMake code.
|
|
# TODO: Add support for lld as well
|
|
export IMPALA_LINKER=${IMPALA_LINKER-gold}
|
|
|
|
# Limit mold to a single job to avoid excessive memory consumption while fully utilizing
|
|
# available CPUs.
|
|
export MOLD_JOBS=${IMPALA_MOLD_JOBS-1}
|
|
|
|
# Override the default compiler by setting a path to the new compiler. The default
|
|
# compiler depends on USE_SYSTEM_GCC and IMPALA_GCC_VERSION. The intended use case
|
|
# is to set the compiler to distcc, in that case the user would also set
|
|
# IMPALA_BUILD_THREADS to increase parallelism.
|
|
export IMPALA_CXX_COMPILER=${IMPALA_CXX_COMPILER-default}
|
|
|
|
# Add options to 'mvn'; useful for configuring a settings file (-s).
|
|
export IMPALA_MAVEN_OPTIONS=${IMPALA_MAVEN_OPTIONS-}
|
|
|
|
# If enabled, debug symbols are added to cross-compiled IR.
|
|
export ENABLE_IMPALA_IR_DEBUG_INFO=${ENABLE_IMPALA_IR_DEBUG_INFO-false}
|
|
|
|
# Impala has dozens of binaries that link in all the Impala libraries.
|
|
# Each binary is hundreds of megabytes, and they end up taking 10s of GBs
|
|
# disk space for a developer environment. A large amount of the binary
|
|
# size is due to debug information.
|
|
#
|
|
# These are a few options for reducing the binary size and disk space
|
|
# usage.
|
|
# - IMPALA_MINIMAL_DEBUG_INFO=true changes the build to produce only
|
|
# minimal debuginfo (i.e. -g1). This has line tables and can do backtraces,
|
|
# but it doesn't include variable information and limits further
|
|
# debuggability. This option reduces the size of binaries by 60+%.
|
|
# - IMPALA_COMPRESSED_DEBUG_INFO=true changes the build to compress the
|
|
# debug info with gzip. This significantly reduces the size of the
|
|
# binary without changing the quantity of debug information. The catch
|
|
# is that tools need to support it. gdb is known to support it and
|
|
# the Breakpad scripts have been modified to handle it, but there may
|
|
# be other tools that do not know how to use it. This reduces the size
|
|
# of binaries by 50+%.
|
|
# - IMPALA_SPLIT_DEBUG_INFO=true changes the build to put debug info in
|
|
# separate .dwo files for each C++ file. Executables contain metadata
|
|
# pointing to these .dwo files without needing to incorporate the debug
|
|
# information. This allows executables to share a single copy of
|
|
# the debug information. It also reduces link time, as the linker does
|
|
# not need to process the debug info. Tools (including gdb) mostly know
|
|
# how to handle this split debug information.
|
|
#
|
|
# Due to the major reduction in binary size and broad support in debugging
|
|
# tools, compressed debug information is enabled by default.
|
|
export IMPALA_MINIMAL_DEBUG_INFO=${IMPALA_MINIMAL_DEBUG_INFO-false}
|
|
export IMPALA_COMPRESSED_DEBUG_INFO=${IMPALA_COMPRESSED_DEBUG_INFO-true}
|
|
export IMPALA_SPLIT_DEBUG_INFO=${IMPALA_SPLIT_DEBUG_INFO-false}
|
|
|
|
# Download and use the CDH components from S3. It can be useful to set this to false if
|
|
# building against a custom local build using HIVE_SRC_DIR_OVERRIDE,
|
|
# HADOOP_INCLUDE_DIR_OVERRIDE, and HADOOP_LIB_DIR_OVERRIDE.
|
|
export DOWNLOAD_CDH_COMPONENTS=${DOWNLOAD_CDH_COMPONENTS-true}
|
|
export DOWNLOAD_APACHE_COMPONENTS=${DOWNLOAD_APACHE_COMPONENTS-true}
|
|
|
|
export IS_OSX="$(if [[ "$OSTYPE" == "darwin"* ]]; then echo true; else echo false; fi)"
|
|
|
|
export IMPALA_AUX_TEST_HOME="${IMPALA_AUX_TEST_HOME-$IMPALA_HOME/../Impala-auxiliary-tests}"
|
|
export TARGET_FILESYSTEM="${TARGET_FILESYSTEM-hdfs}"
|
|
export ERASURE_CODING="${ERASURE_CODING-false}"
|
|
export FILESYSTEM_PREFIX="${FILESYSTEM_PREFIX-}"
|
|
export S3_BUCKET="${S3_BUCKET-}"
|
|
export S3GUARD_ENABLED="${S3GUARD_ENABLED-false}"
|
|
export S3GUARD_DYNAMODB_TABLE="${S3GUARD_DYNAMODB_TABLE-}"
|
|
export S3GUARD_DYNAMODB_REGION="${S3GUARD_DYNAMODB_REGION-}"
|
|
export azure_tenant_id="${azure_tenant_id-DummyAdlsTenantId}"
|
|
export azure_client_id="${azure_client_id-DummyAdlsClientId}"
|
|
export azure_client_secret="${azure_client_secret-DummyAdlsClientSecret}"
|
|
export azure_data_lake_store_name="${azure_data_lake_store_name-}"
|
|
export azure_storage_account_name="${azure_storage_account_name-}"
|
|
export azure_storage_container_name="${azure_storage_container_name-}"
|
|
export GOOGLE_CLOUD_PROJECT_ID="${GOOGLE_CLOUD_PROJECT_ID-}"
|
|
export GOOGLE_CLOUD_SERVICE_ACCOUNT="${GOOGLE_CLOUD_SERVICE_ACCOUNT-}"
|
|
export GOOGLE_APPLICATION_CREDENTIALS="${GOOGLE_APPLICATION_CREDENTIALS-}"
|
|
export GCS_BUCKET="${GCS_BUCKET-}"
|
|
export COS_SECRET_ID="${COS_SECRET_ID-}"
|
|
export COS_SECRET_KEY="${COS_SECRET_KEY-}"
|
|
export COS_REGION="${COS_REGION-}"
|
|
export COS_BUCKET="${COS_BUCKET-}"
|
|
export OSS_ACCESS_KEY_ID="${OSS_ACCESS_KEY_ID-}"
|
|
export OSS_SECRET_ACCESS_KEY="${OSS_SECRET_ACCESS_KEY-}"
|
|
export OSS_ACCESS_ENDPOINT="${OSS_ACCESS_ENDPOINT-}"
|
|
export OSS_BUCKET="${OSS_BUCKET-}"
|
|
export HDFS_REPLICATION="${HDFS_REPLICATION-3}"
|
|
export ISILON_NAMENODE="${ISILON_NAMENODE-}"
|
|
# Internal and external interfaces that test cluster services will listen on. The
|
|
# internal interface is used for ports that should not be accessed from outside the
|
|
# host that the cluster is running on. The external interface is used for ports
|
|
# that may need to be accessed from outside, e.g. web UIs.
|
|
export INTERNAL_LISTEN_HOST="${INTERNAL_LISTEN_HOST-localhost}"
|
|
export EXTERNAL_LISTEN_HOST="${EXTERNAL_LISTEN_HOST-0.0.0.0}"
|
|
export DEFAULT_FS="${DEFAULT_FS-hdfs://${INTERNAL_LISTEN_HOST}:20500}"
|
|
export WAREHOUSE_LOCATION_PREFIX="${WAREHOUSE_LOCATION_PREFIX-}"
|
|
export LOCAL_FS="file:${WAREHOUSE_LOCATION_PREFIX}"
|
|
# Use different node directories for each filesystem so we don't need to recreate them
|
|
# from scratch when switching.
|
|
UNIQUE_FS_LABEL=
|
|
if [[ "${TARGET_FILESYSTEM}" != "hdfs" ]]; then
|
|
UNIQUE_FS_LABEL="${UNIQUE_FS_LABEL}-${TARGET_FILESYSTEM}"
|
|
fi
|
|
if [[ "${ERASURE_CODING}" = true ]]; then
|
|
UNIQUE_FS_LABEL="${UNIQUE_FS_LABEL}-ec"
|
|
fi
|
|
DEFAULT_NODES_DIR="$IMPALA_HOME/testdata/cluster/cdh$CDH_MAJOR_VERSION$UNIQUE_FS_LABEL"
|
|
export IMPALA_CLUSTER_NODES_DIR="${IMPALA_CLUSTER_NODES_DIR-$DEFAULT_NODES_DIR}"
|
|
|
|
ESCAPED_DB_UID=$(sed "s/[^0-9a-zA-Z]/_/g" <<< "$UNIQUE_FS_LABEL$IMPALA_HOME")
|
|
if $USE_APACHE_HIVE_3; then
|
|
export HIVE_HOME="$APACHE_COMPONENTS_HOME/apache-hive-${IMPALA_HIVE_VERSION}-bin"
|
|
export HIVE_SRC_DIR="$APACHE_COMPONENTS_HOME/apache-hive-${IMPALA_HIVE_VERSION}-src"
|
|
# if apache hive is being used change the metastore db name, so we don't have to
|
|
# format the metastore db everytime we switch between hive versions
|
|
export METASTORE_DB=${METASTORE_DB-"$(cut -c-59 <<< HMS$ESCAPED_DB_UID)_apache"}
|
|
else
|
|
export HIVE_HOME=${HIVE_HOME_OVERRIDE:-\
|
|
"$CDP_COMPONENTS_HOME/apache-hive-${IMPALA_HIVE_VERSION}-bin"}
|
|
export HIVE_SRC_DIR=${HIVE_SRC_DIR_OVERRIDE:-\
|
|
"${CDP_COMPONENTS_HOME}/hive-${IMPALA_HIVE_VERSION}"}
|
|
# Previously, there were multiple configurations and the "_cdp" included below
|
|
# allowed the two to be distinct. We keep this "_cdp" for historical reasons.
|
|
export METASTORE_DB=${METASTORE_DB-"$(cut -c-59 <<< HMS$ESCAPED_DB_UID)_cdp"}
|
|
fi
|
|
# Set the path to the hive_metastore.thrift which is used to build thrift code
|
|
export HIVE_METASTORE_THRIFT_DIR=${HIVE_METASTORE_THRIFT_DIR_OVERRIDE:-\
|
|
"$HIVE_SRC_DIR/standalone-metastore/src/main/thrift"}
|
|
if $USE_APACHE_TEZ; then
|
|
export TEZ_HOME="$APACHE_COMPONENTS_HOME/apache-tez-${IMPALA_TEZ_VERSION}-bin"
|
|
else
|
|
export TEZ_HOME="$CDP_COMPONENTS_HOME/tez-${IMPALA_TEZ_VERSION}-minimal"
|
|
fi
|
|
|
|
if $USE_APACHE_HBASE; then
|
|
export HBASE_HOME="$APACHE_COMPONENTS_HOME/hbase-${IMPALA_HBASE_VERSION}-hadoop3/"
|
|
else
|
|
export HBASE_HOME="$CDP_COMPONENTS_HOME/hbase-${IMPALA_HBASE_VERSION}/"
|
|
fi
|
|
if $USE_APACHE_OZONE; then
|
|
export OZONE_HOME="$APACHE_COMPONENTS_HOME/ozone-${IMPALA_OZONE_VERSION}/"
|
|
else
|
|
export OZONE_HOME="$CDP_COMPONENTS_HOME/ozone-${IMPALA_OZONE_VERSION}/"
|
|
fi
|
|
# Set the Hive binaries in the path
|
|
export PATH="$HIVE_HOME/bin:$HBASE_HOME/bin:$OZONE_HOME/bin:$PATH"
|
|
|
|
RANGER_POLICY_DB=${RANGER_POLICY_DB-$(cut -c-63 <<< ranger$ESCAPED_DB_UID)}
|
|
# The DB script in Ranger expects the database name to be in lower case.
|
|
export RANGER_POLICY_DB=$(echo ${RANGER_POLICY_DB} | tr '[:upper:]' '[:lower:]')
|
|
|
|
# Environment variables carrying AWS security credentials are prepared
|
|
# according to the following rules:
|
|
#
|
|
# Instance: Running outside EC2 || Running in EC2 |
|
|
# --------------------+--------+--------++--------+--------+
|
|
# TARGET_FILESYSTEM | S3 | not S3 || S3 | not S3 |
|
|
# --------------------+--------+--------++--------+--------+
|
|
# | | || | |
|
|
# empty | unset | dummy || unset | unset |
|
|
# AWS_* | | || | |
|
|
# env --------------+--------+--------++--------+--------+
|
|
# var | | || | |
|
|
# not empty | export | export || export | export |
|
|
# | | || | |
|
|
# --------------------+--------+--------++--------+--------+
|
|
#
|
|
# Legend: unset: the variable is unset
|
|
# export: the variable is exported with its current value
|
|
# dummy: the variable is set to a constant dummy value and exported
|
|
#
|
|
# Running on an EC2 VM is indicated by setting RUNNING_IN_EC2 to "true" and
|
|
# exporting it from an script running before this one.
|
|
|
|
# Checks are performed in a subshell to avoid leaking secrets to log files.
|
|
if (set +x; [[ -n ${AWS_ACCESS_KEY_ID-} ]]); then
|
|
export AWS_ACCESS_KEY_ID
|
|
else
|
|
if [[ "${TARGET_FILESYSTEM}" == "s3" || "${RUNNING_IN_EC2:-false}" == "true" ]]; then
|
|
unset AWS_ACCESS_KEY_ID
|
|
else
|
|
export AWS_ACCESS_KEY_ID=DummyAccessKeyId
|
|
fi
|
|
fi
|
|
|
|
if (set +x; [[ -n ${AWS_SECRET_ACCESS_KEY-} ]]); then
|
|
export AWS_SECRET_ACCESS_KEY
|
|
else
|
|
if [[ "${TARGET_FILESYSTEM}" == "s3" || "${RUNNING_IN_EC2:-false}" == "true" ]]; then
|
|
unset AWS_SECRET_ACCESS_KEY
|
|
else
|
|
export AWS_SECRET_ACCESS_KEY=DummySecretAccessKey
|
|
fi
|
|
fi
|
|
|
|
# AWS_SESSION_TOKEN is not set to a dummy value, it is not needed by the FE tests
|
|
if (set +x; [[ -n ${AWS_SESSION_TOKEN-} ]]); then
|
|
export AWS_SESSION_TOKEN
|
|
else
|
|
unset AWS_SESSION_TOKEN
|
|
fi
|
|
|
|
if [ "${TARGET_FILESYSTEM}" = "s3" ]; then
|
|
# We guard the S3 access check with a variable. This check hits a rate-limited endpoint
|
|
# on AWS and multiple inclusions of S3 can exceed the limit, causing the check to fail.
|
|
S3_ACCESS_VALIDATED="${S3_ACCESS_VALIDATED-0}"
|
|
if [[ "${S3_ACCESS_VALIDATED}" -ne 1 ]]; then
|
|
if ${IMPALA_HOME}/bin/check-s3-access.sh; then
|
|
export S3_ACCESS_VALIDATED=1
|
|
export DEFAULT_FS="s3a://${S3_BUCKET}"
|
|
else
|
|
return 1
|
|
fi
|
|
else
|
|
echo "S3 access already validated"
|
|
fi
|
|
# If using s3guard, verify that the dynamodb table and region are set
|
|
if [[ "${S3GUARD_ENABLED}" = "true" ]]; then
|
|
if [[ -z "${S3GUARD_DYNAMODB_TABLE}" || -z "${S3GUARD_DYNAMODB_REGION}" ]]; then
|
|
echo "When S3GUARD_ENABLED=true, S3GUARD_DYNAMODB_TABLE and
|
|
S3GUARD_DYNAMODB_REGION must be set"
|
|
echo "S3GUARD_DYNAMODB_TABLE: ${S3GUARD_DYNAMODB_TABLE}"
|
|
echo "S3GUARD_DYNAMODB_REGION: ${S3GUARD_DYNAMODB_REGION}"
|
|
return 1
|
|
fi
|
|
fi
|
|
elif [ "${TARGET_FILESYSTEM}" = "adls" ]; then
|
|
# Basic error checking
|
|
if [[ "${azure_client_id}" = "DummyAdlsClientId" ||\
|
|
"${azure_tenant_id}" = "DummyAdlsTenantId" ||\
|
|
"${azure_client_secret}" = "DummyAdlsClientSecret" ]]; then
|
|
echo "All 3 of the following need to be assigned valid values and belong
|
|
to the owner of the ADLS store in order to access the filesystem:
|
|
azure_client_id, azure_tenant_id, azure_client_secret."
|
|
return 1
|
|
fi
|
|
if [[ "${azure_data_lake_store_name}" = "" ]]; then
|
|
echo "azure_data_lake_store_name cannot be an empty string for ADLS"
|
|
return 1
|
|
fi
|
|
DEFAULT_FS="adl://${azure_data_lake_store_name}.azuredatalakestore.net"
|
|
export DEFAULT_FS
|
|
elif [ "${TARGET_FILESYSTEM}" = "abfs" ]; then
|
|
# ABFS is also known as ADLS Gen2, and they can share credentials
|
|
# Basic error checking
|
|
if [[ "${azure_client_id}" = "DummyAdlsClientId" ||\
|
|
"${azure_tenant_id}" = "DummyAdlsTenantId" ||\
|
|
"${azure_client_secret}" = "DummyAdlsClientSecret" ]]; then
|
|
echo "All 3 of the following need to be assigned valid values and belong
|
|
to the owner of the Azure storage account in order to access the
|
|
filesystem: azure_client_id, azure_tenant_id, azure_client_secret."
|
|
return 1
|
|
fi
|
|
if [[ "${azure_storage_account_name}" = "" ]]; then
|
|
echo "azure_storage_account_name cannot be an empty string for ABFS"
|
|
return 1
|
|
fi
|
|
if [[ "${azure_storage_container_name}" = "" ]]; then
|
|
echo "azure_storage_container_name cannot be an empty string for ABFS"
|
|
return 1
|
|
fi
|
|
domain="${azure_storage_account_name}.dfs.core.windows.net"
|
|
DEFAULT_FS="abfss://${azure_storage_container_name}@${domain}"
|
|
export DEFAULT_FS
|
|
elif [ "${TARGET_FILESYSTEM}" = "gs" ]; then
|
|
# Basic error checking
|
|
if [[ "${GOOGLE_APPLICATION_CREDENTIALS}" = "" ]]; then
|
|
echo "GOOGLE_APPLICATION_CREDENTIALS should be set to the JSON file that contains
|
|
your service account key."
|
|
return 1
|
|
fi
|
|
DEFAULT_FS="gs://${GCS_BUCKET}"
|
|
export DEFAULT_FS
|
|
elif [ "${TARGET_FILESYSTEM}" = "cosn" ]; then
|
|
# Basic error checking
|
|
if [[ "${COS_SECRET_ID}" = "" ]]; then
|
|
echo "COS_SECRET_ID cannot be an empty string for COS"
|
|
return 1
|
|
fi
|
|
if [[ "${COS_SECRET_KEY}" = "" ]]; then
|
|
echo "COS_SECRET_KEY cannot be an empty string for COS"
|
|
return 1
|
|
fi
|
|
if [[ "${COS_REGION}" = "" ]]; then
|
|
echo "COS_REGION cannot be an empty string for COS"
|
|
return 1
|
|
fi
|
|
if [[ "${COS_BUCKET}" = "" ]]; then
|
|
echo "COS_BUCKET cannot be an empty string for COS"
|
|
return 1
|
|
fi
|
|
DEFAULT_FS="cosn://${COS_BUCKET}"
|
|
export DEFAULT_FS
|
|
elif [ "${TARGET_FILESYSTEM}" = "oss" ]; then
|
|
# Basic error checking
|
|
if [[ "${OSS_ACCESS_KEY_ID}" = "" ]]; then
|
|
echo "OSS_ACCESS_KEY_ID cannot be an empty string for OSS"
|
|
return 1
|
|
fi
|
|
if [[ "${OSS_SECRET_ACCESS_KEY}" = "" ]]; then
|
|
echo "OSS_SECRET_ACCESS_KEY cannot be an empty string for OSS"
|
|
return 1
|
|
fi
|
|
if [[ "${OSS_ACCESS_ENDPOINT}" = "" ]]; then
|
|
echo "OSS_ACCESS_ENDPOINT cannot be an empty string for OSS"
|
|
return 1
|
|
fi
|
|
if [[ "${OSS_BUCKET}" = "" ]]; then
|
|
echo "OSS_BUCKET cannot be an empty string for OSS"
|
|
return 1
|
|
fi
|
|
DEFAULT_FS="oss://${OSS_BUCKET}"
|
|
export DEFAULT_FS
|
|
elif [ "${TARGET_FILESYSTEM}" = "obs" ]; then
|
|
# Basic error checking
|
|
OBS_ACCESS_KEY="${OBS_ACCESS_KEY:?OBS_ACCESS_KEY cannot be an empty string for OBS}"
|
|
OBS_SECRET_KEY="${OBS_SECRET_KEY:?OBS_SECRET_KEY cannot be an empty string for OBS}"
|
|
OBS_ENDPOINT="${OBS_ENDPOINT:?OBS_ENDPOINT cannot be an empty string for OBS}"
|
|
OBS_BUCKET="${OBS_BUCKET:?OBS_BUCKET cannot be an empty string for OBS}"
|
|
DEFAULT_FS="obs://${OBS_BUCKET}"
|
|
export OBS_ACCESS_KEY OBS_SECRET_KEY OBS_ENDPOINT DEFAULT_FS ENABLE_OBS_FILESYSTEM=true
|
|
elif [ "${TARGET_FILESYSTEM}" = "isilon" ]; then
|
|
if [ "${ISILON_NAMENODE}" = "" ]; then
|
|
echo "In order to access the Isilon filesystem, ISILON_NAMENODE"
|
|
echo "needs to be a non-empty and valid address."
|
|
return 1
|
|
fi
|
|
DEFAULT_FS="hdfs://${ISILON_NAMENODE}:8020"
|
|
export DEFAULT_FS
|
|
# isilon manages its own replication.
|
|
export HDFS_REPLICATION=1
|
|
elif [ "${TARGET_FILESYSTEM}" = "local" ]; then
|
|
if [[ "${WAREHOUSE_LOCATION_PREFIX}" = "" ]]; then
|
|
echo "WAREHOUSE_LOCATION_PREFIX cannot be an empty string for local filesystem"
|
|
return 1
|
|
fi
|
|
if [ ! -d "${WAREHOUSE_LOCATION_PREFIX}" ]; then
|
|
echo "'$WAREHOUSE_LOCATION_PREFIX' is not a directory on the local filesystem."
|
|
return 1
|
|
elif [ ! -r "${WAREHOUSE_LOCATION_PREFIX}" ] || \
|
|
[ ! -w "${WAREHOUSE_LOCATION_PREFIX}" ]; then
|
|
echo "Current user does not have read/write permissions on local filesystem path "
|
|
"'$WAREHOUSE_LOCATION_PREFIX'"
|
|
return 1
|
|
fi
|
|
export DEFAULT_FS="${LOCAL_FS}"
|
|
export FILESYSTEM_PREFIX="${LOCAL_FS}"
|
|
elif [ "${TARGET_FILESYSTEM}" = "hdfs" ]; then
|
|
if [[ "${ERASURE_CODING}" = true ]]; then
|
|
export HDFS_ERASURECODE_POLICY="RS-3-2-1024k"
|
|
export ERASURECODE_POLICY="$HDFS_ERASURECODE_POLICY"
|
|
export HDFS_ERASURECODE_PATH="/test-warehouse"
|
|
fi
|
|
elif [ "${TARGET_FILESYSTEM}" = "ozone" ]; then
|
|
export USE_OZONE_ENCRYPTION=${USE_OZONE_ENCRYPTION-true}
|
|
export OZONE_VOLUME="impala"
|
|
export DEFAULT_FS="ofs://${INTERNAL_LISTEN_HOST}:9862"
|
|
export FILESYSTEM_PREFIX="${DEFAULT_FS}/${OZONE_VOLUME}"
|
|
export WAREHOUSE_LOCATION_PREFIX="/${OZONE_VOLUME}"
|
|
if [[ "${ERASURE_CODING}" = true ]]; then
|
|
export OZONE_ERASURECODE_POLICY="RS-3-2-1024k"
|
|
# Ozone normalizes the policy for internal storage. Use this string for tests.
|
|
export ERASURECODE_POLICY="rs-3-2-1024k"
|
|
fi
|
|
else
|
|
echo "Unsupported filesystem '$TARGET_FILESYSTEM'"
|
|
echo "Valid values are: hdfs, isilon, s3, abfs, adls, gs, local, ozone"
|
|
return 1
|
|
fi
|
|
|
|
# Directories where local cluster logs will go when running tests or loading data
|
|
DEFAULT_LOGS_DIR="${IMPALA_HOME}/logs" # override by setting IMPALA_LOGS_DIR env var
|
|
export IMPALA_LOGS_DIR="${IMPALA_LOGS_DIR:-$DEFAULT_LOGS_DIR}"
|
|
export IMPALA_CLUSTER_LOGS_DIR="${IMPALA_LOGS_DIR}/cluster"
|
|
export IMPALA_DATA_LOADING_LOGS_DIR="${IMPALA_LOGS_DIR}/data_loading"
|
|
export IMPALA_DATA_LOADING_SQL_DIR="${IMPALA_DATA_LOADING_LOGS_DIR}/sql"
|
|
export IMPALA_FE_TEST_LOGS_DIR="${IMPALA_LOGS_DIR}/fe_tests"
|
|
export IMPALA_FE_TEST_COVERAGE_DIR="${IMPALA_FE_TEST_LOGS_DIR}/coverage"
|
|
export IMPALA_BE_TEST_LOGS_DIR="${IMPALA_LOGS_DIR}/be_tests"
|
|
export IMPALA_EE_TEST_LOGS_DIR="${IMPALA_LOGS_DIR}/ee_tests"
|
|
export IMPALA_JS_TEST_LOGS_DIR="${IMPALA_LOGS_DIR}/js_tests"
|
|
export IMPALA_CUSTOM_CLUSTER_TEST_LOGS_DIR="${IMPALA_LOGS_DIR}/custom_cluster_tests"
|
|
export IMPALA_MVN_LOGS_DIR="${IMPALA_LOGS_DIR}/mvn"
|
|
export IMPALA_TIMEOUT_LOGS_DIR="${IMPALA_LOGS_DIR}/timeout_stacktrace"
|
|
# List of all Impala log dirs so they can be created by buildall.sh
|
|
export IMPALA_ALL_LOGS_DIRS="${IMPALA_CLUSTER_LOGS_DIR}
|
|
${IMPALA_DATA_LOADING_LOGS_DIR} ${IMPALA_DATA_LOADING_SQL_DIR}
|
|
${IMPALA_FE_TEST_LOGS_DIR} ${IMPALA_FE_TEST_COVERAGE_DIR}
|
|
${IMPALA_BE_TEST_LOGS_DIR} ${IMPALA_EE_TEST_LOGS_DIR}
|
|
${IMPALA_CUSTOM_CLUSTER_TEST_LOGS_DIR} ${IMPALA_MVN_LOGS_DIR}
|
|
${IMPALA_TIMEOUT_LOGS_DIR}"
|
|
|
|
# Compute CPUs, using cgroup limits if present and not "max" (v2) or negative (v1)
|
|
awk_divide_roundup='{ cores = $1/$2; print cores==int(cores) ? cores : int(cores)+1 }'
|
|
if grep -v max /sys/fs/cgroup/cpu.max >& /dev/null; then
|
|
# Get CPU limits under cgroups v2
|
|
CORES=$(awk "$awk_divide_roundup" /sys/fs/cgroup/cpu.max)
|
|
echo "Detected $CORES cores from cgroups v2"
|
|
elif grep -v '\-' /sys/fs/cgroup/cpu/cpu.cfs_quota_us >& /dev/null; then
|
|
# Get CPU limits under cgroups v1
|
|
CORES=$(paste /sys/fs/cgroup/cpu/cpu.cfs_quota_us /sys/fs/cgroup/cpu/cpu.cfs_period_us |
|
|
awk "$awk_divide_roundup")
|
|
echo "Detected $CORES cores from cgroups v1"
|
|
else
|
|
CORES=$(getconf _NPROCESSORS_ONLN)
|
|
fi
|
|
# Reduce the concurrency for local tests to half the number of cores in the system.
|
|
export NUM_CONCURRENT_TESTS="${NUM_CONCURRENT_TESTS-$((CORES / 2))}"
|
|
|
|
# IMPALA-14476: Temporarily fix KUDU_MASTER_HOSTS to ipv4 address
|
|
# export KUDU_MASTER_HOSTS="${KUDU_MASTER_HOSTS:-${INTERNAL_LISTEN_HOST}}"
|
|
export KUDU_MASTER_HOSTS="${KUDU_MASTER_HOSTS:-127.0.0.1}"
|
|
export KUDU_MASTER_PORT="${KUDU_MASTER_PORT:-7051}"
|
|
export KUDU_MASTER_WEBUI_PORT="${KUDU_MASTER_WEBUI_PORT:-8051}"
|
|
|
|
export IMPALA_FE_DIR="$IMPALA_HOME/fe"
|
|
export IMPALA_BE_DIR="$IMPALA_HOME/be"
|
|
export IMPALA_WORKLOAD_DIR="$IMPALA_HOME/testdata/workloads"
|
|
export IMPALA_AUX_WORKLOAD_DIR="$IMPALA_AUX_TEST_HOME/testdata/workloads"
|
|
export IMPALA_DATASET_DIR="$IMPALA_HOME/testdata/datasets"
|
|
export IMPALA_AUX_DATASET_DIR="$IMPALA_AUX_TEST_HOME/testdata/datasets"
|
|
export IMPALA_COMMON_DIR="$IMPALA_HOME/common"
|
|
export PATH="$IMPALA_TOOLCHAIN_PACKAGES_HOME/gdb-$IMPALA_GDB_VERSION/bin:$PATH"
|
|
export PATH="$IMPALA_TOOLCHAIN_PACKAGES_HOME/cmake-$IMPALA_CMAKE_VERSION/bin:$PATH"
|
|
export PATH="$IMPALA_HOME/bin:$PATH"
|
|
|
|
export HADOOP_CONF_DIR="$IMPALA_FE_DIR/src/test/resources"
|
|
# The include and lib paths are needed to pick up hdfs.h and libhdfs.*
|
|
# Allow overriding in case we want to point to a package/install with a different layout.
|
|
export HADOOP_INCLUDE_DIR=${HADOOP_INCLUDE_DIR_OVERRIDE:-"${HADOOP_HOME}/include"}
|
|
export HADOOP_LIB_DIR=${HADOOP_LIB_DIR_OVERRIDE:-"${HADOOP_HOME}/lib"}
|
|
|
|
# Beware of adding entries from $HADOOP_HOME here, because they can change
|
|
# the order of the classpath, leading to configuration not showing up first.
|
|
export HADOOP_CLASSPATH="${HADOOP_CLASSPATH-}"
|
|
# Add Ozone Hadoop filesystem implementation when using Ozone
|
|
if [ "${TARGET_FILESYSTEM}" = "ozone" ]; then
|
|
OZONE_JAR="ozone-filesystem-hadoop3-${IMPALA_OZONE_VERSION}.jar"
|
|
HADOOP_CLASSPATH="${HADOOP_CLASSPATH}:${OZONE_HOME}/share/ozone/lib/${OZONE_JAR}"
|
|
fi
|
|
# Add the path containing the hadoop-aws jar, which is required to access AWS from the
|
|
# minicluster.
|
|
# Please note that the * is inside quotes, thus it won't get expanded by bash but
|
|
# by java, see "Understanding class path wildcards" at http://goo.gl/f0cfft
|
|
HADOOP_CLASSPATH="${HADOOP_CLASSPATH}:${HADOOP_HOME}/share/hadoop/tools/lib/*"
|
|
|
|
export PATH="$HADOOP_HOME/bin:$PATH"
|
|
|
|
if $USE_APACHE_RANGER; then
|
|
export RANGER_HOME=${RANGER_HOME_OVERRIDE:-\
|
|
"${APACHE_COMPONENTS_HOME}/ranger-${IMPALA_RANGER_VERSION}-admin"}
|
|
else
|
|
export RANGER_HOME=\
|
|
${RANGER_HOME_OVERRIDE:-"${CDP_COMPONENTS_HOME}/ranger-${IMPALA_RANGER_VERSION}-admin"}
|
|
fi
|
|
export RANGER_CONF_DIR="$IMPALA_HOME/fe/src/test/resources"
|
|
|
|
# To configure Hive logging, there's a hive-log4j2.properties[.template]
|
|
# file in fe/src/test/resources. To get it into the classpath earlier
|
|
# than the hive-log4j2.properties file included in some Hive jars,
|
|
# we must set HIVE_CONF_DIR. Additionally, on Hadoop 3, because of
|
|
# https://issues.apache.org/jira/browse/HADOOP-15019, when HIVE_CONF_DIR happens to equal
|
|
# HADOOP_CONF_DIR, it gets de-duped out of its pole position in the CLASSPATH variable,
|
|
# so we add an extra "./" into the path to avoid that. Use HADOOP_SHELL_SCRIPT_DEBUG=true
|
|
# to debug issues like this. Hive may log something like:
|
|
# Logging initialized using configuration in file:.../fe/src/test/resources/hive-log4j2.properties
|
|
#
|
|
# To debug log4j2 loading issues, add to HADOOP_CLIENT_OPTS:
|
|
# -Dorg.apache.logging.log4j.simplelog.StatusLogger.level=TRACE
|
|
#
|
|
# We use a unique -Dhive.log.file to distinguish the HiveMetaStore and HiveServer2 logs.
|
|
export HIVE_CONF_DIR="$IMPALA_FE_DIR/src/test/resources"
|
|
|
|
# Hive looks for jar files in a single directory from HIVE_AUX_JARS_PATH plus
|
|
# any jars in AUX_CLASSPATH. (Or a list of jars in HIVE_AUX_JARS_PATH.)
|
|
# The Postgres JDBC driver is downloaded by maven when building the frontend.
|
|
# Export the location of Postgres JDBC driver so Ranger can pick it up.
|
|
export POSTGRES_JDBC_DRIVER="${IMPALA_FE_DIR}/target/dependency/postgresql-${IMPALA_POSTGRES_JDBC_DRIVER_VERSION}.jar"
|
|
|
|
export HIVE_AUX_JARS_PATH="$POSTGRES_JDBC_DRIVER"
|
|
# Add the jar of iceberg-hive-runtime to have HiveIcebergStorageHandler.
|
|
# Only needed by Apache Hive3 since CDP Hive3 has the jar of hive-iceberg-handler in its
|
|
# lib folder.
|
|
if $USE_APACHE_HIVE_3; then
|
|
export HIVE_AUX_JARS_PATH="$HIVE_AUX_JARS_PATH:\
|
|
$IMPALA_HOME/fe/target/dependency/iceberg-hive-runtime-${IMPALA_ICEBERG_VERSION}.jar"
|
|
fi
|
|
export AUX_CLASSPATH=""
|
|
### Tell hive not to use jline
|
|
export HADOOP_USER_CLASSPATH_FIRST=true
|
|
|
|
# Add jars to Hive's AUX_CLASSPATH as needed.
|
|
# Newer Hive version already have hbase-shaded-mapreduce in it's library.
|
|
# This commented line is left here as an example.
|
|
# export AUX_CLASSPATH="$AUX_CLASSPATH:\
|
|
# $HBASE_HOME/lib/shaded-clients/hbase-shaded-mapreduce-${IMPALA_HBASE_VERSION}.jar"
|
|
|
|
export HBASE_CONF_DIR="$IMPALA_FE_DIR/src/test/resources"
|
|
# Suppress Ozone deprecation warning
|
|
export OZONE_CONF_DIR="$IMPALA_FE_DIR/src/test/resources"
|
|
|
|
# To use a local build of Kudu, set KUDU_BUILD_DIR to the path Kudu was built in and
|
|
# set KUDU_CLIENT_DIR to the path KUDU was installed in.
|
|
# Example:
|
|
# git clone https://github.com/cloudera/kudu.git
|
|
# ...build 3rd party etc...
|
|
# mkdir -p $KUDU_BUILD_DIR
|
|
# cd $KUDU_BUILD_DIR
|
|
# cmake <path to Kudu source dir>
|
|
# make
|
|
# DESTDIR=$KUDU_CLIENT_DIR make install
|
|
export KUDU_BUILD_DIR=${KUDU_BUILD_DIR-}
|
|
export KUDU_CLIENT_DIR=${KUDU_CLIENT_DIR-}
|
|
if [[ -n "$KUDU_BUILD_DIR" && -z "$KUDU_CLIENT_DIR" ]]; then
|
|
echo When KUDU_BUILD_DIR is set KUDU_CLIENT_DIR must also be set. 1>&2
|
|
return 1
|
|
fi
|
|
if [[ -z "$KUDU_BUILD_DIR" && -n "$KUDU_CLIENT_DIR" ]]; then
|
|
echo When KUDU_CLIENT_DIR is set KUDU_BUILD_DIR must also be set. 1>&2
|
|
return 1
|
|
fi
|
|
|
|
# Only applies to the minicluster Kudu (we always link against the libkudu_client for the
|
|
# overall build type) and does not apply when using a local Kudu build.
|
|
export USE_KUDU_DEBUG_BUILD=${USE_KUDU_DEBUG_BUILD-false}
|
|
|
|
export IMPALA_KUDU_VERSION=${IMPALA_KUDU_VERSION-"879a8f9e2"}
|
|
export IMPALA_KUDU_HOME=${IMPALA_TOOLCHAIN_PACKAGES_HOME}/kudu-$IMPALA_KUDU_VERSION
|
|
export IMPALA_KUDU_JAVA_HOME=\
|
|
${IMPALA_TOOLCHAIN_PACKAGES_HOME}/kudu-${IMPALA_KUDU_VERSION}/java
|
|
export IMPALA_TOOLCHAIN_KUDU_MAVEN_REPOSITORY=\
|
|
"file://${IMPALA_KUDU_JAVA_HOME}/repository"
|
|
export IMPALA_TOOLCHAIN_KUDU_MAVEN_REPOSITORY_ENABLED=true
|
|
|
|
# Set $THRIFT_XXX_HOME to the Thrift directory in toolchain.
|
|
export THRIFT_CPP_HOME="${IMPALA_TOOLCHAIN_PACKAGES_HOME}/thrift-${IMPALA_THRIFT_CPP_VERSION}"
|
|
export THRIFT_JAVA_HOME="${IMPALA_TOOLCHAIN_PACKAGES_HOME}/thrift-${IMPALA_THRIFT_JAVA_VERSION}"
|
|
export THRIFT_PY_HOME="${IMPALA_TOOLCHAIN_PACKAGES_HOME}/thrift-${IMPALA_THRIFT_PY_VERSION}"
|
|
|
|
# ASAN needs a matching version of llvm-symbolizer to symbolize stack traces.
|
|
export ASAN_SYMBOLIZER_PATH="${IMPALA_TOOLCHAIN_PACKAGES_HOME}/llvm-${IMPALA_LLVM_ASAN_VERSION}/bin/llvm-symbolizer"
|
|
|
|
export CLUSTER_DIR="${IMPALA_HOME}/testdata/cluster"
|
|
|
|
# The number of parallel build processes we should run at a time. Require 2GB memory per
|
|
# core as too many compilation processes can exhaust available memory and fail a build.
|
|
if $IS_OSX; then
|
|
AVAILABLE_MEM=$(($(sysctl -n hw.memsize) / 1024 / 1024 / 1024))
|
|
else
|
|
# MemTotal: 65550228 kB
|
|
AVAILABLE_MEM=$(awk '/MemTotal/{print int($2/1024/1024)}' /proc/meminfo)
|
|
fi
|
|
if grep -v max /sys/fs/cgroup/memory.max >& /dev/null; then
|
|
# Get memory limits under cgroups v2
|
|
CGROUP_MEM_LIMIT=$(($(cat /sys/fs/cgroup/memory.max) / 1024 / 1024 / 1024))
|
|
echo "Detected $CGROUP_MEM_LIMIT GB memory limit from cgroups v2"
|
|
elif grep -v '\-' /sys/fs/cgroup/memory/memory.limit_in_bytes >& /dev/null; then
|
|
# Get memory limits under cgroups v1
|
|
CGROUP_MEM_LIMIT=$((
|
|
$(cat /sys/fs/cgroup/memory/memory.limit_in_bytes) / 1024 / 1024 / 1024))
|
|
echo "Detected $CGROUP_MEM_LIMIT GB memory limit from cgroups v1"
|
|
else
|
|
CGROUP_MEM_LIMIT=8589934591 # max int64 bytes in GB
|
|
fi
|
|
AVAILABLE_MEM=$((AVAILABLE_MEM > $CGROUP_MEM_LIMIT ? $CGROUP_MEM_LIMIT : $AVAILABLE_MEM))
|
|
if [[ $AVAILABLE_MEM -lt 5 ]]; then
|
|
echo "Insufficient memory ($AVAILABLE_MEM GB) to build Impala"
|
|
exit 1
|
|
fi
|
|
BOUNDED_CONCURRENCY=$((AVAILABLE_MEM / 2))
|
|
if [[ $BOUNDED_CONCURRENCY -lt $CORES ]]; then
|
|
echo "Bounding concurrency for available memory ($AVAILABLE_MEM GB)"
|
|
else
|
|
BOUNDED_CONCURRENCY=$CORES
|
|
fi
|
|
export IMPALA_BUILD_THREADS=${IMPALA_BUILD_THREADS:-"${BOUNDED_CONCURRENCY}"}
|
|
# Limit number of links; only works with ninja builds.
|
|
# Determines number of concurrent links based on expected memory use.
|
|
if [[ "$IMPALA_MINIMAL_DEBUG_INFO" == "true" ||
|
|
"$IMPALA_SPLIT_DEBUG_INFO" == "true" ]]; then
|
|
MEM_PER_LINK=2
|
|
else
|
|
MEM_PER_LINK=5
|
|
fi
|
|
BOUNDED_LINKS=$((AVAILABLE_MEM / MEM_PER_LINK))
|
|
if [[ $BOUNDED_LINKS -gt $IMPALA_BUILD_THREADS ]]; then
|
|
# Avoid regressing behavior if IMPALA_BUILD_THREADS is already set to a low value.
|
|
BOUNDED_LINKS=${IMPALA_BUILD_THREADS}
|
|
fi
|
|
export IMPALA_LINK_THREADS=${IMPALA_LINK_THREADS:-"${BOUNDED_LINKS}"}
|
|
|
|
# Additional flags to pass to make or ninja.
|
|
export IMPALA_MAKE_FLAGS=${IMPALA_MAKE_FLAGS-}
|
|
|
|
# Some environments (like the packaging build) might not have $USER set. Fix that here.
|
|
export USER="${USER-`id -un`}"
|
|
|
|
# These arguments are, despite the name, passed to every JVM created
|
|
# by an impalad.
|
|
# - Enable JNI check
|
|
# When running hive UDFs, this check makes it unacceptably slow (over 100x)
|
|
# Enable if you suspect a JNI issue
|
|
# TODO: figure out how to turn this off only the stuff that can't run with it.
|
|
#LIBHDFS_OPTS="-Xcheck:jni -Xcheck:nabounds"
|
|
export LIBHDFS_OPTS="${LIBHDFS_OPTS:-} -Djava.library.path=${HADOOP_LIB_DIR}/native/"
|
|
LIBHDFS_OPTS+=" -XX:ErrorFile=${IMPALA_LOGS_DIR}/hs_err_pid%p.log"
|
|
|
|
|
|
# IMPALA-5080: Our use of PermGen space sometimes exceeds the default maximum while
|
|
# running tests that load UDF jars.
|
|
LIBHDFS_OPTS="${LIBHDFS_OPTS} -XX:MaxPermSize=128m"
|
|
|
|
export CLASSPATH="$IMPALA_FE_DIR/target/dependency:${CLASSPATH:+:${CLASSPATH}}"
|
|
CLASSPATH="$IMPALA_FE_DIR/target/classes:$CLASSPATH"
|
|
CLASSPATH="$IMPALA_FE_DIR/src/test/resources:$CLASSPATH"
|
|
|
|
# A marker in the environment to prove that we really did source this file
|
|
export IMPALA_CONFIG_SOURCED=1
|
|
|
|
echo "IMPALA_VERSION = $IMPALA_VERSION"
|
|
echo "IMPALA_HOME = $IMPALA_HOME"
|
|
echo "HADOOP_HOME = $HADOOP_HOME"
|
|
echo "HADOOP_CONF_DIR = $HADOOP_CONF_DIR"
|
|
echo "HADOOP_INCLUDE_DIR = $HADOOP_INCLUDE_DIR"
|
|
echo "HADOOP_LIB_DIR = $HADOOP_LIB_DIR"
|
|
echo "IMPALA_CLUSTER_NODES_DIR= $IMPALA_CLUSTER_NODES_DIR"
|
|
echo "HIVE_HOME = $HIVE_HOME"
|
|
echo "HIVE_CONF_DIR = $HIVE_CONF_DIR"
|
|
echo "HIVE_SRC_DIR = $HIVE_SRC_DIR"
|
|
echo "HBASE_HOME = $HBASE_HOME"
|
|
echo "HBASE_CONF_DIR = $HBASE_CONF_DIR"
|
|
echo "OZONE_HOME = $OZONE_HOME"
|
|
echo "OZONE_CONF_DIR = $OZONE_CONF_DIR"
|
|
echo "RANGER_HOME = $RANGER_HOME"
|
|
echo "RANGER_CONF_DIR = $RANGER_CONF_DIR "
|
|
echo "THRIFT_CPP_HOME = $THRIFT_CPP_HOME"
|
|
echo "THRIFT_JAVA_HOME = $THRIFT_JAVA_HOME"
|
|
echo "THRIFT_PY_HOME = $THRIFT_PY_HOME"
|
|
echo "CLASSPATH = $CLASSPATH"
|
|
echo "LIBHDFS_OPTS = $LIBHDFS_OPTS"
|
|
echo "JAVA_HOME = $JAVA_HOME"
|
|
echo "IMPALA_JAVA_TARGET = $IMPALA_JAVA_TARGET"
|
|
echo "POSTGRES_JDBC_DRIVER = $POSTGRES_JDBC_DRIVER"
|
|
echo "IMPALA_TOOLCHAIN = $IMPALA_TOOLCHAIN"
|
|
echo "IMPALA_TOOLCHAIN_PACKAGES_HOME = $IMPALA_TOOLCHAIN_PACKAGES_HOME"
|
|
echo "METASTORE_DB = $METASTORE_DB"
|
|
echo "DOWNLOAD_CDH_COMPONENTS = $DOWNLOAD_CDH_COMPONENTS"
|
|
echo "IMPALA_MAVEN_OPTIONS = $IMPALA_MAVEN_OPTIONS"
|
|
echo "IMPALA_TOOLCHAIN_HOST = $IMPALA_TOOLCHAIN_HOST"
|
|
echo "CDP_BUILD_NUMBER = $CDP_BUILD_NUMBER"
|
|
echo "CDP_COMPONENTS_HOME = $CDP_COMPONENTS_HOME"
|
|
if $USE_APACHE_COMPONENTS; then
|
|
echo "APACHE_MIRROR = $APACHE_MIRROR"
|
|
echo "APACHE_COMPONENTS_HOME = $APACHE_COMPONENTS_HOME"
|
|
fi
|
|
echo "IMPALA_HADOOP_VERSION = $IMPALA_HADOOP_VERSION"
|
|
echo "IMPALA_AVRO_JAVA_VERSION= $IMPALA_AVRO_JAVA_VERSION"
|
|
echo "IMPALA_PARQUET_VERSION = $IMPALA_PARQUET_VERSION"
|
|
echo "IMPALA_HIVE_VERSION = $IMPALA_HIVE_VERSION"
|
|
echo "IMPALA_TEZ_VERSION = $IMPALA_TEZ_VERSION"
|
|
echo "IMPALA_HBASE_VERSION = $IMPALA_HBASE_VERSION"
|
|
echo "IMPALA_ORC_JAVA_VERSION = $IMPALA_ORC_JAVA_VERSION"
|
|
echo "IMPALA_OZONE_VERSION = $IMPALA_OZONE_VERSION"
|
|
echo "IMPALA_HUDI_VERSION = $IMPALA_HUDI_VERSION"
|
|
echo "IMPALA_KUDU_VERSION = $IMPALA_KUDU_VERSION"
|
|
echo "IMPALA_RANGER_VERSION = $IMPALA_RANGER_VERSION"
|
|
echo "IMPALA_ICEBERG_VERSION = $IMPALA_ICEBERG_VERSION"
|
|
echo "IMPALA_PAIMON_VERSION = $IMPALA_PAIMON_VERSION"
|
|
echo "IMPALA_COS_VERSION = $IMPALA_COS_VERSION"
|
|
echo "IMPALA_OBS_VERSION = $IMPALA_OBS_VERSION"
|
|
echo "IMPALA_SYSTEM_PYTHON2 = $IMPALA_SYSTEM_PYTHON2"
|
|
echo "IMPALA_SYSTEM_PYTHON3 = $IMPALA_SYSTEM_PYTHON3"
|
|
echo "IMPALA_BUILD_THREADS = $IMPALA_BUILD_THREADS"
|
|
echo "IMPALA_LINK_THREADS = $IMPALA_LINK_THREADS"
|
|
echo "NUM_CONCURRENT_TESTS = $NUM_CONCURRENT_TESTS"
|
|
echo "USE_CUSTOM_IMPALA_BASE_IMAGE = $USE_CUSTOM_IMPALA_BASE_IMAGE"
|
|
echo "IMPALA_CUSTOM_DOCKER_BASE = $IMPALA_CUSTOM_DOCKER_BASE"
|
|
|
|
# Kerberos things. If the cluster exists and is kerberized, source
|
|
# the required environment. This is required for any hadoop tool to
|
|
# work. Note that if impala-config.sh is sourced before the
|
|
# kerberized cluster is created, it will have to be sourced again
|
|
# *after* the cluster is created in order to pick up these settings.
|
|
export MINIKDC_ENV="${IMPALA_HOME}/testdata/bin/minikdc_env.sh"
|
|
if "${CLUSTER_DIR}/admin" is_kerberized ||
|
|
( ! "${CLUSTER_DIR}/admin" cluster_exists && [[ "$IMPALA_KERBERIZE" == "true" ]] ); then
|
|
|
|
. "${MINIKDC_ENV}"
|
|
echo " *** This cluster is kerberized ***"
|
|
echo "KRB5_KTNAME = $KRB5_KTNAME"
|
|
echo "KRB5_CONFIG = $KRB5_CONFIG"
|
|
echo "KRB5_TRACE = ${KRB5_TRACE:-}"
|
|
echo "HADOOP_OPTS = $HADOOP_OPTS"
|
|
echo " *** This cluster is kerberized ***"
|
|
else
|
|
# If the cluster *isn't* kerberized, ensure that the environment isn't
|
|
# polluted with kerberos items that might screw us up. We go through
|
|
# everything set in the minikdc environment and explicitly unset it.
|
|
unset `grep export "${MINIKDC_ENV}" | sed "s/.*export \([^=]*\)=.*/\1/" \
|
|
| sort | uniq`
|
|
fi
|
|
|
|
# Check for minimum required Java version
|
|
if [[ $IMPALA_JAVA_TARGET -le 7 ]]; then
|
|
cat << EOF
|
|
|
|
WARNING: Your development environment is configured for Hadoop 3 and Java
|
|
$IMPALA_JAVA_TARGET. Hadoop 3 requires at least Java 8. Your JAVA binary
|
|
currently points to $JAVA and reports the following version:
|
|
|
|
EOF
|
|
$JAVA -version
|
|
echo
|
|
fi
|