IMPALA-12441: Simplify local toolchain development

If NATIVE_TOOLCHAIN_HOME is set, that will be used to provide the native
toolchain instead of the default in IMPALA_TOOLCHAIN. Overrides
IMPALA_TOOLCHAIN_PACKAGES_HOME and sets SKIP_TOOLCHAIN_BOOTSTRAP=true.

Adds IMPALA_TOOLCHAIN_REPO, IMPALA_TOOLCHAIN_BRANCH, and
IMPALA_TOOLCHAIN_COMMIT_HASH so everything is clear about what toolchain
is used for this Impala commit.

If NATIVE_TOOLCHAIN_HOME does not yet exist, buildall.sh will clone the
repo and checkout the commit hash mentioned above before building.

Also skips downloading Kudu if SKIP_TOOLCHAIN_BOOTSTRAP is true as Kudu
is built from native-toolchain. Normalizes aarch64 logic, which skipped
Kudu because it would always build native-toolchain locally.

Change-Id: I3a9e51b7f54c738d8cc01b32428ac88a344de376
Reviewed-on: http://gerrit.cloudera.org:8080/20267
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Reviewed-by: Michael Smith <michael.smith@cloudera.com>
This commit is contained in:
Michael Smith
2023-07-24 16:24:47 -07:00
parent 130a55e526
commit 4be517e150
4 changed files with 40 additions and 28 deletions

View File

@@ -481,23 +481,12 @@ echo -e "\n$SET_IMPALA_HOME" >> ~/.bashrc
eval "$SET_IMPALA_HOME"
if [[ $ARCH_NAME == 'aarch64' ]]; then
echo -e "\nexport SKIP_TOOLCHAIN_BOOTSTRAP=true" >> \
"${IMPALA_HOME}/bin/impala-config-local.sh"
SET_TOOLCHAIN_HOME="export NATIVE_TOOLCHAIN_HOME=${IMPALA_HOME}/../native-toolchain"
echo -e "\n$SET_TOOLCHAIN_HOME" >> ~/.bashrc
echo -e "\n$SET_TOOLCHAIN_HOME" >> "${IMPALA_HOME}/bin/impala-config-local.sh"
eval "$SET_TOOLCHAIN_HOME"
if ! [[ -d "$NATIVE_TOOLCHAIN_HOME" ]]; then
time -p git clone https://github.com/cloudera/native-toolchain/ \
"$NATIVE_TOOLCHAIN_HOME"
fi
cd "$NATIVE_TOOLCHAIN_HOME"
git pull
echo "Begin build tool chain, may need several hours, please be patient...."
# Provide access to ~/.cache on build machines so we can use ccache.
sudo chmod 755 ~/.cache
./buildall.sh
cd -
mkdir -p ${IMPALA_HOME}/toolchain
fi
# Try to prepopulate the m2 directory to save time

View File

@@ -568,7 +568,8 @@ def main():
if os.getenv("DOWNLOAD_CDH_COMPONENTS", "false") == "true":
create_directory_from_env_var("CDP_COMPONENTS_HOME")
create_directory_from_env_var("APACHE_COMPONENTS_HOME")
if platform.processor() != "aarch64":
if os.getenv("SKIP_TOOLCHAIN_BOOTSTRAP", "false") != "true":
# Kudu is currently sourced from native-toolchain
downloads += get_kudu_downloads()
downloads += get_hadoop_downloads()

View File

@@ -86,6 +86,11 @@ export USE_AVRO_CPP=${USE_AVRO_CPP:=false}
# compile option is changed. The build id can be found in the output of the toolchain
# build jobs, it is constructed from the build number and toolchain git hash prefix.
export IMPALA_TOOLCHAIN_BUILD_ID=358-e7cfab15d3
export IMPALA_TOOLCHAIN_REPO=\
${IMPALA_TOOLCHAIN_REPO:-https://github.com/cloudera/native-toolchain.git}
export IMPALA_TOOLCHAIN_BRANCH=${IMPALA_TOOLCHAIN_BRANCH:-master}
export IMPALA_TOOLCHAIN_COMMIT_HASH=\
${IMPALA_TOOLCHAIN_COMMIT_HASH-e7cfab15d36ae051747252b676f0a11a9c58fe05}
# Versions of toolchain dependencies.
# -----------------------------------
if $USE_AVRO_CPP; then
@@ -316,8 +321,13 @@ fi
# IMPALA_TOOLCHAIN_PACKAGES_HOME is the location inside IMPALA_TOOLCHAIN where native
# toolchain packages are placed. This uses a subdirectory that contains the information
# about the compiler to allow using different compiler versions.
export IMPALA_TOOLCHAIN_PACKAGES_HOME=\
IMPALA_TOOLCHAIN_PACKAGES_HOME=\
${IMPALA_TOOLCHAIN}/toolchain-packages-gcc${IMPALA_GCC_VERSION}
if ! [ -z ${NATIVE_TOOLCHAIN_HOME-} ]; then
IMPALA_TOOLCHAIN_PACKAGES_HOME=$(realpath ${NATIVE_TOOLCHAIN_HOME})/build
export SKIP_TOOLCHAIN_BOOTSTRAP=true
fi
export IMPALA_TOOLCHAIN_PACKAGES_HOME
export CDP_HADOOP_URL=${CDP_HADOOP_URL-}
export CDP_HBASE_URL=${CDP_HBASE_URL-}

View File

@@ -416,29 +416,41 @@ bootstrap_dependencies() {
# Populate necessary thirdparty components unless it's set to be skipped.
if [[ "${SKIP_TOOLCHAIN_BOOTSTRAP}" = true ]]; then
echo "SKIP_TOOLCHAIN_BOOTSTRAP is true, skipping toolchain bootstrap."
if ! [ -z "${NATIVE_TOOLCHAIN_HOME}" ]; then
if ! [ -d "${NATIVE_TOOLCHAIN_HOME}" ]; then
mkdir -p "${NATIVE_TOOLCHAIN_HOME}"
pushd "${NATIVE_TOOLCHAIN_HOME}"
git init
git remote add toolchain "${IMPALA_TOOLCHAIN_REPO}"
git fetch toolchain "${IMPALA_TOOLCHAIN_BRANCH}"
# Specifying a branch avoids a large message from git about detached HEADs.
git checkout "${IMPALA_TOOLCHAIN_COMMIT_HASH}" -b "${IMPALA_TOOLCHAIN_BUILD_ID}"
else
pushd "${NATIVE_TOOLCHAIN_HOME}"
fi
echo "Begin building toolchain, may need several hours, please be patient...."
./buildall.sh
popd
else
echo "SKIP_TOOLCHAIN_BOOTSTRAP is true, skipping toolchain bootstrap."
fi
if [[ "${DOWNLOAD_CDH_COMPONENTS}" = true ]]; then
echo ">>> Downloading and extracting cdh components."
"$IMPALA_HOME/bin/bootstrap_toolchain.py"
fi
# Create soft link to locally builded native-toolchain on aarch64
if [[ "$(uname -p)" = "aarch64" ]]; then
mkdir -p $IMPALA_TOOLCHAIN_PACKAGES_HOME
cd "$IMPALA_TOOLCHAIN_PACKAGES_HOME"
ln -f -s ${NATIVE_TOOLCHAIN_HOME}/build/* .
cd -
if ! [[ -d "$IMPALA_HOME/../hadoopAarch64NativeLibs" ]]; then
git clone https://github.com/zhaorenhai/hadoopAarch64NativeLibs \
"$IMPALA_HOME/../hadoopAarch64NativeLibs"
fi
cp $IMPALA_HOME/../hadoopAarch64NativeLibs/lib* $HADOOP_HOME/lib/native/
fi
else
echo ">>> Downloading and extracting toolchain dependencies."
"$IMPALA_HOME/bin/bootstrap_toolchain.py"
echo "Toolchain bootstrap complete."
fi
# Download prebuilt Hadoop native binaries for aarch64
if [[ "$(uname -p)" = "aarch64" ]]; then
if ! [[ -d "$IMPALA_HOME/../hadoopAarch64NativeLibs" ]]; then
git clone https://github.com/zhaorenhai/hadoopAarch64NativeLibs \
"$IMPALA_HOME/../hadoopAarch64NativeLibs"
fi
cp $IMPALA_HOME/../hadoopAarch64NativeLibs/lib* $HADOOP_HOME/lib/native/
fi
if [[ "${USE_APACHE_HIVE}" = true ]]; then
"$IMPALA_HOME/testdata/bin/patch_hive.sh"
fi