IMPALA-13147: Limit concurrency of link jobs

Configure separate compile and link pools for ninja. Configures link
parallelism based on expected memory use, which can be reduced by
setting IMPALA_MINIMAL_DEBUG_INFO=true or IMPALA_SPLIT_DEBUG_INFO=true.

Adds IMPALA_MAKE_CMD to simplify using the ninja build tool for all make
operations in scripts. Install ninja on Ubuntu. Adds a '-make' option to
buildall.sh to force using 'make'.

Adds MOLD_JOBS=1 to avoid overloading the system when trying 'mold' and
linking test binaries. However 'mold' is not selected as the default
due to test failures around SASL/GSSAPI (see IMPALA-14527).

Switches bin/jenkins/all-tests.sh to use ninja and removes the guard in
bootstrap_development.sh limiting IMPALA_BUILD_THREADS as it's no longer
needed with ninja.

SKIP_BE_TEST_PATTERN in run-backend-tests is unused (only used with
TARGET_FILESYSTEM=local) so I don't attempt to make it work with ninja.

Tested with local 'IMPALA_SPLIT_DEBUG_INFO=true buildall.sh -skiptests'
with default (make) and IMPALA_MAKE_CMD=ninja.

Change-Id: I0952dc19ace5c9c42bed0d2ffb61499656c0a2db
Reviewed-on: http://gerrit.cloudera.org:8080/23572
Reviewed-by: Joe McDonnell <joemcdonnell@cloudera.com>
Reviewed-by: Pranav Lodha <pranav.lodha@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
Michael Smith
2025-10-22 13:23:22 -07:00
committed by Impala Public Jenkins
parent aa4f19219c
commit c3dc7f9667
10 changed files with 55 additions and 27 deletions

View File

@@ -36,6 +36,12 @@ set(IMPALA_BUILD_SHARED_LIBS ${BUILD_SHARED_LIBS})
# Build compile commands database # Build compile commands database
set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
# Configure ninja build pools
set(CMAKE_JOB_POOLS compilation_pool=$ENV{IMPALA_BUILD_THREADS}
link_pool=$ENV{IMPALA_LINK_THREADS})
set(CMAKE_JOB_POOL_COMPILE compilation_pool)
set(CMAKE_JOB_POOL_LINK link_pool)
# Codegen-dependent executables need to be linked with -rdynamic; otherwise LLVM # Codegen-dependent executables need to be linked with -rdynamic; otherwise LLVM
# can't find dependent symbols at runtime. # can't find dependent symbols at runtime.
# #

View File

@@ -57,11 +57,13 @@ can do so through the environment variables and scripts listed below.
| Environment variable | Default value | Description | | Environment variable | Default value | Description |
|----------------------|---------------|-------------| |----------------------|---------------|-------------|
| IMPALA_BUILD_THREADS | "8" or set to number of processors by default. | Used for make -j and distcc -j settings. | | IMPALA_BUILD_THREADS | Number of processors. | Used for make -j and distcc -j settings. |
| IMPALA_LINK_THREADS | Bounded based on available memory. | Used for ninja. |
| IMPALA_MAKE_CMD | "make" | Make tool to use by default, options are make or ninja. |
| IMPALA_MAKE_FLAGS | "" | Any extra settings to pass to make. Also used when copying udfs / udas into HDFS. | | IMPALA_MAKE_FLAGS | "" | Any extra settings to pass to make. Also used when copying udfs / udas into HDFS. |
| USE_SYSTEM_GCC | "0" | If set to any other value, directs cmake to not set GCC_ROOT, CMAKE_C_COMPILER, CMAKE_CXX_COMPILER, as well as setting TOOLCHAIN_LINK_FLAGS | | USE_SYSTEM_GCC | "0" | If set to any other value, directs cmake to not set GCC_ROOT, CMAKE_C_COMPILER, CMAKE_CXX_COMPILER, as well as setting TOOLCHAIN_LINK_FLAGS |
| IMPALA_CXX_COMPILER | "default" | Used by cmake (cmake_modules/toolchain and clang_toolchain.cmake) to select gcc / clang | | IMPALA_CXX_COMPILER | "default" | Used by cmake (cmake_modules/toolchain and clang_toolchain.cmake) to select gcc / clang |
| IMPALA_LINKER. | "gold" | Specifies the linker to use. | | IMPALA_LINKER | "gold" | Specifies the linker to use; options are "gold", "mold", or "ld". |
| IS_OSX | "false" | (Experimental) currently only used to disable Kudu. | | IS_OSX | "false" | (Experimental) currently only used to disable Kudu. |
## Dependencies ## Dependencies

View File

@@ -36,7 +36,7 @@ sudo -E apt-get --quiet update
# unversioned python-dev and python-setuptools are not available on newer releases # unversioned python-dev and python-setuptools are not available on newer releases
# that don't support Python 2. Add them only when they exist for the platform, # that don't support Python 2. Add them only when they exist for the platform,
# otherwise set Python 3 to be the default Python version. # otherwise set Python 3 to be the default Python version.
PACKAGES='g++ gcc git libsasl2-dev libssl-dev make PACKAGES='g++ gcc git libsasl2-dev libssl-dev make ninja-build
python3-dev python3-setuptools python3-venv libffi-dev language-pack-en python3-dev python3-setuptools python3-venv libffi-dev language-pack-en
libkrb5-dev krb5-admin-server krb5-kdc krb5-user libxml2-dev libxslt-dev wget' libkrb5-dev krb5-admin-server krb5-kdc krb5-user libxml2-dev libxslt-dev wget'

View File

@@ -54,16 +54,6 @@ source "${BINDIR}/bootstrap_system.sh"
export MAX_PYTEST_FAILURES=0 export MAX_PYTEST_FAILURES=0
source bin/impala-config.sh > /dev/null 2>&1 source bin/impala-config.sh > /dev/null 2>&1
BOUNDED_CONCURRENCY=$((AVAILABLE_MEM / 4))
if [[ $AVAILABLE_MEM -lt 4 ]]; then
echo "Insufficient memory ($AVAILABLE_MEM GB) to link Impala test binaries"
echo "Increase memory, or run buildall.sh -format -testdata -notests"
exit 1
elif [[ $BOUNDED_CONCURRENCY -lt $IMPALA_BUILD_THREADS ]]; then
echo "Bounding concurrency to $BOUNDED_CONCURRENCY for link phase"
IMPALA_BUILD_THREADS=$BOUNDED_CONCURRENCY
fi
time -p ./buildall.sh -format -testdata -skiptests time -p ./buildall.sh -format -testdata -skiptests
# To then run the tests: # To then run the tests:

View File

@@ -27,7 +27,7 @@ setup_report_build_error
# If the project was never build, no Makefile will exist and thus make clean will fail. # If the project was never build, no Makefile will exist and thus make clean will fail.
# Combine the make command with the bash noop to always return true. # Combine the make command with the bash noop to always return true.
"${MAKE_CMD:-make}" clean || : "${MAKE_CMD:-${IMPALA_MAKE_CMD}}" clean || :
# clean Java projects # clean Java projects
pushd "${IMPALA_HOME}/java" pushd "${IMPALA_HOME}/java"

View File

@@ -580,6 +580,9 @@ export LIB_JAVA=$(find "${JAVA_HOME}/" -name libjava.so | head -1)
export LIB_JSIG=$(find "${JAVA_HOME}/" -name libjsig.so | head -1) export LIB_JSIG=$(find "${JAVA_HOME}/" -name libjsig.so | head -1)
export LIB_JVM=$(find "${JAVA_HOME}/" -name libjvm.so | head -1) export LIB_JVM=$(find "${JAVA_HOME}/" -name libjvm.so | head -1)
# Default to make, but allow overriding to e.g. ninja.
export IMPALA_MAKE_CMD=${IMPALA_MAKE_CMD:-make}
######################################################################################### #########################################################################################
# Below here are variables that can be overridden by impala-config-*.sh and environment # # Below here are variables that can be overridden by impala-config-*.sh and environment #
# vars, variables computed based on other variables, and variables that cannot be # # vars, variables computed based on other variables, and variables that cannot be #
@@ -608,6 +611,10 @@ export USE_SYSTEM_GCC=${USE_SYSTEM_GCC-0}
# TODO: Add support for lld as well # TODO: Add support for lld as well
export IMPALA_LINKER=${IMPALA_LINKER-gold} export IMPALA_LINKER=${IMPALA_LINKER-gold}
# Limit mold to a single job to avoid excessive memory consumption while fully utilizing
# available CPUs.
export MOLD_JOBS=${IMPALA_MOLD_JOBS-1}
# Override the default compiler by setting a path to the new compiler. The default # Override the default compiler by setting a path to the new compiler. The default
# compiler depends on USE_SYSTEM_GCC and IMPALA_GCC_VERSION. The intended use case # compiler depends on USE_SYSTEM_GCC and IMPALA_GCC_VERSION. The intended use case
# is to set the compiler to distcc, in that case the user would also set # is to set the compiler to distcc, in that case the user would also set
@@ -1023,7 +1030,7 @@ export IMPALA_DATASET_DIR="$IMPALA_HOME/testdata/datasets"
export IMPALA_AUX_DATASET_DIR="$IMPALA_AUX_TEST_HOME/testdata/datasets" export IMPALA_AUX_DATASET_DIR="$IMPALA_AUX_TEST_HOME/testdata/datasets"
export IMPALA_COMMON_DIR="$IMPALA_HOME/common" export IMPALA_COMMON_DIR="$IMPALA_HOME/common"
export PATH="$IMPALA_TOOLCHAIN_PACKAGES_HOME/gdb-$IMPALA_GDB_VERSION/bin:$PATH" export PATH="$IMPALA_TOOLCHAIN_PACKAGES_HOME/gdb-$IMPALA_GDB_VERSION/bin:$PATH"
export PATH="$IMPALA_TOOLCHAIN_PACKAGES_HOME/cmake-$IMPALA_CMAKE_VERSION/bin/:$PATH" export PATH="$IMPALA_TOOLCHAIN_PACKAGES_HOME/cmake-$IMPALA_CMAKE_VERSION/bin:$PATH"
export PATH="$IMPALA_HOME/bin:$PATH" export PATH="$IMPALA_HOME/bin:$PATH"
export HADOOP_CONF_DIR="$IMPALA_FE_DIR/src/test/resources" export HADOOP_CONF_DIR="$IMPALA_FE_DIR/src/test/resources"
@@ -1165,16 +1172,31 @@ else
CGROUP_MEM_LIMIT=8589934591 # max int64 bytes in GB CGROUP_MEM_LIMIT=8589934591 # max int64 bytes in GB
fi fi
AVAILABLE_MEM=$((AVAILABLE_MEM > $CGROUP_MEM_LIMIT ? $CGROUP_MEM_LIMIT : $AVAILABLE_MEM)) AVAILABLE_MEM=$((AVAILABLE_MEM > $CGROUP_MEM_LIMIT ? $CGROUP_MEM_LIMIT : $AVAILABLE_MEM))
BOUNDED_CONCURRENCY=$((AVAILABLE_MEM / 2)) if [[ $AVAILABLE_MEM -lt 5 ]]; then
if [[ $AVAILABLE_MEM -lt 2 ]]; then
echo "Insufficient memory ($AVAILABLE_MEM GB) to build Impala" echo "Insufficient memory ($AVAILABLE_MEM GB) to build Impala"
exit 1 exit 1
elif [[ $BOUNDED_CONCURRENCY -lt $CORES ]]; then fi
BOUNDED_CONCURRENCY=$((AVAILABLE_MEM / 2))
if [[ $BOUNDED_CONCURRENCY -lt $CORES ]]; then
echo "Bounding concurrency for available memory ($AVAILABLE_MEM GB)" echo "Bounding concurrency for available memory ($AVAILABLE_MEM GB)"
else else
BOUNDED_CONCURRENCY=$CORES BOUNDED_CONCURRENCY=$CORES
fi fi
export IMPALA_BUILD_THREADS=${IMPALA_BUILD_THREADS-"${BOUNDED_CONCURRENCY}"} export IMPALA_BUILD_THREADS=${IMPALA_BUILD_THREADS:-"${BOUNDED_CONCURRENCY}"}
# Limit number of links; only works with ninja builds.
# Determines number of concurrent links based on expected memory use.
if [[ "$IMPALA_MINIMAL_DEBUG_INFO" == "true" ||
"$IMPALA_SPLIT_DEBUG_INFO" == "true" ]]; then
MEM_PER_LINK=2
else
MEM_PER_LINK=5
fi
BOUNDED_LINKS=$((AVAILABLE_MEM / MEM_PER_LINK))
if [[ $BOUNDED_LINKS -gt $IMPALA_BUILD_THREADS ]]; then
# Avoid regressing behavior if IMPALA_BUILD_THREADS is already set to a low value.
BOUNDED_LINKS=${IMPALA_BUILD_THREADS}
fi
export IMPALA_LINK_THREADS=${IMPALA_LINK_THREADS:-"${BOUNDED_LINKS}"}
# Additional flags to pass to make or ninja. # Additional flags to pass to make or ninja.
export IMPALA_MAKE_FLAGS=${IMPALA_MAKE_FLAGS-} export IMPALA_MAKE_FLAGS=${IMPALA_MAKE_FLAGS-}
@@ -1258,6 +1280,7 @@ echo "IMPALA_OBS_VERSION = $IMPALA_OBS_VERSION"
echo "IMPALA_SYSTEM_PYTHON2 = $IMPALA_SYSTEM_PYTHON2" echo "IMPALA_SYSTEM_PYTHON2 = $IMPALA_SYSTEM_PYTHON2"
echo "IMPALA_SYSTEM_PYTHON3 = $IMPALA_SYSTEM_PYTHON3" echo "IMPALA_SYSTEM_PYTHON3 = $IMPALA_SYSTEM_PYTHON3"
echo "IMPALA_BUILD_THREADS = $IMPALA_BUILD_THREADS" echo "IMPALA_BUILD_THREADS = $IMPALA_BUILD_THREADS"
echo "IMPALA_LINK_THREADS = $IMPALA_LINK_THREADS"
echo "NUM_CONCURRENT_TESTS = $NUM_CONCURRENT_TESTS" echo "NUM_CONCURRENT_TESTS = $NUM_CONCURRENT_TESTS"
echo "USE_CUSTOM_IMPALA_BASE_IMAGE = $USE_CUSTOM_IMPALA_BASE_IMAGE" echo "USE_CUSTOM_IMPALA_BASE_IMAGE = $USE_CUSTOM_IMPALA_BASE_IMAGE"
echo "IMPALA_CUSTOM_DOCKER_BASE = $IMPALA_CUSTOM_DOCKER_BASE" echo "IMPALA_CUSTOM_DOCKER_BASE = $IMPALA_CUSTOM_DOCKER_BASE"

View File

@@ -31,6 +31,9 @@ export IMPALA_MAVEN_OPTIONS="-U"
# Allow unlimited pytest failures # Allow unlimited pytest failures
export MAX_PYTEST_FAILURES=0 export MAX_PYTEST_FAILURES=0
# Use ninja for better link concurrency.
export IMPALA_MAKE_CMD=ninja
# When UBSAN_FAIL is "death", the logs are monitored for UBSAN errors. Any errors will # When UBSAN_FAIL is "death", the logs are monitored for UBSAN errors. Any errors will
# then cause this script to exit. # then cause this script to exit.
# #

View File

@@ -28,11 +28,6 @@ export GTEST_OUTPUT="xml:$IMPALA_BE_TEST_LOGS_DIR/"
# The backend unit tests currently do not work when HEAPCHECK is enabled. # The backend unit tests currently do not work when HEAPCHECK is enabled.
export HEAPCHECK= export HEAPCHECK=
BE_TEST_ARGS=""
if [[ -n "$SKIP_BE_TEST_PATTERN" ]]; then
BE_TEST_ARGS="-E ${SKIP_BE_TEST_PATTERN}"
fi
cd ${IMPALA_BE_DIR} cd ${IMPALA_BE_DIR}
. ${IMPALA_HOME}/bin/set-classpath.sh . ${IMPALA_HOME}/bin/set-classpath.sh
cd .. cd ..
@@ -44,4 +39,10 @@ export ASAN_OPTIONS="disable_coredump=0:unmap_shadow_on_exit=1"
export UBSAN_OPTIONS="disable_coredump=0:unmap_shadow_on_exit=1" export UBSAN_OPTIONS="disable_coredump=0:unmap_shadow_on_exit=1"
export PATH="${IMPALA_TOOLCHAIN_PACKAGES_HOME}/llvm-${IMPALA_LLVM_VERSION}/bin:${PATH}" export PATH="${IMPALA_TOOLCHAIN_PACKAGES_HOME}/llvm-${IMPALA_LLVM_VERSION}/bin:${PATH}"
"${MAKE_CMD:-make}" test ARGS="${BE_TEST_ARGS}" if [[ -n "$SKIP_BE_TEST_PATTERN" ]]; then
# Requires make, will fail with ninja.
"${MAKE_CMD:-${IMPALA_MAKE_CMD}}" test ARGS="-E ${SKIP_BE_TEST_PATTERN}"
else
# Ninja doesn't accept additional parameters, so omit ARGS.
"${MAKE_CMD:-${IMPALA_MAKE_CMD}}" test
fi

View File

@@ -80,7 +80,7 @@ BUILD_DEBUG_NOOPT=0
BUILD_SHARED_LIBS=0 BUILD_SHARED_LIBS=0
UDF_DEVEL=0 UDF_DEVEL=0
# Export MAKE_CMD so it is visible in scripts that invoke make, e.g. copy-udfs-udas.sh # Export MAKE_CMD so it is visible in scripts that invoke make, e.g. copy-udfs-udas.sh
export MAKE_CMD=make export MAKE_CMD=${IMPALA_MAKE_CMD:-make}
# Defaults that can be picked up from the environment, but are overridable through the # Defaults that can be picked up from the environment, but are overridable through the
# commandline. # commandline.
@@ -203,6 +203,9 @@ do
-ninja) -ninja)
MAKE_CMD=ninja MAKE_CMD=ninja
;; ;;
-make)
MAKE_CMD=make
;;
-cmake_only) -cmake_only)
GEN_CMAKE_ONLY=1 GEN_CMAKE_ONLY=1
;; ;;

View File

@@ -49,7 +49,7 @@ done
if [ $BUILD -eq 1 ] if [ $BUILD -eq 1 ]
then then
pushd "${IMPALA_HOME}" pushd "${IMPALA_HOME}"
"${MAKE_CMD:-make}" ${IMPALA_MAKE_FLAGS} "-j${IMPALA_BUILD_THREADS:-4}" \ "${MAKE_CMD:-${IMPALA_MAKE_CMD}}" ${IMPALA_MAKE_FLAGS} "-j${IMPALA_BUILD_THREADS:-4}" \
TestUdas TestUdfs test-udfs-ir udfsample udasample udf-sample-ir uda-sample-ir TestUdas TestUdfs test-udfs-ir udfsample udasample udf-sample-ir uda-sample-ir
cd "${IMPALA_HOME}/java/test-corrupt-hive-udfs" cd "${IMPALA_HOME}/java/test-corrupt-hive-udfs"
"${IMPALA_HOME}/bin/mvn-quiet.sh" package "${IMPALA_HOME}/bin/mvn-quiet.sh" package