IMPALA-4259: build Impala without any test cluster setup.

The main outcome of this change is to avoid making unnecessary
modification to the Impala or other source trees when we don't need the
test cluster.

To achieve that, this refactors the script to make the flow easier
to understand and makes it more consistent which build steps are
executed in which modes.

Change-Id: I429da7bc6681b16c07fe58bb3efac6d1a8579137
Reviewed-on: http://gerrit.cloudera.org:8080/4685
Reviewed-by: Tim Armstrong <tarmstrong@cloudera.com>
Tested-by: Internal Jenkins
This commit is contained in:
Tim Armstrong
2016-10-09 23:06:40 -07:00
committed by Internal Jenkins
parent 0b3efb19cc
commit 75a857c0ce
4 changed files with 181 additions and 163 deletions

View File

@@ -26,12 +26,7 @@ trap 'echo Error in $0 at line $LINENO: $(cd "'$PWD'" && awk "NR == $LINENO" $0)
# If the project was never build, no Makefile will exist and thus make clean will fail.
# Combine the make command with the bash noop to always return true.
make clean || :
# Stop the minikdc if needed.
if "${CLUSTER_DIR}/admin" is_kerberized; then
"${IMPALA_HOME}/testdata/bin/minikdc.sh" stop
fi
"${MAKE_CMD:-make}" clean || :
# clean the external data source project
pushd ${IMPALA_HOME}/ext-data-source

View File

@@ -29,7 +29,7 @@ ROOT=`cd "$ROOT" >/dev/null; pwd`
# kerberized environment variables already or not.
NEEDS_RE_SOURCE_NOTE=1
: ${MINIKDC_REALM=}
if [ ! -z "${MINIKDC_REALM}" ]; then
if [[ ! -z "${MINIKDC_REALM}" ]]; then
NEEDS_RE_SOURCE_NOTE=0
fi
@@ -46,6 +46,7 @@ TESTS_ACTION=1
FORMAT_CLUSTER=0
FORMAT_METASTORE=0
FORMAT_SENTRY_POLICY_DB=0
START_IMPALA_CLUSTER=0
IMPALA_KERBERIZE=0
SNAPSHOT_FILE=
METASTORE_SNAPSHOT_FILE=
@@ -62,7 +63,6 @@ LZO_CMAKE_ARGS=
: ${CMAKE_BUILD_TYPE:=Debug}
# parse command line options
# TODO: We have to change this to use getopts, or something more sensible.
while [ -n "$*" ]
do
case "$1" in
@@ -113,9 +113,9 @@ do
;;
-snapshot_file)
SNAPSHOT_FILE="${2-}"
if [ ! -f "$SNAPSHOT_FILE" ]; then
if [[ ! -f "$SNAPSHOT_FILE" ]]; then
echo "-snapshot_file does not exist: $SNAPSHOT_FILE"
exit 1;
exit 1
fi
TESTDATA_ACTION=1
# Get the full path.
@@ -124,15 +124,18 @@ do
;;
-metastore_snapshot_file)
METASTORE_SNAPSHOT_FILE="${2-}"
if [ ! -f "$METASTORE_SNAPSHOT_FILE" ]; then
if [[ ! -f "$METASTORE_SNAPSHOT_FILE" ]]; then
echo "-metastore_snapshot_file does not exist: $METASTORE_SNAPSHOT_FILE"
exit 1;
exit 1
fi
TESTDATA_ACTION=1
# Get the full path.
METASTORE_SNAPSHOT_FILE="$(readlink -f "$METASTORE_SNAPSHOT_FILE")"
shift;
;;
-start_impala_cluster)
START_IMPALA_CLUSTER=1
;;
-k|-kerberize|-kerberos|-kerb)
# Export to the environment for all child process tools
export IMPALA_KERBERIZE=1
@@ -166,7 +169,9 @@ do
echo "[-asan] : Address sanitizer build [Default: False]"
echo "[-skiptests] : Skips execution of all tests"
echo "[-notests] : Skips building and execution of all tests"
echo "[-testpairwise] : Sun tests in 'pairwise' mode (increases"\
echo "[-start_impala_cluster] : Start Impala minicluster after build"\
" [Default: False]"
echo "[-testpairwise] : Run tests in 'pairwise' mode (increases"\
"test execution time)"
echo "[-testexhaustive] : Run tests in 'exhaustive' mode (significantly increases"\
"test execution time)"
@@ -228,13 +233,21 @@ if [[ ${BUILD_ASAN} -eq 1 ]]; then
fi
CMAKE_BUILD_TYPE=ADDRESS_SANITIZER
fi
MAKE_IMPALA_ARGS+=" -build_type=${CMAKE_BUILD_TYPE}"
# If we aren't kerberized then we certainly don't need to talk about
# re-sourcing impala-config.
if [ ${IMPALA_KERBERIZE} -eq 0 ]; then
if [[ ${IMPALA_KERBERIZE} -eq 0 ]]; then
NEEDS_RE_SOURCE_NOTE=0
fi
if [[ ${IMPALA_KERBERIZE} -eq 1 &&
(${TESTDATA_ACTION} -eq 1 || ${TESTS_ACTION} -eq 1) ]]; then
echo "Running tests or loading test data is not supported for kerberized clusters."
echo "Please remove the -testdata flag and/or add the -skiptests flag."
exit 1
fi
# Loading data on a filesystem other than fs.defaultFS is not supported.
if [[ -z "$METASTORE_SNAPSHOT_FILE" && "${TARGET_FILESYSTEM}" != "hdfs" &&
"$TESTDATA_ACTION" -eq 1 ]]; then
@@ -243,160 +256,130 @@ if [[ -z "$METASTORE_SNAPSHOT_FILE" && "${TARGET_FILESYSTEM}" != "hdfs" &&
exit 1
fi
# option to clean everything first
if [ "$CLEAN_ACTION" -eq 1 ]; then
"$IMPALA_HOME/bin/clean.sh"
NEED_MINICLUSTER=0
if [[ $TESTS_ACTION -eq 1 || $TESTDATA_ACTION -eq 1 || $FORMAT_CLUSTER -eq 1 ||
$FORMAT_METASTORE -eq 1 || $FORMAT_SENTRY_POLICY_DB -eq 1 || -n "$SNAPSHOT_FILE" ||
-n "$METASTORE_SNAPSHOT_FILE" ]]; then
NEED_MINICLUSTER=1
fi
# Populate necessary thirdparty components unless it's set to be skipped.
if [ "${SKIP_TOOLCHAIN_BOOTSTRAP}" = true ]; then
echo "SKIP_TOOLCHAIN_BOOTSTRAP is true, skipping download of Python dependencies."
echo "SKIP_TOOLCHAIN_BOOTSTRAP is true, skipping toolchain bootstrap."
else
echo "Downloading Python dependencies"
# Download all the Python dependencies we need before doing anything
# of substance. Does not re-download anything that is already present.
if ! "$IMPALA_HOME/infra/python/deps/download_requirements"; then
echo "Warning: Unable to download Python requirements."
echo "Warning: bootstrap_virtualenv or other Python-based tooling may fail."
bootstrap_dependencies() {
# Populate necessary thirdparty components unless it's set to be skipped.
if [[ "${SKIP_TOOLCHAIN_BOOTSTRAP}" = true ]]; then
echo "SKIP_TOOLCHAIN_BOOTSTRAP is true, skipping download of Python dependencies."
echo "SKIP_TOOLCHAIN_BOOTSTRAP is true, skipping toolchain bootstrap."
else
echo "Finished downloading Python dependencies"
echo "Downloading Python dependencies"
# Download all the Python dependencies we need before doing anything
# of substance. Does not re-download anything that is already present.
if ! "$IMPALA_HOME/infra/python/deps/download_requirements"; then
echo "Warning: Unable to download Python requirements."
echo "Warning: bootstrap_virtualenv or other Python-based tooling may fail."
else
echo "Finished downloading Python dependencies"
fi
echo "Downloading and extracting toolchain dependencies."
"$IMPALA_HOME/bin/bootstrap_toolchain.py"
echo "Toolchain bootstrap complete."
fi
}
echo "Downloading and extracting toolchain dependencies."
"$IMPALA_HOME/bin/bootstrap_toolchain.py"
echo "Toolchain bootstrap complete."
fi
MAKE_IMPALA_ARGS="${MAKE_IMPALA_ARGS} -build_type=${CMAKE_BUILD_TYPE}"
if [ "$BUILD_FE_ONLY" -eq 1 ]; then
# Build the Impala frontend and its dependencies.
build_fe() {
"$IMPALA_HOME/bin/make_impala.sh" ${MAKE_IMPALA_ARGS} -cmake_only
"${MAKE_CMD}" fe
exit 0
fi
}
if [ -e "$HADOOP_LZO"/build/native/Linux-*-*/lib/libgplcompression.so ]
then
cp "$HADOOP_LZO"/build/native/Linux-*-*/lib/libgplcompression.* "$HADOOP_HOME/lib/native"
else
echo "No hadoop-lzo found"
fi
# Build all components.
build_all_components() {
# Build common and backend. This also sets up the CMake files.
echo "Calling make_impala.sh ${MAKE_IMPALA_ARGS}"
"$IMPALA_HOME/bin/make_impala.sh" ${MAKE_IMPALA_ARGS}
# Stop any running Impala services.
"${IMPALA_HOME}/bin/start-impala-cluster.py" --kill --force
if [[ "$CLEAN_ACTION" -eq 1 || "$FORMAT_METASTORE" -eq 1 || "$FORMAT_CLUSTER" -eq 1 ||
"$FORMAT_SENTRY_POLICY_DB" -eq 1 || -n "$METASTORE_SNAPSHOT_FILE" ]]
then
# Kill any processes that may be accessing postgres metastore. To be safe, this is done
# before we make any changes to the config files.
set +e
"${IMPALA_HOME}/testdata/bin/kill-all.sh"
set -e
fi
CREATE_TEST_CONFIG_ARGS=""
if [[ "$FORMAT_SENTRY_POLICY_DB" -eq 1 ]]; then
CREATE_TEST_CONFIG_ARGS+=" -create_sentry_policy_db"
fi
if [[ "$FORMAT_METASTORE" -eq 1 && -z "$METASTORE_SNAPSHOT_FILE" ]]; then
CREATE_TEST_CONFIG_ARGS+=" -create_metastore"
fi
# Generate the Hadoop configs needed by Impala
"${IMPALA_HOME}/bin/create-test-configuration.sh" ${CREATE_TEST_CONFIG_ARGS}
# If a metastore snapshot exists, load it.
if [ "$METASTORE_SNAPSHOT_FILE" ]; then
echo "Loading metastore snapshot"
"${IMPALA_HOME}/testdata/bin/load-metastore-snapshot.sh" "$METASTORE_SNAPSHOT_FILE"
fi
# build common and backend
echo "Calling make_impala.sh ${MAKE_IMPALA_ARGS}"
"$IMPALA_HOME/bin/make_impala.sh" ${MAKE_IMPALA_ARGS}
if [ -e "$IMPALA_LZO" ]
then
pushd "$IMPALA_LZO"
LZO_CMAKE_ARGS+=" -DCMAKE_TOOLCHAIN_FILE=./cmake_modules/toolchain.cmake"
rm -f CMakeCache.txt
cmake ${LZO_CMAKE_ARGS}
"${MAKE_CMD}"
popd
fi
# build the external data source API
pushd "${IMPALA_HOME}/ext-data-source"
"${IMPALA_HOME}/bin/mvn-quiet.sh" install -DskipTests
popd
# build frontend and copy dependencies
pushd "${IMPALA_FE_DIR}"
"${IMPALA_HOME}/bin/mvn-quiet.sh" package -DskipTests
popd
# Build the shell tarball
echo "Creating shell tarball"
"${IMPALA_HOME}/shell/make_shell_tarball.sh"
if [ "$FORMAT_CLUSTER" -eq 1 ]; then
"$IMPALA_HOME/testdata/bin/run-all.sh" -format
elif [ "$TESTDATA_ACTION" -eq 1 ] || [ "$TESTS_ACTION" -eq 1 ]; then
"$IMPALA_HOME/testdata/bin/run-all.sh"
fi
#
# KERBEROS TODO
# There is still work to be done for kerberos.
# - The hive metastore needs to be kerberized
# - If the user principal is "impala/localhost", MR jobs complain that user
# "impala" is not user ${USER}. But if the principal is ${USER}/localhost,
# the impala daemons change it to impala/localhost in
# KerberosAuthProvider::RunKinit() - and there may be other difficulties
# down the road with getting all the permissions correct.
# - Futher Beeline -> HiveServer2 -> HBase|MapReduce combo issues
# - Getting farther down the testing path, it's likely more issues will turn up
# - Further extensive testing
#
if [ ${IMPALA_KERBERIZE} -eq 1 ]; then
if [ ${TESTDATA_ACTION} -eq 1 -o ${TESTS_ACTION} -eq 1 ]; then
echo "At this time we only support cluster creation and impala daemon"
echo "bringup in kerberized mode. Data won't be loaded, and tests"
echo "won't be run. The impala daemons will be started."
TESTDATA_ACTION=0
TESTS_ACTION=0
"${IMPALA_HOME}/bin/start-impala-cluster.py"
if [[ -e "$IMPALA_LZO" ]]
then
pushd "$IMPALA_LZO"
LZO_CMAKE_ARGS+=" -DCMAKE_TOOLCHAIN_FILE=./cmake_modules/toolchain.cmake"
rm -f CMakeCache.txt
cmake ${LZO_CMAKE_ARGS}
"${MAKE_CMD}"
popd
fi
fi
# END KERBEROS TODO
#
# Don't try to run tests without data!
#
TESTWH_ITEMS=`(hadoop fs -ls ${FILESYSTEM_PREFIX}/test-warehouse 2> /dev/null || true) | \
(grep test-warehouse || true) | wc -l`
if [ ${TESTS_ACTION} -eq 1 -a \
${TESTDATA_ACTION} -eq 0 -a \
${TESTWH_ITEMS} -lt 5 ]; then
set +x
echo "You just asked buildall to run tests, but did not supply any data."
echo "Running tests without data doesn't work. Exiting now."
exit 1
fi
# Build the Java components (fe and external data source API).
pushd "$IMPALA_HOME"
"${MAKE_CMD}" ext-data-source fe
popd
if [ $TESTDATA_ACTION -eq 1 ]; then
# Create testdata.
# Build the shell tarball
echo "Creating shell tarball"
"${IMPALA_HOME}/shell/make_shell_tarball.sh"
# Generate list of files for Cscope to index
"$IMPALA_HOME/bin/gen-cscope.sh"
}
# Do any configuration of the test cluster required by the script arguments.
# Kills any cluster processes that will need to be restarted to pick up new
# configurations or the new build.
reconfigure_test_cluster() {
# Stop any running Impala services.
"${IMPALA_HOME}/bin/start-impala-cluster.py" --kill --force
if [[ "$FORMAT_METASTORE" -eq 1 || "$FORMAT_CLUSTER" -eq 1 ||
"$FORMAT_SENTRY_POLICY_DB" -eq 1 || -n "$METASTORE_SNAPSHOT_FILE" ]]
then
# Kill any processes that may be accessing postgres metastore. To be safe, this is
# done before we make any changes to the config files.
"${IMPALA_HOME}/testdata/bin/kill-all.sh" || true
fi
# Stop the minikdc if needed.
if "${CLUSTER_DIR}/admin" is_kerberized; then
"${IMPALA_HOME}/testdata/bin/minikdc.sh" stop
fi
local CREATE_TEST_CONFIG_ARGS=""
if [[ "$FORMAT_SENTRY_POLICY_DB" -eq 1 ]]; then
CREATE_TEST_CONFIG_ARGS+=" -create_sentry_policy_db"
fi
if [[ "$FORMAT_METASTORE" -eq 1 && -z "$METASTORE_SNAPSHOT_FILE" ]]; then
CREATE_TEST_CONFIG_ARGS+=" -create_metastore"
fi
# Generate the Hadoop configs needed by Impala
"${IMPALA_HOME}/bin/create-test-configuration.sh" ${CREATE_TEST_CONFIG_ARGS}
# Copy Hadoop-lzo dependencies if available (required to generate Lzo data).
if stat "$HADOOP_LZO"/build/native/Linux-*-*/lib/libgplcompression.* > /dev/null ; then
cp "$HADOOP_LZO"/build/native/Linux-*-*/lib/libgplcompression.* "$HADOOP_HOME/lib/native"
else
echo "No hadoop-lzo found"
fi
}
# Starts the test cluster processes except for Impala.
start_test_cluster_dependencies() {
local RUN_ALL_ARGS=""
if [[ "$FORMAT_CLUSTER" -eq 1 ]]; then
RUN_ALL_ARGS+=" -format"
fi
"$IMPALA_HOME/testdata/bin/run-all.sh" $RUN_ALL_ARGS
}
# Execute any data loading steps once the cluster dependencies are started.
# This does all data loading, except for the metastore snapshot which must be loaded
# earlier before the cluster is running.
load_test_data() {
"$IMPALA_HOME/bin/create_testdata.sh"
cd "$ROOT"
# We have 4 cases:
# - test-warehouse and metastore snapshots exists.
# - Only the test-warehouse snapshot exists.
# - Only the metastore snapshot exists.
# - Neither of them exist.
CREATE_LOAD_DATA_ARGS=""
local CREATE_LOAD_DATA_ARGS=""
if [[ "$SNAPSHOT_FILE" && "$METASTORE_SNAPSHOT_FILE" ]]; then
CREATE_LOAD_DATA_ARGS="-snapshot_file ${SNAPSHOT_FILE} -skip_metadata_load"
elif [[ "$SNAPSHOT_FILE" && -z "$METASTORE_SNAPSHOT_FILE" ]]; then
@@ -405,20 +388,60 @@ if [ $TESTDATA_ACTION -eq 1 ]; then
CREATE_LOAD_DATA_ARGS="-skip_metadata_load -skip_snapshot_load"
fi
"${IMPALA_HOME}/testdata/bin/create-load-data.sh" ${CREATE_LOAD_DATA_ARGS} <<< Y
fi
}
if [ $TESTS_ACTION -eq 1 ]; then
if [ $CODE_COVERAGE -eq 0 ]; then
"${IMPALA_HOME}/bin/run-all-tests.sh" -e $EXPLORATION_STRATEGY
else
"${IMPALA_HOME}/bin/run-all-tests.sh" -e $EXPLORATION_STRATEGY -c
run_all_tests() {
local RUN_ALL_TESTS_ARGS=
if [[ $CODE_COVERAGE -eq 1 ]]; then
RUN_ALL_TESTS_ARGS+=" -c"
fi
"${IMPALA_HOME}/bin/run-all-tests.sh" -e $EXPLORATION_STRATEGY $RUN_ALL_TESTS_ARGS
}
# Clean everything first if requested.
if [[ "$CLEAN_ACTION" -eq 1 ]]; then
"$IMPALA_HOME/bin/clean.sh"
fi
# Generate list of files for Cscope to index
"$IMPALA_HOME/bin/gen-cscope.sh"
bootstrap_dependencies
if [ ${NEEDS_RE_SOURCE_NOTE} -eq 1 ]; then
if [[ "$BUILD_FE_ONLY" -eq 1 ]]; then
build_fe
exit 0
fi
build_all_components
if [[ $NEED_MINICLUSTER -eq 1 ]]; then
reconfigure_test_cluster
fi
# If a metastore snapshot exists, load it while the cluster process are down and not
# accessing the metastore.
if [[ -n "$METASTORE_SNAPSHOT_FILE" ]]; then
echo "Loading metastore snapshot"
"${IMPALA_HOME}/testdata/bin/load-metastore-snapshot.sh" "$METASTORE_SNAPSHOT_FILE"
fi
if [[ $NEED_MINICLUSTER -eq 1 ]]; then
start_test_cluster_dependencies
fi
if [[ $TESTDATA_ACTION -eq 1 ]]; then
load_test_data
fi
if [[ $TESTS_ACTION -eq 1 ]]; then
run_all_tests
fi
# Bring up Impala if requested. Tests and data load start their own miniclusters, so we
# should bring up a clean cluster *after* those steps are completed.
if [[ $START_IMPALA_CLUSTER -eq 1 ]]; then
"${IMPALA_HOME}/bin/start-impala-cluster.py"
fi
if [[ ${NEEDS_RE_SOURCE_NOTE} -eq 1 ]]; then
echo
echo "You have just successfully created a kerberized cluster."
echo "Congratulations! Communication with this cluster requires"

View File

@@ -16,5 +16,5 @@
# under the License.
add_custom_target(ext-data-source DEPENDS thrift-deps
COMMAND mvn install -DskipTests
)
COMMAND $ENV{IMPALA_HOME}/bin/mvn-quiet.sh install -DskipTests
)

View File

@@ -16,5 +16,5 @@
# under the License.
add_custom_target(fe DEPENDS thrift-deps function-registry ext-data-source
COMMAND mvn package -DskipTests
)
COMMAND $ENV{IMPALA_HOME}/bin/mvn-quiet.sh install -DskipTests
)