mirror of
https://github.com/apache/impala.git
synced 2026-01-07 09:02:19 -05:00
Allows running the tests that make up the "core" suite in about 2 hours. By comparison, https://jenkins.impala.io/job/ubuntu-16.04-from-scratch/buildTimeTrend tends to run in about 3.5 hours. This commit: * Adds "echo" statements in a few places, to facilitate timing. * Adds --skip-parallel/--skip-serial flags to run-tests.py, and exposes them in run-all-tests.sh. * Marks TestRuntimeFilters as a serial test. This test runs queries that need > 1GB of memory, and, combined with other tests running in parallel, can kill the parallel test suite. * Adds "test-with-docker.py", which runs a full build, data load, and executes tests inside of Docker containers, generating a timeline at the end. In short, one container is used to do the build and data load, and then this container is re-used to run various tests in parallel. All logs are left on the host system. Besides the obvious win of getting test results more quickly, this commit serves as an example of how to get various bits of Impala development working inside of Docker containers. For example, Kudu relies on atomic rename of directories, which isn't available in most Docker filesystems, and entrypoint.sh works around it. In addition, the timeline generated by the build suggests where further optimizations can be made. Most obviously, dataload eats up a precious ~30-50 minutes, on a largely idle machine. This work is significantly CPU and memory hungry. It was developed on a 32-core, 120GB RAM Google Compute Engine machine. I've worked out parallelism configurations such that it runs nicely on 60GB of RAM (c4.8xlarge) and over 100GB (eg., m4.10xlarge, which has 160GB). There is some simple logic to guess at some knobs, and there are knobs. By and large, EC2 and GCE price machines linearly, so, if CPU usage can be kept up, it's not wasteful to run on bigger machines. Change-Id: I82052ef31979564968effef13a3c6af0d5c62767 Reviewed-on: http://gerrit.cloudera.org:8080/9085 Reviewed-by: Philip Zeyliger <philip@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
330 lines
10 KiB
Bash
Executable File
330 lines
10 KiB
Bash
Executable File
#!/bin/bash
|
|
#
|
|
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
#
|
|
# Entrypoint code for test-with-docker.py containers. test-with-docker.py
|
|
# will create Docker containers with this script as the entrypoint,
|
|
# with a variety of arguments. See test-with-docker.py for a more
|
|
# general overview.
|
|
#
|
|
# This assumes that the following are already mounted inside
|
|
# the container:
|
|
# /etc/localtime -> /mnt/localtime
|
|
# Helps timestamps be in the time zone of the host
|
|
# $IMPALA_HOME [a git repo of Impala] -> /repo
|
|
# Used to check out Impala afresh
|
|
# $IMPALA_HOME/logs/docker/<n1>/<n2> -> /logs
|
|
# Used to save logs out to host.
|
|
# <n1> represents the --name passed into
|
|
# test-with-docker for the test run. <n2>
|
|
# indicates which specific container is being run.
|
|
# ~/.ccache [configurable] -> /ccache
|
|
# Used to speed up builds.
|
|
#
|
|
# Usage:
|
|
# entrypoint.sh build <uid>
|
|
# entrypoint.sh test_suite <suite>
|
|
# where <suite> is one of: BE_TEST JDBC_TEST CLUSTER_TEST
|
|
# EE_TEST_SERIAL EE_TEST_PARALLEL
|
|
|
|
# Boostraps the container by creating a user and adding basic tools like Python and git.
|
|
# Takes a uid as an argument for the user to be created.
|
|
function build() {
|
|
# Handy for testing.
|
|
if [[ $TEST_TEST_WITH_DOCKER ]]; then
|
|
# We sleep busily so that CPU metrics will show usage, to
|
|
# better exercise the timeline code.
|
|
echo sleeping busily for 4 seconds
|
|
bash -c 'while [[ $SECONDS -lt 4 ]]; do :; done'
|
|
return
|
|
fi
|
|
|
|
# Configure timezone, so any timestamps that appear are coherent with the host.
|
|
configure_timezone
|
|
|
|
# Assert we're superuser.
|
|
[ "$(id -u)" = 0 ]
|
|
if id $1 2> /dev/null; then
|
|
echo "User with id $1 already exists. Please run this as a user id missing from " \
|
|
"the base Ubuntu container."
|
|
echo
|
|
echo "Container users:"
|
|
paste <(cut -d : -f3 /etc/passwd) <(cut -d : -f1 /etc/passwd) | sort -n
|
|
exit 1
|
|
fi
|
|
apt-get update
|
|
apt-get install -y sudo git lsb-release python
|
|
|
|
adduser --disabled-password --gecos "" --uid $1 impdev
|
|
echo "impdev ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
|
|
|
|
su impdev -c "$0 build_impdev"
|
|
}
|
|
|
|
# Sets up Impala environment
|
|
function impala_environment() {
|
|
pushd /home/impdev/Impala
|
|
export IMPALA_HOME=/home/impdev/Impala
|
|
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
|
|
source bin/impala-config.sh
|
|
popd
|
|
}
|
|
|
|
# Starts SSH and PostgreSQL; configures container as necessary;
|
|
# prepares Kudu for starting.
|
|
function boot_container() {
|
|
pushd /home/impdev/Impala
|
|
|
|
# Required for metastore
|
|
sudo service postgresql start
|
|
|
|
# Required for starting HBase
|
|
sudo service ssh start
|
|
|
|
# Make log directories. This is typically done in buildall.sh.
|
|
mkdir -p logs/be_tests logs/fe_tests/coverage logs/ee_tests logs/custom_cluster_tests
|
|
|
|
# Update /etc/hosts to remove the entry for the unique docker hostname,
|
|
# and instead point it to 127.0.0.1. Otherwise, HttpFS returns Location:
|
|
# redirects to said hostname, but the relevant datanode isn't listening
|
|
# on the wildcard address.
|
|
sed -e /$(hostname)/d /etc/hosts -e /127.0.0.1/s,localhost,"localhost $(hostname)," \
|
|
> /tmp/hosts
|
|
# "sed -i" in place doesn't work on Docker, because /etc/hosts is a bind mount.
|
|
sudo cp /tmp/hosts /etc/hosts
|
|
|
|
echo Hostname: $(hostname)
|
|
echo Hosts file:
|
|
cat /etc/hosts
|
|
|
|
# Make a copy of Kudu's WALs to avoid isue with Docker filesystems (aufs and
|
|
# overlayfs) that don't support os.rename(2) on directories, which Kudu
|
|
# requires. We make a fresh copy of the data, in which case rename(2) works
|
|
# presumably because there's only one layer involved. See
|
|
# https://issues.apache.org/jira/browse/KUDU-1419.
|
|
cd /home/impdev/Impala/testdata
|
|
for x in cluster/cdh*/node-*/var/lib/kudu/*/wal; do
|
|
mv $x $x-orig
|
|
cp -r $x-orig $x
|
|
rm -r $x-orig
|
|
done
|
|
|
|
# Wait for postgresql to really start; if it doesn't, Hive Metastore will fail to start.
|
|
for i in {1..120}; do
|
|
echo connecting to postgresql attempt $i
|
|
if sudo -u postgres psql -c "select 1"; then
|
|
break
|
|
else
|
|
sleep 2
|
|
fi
|
|
done
|
|
sudo -u postgres psql -c "select 1"
|
|
|
|
popd
|
|
}
|
|
|
|
# Runs bootstrap_system.sh and then builds Impala.
|
|
function build_impdev() {
|
|
# Assert we're impdev now.
|
|
[ "$(id -un)" = impdev ]
|
|
|
|
# Link in ccache from host.
|
|
ln -s /ccache /home/impdev/.ccache
|
|
|
|
# Instead of doing a full "git clone" of /repo, which is the host's checkout,
|
|
# we only fetch one branch, without tags. This keeps the checkout
|
|
# considerably lighter.
|
|
mkdir /home/impdev/Impala
|
|
pushd /home/impdev/Impala
|
|
git init
|
|
git fetch /repo --no-tags HEAD
|
|
git checkout -b test-with-docker FETCH_HEAD
|
|
|
|
# Link in logs. Logs are on the host since that's the most important thing to
|
|
# look at after the tests are run.
|
|
ln -sf /logs logs
|
|
|
|
bin/bootstrap_system.sh
|
|
impala_environment
|
|
|
|
# Builds Impala and loads test data.
|
|
# Note that IMPALA-6494 prevents us from using shared library builds,
|
|
# which are smaller and thereby speed things up.
|
|
./buildall.sh -noclean -format -testdata -skiptests
|
|
|
|
# Shut down things cleanly.
|
|
testdata/bin/kill-all.sh
|
|
|
|
# Shutting down PostgreSQL nicely speeds up it's start time for new containers.
|
|
sudo service postgresql stop
|
|
|
|
# Clean up things we don't need to reduce image size
|
|
find be -name '*.o' -execdir rm '{}' + # ~1.6GB
|
|
|
|
popd
|
|
}
|
|
|
|
# Runs a suite passed in as the first argument. Tightly
|
|
# coupled with Impala's run-all-tests and the suite names.
|
|
# from test-with-docker.py.
|
|
#
|
|
# Before running tests, starts up the minicluster.
|
|
function test_suite() {
|
|
cd /home/impdev/Impala
|
|
|
|
# These test suites are for testing.
|
|
if [[ $1 == NOOP ]]; then
|
|
return 0
|
|
fi
|
|
if [[ $1 == NOOP_FAIL ]]; then
|
|
return 1
|
|
fi
|
|
if [[ $1 == NOOP_SLEEP_FOREVER ]]; then
|
|
# Handy to test timeouts.
|
|
while true; do sleep 60; done
|
|
fi
|
|
|
|
# Assert that we're running as impdev
|
|
[ "$(id -un)" = impdev ]
|
|
|
|
# Assert that /home/impdev/Impala/logs is a symlink to /logs.
|
|
[ "$(readlink /home/impdev/Impala/logs)" = /logs ]
|
|
|
|
boot_container
|
|
impala_environment
|
|
|
|
# By default, the JVM will use 1/4 of your OS memory for its heap size. For a
|
|
# long-running test, this will delay GC inside of impalad's leading to
|
|
# unnecessarily large process RSS footprints. We cap the heap size at
|
|
# a more reasonable size. Note that "test_insert_large_string" fails
|
|
# at 2g and 3g, so the suite that includes it (EE_TEST_PARALLEL) gets
|
|
# additional memory.
|
|
#
|
|
# Similarly, bin/start-impala-cluster typically configures the memlimit
|
|
# to be 80% of the machine memory, divided by the number of daemons.
|
|
# If multiple containers are to be run simultaneously, this is scaled
|
|
# down in test-with-docker.py (and further configurable with --impalad-mem-limit-bytes)
|
|
# and passed in via $IMPALAD_MEM_LIMIT_BYTES to the container. There is a
|
|
# relationship between the number of parallel tests that can be run by py.test and this
|
|
# limit.
|
|
JVM_HEAP_GB=2
|
|
if [[ $1 = EE_TEST_PARALLEL ]]; then
|
|
JVM_HEAP_GB=4
|
|
fi
|
|
export TEST_START_CLUSTER_ARGS="--jvm_args=-Xmx${JVM_HEAP_GB}g \
|
|
--impalad_args=--mem_limit=$IMPALAD_MEM_LIMIT_BYTES"
|
|
|
|
# BE tests don't require the minicluster, so we can run them directly.
|
|
if [[ $1 = BE_TEST ]]; then
|
|
# IMPALA-6494: thrift-server-test fails in Ubuntu16.04 for the moment; skip it.
|
|
export SKIP_BE_TEST_PATTERN='thrift-server-test*'
|
|
if ! bin/run-backend-tests.sh; then
|
|
echo "Tests $1 failed!"
|
|
return 1
|
|
else
|
|
echo "Tests $1 succeeded!"
|
|
return 0
|
|
fi
|
|
fi
|
|
|
|
# Start the minicluster
|
|
testdata/bin/run-all.sh
|
|
|
|
export MAX_PYTEST_FAILURES=0
|
|
# Choose which suite to run; this is how run-all.sh chooses between them.
|
|
export FE_TEST=false
|
|
export BE_TEST=false
|
|
export EE_TEST=false
|
|
export JDBC_TEST=false
|
|
export CLUSTER_TEST=false
|
|
|
|
eval "export ${1}=true"
|
|
|
|
if [[ ${1} = "EE_TEST_SERIAL" ]]; then
|
|
# We bucket the stress tests with the parallel tests.
|
|
export RUN_TESTS_ARGS="--skip-parallel --skip-stress"
|
|
export EE_TEST=true
|
|
elif [[ ${1} = "EE_TEST_PARALLEL" ]]; then
|
|
export RUN_TESTS_ARGS="--skip-serial"
|
|
export EE_TEST=true
|
|
fi
|
|
|
|
ret=0
|
|
|
|
# Run tests.
|
|
if ! time -p bin/run-all-tests.sh; then
|
|
ret=1
|
|
echo "Tests $1 failed!"
|
|
else
|
|
echo "Tests $1 succeeded!"
|
|
fi
|
|
# Oddly, I've observed bash fail to exit (and wind down the container),
|
|
# leading to test-with-docker.py hitting a timeout. Killing the minicluster
|
|
# daemons fixes this.
|
|
testdata/bin/kill-all.sh || true
|
|
return $ret
|
|
}
|
|
|
|
# Ubuntu's tzdata package is very finnicky, and if you
|
|
# mount /etc/localtime from the host to the container directly,
|
|
# it fails to install. However, if you make it a symlink
|
|
# and configure /etc/timezone to something that's not an
|
|
# empty string, you'll get the right behavior.
|
|
#
|
|
# The post installation script is findable by looking for "tzdata.postinst"
|
|
#
|
|
# Use this command to reproduce the Ubuntu issue:
|
|
# docker run -v /etc/localtime:/mnt/localtime -ti ubuntu:16.04 bash -c '
|
|
# date
|
|
# ln -sf /mnt/localtime /etc/localtime
|
|
# date +%Z > /etc/timezone
|
|
# date
|
|
# apt-get update > /dev/null
|
|
# apt-get install tzdata
|
|
# date'
|
|
function configure_timezone() {
|
|
if ! diff -q /etc/localtime /mnt/localtime 2> /dev/null; then
|
|
ln -sf /mnt/localtime /etc/localtime
|
|
date +%Z > /etc/timezone
|
|
fi
|
|
}
|
|
|
|
function main() {
|
|
set -e
|
|
|
|
# Run given command
|
|
CMD="$1"
|
|
shift
|
|
|
|
echo ">>> ${CMD} $@ (begin)"
|
|
set -x
|
|
if "${CMD}" "$@"; then
|
|
ret=0
|
|
else
|
|
ret=$?
|
|
fi
|
|
set +x
|
|
echo ">>> ${CMD} $@ ($ret) (end)"
|
|
exit $ret
|
|
}
|
|
|
|
# Run main() unless we're being sourced.
|
|
if [[ "${BASH_SOURCE[0]}" == "${0}" ]]; then
|
|
main "$@"
|
|
fi
|