diff --git a/.gitignore b/.gitignore index c6aaa3ad6..0987017d0 100644 --- a/.gitignore +++ b/.gitignore @@ -36,7 +36,7 @@ tests/test-hive-udfs/target/ cdh-*-hdfs-data/ avro_schemas/ -cluster_logs/ +logs/ # Impala toolchain directory toolchain/ \ No newline at end of file diff --git a/LOGS.md b/LOGS.md new file mode 100644 index 000000000..cd43d3836 --- /dev/null +++ b/LOGS.md @@ -0,0 +1,21 @@ +All logs, test results and SQL files generated during data loading +and testing are consolidated under $IMPALA_HOME/logs with the +following directory structure: + +$IMPALA_HOME/logs/cluster +- logs of Hadoop components and Impala + +$IMPALA_HOME/logs/data_loading +- logs and SQL files produced in data loading + +$IMPALA_HOME/logs/fe_tests +- logs and test output of Frontend unit tests + +$IMPALA_HOME/logs/be_tests +- logs and test output of Backend unit tests + +$IMPALA_HOME/logs/ee_tests +- logs and test output of end-to-end tests + +$IMPALA_HOME/logs/custom_cluster_tests +- logs and test output of custom cluster tests diff --git a/be/CMakeLists.txt b/be/CMakeLists.txt index 7857fbdc5..64328d013 100644 --- a/be/CMakeLists.txt +++ b/be/CMakeLists.txt @@ -383,7 +383,8 @@ FUNCTION(ADD_BE_TEST TEST_NAME) ADD_EXECUTABLE(${TEST_NAME} ${TEST_NAME}.cc) TARGET_LINK_LIBRARIES(${TEST_NAME} ${IMPALA_TEST_LINK_LIBS}) set(CMAKE_EXE_LINKER_FLAGS "--start-group") - ADD_TEST(${TEST_NAME} "${BUILD_OUTPUT_ROOT_DIRECTORY}/${DIR_NAME}/${TEST_NAME}") + ADD_TEST(${TEST_NAME} "${BUILD_OUTPUT_ROOT_DIRECTORY}/${DIR_NAME}/${TEST_NAME}" + -log_dir=$ENV{IMPALA_BE_TEST_LOGS_DIR}) ADD_DEPENDENCIES(be-test ${TEST_NAME}) ENDFUNCTION() diff --git a/be/src/rpc/authentication-test.cc b/be/src/rpc/authentication-test.cc index efccb021a..59ecda427 100644 --- a/be/src/rpc/authentication-test.cc +++ b/be/src/rpc/authentication-test.cc @@ -174,8 +174,8 @@ TEST(Auth, KerbAndSslEnabled) { } int main(int argc, char** argv) { - impala::InitCommonRuntime(argc, argv, true, impala::TestInfo::BE_TEST); ::testing::InitGoogleTest(&argc, argv); + impala::InitCommonRuntime(argc, argv, true, impala::TestInfo::BE_TEST); env_keytab = getenv("KRB5_KTNAME"); env_princ = getenv("MINIKDC_PRINC_IMPALA"); diff --git a/be/src/util/thread-pool-test.cc b/be/src/util/thread-pool-test.cc index 40c4259bb..f4541cdd6 100644 --- a/be/src/util/thread-pool-test.cc +++ b/be/src/util/thread-pool-test.cc @@ -18,6 +18,7 @@ #include #include +#include "common/init.h" #include "common/logging.h" #include "util/thread-pool.h" @@ -67,9 +68,7 @@ TEST(ThreadPoolTest, BasicTest) { } int main(int argc, char** argv) { - impala::InitGoogleLoggingSafe(argv[0]); - impala::InitThreading(); - impala::OsInfo::Init(); ::testing::InitGoogleTest(&argc, argv); + impala::InitCommonRuntime(argc, argv, true, impala::TestInfo::BE_TEST); return RUN_ALL_TESTS(); } diff --git a/bin/clean.sh b/bin/clean.sh index 00755b690..532923818 100755 --- a/bin/clean.sh +++ b/bin/clean.sh @@ -41,7 +41,8 @@ pushd $IMPALA_FE_DIR rm -rf target rm -f src/test/resources/{core,hbase,hive}-site.xml rm -rf generated-sources/* -rm -rf ${IMPALA_TEST_CLUSTER_LOG_DIR}/* +rm -rf ${IMPALA_LOGS_DIR}/* +mkdir -p $IMPALA_ALL_LOGS_DIRS popd # clean be diff --git a/bin/impala-config.sh b/bin/impala-config.sh index 5b6f673b1..bbcf2def5 100755 --- a/bin/impala-config.sh +++ b/bin/impala-config.sh @@ -183,8 +183,34 @@ elif [ "${TARGET_FILESYSTEM}" != "hdfs" ]; then return 1 fi -# Directory where local cluster logs will go when running tests or loading data -export IMPALA_TEST_CLUSTER_LOG_DIR=${IMPALA_HOME}/cluster_logs +# Directories where local cluster logs will go when running tests or loading data +export IMPALA_LOGS_DIR=${IMPALA_HOME}/logs +export IMPALA_CLUSTER_LOGS_DIR=${IMPALA_LOGS_DIR}/cluster +export IMPALA_DATA_LOADING_LOGS_DIR=${IMPALA_LOGS_DIR}/data_loading +export IMPALA_DATA_LOADING_SQL_DIR=${IMPALA_DATA_LOADING_LOGS_DIR}/sql +export IMPALA_FE_TEST_LOGS_DIR=${IMPALA_LOGS_DIR}/fe_tests +export IMPALA_BE_TEST_LOGS_DIR=${IMPALA_LOGS_DIR}/be_tests +export IMPALA_EE_TEST_LOGS_DIR=${IMPALA_LOGS_DIR}/ee_tests +export IMPALA_CUSTOM_CLUSTER_TEST_LOGS_DIR=${IMPALA_LOGS_DIR}/custom_cluster_tests +# List of all Impala log dirs and create them. +export IMPALA_ALL_LOGS_DIRS="${IMPALA_CLUSTER_LOGS_DIR} + ${IMPALA_DATA_LOADING_LOGS_DIR} ${IMPALA_DATA_LOADING_SQL_DIR} + ${IMPALA_EE_TEST_LOGS_DIR} ${IMPALA_BE_TEST_LOGS_DIR} + ${IMPALA_EE_TEST_LOGS_DIR} ${IMPALA_CUSTOM_CLUSTER_TEST_LOGS_DIR}" +mkdir -p $IMPALA_ALL_LOGS_DIRS + +# Create symlinks Testing/Temporary and be/Testing/Temporary that point to the BE test +# log dir to capture the all logs of BE unit tests. Gtest has Testing/Temporary +# hardwired in its code, so we cannot change the output dir by configuration. +# We create two symlinks to capture the logs when running ctest either from +# ${IMPALA_HOME} or ${IMPALA_HOME}/be. +rm -rf ${IMPALA_HOME}/Testing +mkdir -p ${IMPALA_HOME}/Testing +ln -fs ${IMPALA_BE_TEST_LOGS_DIR} ${IMPALA_HOME}/Testing/Temporary +rm -rf ${IMPALA_HOME}/be/Testing +mkdir -p ${IMPALA_HOME}/be/Testing +ln -fs ${IMPALA_BE_TEST_LOGS_DIR} ${IMPALA_HOME}/be/Testing/Temporary + # Reduce the concurrency for local tests to half the number of cores in the system. # Note than nproc may not be available on older distributions (centos5.5) if type nproc >/dev/null 2>&1; then diff --git a/bin/load-data.py b/bin/load-data.py index 9e8309940..b0fb535d6 100755 --- a/bin/load-data.py +++ b/bin/load-data.py @@ -60,7 +60,7 @@ parser.add_option("--principal", default=None, dest="principal", options, args = parser.parse_args() -DATA_LOAD_DIR = '/tmp/data-load-files' +SQL_OUTPUT_DIR = os.environ['IMPALA_DATA_LOADING_SQL_DIR'] WORKLOAD_DIR = options.workload_dir DATASET_DIR = options.dataset_dir TESTDATA_BIN_DIR = os.path.join(os.environ['IMPALA_HOME'], 'testdata/bin') @@ -257,9 +257,11 @@ if __name__ == "__main__": start_time = time.time() dataset = get_dataset_for_workload(workload) generate_schema_statements(workload) - assert os.path.isdir(os.path.join(DATA_LOAD_DIR, dataset)), ("Data loading files " - "do not exist for (%s)" % dataset) - os.chdir(os.path.join(DATA_LOAD_DIR, dataset)) + sql_dir = os.path.join(SQL_OUTPUT_DIR, dataset) + assert os.path.isdir(sql_dir),\ + ("Could not find the generated SQL files for loading dataset '%s'.\ + \nExpected to find the SQL files in: %s" % (dataset, sql_dir)) + os.chdir(os.path.join(SQL_OUTPUT_DIR, dataset)) copy_avro_schemas_to_hdfs(AVRO_SCHEMA_DIR) dataset_dir_contents = os.listdir(os.getcwd()) load_file_substr = "%s-%s" % (workload, options.exploration_strategy) diff --git a/bin/run-all-tests.sh b/bin/run-all-tests.sh index 67dda02ec..1023fee1d 100755 --- a/bin/run-all-tests.sh +++ b/bin/run-all-tests.sh @@ -77,8 +77,8 @@ do esac done -LOG_DIR=${IMPALA_TEST_CLUSTER_LOG_DIR}/query_tests -mkdir -p ${LOG_DIR} +# For logging when using run-step. +LOG_DIR=${IMPALA_EE_TEST_LOGS_DIR} # Enable core dumps ulimit -c unlimited @@ -96,7 +96,7 @@ do TEST_RET_CODE=0 run-step "Starting Impala cluster" start-impala-cluster.log \ - ${IMPALA_HOME}/bin/start-impala-cluster.py --log_dir=${LOG_DIR} \ + ${IMPALA_HOME}/bin/start-impala-cluster.py --log_dir=${IMPALA_EE_TEST_LOGS_DIR} \ ${TEST_START_CLUSTER_ARGS} if [[ "$BE_TEST" == true ]]; then @@ -157,7 +157,7 @@ do if [[ "$JDBC_TEST" == true ]]; then # Run the JDBC tests with background loading disabled. This is interesting because # it requires loading missing table metadata. - ${IMPALA_HOME}/bin/start-impala-cluster.py --log_dir=${LOG_DIR} \ + ${IMPALA_HOME}/bin/start-impala-cluster.py --log_dir=${IMPALA_EE_TEST_LOGS_DIR} \ --catalogd_args=--load_catalog_in_background=false \ ${TEST_START_CLUSTER_ARGS} pushd ${IMPALA_FE_DIR} diff --git a/bin/run-backend-tests.sh b/bin/run-backend-tests.sh index d5c317afe..400ee1e29 100755 --- a/bin/run-backend-tests.sh +++ b/bin/run-backend-tests.sh @@ -4,9 +4,7 @@ set -euo pipefail trap 'echo Error in $0 at line $LINENO: $(cd "'$PWD'" && awk "NR == $LINENO" $0)' ERR -# Prepare output directory -mkdir -p $IMPALA_TEST_CLUSTER_LOG_DIR/be_test -export GTEST_OUTPUT="xml:$IMPALA_TEST_CLUSTER_LOG_DIR/be_test/" +export GTEST_OUTPUT="xml:$IMPALA_BE_TEST_LOGS_DIR/" : ${SKIP_BE_TEST_PATTERN:=} diff --git a/bin/start-impala-cluster.py b/bin/start-impala-cluster.py index d3c7b7760..1302a66bd 100755 --- a/bin/start-impala-cluster.py +++ b/bin/start-impala-cluster.py @@ -51,7 +51,7 @@ parser.add_option("-r", "--restart_impalad_only", dest="restart_impalad_only", help="Restarts only the impalad processes") parser.add_option("--in-process", dest="inprocess", action="store_true", default=False, help="Start all Impala backends and state store in a single process.") -parser.add_option("--log_dir", dest="log_dir", default="/tmp", +parser.add_option("--log_dir", dest="log_dir", default=os.environ['IMPALA_CLUSTER_LOGS_DIR'], help="Directory to store output logs to.") parser.add_option("-v", "--verbose", dest="verbose", action="store_true", default=False, help="Prints all output to stderr/stdout.") @@ -327,6 +327,10 @@ if __name__ == "__main__": print 'Please specify a cluster size >= 0' sys.exit(1) + if not os.path.isdir(options.log_dir): + print 'Log dir does not exist or is not a directory: %s' % options.log_dir + sys.exit(1) + # Kill existing cluster processes based on the current configuration. if options.restart_impalad_only: if options.inprocess: diff --git a/buildall.sh b/buildall.sh index 8f1afe746..e61ccb166 100755 --- a/buildall.sh +++ b/buildall.sh @@ -298,11 +298,6 @@ ${IMPALA_HOME}/shell/make_shell_tarball.sh echo "Creating test tarball" ${IMPALA_HOME}/tests/make_test_tarball.sh -# Create subdirectories for the test and data loading impalad logs. -mkdir -p ${IMPALA_TEST_CLUSTER_LOG_DIR}/query_tests -mkdir -p ${IMPALA_TEST_CLUSTER_LOG_DIR}/fe_tests -mkdir -p ${IMPALA_TEST_CLUSTER_LOG_DIR}/data_loading - if [ $FORMAT_CLUSTER -eq 1 ]; then $IMPALA_HOME/testdata/bin/run-all.sh -format elif [ $TESTDATA_ACTION -eq 1 ] || [ $TESTS_ACTION -eq 1 ]; then diff --git a/fe/pom.xml b/fe/pom.xml index 1d3647533..982e05d30 100644 --- a/fe/pom.xml +++ b/fe/pom.xml @@ -26,6 +26,7 @@ Cloudera Impala Query Engine Frontend + ${env.IMPALA_LOGS_DIR}/fe_tests ${project.basedir}/../testdata/target/AllTypes.txt ${env.IMPALA_HOME}/be/build/debug/service:${env.IMPALA_HOME}/be/build/release/service 21000 @@ -459,6 +460,7 @@ maven-surefire-plugin 2.18 + ${surefire.reports.dir} true -Djava.library.path=${java.library.path}:${backend.library.path} diff --git a/fe/src/test/resources/hbase-site.xml.template b/fe/src/test/resources/hbase-site.xml.template index 5059fa5e6..7a729246f 100644 --- a/fe/src/test/resources/hbase-site.xml.template +++ b/fe/src/test/resources/hbase-site.xml.template @@ -53,7 +53,7 @@ hbase.zookeeper.property.dataDir - ${IMPALA_TEST_CLUSTER_LOG_DIR}/zoo + ${IMPALA_CLUSTER_LOGS_DIR}/zoo diff --git a/fe/src/test/resources/hive-log4j.properties.template b/fe/src/test/resources/hive-log4j.properties.template index 27289775f..e847e3858 100644 --- a/fe/src/test/resources/hive-log4j.properties.template +++ b/fe/src/test/resources/hive-log4j.properties.template @@ -1,6 +1,6 @@ # Define some default values that can be overridden by system properties hive.root.logger=INFO,DRFA -hive.log.dir=${IMPALA_TEST_CLUSTER_LOG_DIR}/hive +hive.log.dir=${IMPALA_CLUSTER_LOGS_DIR}/hive hive.log.file=hive.log # Define the root logger to the system property "hadoop.root.logger". diff --git a/fe/src/test/resources/mysql-hive-site.xml.template b/fe/src/test/resources/mysql-hive-site.xml.template index bab3eb21e..add10dcf9 100644 --- a/fe/src/test/resources/mysql-hive-site.xml.template +++ b/fe/src/test/resources/mysql-hive-site.xml.template @@ -96,7 +96,7 @@ hive.querylog.location - ${IMPALA_TEST_CLUSTER_LOG_DIR}/hive + ${IMPALA_CLUSTER_LOGS_DIR}/hive hive.querylog.location - ${IMPALA_TEST_CLUSTER_LOG_DIR}/hive + ${IMPALA_CLUSTER_LOGS_DIR}/hive Starting mini-DFS cluster" $IMPALA_HOME/testdata/bin/run-mini-dfs.sh ${HDFS_FORMAT_CLUSTER-} 2>&1 | \ - tee ${IMPALA_TEST_CLUSTER_LOG_DIR}/run-mini-dfs.log + tee ${IMPALA_CLUSTER_LOGS_DIR}/run-mini-dfs.log echo " --> Starting HBase" $IMPALA_HOME/testdata/bin/run-hbase.sh 2>&1 | \ - tee ${IMPALA_TEST_CLUSTER_LOG_DIR}/run-hbase.log + tee ${IMPALA_CLUSTER_LOGS_DIR}/run-hbase.log echo " --> Starting Hive Server and Metastore Service" $IMPALA_HOME/testdata/bin/run-hive-server.sh 2>&1 | \ - tee ${IMPALA_TEST_CLUSTER_LOG_DIR}/run-hive-server.log + tee ${IMPALA_CLUSTER_LOGS_DIR}/run-hive-server.log echo " --> Starting the Sentry Policy Server" $IMPALA_HOME/testdata/bin/run-sentry-service.sh > \ - ${IMPALA_TEST_CLUSTER_LOG_DIR}/run-sentry-service.log 2>&1 + ${IMPALA_CLUSTER_LOGS_DIR}/run-sentry-service.log 2>&1 elif [[ ${DEFAULT_FS} == "${LOCAL_FS}" ]]; then # When the local file system is used as default, we only start the Hive metastore. # Impala can run locally without additional services. $IMPALA_HOME/testdata/bin/run-hive-server.sh -only_metastore 2>&1 | \ - tee ${IMPALA_TEST_CLUSTER_LOG_DIR}/run-hive-server.log + tee ${IMPALA_CLUSTER_LOGS_DIR}/run-hive-server.log else # With Isilon, we only start the Hive metastore and Sentry Policy Server. # - HDFS is not started becuase Isilon is used as the defaultFs in core-site @@ -72,9 +72,9 @@ else # TODO: Figure out how to start YARN, LLAMA and Hive with a different defaultFs. echo " --> Starting Hive Metastore Service" $IMPALA_HOME/testdata/bin/run-hive-server.sh -only_metastore 2>&1 | \ - tee ${IMPALA_TEST_CLUSTER_LOG_DIR}/run-hive-server.log + tee ${IMPALA_CLUSTER_LOGS_DIR}/run-hive-server.log echo " --> Starting the Sentry Policy Server" $IMPALA_HOME/testdata/bin/run-sentry-service.sh > \ - ${IMPALA_TEST_CLUSTER_LOG_DIR}/run-sentry-service.log 2>&1 + ${IMPALA_CLUSTER_LOGS_DIR}/run-sentry-service.log 2>&1 fi diff --git a/testdata/bin/run-hbase.sh b/testdata/bin/run-hbase.sh index 4dd5b7639..2d76a6917 100755 --- a/testdata/bin/run-hbase.sh +++ b/testdata/bin/run-hbase.sh @@ -7,7 +7,7 @@ trap 'echo Error in $0 at line $LINENO: $(cd "'$PWD'" && awk "NR == $LINENO" $0) CLUSTER_BIN=${IMPALA_HOME}/testdata/bin HBASE_JAAS_CLIENT=${HBASE_CONF_DIR}/hbase-jaas-client.conf HBASE_JAAS_SERVER=${HBASE_CONF_DIR}/hbase-jaas-server.conf -HBASE_LOGDIR=${IMPALA_TEST_CLUSTER_LOG_DIR}/hbase +HBASE_LOGDIR=${IMPALA_CLUSTER_LOGS_DIR}/hbase # Kill and clean data for a clean start. ${CLUSTER_BIN}/kill-hbase.sh > /dev/null 2>&1 @@ -22,10 +22,10 @@ export HBASE_LOG_DIR=${HBASE_LOGDIR} export HBASE_PID_DIR=${HBASE_LOGDIR} EOF -# Put zookeeper things in the cluster_logs/zoo directory. +# Put zookeeper things in the logs/cluster/zoo directory. # (See hbase.zookeeper.property.dataDir in hbase-site.xml) -rm -rf ${IMPALA_TEST_CLUSTER_LOG_DIR}/zoo -mkdir -p ${IMPALA_TEST_CLUSTER_LOG_DIR}/zoo +rm -rf ${IMPALA_CLUSTER_LOGS_DIR}/zoo +mkdir -p ${IMPALA_CLUSTER_LOGS_DIR}/zoo mkdir -p ${HBASE_LOGDIR} if ${CLUSTER_DIR}/admin is_kerberized; then diff --git a/testdata/bin/run-hive-server.sh b/testdata/bin/run-hive-server.sh index 23df556e9..86832cb22 100755 --- a/testdata/bin/run-hive-server.sh +++ b/testdata/bin/run-hive-server.sh @@ -9,7 +9,7 @@ trap 'echo Error in $0 at line $LINENO: $(cd "'$PWD'" && awk "NR == $LINENO" $0) HIVE_SERVER_PORT=10000 export HIVE_SERVER2_THRIFT_PORT=11050 HIVE_METASTORE_PORT=9083 -LOGDIR=${IMPALA_HOME}/cluster_logs/hive +LOGDIR=${IMPALA_CLUSTER_LOGS_DIR}/hive HIVES2_TRANSPORT="plain_sasl" METASTORE_TRANSPORT="buffered" ONLY_METASTORE=0 diff --git a/testdata/cluster/admin b/testdata/cluster/admin index ba5f9756d..286d526c9 100755 --- a/testdata/cluster/admin +++ b/testdata/cluster/admin @@ -82,7 +82,7 @@ if [[ "$CDH_MAJOR_VERSION" -ge 5 ]]; then EMPTY_NODE_DIRS+=" /var/log/llama" fi -EASY_ACCESS_LOG_DIR="$IMPALA_HOME/cluster_logs" +EASY_ACCESS_LOG_DIR="$IMPALA_CLUSTER_LOGS_DIR" MINIKDC_INIT=${IMPALA_HOME}/testdata/bin/minikdc.sh if $IS_OSX; then @@ -246,7 +246,7 @@ function create_cluster { if [[ ! -e "$EASY_ACCESS_LOG_LINK" ]]; then mkdir -p "$EASY_ACCESS_LOG_DIR" ln -s "$NODE_DIR/var/log" "$EASY_ACCESS_LOG_DIR" - mv "$IMPALA_HOME/cluster_logs/log" "$EASY_ACCESS_LOG_LINK" + mv "$IMPALA_CLUSTER_LOGS_DIR/log" "$EASY_ACCESS_LOG_LINK" fi # Template population diff --git a/tests/run-custom-cluster-tests.sh b/tests/run-custom-cluster-tests.sh index 9339894c9..0131546ad 100755 --- a/tests/run-custom-cluster-tests.sh +++ b/tests/run-custom-cluster-tests.sh @@ -23,17 +23,14 @@ trap 'echo Error in $0 at line $LINENO: $(cd "'$PWD'" && awk "NR == $LINENO" $0) # TODO: Combine with run-process-failure-tests.sh export HEAPCHECK= -RESULTS_DIR=${IMPALA_HOME}/tests/custom_cluster/results -mkdir -p ${RESULTS_DIR} -LOG_DIR=${IMPALA_TEST_CLUSTER_LOG_DIR}/custom_cluster/ -mkdir -p ${LOG_DIR} - AUX_CUSTOM_DIR="" if [ -n ${IMPALA_AUX_TEST_HOME} ]; then AUX_CUSTOM_DIR=${IMPALA_AUX_TEST_HOME}/tests/aux_custom_cluster_tests/ fi -export LOG_DIR +export LOG_DIR=${IMPALA_CUSTOM_CLUSTER_TEST_LOGS_DIR} +RESULTS_DIR=${IMPALA_CUSTOM_CLUSTER_TEST_LOGS_DIR}/results +mkdir -p ${RESULTS_DIR} # KERBEROS TODO We'll want to pass kerberos status in here. cd ${IMPALA_HOME}/tests