mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
Consolidate test and cluster logs under a single directory.
All logs, test results and SQL files generated during data loading and testing are now consolidated under a single new directory $IMPALA_HOME/logs. The goal is to simplify archiving in Jenkins runs and debugging. The new structure is as follows: $IMPALA_HOME/logs/cluster - logs of Hadoop components and Impala $IMPALA_HOME/logs/data_loading - logs and SQL files produced in data loading $IMPALA_HOME/logs/fe_tests - logs and test output of Frontend unit tests $IMPALA_HOME/logs/be_tests - logs and test output of Backend unit tests $IMPALA_HOME/logs/ee_tests - logs and test output of end-to-end tests $IMPALA_HOME/logs/custom_cluster_tests - logs and test output of custom cluster tests I tested this change with a full data load which was successful. Change-Id: Ief1f58f3320ec39d31b3c6bc6ef87f58ff7dfdfa Reviewed-on: http://gerrit.cloudera.org:8080/2456 Reviewed-by: Alex Behm <alex.behm@cloudera.com> Tested-by: Internal Jenkins
This commit is contained in:
committed by
Internal Jenkins
parent
35dfbafab6
commit
7e76e92bef
2
.gitignore
vendored
2
.gitignore
vendored
@@ -36,7 +36,7 @@ tests/test-hive-udfs/target/
|
|||||||
|
|
||||||
cdh-*-hdfs-data/
|
cdh-*-hdfs-data/
|
||||||
avro_schemas/
|
avro_schemas/
|
||||||
cluster_logs/
|
logs/
|
||||||
|
|
||||||
# Impala toolchain directory
|
# Impala toolchain directory
|
||||||
toolchain/
|
toolchain/
|
||||||
21
LOGS.md
Normal file
21
LOGS.md
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
All logs, test results and SQL files generated during data loading
|
||||||
|
and testing are consolidated under $IMPALA_HOME/logs with the
|
||||||
|
following directory structure:
|
||||||
|
|
||||||
|
$IMPALA_HOME/logs/cluster
|
||||||
|
- logs of Hadoop components and Impala
|
||||||
|
|
||||||
|
$IMPALA_HOME/logs/data_loading
|
||||||
|
- logs and SQL files produced in data loading
|
||||||
|
|
||||||
|
$IMPALA_HOME/logs/fe_tests
|
||||||
|
- logs and test output of Frontend unit tests
|
||||||
|
|
||||||
|
$IMPALA_HOME/logs/be_tests
|
||||||
|
- logs and test output of Backend unit tests
|
||||||
|
|
||||||
|
$IMPALA_HOME/logs/ee_tests
|
||||||
|
- logs and test output of end-to-end tests
|
||||||
|
|
||||||
|
$IMPALA_HOME/logs/custom_cluster_tests
|
||||||
|
- logs and test output of custom cluster tests
|
||||||
@@ -383,7 +383,8 @@ FUNCTION(ADD_BE_TEST TEST_NAME)
|
|||||||
ADD_EXECUTABLE(${TEST_NAME} ${TEST_NAME}.cc)
|
ADD_EXECUTABLE(${TEST_NAME} ${TEST_NAME}.cc)
|
||||||
TARGET_LINK_LIBRARIES(${TEST_NAME} ${IMPALA_TEST_LINK_LIBS})
|
TARGET_LINK_LIBRARIES(${TEST_NAME} ${IMPALA_TEST_LINK_LIBS})
|
||||||
set(CMAKE_EXE_LINKER_FLAGS "--start-group")
|
set(CMAKE_EXE_LINKER_FLAGS "--start-group")
|
||||||
ADD_TEST(${TEST_NAME} "${BUILD_OUTPUT_ROOT_DIRECTORY}/${DIR_NAME}/${TEST_NAME}")
|
ADD_TEST(${TEST_NAME} "${BUILD_OUTPUT_ROOT_DIRECTORY}/${DIR_NAME}/${TEST_NAME}"
|
||||||
|
-log_dir=$ENV{IMPALA_BE_TEST_LOGS_DIR})
|
||||||
ADD_DEPENDENCIES(be-test ${TEST_NAME})
|
ADD_DEPENDENCIES(be-test ${TEST_NAME})
|
||||||
ENDFUNCTION()
|
ENDFUNCTION()
|
||||||
|
|
||||||
|
|||||||
@@ -174,8 +174,8 @@ TEST(Auth, KerbAndSslEnabled) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
impala::InitCommonRuntime(argc, argv, true, impala::TestInfo::BE_TEST);
|
|
||||||
::testing::InitGoogleTest(&argc, argv);
|
::testing::InitGoogleTest(&argc, argv);
|
||||||
|
impala::InitCommonRuntime(argc, argv, true, impala::TestInfo::BE_TEST);
|
||||||
|
|
||||||
env_keytab = getenv("KRB5_KTNAME");
|
env_keytab = getenv("KRB5_KTNAME");
|
||||||
env_princ = getenv("MINIKDC_PRINC_IMPALA");
|
env_princ = getenv("MINIKDC_PRINC_IMPALA");
|
||||||
|
|||||||
@@ -18,6 +18,7 @@
|
|||||||
#include <gtest/gtest.h>
|
#include <gtest/gtest.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#include "common/init.h"
|
||||||
#include "common/logging.h"
|
#include "common/logging.h"
|
||||||
#include "util/thread-pool.h"
|
#include "util/thread-pool.h"
|
||||||
|
|
||||||
@@ -67,9 +68,7 @@ TEST(ThreadPoolTest, BasicTest) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char** argv) {
|
int main(int argc, char** argv) {
|
||||||
impala::InitGoogleLoggingSafe(argv[0]);
|
|
||||||
impala::InitThreading();
|
|
||||||
impala::OsInfo::Init();
|
|
||||||
::testing::InitGoogleTest(&argc, argv);
|
::testing::InitGoogleTest(&argc, argv);
|
||||||
|
impala::InitCommonRuntime(argc, argv, true, impala::TestInfo::BE_TEST);
|
||||||
return RUN_ALL_TESTS();
|
return RUN_ALL_TESTS();
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -41,7 +41,8 @@ pushd $IMPALA_FE_DIR
|
|||||||
rm -rf target
|
rm -rf target
|
||||||
rm -f src/test/resources/{core,hbase,hive}-site.xml
|
rm -f src/test/resources/{core,hbase,hive}-site.xml
|
||||||
rm -rf generated-sources/*
|
rm -rf generated-sources/*
|
||||||
rm -rf ${IMPALA_TEST_CLUSTER_LOG_DIR}/*
|
rm -rf ${IMPALA_LOGS_DIR}/*
|
||||||
|
mkdir -p $IMPALA_ALL_LOGS_DIRS
|
||||||
popd
|
popd
|
||||||
|
|
||||||
# clean be
|
# clean be
|
||||||
|
|||||||
@@ -183,8 +183,34 @@ elif [ "${TARGET_FILESYSTEM}" != "hdfs" ]; then
|
|||||||
return 1
|
return 1
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Directory where local cluster logs will go when running tests or loading data
|
# Directories where local cluster logs will go when running tests or loading data
|
||||||
export IMPALA_TEST_CLUSTER_LOG_DIR=${IMPALA_HOME}/cluster_logs
|
export IMPALA_LOGS_DIR=${IMPALA_HOME}/logs
|
||||||
|
export IMPALA_CLUSTER_LOGS_DIR=${IMPALA_LOGS_DIR}/cluster
|
||||||
|
export IMPALA_DATA_LOADING_LOGS_DIR=${IMPALA_LOGS_DIR}/data_loading
|
||||||
|
export IMPALA_DATA_LOADING_SQL_DIR=${IMPALA_DATA_LOADING_LOGS_DIR}/sql
|
||||||
|
export IMPALA_FE_TEST_LOGS_DIR=${IMPALA_LOGS_DIR}/fe_tests
|
||||||
|
export IMPALA_BE_TEST_LOGS_DIR=${IMPALA_LOGS_DIR}/be_tests
|
||||||
|
export IMPALA_EE_TEST_LOGS_DIR=${IMPALA_LOGS_DIR}/ee_tests
|
||||||
|
export IMPALA_CUSTOM_CLUSTER_TEST_LOGS_DIR=${IMPALA_LOGS_DIR}/custom_cluster_tests
|
||||||
|
# List of all Impala log dirs and create them.
|
||||||
|
export IMPALA_ALL_LOGS_DIRS="${IMPALA_CLUSTER_LOGS_DIR}
|
||||||
|
${IMPALA_DATA_LOADING_LOGS_DIR} ${IMPALA_DATA_LOADING_SQL_DIR}
|
||||||
|
${IMPALA_EE_TEST_LOGS_DIR} ${IMPALA_BE_TEST_LOGS_DIR}
|
||||||
|
${IMPALA_EE_TEST_LOGS_DIR} ${IMPALA_CUSTOM_CLUSTER_TEST_LOGS_DIR}"
|
||||||
|
mkdir -p $IMPALA_ALL_LOGS_DIRS
|
||||||
|
|
||||||
|
# Create symlinks Testing/Temporary and be/Testing/Temporary that point to the BE test
|
||||||
|
# log dir to capture the all logs of BE unit tests. Gtest has Testing/Temporary
|
||||||
|
# hardwired in its code, so we cannot change the output dir by configuration.
|
||||||
|
# We create two symlinks to capture the logs when running ctest either from
|
||||||
|
# ${IMPALA_HOME} or ${IMPALA_HOME}/be.
|
||||||
|
rm -rf ${IMPALA_HOME}/Testing
|
||||||
|
mkdir -p ${IMPALA_HOME}/Testing
|
||||||
|
ln -fs ${IMPALA_BE_TEST_LOGS_DIR} ${IMPALA_HOME}/Testing/Temporary
|
||||||
|
rm -rf ${IMPALA_HOME}/be/Testing
|
||||||
|
mkdir -p ${IMPALA_HOME}/be/Testing
|
||||||
|
ln -fs ${IMPALA_BE_TEST_LOGS_DIR} ${IMPALA_HOME}/be/Testing/Temporary
|
||||||
|
|
||||||
# Reduce the concurrency for local tests to half the number of cores in the system.
|
# Reduce the concurrency for local tests to half the number of cores in the system.
|
||||||
# Note than nproc may not be available on older distributions (centos5.5)
|
# Note than nproc may not be available on older distributions (centos5.5)
|
||||||
if type nproc >/dev/null 2>&1; then
|
if type nproc >/dev/null 2>&1; then
|
||||||
|
|||||||
@@ -60,7 +60,7 @@ parser.add_option("--principal", default=None, dest="principal",
|
|||||||
|
|
||||||
options, args = parser.parse_args()
|
options, args = parser.parse_args()
|
||||||
|
|
||||||
DATA_LOAD_DIR = '/tmp/data-load-files'
|
SQL_OUTPUT_DIR = os.environ['IMPALA_DATA_LOADING_SQL_DIR']
|
||||||
WORKLOAD_DIR = options.workload_dir
|
WORKLOAD_DIR = options.workload_dir
|
||||||
DATASET_DIR = options.dataset_dir
|
DATASET_DIR = options.dataset_dir
|
||||||
TESTDATA_BIN_DIR = os.path.join(os.environ['IMPALA_HOME'], 'testdata/bin')
|
TESTDATA_BIN_DIR = os.path.join(os.environ['IMPALA_HOME'], 'testdata/bin')
|
||||||
@@ -257,9 +257,11 @@ if __name__ == "__main__":
|
|||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
dataset = get_dataset_for_workload(workload)
|
dataset = get_dataset_for_workload(workload)
|
||||||
generate_schema_statements(workload)
|
generate_schema_statements(workload)
|
||||||
assert os.path.isdir(os.path.join(DATA_LOAD_DIR, dataset)), ("Data loading files "
|
sql_dir = os.path.join(SQL_OUTPUT_DIR, dataset)
|
||||||
"do not exist for (%s)" % dataset)
|
assert os.path.isdir(sql_dir),\
|
||||||
os.chdir(os.path.join(DATA_LOAD_DIR, dataset))
|
("Could not find the generated SQL files for loading dataset '%s'.\
|
||||||
|
\nExpected to find the SQL files in: %s" % (dataset, sql_dir))
|
||||||
|
os.chdir(os.path.join(SQL_OUTPUT_DIR, dataset))
|
||||||
copy_avro_schemas_to_hdfs(AVRO_SCHEMA_DIR)
|
copy_avro_schemas_to_hdfs(AVRO_SCHEMA_DIR)
|
||||||
dataset_dir_contents = os.listdir(os.getcwd())
|
dataset_dir_contents = os.listdir(os.getcwd())
|
||||||
load_file_substr = "%s-%s" % (workload, options.exploration_strategy)
|
load_file_substr = "%s-%s" % (workload, options.exploration_strategy)
|
||||||
|
|||||||
@@ -77,8 +77,8 @@ do
|
|||||||
esac
|
esac
|
||||||
done
|
done
|
||||||
|
|
||||||
LOG_DIR=${IMPALA_TEST_CLUSTER_LOG_DIR}/query_tests
|
# For logging when using run-step.
|
||||||
mkdir -p ${LOG_DIR}
|
LOG_DIR=${IMPALA_EE_TEST_LOGS_DIR}
|
||||||
|
|
||||||
# Enable core dumps
|
# Enable core dumps
|
||||||
ulimit -c unlimited
|
ulimit -c unlimited
|
||||||
@@ -96,7 +96,7 @@ do
|
|||||||
TEST_RET_CODE=0
|
TEST_RET_CODE=0
|
||||||
|
|
||||||
run-step "Starting Impala cluster" start-impala-cluster.log \
|
run-step "Starting Impala cluster" start-impala-cluster.log \
|
||||||
${IMPALA_HOME}/bin/start-impala-cluster.py --log_dir=${LOG_DIR} \
|
${IMPALA_HOME}/bin/start-impala-cluster.py --log_dir=${IMPALA_EE_TEST_LOGS_DIR} \
|
||||||
${TEST_START_CLUSTER_ARGS}
|
${TEST_START_CLUSTER_ARGS}
|
||||||
|
|
||||||
if [[ "$BE_TEST" == true ]]; then
|
if [[ "$BE_TEST" == true ]]; then
|
||||||
@@ -157,7 +157,7 @@ do
|
|||||||
if [[ "$JDBC_TEST" == true ]]; then
|
if [[ "$JDBC_TEST" == true ]]; then
|
||||||
# Run the JDBC tests with background loading disabled. This is interesting because
|
# Run the JDBC tests with background loading disabled. This is interesting because
|
||||||
# it requires loading missing table metadata.
|
# it requires loading missing table metadata.
|
||||||
${IMPALA_HOME}/bin/start-impala-cluster.py --log_dir=${LOG_DIR} \
|
${IMPALA_HOME}/bin/start-impala-cluster.py --log_dir=${IMPALA_EE_TEST_LOGS_DIR} \
|
||||||
--catalogd_args=--load_catalog_in_background=false \
|
--catalogd_args=--load_catalog_in_background=false \
|
||||||
${TEST_START_CLUSTER_ARGS}
|
${TEST_START_CLUSTER_ARGS}
|
||||||
pushd ${IMPALA_FE_DIR}
|
pushd ${IMPALA_FE_DIR}
|
||||||
|
|||||||
@@ -4,9 +4,7 @@
|
|||||||
set -euo pipefail
|
set -euo pipefail
|
||||||
trap 'echo Error in $0 at line $LINENO: $(cd "'$PWD'" && awk "NR == $LINENO" $0)' ERR
|
trap 'echo Error in $0 at line $LINENO: $(cd "'$PWD'" && awk "NR == $LINENO" $0)' ERR
|
||||||
|
|
||||||
# Prepare output directory
|
export GTEST_OUTPUT="xml:$IMPALA_BE_TEST_LOGS_DIR/"
|
||||||
mkdir -p $IMPALA_TEST_CLUSTER_LOG_DIR/be_test
|
|
||||||
export GTEST_OUTPUT="xml:$IMPALA_TEST_CLUSTER_LOG_DIR/be_test/"
|
|
||||||
|
|
||||||
: ${SKIP_BE_TEST_PATTERN:=}
|
: ${SKIP_BE_TEST_PATTERN:=}
|
||||||
|
|
||||||
|
|||||||
@@ -51,7 +51,7 @@ parser.add_option("-r", "--restart_impalad_only", dest="restart_impalad_only",
|
|||||||
help="Restarts only the impalad processes")
|
help="Restarts only the impalad processes")
|
||||||
parser.add_option("--in-process", dest="inprocess", action="store_true", default=False,
|
parser.add_option("--in-process", dest="inprocess", action="store_true", default=False,
|
||||||
help="Start all Impala backends and state store in a single process.")
|
help="Start all Impala backends and state store in a single process.")
|
||||||
parser.add_option("--log_dir", dest="log_dir", default="/tmp",
|
parser.add_option("--log_dir", dest="log_dir", default=os.environ['IMPALA_CLUSTER_LOGS_DIR'],
|
||||||
help="Directory to store output logs to.")
|
help="Directory to store output logs to.")
|
||||||
parser.add_option("-v", "--verbose", dest="verbose", action="store_true", default=False,
|
parser.add_option("-v", "--verbose", dest="verbose", action="store_true", default=False,
|
||||||
help="Prints all output to stderr/stdout.")
|
help="Prints all output to stderr/stdout.")
|
||||||
@@ -327,6 +327,10 @@ if __name__ == "__main__":
|
|||||||
print 'Please specify a cluster size >= 0'
|
print 'Please specify a cluster size >= 0'
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
|
if not os.path.isdir(options.log_dir):
|
||||||
|
print 'Log dir does not exist or is not a directory: %s' % options.log_dir
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
# Kill existing cluster processes based on the current configuration.
|
# Kill existing cluster processes based on the current configuration.
|
||||||
if options.restart_impalad_only:
|
if options.restart_impalad_only:
|
||||||
if options.inprocess:
|
if options.inprocess:
|
||||||
|
|||||||
@@ -298,11 +298,6 @@ ${IMPALA_HOME}/shell/make_shell_tarball.sh
|
|||||||
echo "Creating test tarball"
|
echo "Creating test tarball"
|
||||||
${IMPALA_HOME}/tests/make_test_tarball.sh
|
${IMPALA_HOME}/tests/make_test_tarball.sh
|
||||||
|
|
||||||
# Create subdirectories for the test and data loading impalad logs.
|
|
||||||
mkdir -p ${IMPALA_TEST_CLUSTER_LOG_DIR}/query_tests
|
|
||||||
mkdir -p ${IMPALA_TEST_CLUSTER_LOG_DIR}/fe_tests
|
|
||||||
mkdir -p ${IMPALA_TEST_CLUSTER_LOG_DIR}/data_loading
|
|
||||||
|
|
||||||
if [ $FORMAT_CLUSTER -eq 1 ]; then
|
if [ $FORMAT_CLUSTER -eq 1 ]; then
|
||||||
$IMPALA_HOME/testdata/bin/run-all.sh -format
|
$IMPALA_HOME/testdata/bin/run-all.sh -format
|
||||||
elif [ $TESTDATA_ACTION -eq 1 ] || [ $TESTS_ACTION -eq 1 ]; then
|
elif [ $TESTDATA_ACTION -eq 1 ] || [ $TESTS_ACTION -eq 1 ]; then
|
||||||
|
|||||||
@@ -26,6 +26,7 @@
|
|||||||
<name>Cloudera Impala Query Engine Frontend</name>
|
<name>Cloudera Impala Query Engine Frontend</name>
|
||||||
|
|
||||||
<properties>
|
<properties>
|
||||||
|
<surefire.reports.dir>${env.IMPALA_LOGS_DIR}/fe_tests</surefire.reports.dir>
|
||||||
<test.hive.testdata>${project.basedir}/../testdata/target/AllTypes.txt</test.hive.testdata>
|
<test.hive.testdata>${project.basedir}/../testdata/target/AllTypes.txt</test.hive.testdata>
|
||||||
<backend.library.path>${env.IMPALA_HOME}/be/build/debug/service:${env.IMPALA_HOME}/be/build/release/service</backend.library.path>
|
<backend.library.path>${env.IMPALA_HOME}/be/build/debug/service:${env.IMPALA_HOME}/be/build/release/service</backend.library.path>
|
||||||
<beeswax_port>21000</beeswax_port>
|
<beeswax_port>21000</beeswax_port>
|
||||||
@@ -459,6 +460,7 @@
|
|||||||
<artifactId>maven-surefire-plugin</artifactId>
|
<artifactId>maven-surefire-plugin</artifactId>
|
||||||
<version>2.18</version>
|
<version>2.18</version>
|
||||||
<configuration>
|
<configuration>
|
||||||
|
<reportsDirectory>${surefire.reports.dir}</reportsDirectory>
|
||||||
<redirectTestOutputToFile>true</redirectTestOutputToFile>
|
<redirectTestOutputToFile>true</redirectTestOutputToFile>
|
||||||
<argLine>-Djava.library.path=${java.library.path}:${backend.library.path}</argLine>
|
<argLine>-Djava.library.path=${java.library.path}:${backend.library.path}</argLine>
|
||||||
<systemProperties>
|
<systemProperties>
|
||||||
|
|||||||
@@ -53,7 +53,7 @@
|
|||||||
|
|
||||||
<property>
|
<property>
|
||||||
<name>hbase.zookeeper.property.dataDir</name>
|
<name>hbase.zookeeper.property.dataDir</name>
|
||||||
<value>${IMPALA_TEST_CLUSTER_LOG_DIR}/zoo</value>
|
<value>${IMPALA_CLUSTER_LOGS_DIR}/zoo</value>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<!-- BEGIN Kerberos settings -->
|
<!-- BEGIN Kerberos settings -->
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
# Define some default values that can be overridden by system properties
|
# Define some default values that can be overridden by system properties
|
||||||
hive.root.logger=INFO,DRFA
|
hive.root.logger=INFO,DRFA
|
||||||
hive.log.dir=${IMPALA_TEST_CLUSTER_LOG_DIR}/hive
|
hive.log.dir=${IMPALA_CLUSTER_LOGS_DIR}/hive
|
||||||
hive.log.file=hive.log
|
hive.log.file=hive.log
|
||||||
|
|
||||||
# Define the root logger to the system property "hadoop.root.logger".
|
# Define the root logger to the system property "hadoop.root.logger".
|
||||||
|
|||||||
@@ -96,7 +96,7 @@
|
|||||||
<property>
|
<property>
|
||||||
<!-- Location of Hive per-query log files of the form: hive_job_log_<hive_query_id>.txt -->
|
<!-- Location of Hive per-query log files of the form: hive_job_log_<hive_query_id>.txt -->
|
||||||
<name>hive.querylog.location</name>
|
<name>hive.querylog.location</name>
|
||||||
<value>${IMPALA_TEST_CLUSTER_LOG_DIR}/hive</value>
|
<value>${IMPALA_CLUSTER_LOGS_DIR}/hive</value>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<!--property>
|
<!--property>
|
||||||
|
|||||||
@@ -99,7 +99,7 @@
|
|||||||
<property>
|
<property>
|
||||||
<!-- Location of Hive per-query log files of the form: hive_job_log_<hive_query_id>.txt -->
|
<!-- Location of Hive per-query log files of the form: hive_job_log_<hive_query_id>.txt -->
|
||||||
<name>hive.querylog.location</name>
|
<name>hive.querylog.location</name>
|
||||||
<value>${IMPALA_TEST_CLUSTER_LOG_DIR}/hive</value>
|
<value>${IMPALA_CLUSTER_LOGS_DIR}/hive</value>
|
||||||
</property>
|
</property>
|
||||||
|
|
||||||
<!--property>
|
<!--property>
|
||||||
|
|||||||
12
testdata/bin/create-load-data.sh
vendored
12
testdata/bin/create-load-data.sh
vendored
@@ -35,8 +35,8 @@ SKIP_SNAPSHOT_LOAD=0
|
|||||||
SNAPSHOT_FILE=""
|
SNAPSHOT_FILE=""
|
||||||
LOAD_DATA_ARGS=""
|
LOAD_DATA_ARGS=""
|
||||||
JDBC_URL="jdbc:hive2://localhost:11050/default;"
|
JDBC_URL="jdbc:hive2://localhost:11050/default;"
|
||||||
LOG_DIR=${IMPALA_TEST_CLUSTER_LOG_DIR}/data_loading
|
# For logging when using run-step.
|
||||||
mkdir -p ${LOG_DIR}
|
LOG_DIR=${IMPALA_DATA_LOADING_LOGS_DIR}
|
||||||
|
|
||||||
while [ -n "$*" ]
|
while [ -n "$*" ]
|
||||||
do
|
do
|
||||||
@@ -141,7 +141,7 @@ function load-data {
|
|||||||
ARGS+=("--force")
|
ARGS+=("--force")
|
||||||
echo "Force loading."
|
echo "Force loading."
|
||||||
fi
|
fi
|
||||||
LOG_FILE=${LOG_DIR}/data-load-${WORKLOAD}-${EXPLORATION_STRATEGY}.log
|
LOG_FILE=${IMPALA_DATA_LOADING_LOGS_DIR}/data-load-${WORKLOAD}-${EXPLORATION_STRATEGY}.log
|
||||||
echo "$MSG. Logging to ${LOG_FILE}"
|
echo "$MSG. Logging to ${LOG_FILE}"
|
||||||
# Use unbuffered logging by executing with -u
|
# Use unbuffered logging by executing with -u
|
||||||
if ! impala-python -u ${IMPALA_HOME}/bin/load-data.py ${ARGS[@]} &> ${LOG_FILE}; then
|
if ! impala-python -u ${IMPALA_HOME}/bin/load-data.py ${ARGS[@]} &> ${LOG_FILE}; then
|
||||||
@@ -162,7 +162,7 @@ function cache-test-tables {
|
|||||||
}
|
}
|
||||||
|
|
||||||
function load-aux-workloads {
|
function load-aux-workloads {
|
||||||
LOG_FILE=${LOG_DIR}/data-load-auxiliary-workloads-core.log
|
LOG_FILE=${IMPALA_DATA_LOADING_LOGS_DIR}/data-load-auxiliary-workloads-core.log
|
||||||
rm -f $LOG_FILE
|
rm -f $LOG_FILE
|
||||||
# Load all the auxiliary workloads (if any exist)
|
# Load all the auxiliary workloads (if any exist)
|
||||||
if [ -d ${IMPALA_AUX_WORKLOAD_DIR} ] && [ -d ${IMPALA_AUX_DATASET_DIR} ]; then
|
if [ -d ${IMPALA_AUX_WORKLOAD_DIR} ] && [ -d ${IMPALA_AUX_DATASET_DIR} ]; then
|
||||||
@@ -339,8 +339,8 @@ else
|
|||||||
START_CLUSTER_ARGS="-s 3 ${START_CLUSTER_ARGS}"
|
START_CLUSTER_ARGS="-s 3 ${START_CLUSTER_ARGS}"
|
||||||
fi
|
fi
|
||||||
run-step "Starting Impala cluster" start-impala-cluster.log \
|
run-step "Starting Impala cluster" start-impala-cluster.log \
|
||||||
${IMPALA_HOME}/bin/start-impala-cluster.py --log_dir=${LOG_DIR} \
|
${IMPALA_HOME}/bin/start-impala-cluster.py \
|
||||||
${START_CLUSTER_ARGS}
|
--log_dir=${IMPALA_DATA_LOADING_LOGS_DIR} ${START_CLUSTER_ARGS}
|
||||||
# The hdfs environment script sets up kms (encryption) and cache pools (hdfs caching).
|
# The hdfs environment script sets up kms (encryption) and cache pools (hdfs caching).
|
||||||
# On a non-hdfs filesystem, we don't test encryption or hdfs caching, so this setup is not
|
# On a non-hdfs filesystem, we don't test encryption or hdfs caching, so this setup is not
|
||||||
# needed.
|
# needed.
|
||||||
|
|||||||
4
testdata/bin/generate-schema-statements.py
vendored
4
testdata/bin/generate-schema-statements.py
vendored
@@ -73,9 +73,9 @@ if options.workload is None:
|
|||||||
parser.print_help()
|
parser.print_help()
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
|
|
||||||
DATA_LOAD_DIR = '/tmp/data-load-files'
|
|
||||||
WORKLOAD_DIR = os.path.join(os.environ['IMPALA_HOME'], 'testdata', 'workloads')
|
WORKLOAD_DIR = os.path.join(os.environ['IMPALA_HOME'], 'testdata', 'workloads')
|
||||||
DATASET_DIR = os.path.join(os.environ['IMPALA_HOME'], 'testdata', 'datasets')
|
DATASET_DIR = os.path.join(os.environ['IMPALA_HOME'], 'testdata', 'datasets')
|
||||||
|
SQL_OUTPUT_DIR = os.environ['IMPALA_DATA_LOADING_SQL_DIR']
|
||||||
AVRO_SCHEMA_DIR = "avro_schemas"
|
AVRO_SCHEMA_DIR = "avro_schemas"
|
||||||
IMPALA_SUPPORTED_INSERT_FORMATS = ['parquet', 'hbase', 'text', 'kudu']
|
IMPALA_SUPPORTED_INSERT_FORMATS = ['parquet', 'hbase', 'text', 'kudu']
|
||||||
|
|
||||||
@@ -690,7 +690,7 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
target_dataset = test_vectors[0].dataset
|
target_dataset = test_vectors[0].dataset
|
||||||
print 'Target Dataset: ' + target_dataset
|
print 'Target Dataset: ' + target_dataset
|
||||||
dataset_load_dir = os.path.join(DATA_LOAD_DIR, target_dataset)
|
dataset_load_dir = os.path.join(SQL_OUTPUT_DIR, target_dataset)
|
||||||
# If the directory containing the sql files does not exist, create it. Else nuke all the
|
# If the directory containing the sql files does not exist, create it. Else nuke all the
|
||||||
# files corresponding to the current workload.
|
# files corresponding to the current workload.
|
||||||
try:
|
try:
|
||||||
|
|||||||
2
testdata/bin/minikdc_env.sh
vendored
2
testdata/bin/minikdc_env.sh
vendored
@@ -51,7 +51,7 @@ export MINIKDC_PRINC_USER=${USER}/localhost@${MINIKDC_REALM}
|
|||||||
export MINIKDC_PRINC_LLAM=llama/localhost@${MINIKDC_REALM}
|
export MINIKDC_PRINC_LLAM=llama/localhost@${MINIKDC_REALM}
|
||||||
|
|
||||||
# Basic directory setup:
|
# Basic directory setup:
|
||||||
MINIKDC_SCRATCH_ROOT=${MINIKDC_SCRATCH_ROOT-${IMPALA_TEST_CLUSTER_LOG_DIR}}
|
MINIKDC_SCRATCH_ROOT=${MINIKDC_SCRATCH_ROOT-${IMPALA_CLUSTER_LOGS_DIR}}
|
||||||
export MINIKDC_WD=${MINIKDC_SCRATCH_ROOT}/minikdc-workdir
|
export MINIKDC_WD=${MINIKDC_SCRATCH_ROOT}/minikdc-workdir
|
||||||
|
|
||||||
# The one big keytab created by the minikdc
|
# The one big keytab created by the minikdc
|
||||||
|
|||||||
16
testdata/bin/run-all.sh
vendored
16
testdata/bin/run-all.sh
vendored
@@ -31,7 +31,7 @@ fi
|
|||||||
|
|
||||||
# Kill and clean data for a clean start.
|
# Kill and clean data for a clean start.
|
||||||
echo "Killing running services..."
|
echo "Killing running services..."
|
||||||
$IMPALA_HOME/testdata/bin/kill-all.sh &>${IMPALA_TEST_CLUSTER_LOG_DIR}/kill-all.log
|
$IMPALA_HOME/testdata/bin/kill-all.sh &>${IMPALA_CLUSTER_LOGS_DIR}/kill-all.log
|
||||||
|
|
||||||
# Starts up a mini-cluster which includes:
|
# Starts up a mini-cluster which includes:
|
||||||
# - HDFS with 3 DNs
|
# - HDFS with 3 DNs
|
||||||
@@ -41,25 +41,25 @@ if [[ ${DEFAULT_FS} == "hdfs://localhost:20500" ]]; then
|
|||||||
echo "Starting all cluster services..."
|
echo "Starting all cluster services..."
|
||||||
echo " --> Starting mini-DFS cluster"
|
echo " --> Starting mini-DFS cluster"
|
||||||
$IMPALA_HOME/testdata/bin/run-mini-dfs.sh ${HDFS_FORMAT_CLUSTER-} 2>&1 | \
|
$IMPALA_HOME/testdata/bin/run-mini-dfs.sh ${HDFS_FORMAT_CLUSTER-} 2>&1 | \
|
||||||
tee ${IMPALA_TEST_CLUSTER_LOG_DIR}/run-mini-dfs.log
|
tee ${IMPALA_CLUSTER_LOGS_DIR}/run-mini-dfs.log
|
||||||
|
|
||||||
echo " --> Starting HBase"
|
echo " --> Starting HBase"
|
||||||
$IMPALA_HOME/testdata/bin/run-hbase.sh 2>&1 | \
|
$IMPALA_HOME/testdata/bin/run-hbase.sh 2>&1 | \
|
||||||
tee ${IMPALA_TEST_CLUSTER_LOG_DIR}/run-hbase.log
|
tee ${IMPALA_CLUSTER_LOGS_DIR}/run-hbase.log
|
||||||
|
|
||||||
echo " --> Starting Hive Server and Metastore Service"
|
echo " --> Starting Hive Server and Metastore Service"
|
||||||
$IMPALA_HOME/testdata/bin/run-hive-server.sh 2>&1 | \
|
$IMPALA_HOME/testdata/bin/run-hive-server.sh 2>&1 | \
|
||||||
tee ${IMPALA_TEST_CLUSTER_LOG_DIR}/run-hive-server.log
|
tee ${IMPALA_CLUSTER_LOGS_DIR}/run-hive-server.log
|
||||||
|
|
||||||
echo " --> Starting the Sentry Policy Server"
|
echo " --> Starting the Sentry Policy Server"
|
||||||
$IMPALA_HOME/testdata/bin/run-sentry-service.sh > \
|
$IMPALA_HOME/testdata/bin/run-sentry-service.sh > \
|
||||||
${IMPALA_TEST_CLUSTER_LOG_DIR}/run-sentry-service.log 2>&1
|
${IMPALA_CLUSTER_LOGS_DIR}/run-sentry-service.log 2>&1
|
||||||
|
|
||||||
elif [[ ${DEFAULT_FS} == "${LOCAL_FS}" ]]; then
|
elif [[ ${DEFAULT_FS} == "${LOCAL_FS}" ]]; then
|
||||||
# When the local file system is used as default, we only start the Hive metastore.
|
# When the local file system is used as default, we only start the Hive metastore.
|
||||||
# Impala can run locally without additional services.
|
# Impala can run locally without additional services.
|
||||||
$IMPALA_HOME/testdata/bin/run-hive-server.sh -only_metastore 2>&1 | \
|
$IMPALA_HOME/testdata/bin/run-hive-server.sh -only_metastore 2>&1 | \
|
||||||
tee ${IMPALA_TEST_CLUSTER_LOG_DIR}/run-hive-server.log
|
tee ${IMPALA_CLUSTER_LOGS_DIR}/run-hive-server.log
|
||||||
else
|
else
|
||||||
# With Isilon, we only start the Hive metastore and Sentry Policy Server.
|
# With Isilon, we only start the Hive metastore and Sentry Policy Server.
|
||||||
# - HDFS is not started becuase Isilon is used as the defaultFs in core-site
|
# - HDFS is not started becuase Isilon is used as the defaultFs in core-site
|
||||||
@@ -72,9 +72,9 @@ else
|
|||||||
# TODO: Figure out how to start YARN, LLAMA and Hive with a different defaultFs.
|
# TODO: Figure out how to start YARN, LLAMA and Hive with a different defaultFs.
|
||||||
echo " --> Starting Hive Metastore Service"
|
echo " --> Starting Hive Metastore Service"
|
||||||
$IMPALA_HOME/testdata/bin/run-hive-server.sh -only_metastore 2>&1 | \
|
$IMPALA_HOME/testdata/bin/run-hive-server.sh -only_metastore 2>&1 | \
|
||||||
tee ${IMPALA_TEST_CLUSTER_LOG_DIR}/run-hive-server.log
|
tee ${IMPALA_CLUSTER_LOGS_DIR}/run-hive-server.log
|
||||||
|
|
||||||
echo " --> Starting the Sentry Policy Server"
|
echo " --> Starting the Sentry Policy Server"
|
||||||
$IMPALA_HOME/testdata/bin/run-sentry-service.sh > \
|
$IMPALA_HOME/testdata/bin/run-sentry-service.sh > \
|
||||||
${IMPALA_TEST_CLUSTER_LOG_DIR}/run-sentry-service.log 2>&1
|
${IMPALA_CLUSTER_LOGS_DIR}/run-sentry-service.log 2>&1
|
||||||
fi
|
fi
|
||||||
|
|||||||
8
testdata/bin/run-hbase.sh
vendored
8
testdata/bin/run-hbase.sh
vendored
@@ -7,7 +7,7 @@ trap 'echo Error in $0 at line $LINENO: $(cd "'$PWD'" && awk "NR == $LINENO" $0)
|
|||||||
CLUSTER_BIN=${IMPALA_HOME}/testdata/bin
|
CLUSTER_BIN=${IMPALA_HOME}/testdata/bin
|
||||||
HBASE_JAAS_CLIENT=${HBASE_CONF_DIR}/hbase-jaas-client.conf
|
HBASE_JAAS_CLIENT=${HBASE_CONF_DIR}/hbase-jaas-client.conf
|
||||||
HBASE_JAAS_SERVER=${HBASE_CONF_DIR}/hbase-jaas-server.conf
|
HBASE_JAAS_SERVER=${HBASE_CONF_DIR}/hbase-jaas-server.conf
|
||||||
HBASE_LOGDIR=${IMPALA_TEST_CLUSTER_LOG_DIR}/hbase
|
HBASE_LOGDIR=${IMPALA_CLUSTER_LOGS_DIR}/hbase
|
||||||
|
|
||||||
# Kill and clean data for a clean start.
|
# Kill and clean data for a clean start.
|
||||||
${CLUSTER_BIN}/kill-hbase.sh > /dev/null 2>&1
|
${CLUSTER_BIN}/kill-hbase.sh > /dev/null 2>&1
|
||||||
@@ -22,10 +22,10 @@ export HBASE_LOG_DIR=${HBASE_LOGDIR}
|
|||||||
export HBASE_PID_DIR=${HBASE_LOGDIR}
|
export HBASE_PID_DIR=${HBASE_LOGDIR}
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
# Put zookeeper things in the cluster_logs/zoo directory.
|
# Put zookeeper things in the logs/cluster/zoo directory.
|
||||||
# (See hbase.zookeeper.property.dataDir in hbase-site.xml)
|
# (See hbase.zookeeper.property.dataDir in hbase-site.xml)
|
||||||
rm -rf ${IMPALA_TEST_CLUSTER_LOG_DIR}/zoo
|
rm -rf ${IMPALA_CLUSTER_LOGS_DIR}/zoo
|
||||||
mkdir -p ${IMPALA_TEST_CLUSTER_LOG_DIR}/zoo
|
mkdir -p ${IMPALA_CLUSTER_LOGS_DIR}/zoo
|
||||||
mkdir -p ${HBASE_LOGDIR}
|
mkdir -p ${HBASE_LOGDIR}
|
||||||
|
|
||||||
if ${CLUSTER_DIR}/admin is_kerberized; then
|
if ${CLUSTER_DIR}/admin is_kerberized; then
|
||||||
|
|||||||
2
testdata/bin/run-hive-server.sh
vendored
2
testdata/bin/run-hive-server.sh
vendored
@@ -9,7 +9,7 @@ trap 'echo Error in $0 at line $LINENO: $(cd "'$PWD'" && awk "NR == $LINENO" $0)
|
|||||||
HIVE_SERVER_PORT=10000
|
HIVE_SERVER_PORT=10000
|
||||||
export HIVE_SERVER2_THRIFT_PORT=11050
|
export HIVE_SERVER2_THRIFT_PORT=11050
|
||||||
HIVE_METASTORE_PORT=9083
|
HIVE_METASTORE_PORT=9083
|
||||||
LOGDIR=${IMPALA_HOME}/cluster_logs/hive
|
LOGDIR=${IMPALA_CLUSTER_LOGS_DIR}/hive
|
||||||
HIVES2_TRANSPORT="plain_sasl"
|
HIVES2_TRANSPORT="plain_sasl"
|
||||||
METASTORE_TRANSPORT="buffered"
|
METASTORE_TRANSPORT="buffered"
|
||||||
ONLY_METASTORE=0
|
ONLY_METASTORE=0
|
||||||
|
|||||||
4
testdata/cluster/admin
vendored
4
testdata/cluster/admin
vendored
@@ -82,7 +82,7 @@ if [[ "$CDH_MAJOR_VERSION" -ge 5 ]]; then
|
|||||||
EMPTY_NODE_DIRS+=" /var/log/llama"
|
EMPTY_NODE_DIRS+=" /var/log/llama"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
EASY_ACCESS_LOG_DIR="$IMPALA_HOME/cluster_logs"
|
EASY_ACCESS_LOG_DIR="$IMPALA_CLUSTER_LOGS_DIR"
|
||||||
MINIKDC_INIT=${IMPALA_HOME}/testdata/bin/minikdc.sh
|
MINIKDC_INIT=${IMPALA_HOME}/testdata/bin/minikdc.sh
|
||||||
|
|
||||||
if $IS_OSX; then
|
if $IS_OSX; then
|
||||||
@@ -246,7 +246,7 @@ function create_cluster {
|
|||||||
if [[ ! -e "$EASY_ACCESS_LOG_LINK" ]]; then
|
if [[ ! -e "$EASY_ACCESS_LOG_LINK" ]]; then
|
||||||
mkdir -p "$EASY_ACCESS_LOG_DIR"
|
mkdir -p "$EASY_ACCESS_LOG_DIR"
|
||||||
ln -s "$NODE_DIR/var/log" "$EASY_ACCESS_LOG_DIR"
|
ln -s "$NODE_DIR/var/log" "$EASY_ACCESS_LOG_DIR"
|
||||||
mv "$IMPALA_HOME/cluster_logs/log" "$EASY_ACCESS_LOG_LINK"
|
mv "$IMPALA_CLUSTER_LOGS_DIR/log" "$EASY_ACCESS_LOG_LINK"
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Template population
|
# Template population
|
||||||
|
|||||||
@@ -23,17 +23,14 @@ trap 'echo Error in $0 at line $LINENO: $(cd "'$PWD'" && awk "NR == $LINENO" $0)
|
|||||||
# TODO: Combine with run-process-failure-tests.sh
|
# TODO: Combine with run-process-failure-tests.sh
|
||||||
export HEAPCHECK=
|
export HEAPCHECK=
|
||||||
|
|
||||||
RESULTS_DIR=${IMPALA_HOME}/tests/custom_cluster/results
|
|
||||||
mkdir -p ${RESULTS_DIR}
|
|
||||||
LOG_DIR=${IMPALA_TEST_CLUSTER_LOG_DIR}/custom_cluster/
|
|
||||||
mkdir -p ${LOG_DIR}
|
|
||||||
|
|
||||||
AUX_CUSTOM_DIR=""
|
AUX_CUSTOM_DIR=""
|
||||||
if [ -n ${IMPALA_AUX_TEST_HOME} ]; then
|
if [ -n ${IMPALA_AUX_TEST_HOME} ]; then
|
||||||
AUX_CUSTOM_DIR=${IMPALA_AUX_TEST_HOME}/tests/aux_custom_cluster_tests/
|
AUX_CUSTOM_DIR=${IMPALA_AUX_TEST_HOME}/tests/aux_custom_cluster_tests/
|
||||||
fi
|
fi
|
||||||
|
|
||||||
export LOG_DIR
|
export LOG_DIR=${IMPALA_CUSTOM_CLUSTER_TEST_LOGS_DIR}
|
||||||
|
RESULTS_DIR=${IMPALA_CUSTOM_CLUSTER_TEST_LOGS_DIR}/results
|
||||||
|
mkdir -p ${RESULTS_DIR}
|
||||||
|
|
||||||
# KERBEROS TODO We'll want to pass kerberos status in here.
|
# KERBEROS TODO We'll want to pass kerberos status in here.
|
||||||
cd ${IMPALA_HOME}/tests
|
cd ${IMPALA_HOME}/tests
|
||||||
|
|||||||
Reference in New Issue
Block a user