Files
impala/testdata/cluster/admin
casey 2351266d0e Replace single process mini-dfs with multiple processes
This should allow individual service components, such as a single nodemanager,
to be shutdown for failure testing. The mini-cluster bundled with hadoop is a
single process that does not expose the ability to control individual roles.
Now each role can be controlled and configured independently of the others.

Change-Id: Ic1d42e024226c6867e79916464d184fce886d783
Reviewed-on: http://gerrit.ent.cloudera.com:8080/1432
Tested-by: Casey Ching <casey@cloudera.com>
Reviewed-by: Casey Ching <casey@cloudera.com>
Reviewed-on: http://gerrit.ent.cloudera.com:8080/2297
Reviewed-by: Ishaan Joshi <ishaan@cloudera.com>
Tested-by: Ishaan Joshi <ishaan@cloudera.com>
2014-04-23 18:24:05 -07:00

253 lines
8.2 KiB
Bash
Executable File

#!/bin/bash
# Copyright 2014 Cloudera Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This will create/control/destroy a local hdfs+yarn+llama cluster.
#
# The original idea was to run each node on a different loopback address but
# https://jira.cloudera.com/browse/CDH-16602 makes that impossible for now. So all roles
# run on 127.0.0.1, just like the standard mini cluster included with hadoop. The
# difference is with this cluster, each role runs in its own process and has its own
# configs. For each node, the layout of the configs, logs, start/stop scripts, etc, is
# kept as close as possible to a real cluster. For example, the first node will live in
# the dir "cdh-<version>/node-1" and its logs would be at "cdh-<version>/node-1/var/log".
set -eu
while getopts v OPT; do
case $OPT in
v) set -x; shift;;
?) echo "Usage: $0 [-v (verbose)] ACTION (see source...)"; exit 1;;
esac
done
DIR=$(dirname $0)
NODES_DIR="$DIR/cdh$CDH_MAJOR_VERSION"
NODE_COUNT=3
NODE_PREFIX=node-
COMMON_NODE_TEMPLATE="$DIR/node_templates/common"
NODE_TEMPLATE="$DIR/node_templates/cdh$CDH_MAJOR_VERSION"
TEMPLATE_SUFFIX=".tmpl"
# Each process should be marked with this so a "pkill -f" can be done to nuke everything.
export KILL_CLUSTER_MARKER=IBelongToTheMiniCluster
SUPPORTED_SERVICES=(hdfs yarn)
if [[ "$CDH_MAJOR_VERSION" -ge 5 ]]; then
SUPPORTED_SERVICES+=(llama)
fi
# All DataNodes and NodeManagers need a unique but fixed address. The IP is fixed at
# 127.0.0.1, so the only difference is the port. The address must be fixed because it is
# used as an identifier. If a node were restarted with a different address it would be
# considered a new node. The values below are arbitrary and may conflict with existing
# sevices. Fixed ports were preferred to dynamically chosen free ports for consistency.
DATANODE_FREE_PORT_START=31000
NODEMANAGER_FREE_PORT_START=31100
# Used to populate config templates later. Any changes made here have no effect on an
# existing cluster.
export HDFS_WEBUI_PORT=5070 # changed from 50070 so it is not ephemeral
export YARN_WEBUI_PORT=8088 # same as default
export LLAMA_WEBUI_PORT=1501 # same as default
# Empty dirs that should be included in the templates. Since git ignores empty dirs it is
# easier to maintain them here.
EMPTY_NODE_DIRS=$(echo data/dfs/{dn,nn} var/{run,lib/hadoop-hdfs,log})
if [[ "$CDH_MAJOR_VERSION" -ge 5 ]]; then
EMPTY_NODE_DIRS+=" /var/log/llama"
fi
EASY_ACCESS_LOG_DIR="$IMPALA_HOME/cluster_logs"
function cluster_exists {
# Just use the first node as an indicator...
if [[ ! -e "$NODES_DIR/${NODE_PREFIX}1" ]]; then
return 1
fi
}
function create_cluster {
mkdir -p "$NODES_DIR"
# Used to populate config templates later
GROUP=$(id -gn)
export GROUP
# Llama needs a mapping of DataNodes to NodeManagers and for that we'll need to know
# the hostname hadoop has chosen. It should be the first entry in /etc/hosts for
# 127.0.0.1.
#
# It is possible this hostname must also mactch the impala hostname, which will
# never be "localhost".
if getent hosts 127.0.0.1 1>/dev/null; then
HADOOP_HOSTNAME=$(getent hosts 127.0.0.1 | awk '{print $2}')
else
# This may be an error case...
HADOOP_HOSTNAME=127.0.0.1
fi
# For consistency, the first node will host all the master roles, including llama. Llama
# needs to know the ports of the datanodes and nodemanagers. The ports are generated
# below as part of setting up each node. The slave nodes are created first so the ports
# will be known when it is time to configure llama.
for ((NODE_IDX=$NODE_COUNT; NODE_IDX >= 1; NODE_IDX--)); do
NODE=${NODE_PREFIX}$NODE_IDX
NODE_DIR=$(get_node_dir $NODE)
echo "Creating $NODE at $NODE_DIR"
mkdir -p "$NODE_DIR"
cp -a "$COMMON_NODE_TEMPLATE"/* "$NODE_DIR"
if [[ -e "$NODE_TEMPLATE" ]]; then
cp -a "$NODE_TEMPLATE"/* "$NODE_DIR"
fi
if [[ $NODE_IDX -gt 1 ]]; then
# Remove master role scripts from slave nodes
rm -f "$NODE_DIR/etc/init.d/"{hdfs-namenode,yarn-resourcemanager,llama-application}
fi
for EMPTY_NODE_DIR in $EMPTY_NODE_DIRS; do
mkdir -p "$NODE_DIR/$EMPTY_NODE_DIR"
done
# Add some easy access links closer to IMPALA_HOME
EASY_ACCESS_LOG_LINK="$EASY_ACCESS_LOG_DIR/cdh$CDH_MAJOR_VERSION-$NODE"
if [[ ! -e "$EASY_ACCESS_LOG_LINK" ]]; then
mkdir -p "$EASY_ACCESS_LOG_DIR"
ln -s "$NODE_DIR/var/log" "$EASY_ACCESS_LOG_DIR"
mv "$IMPALA_HOME/cluster_logs/log" "$EASY_ACCESS_LOG_LINK"
fi
# Template population
DATANODE_PORT=$((DATANODE_FREE_PORT_START++))
NODEMANAGER_PORT=$((NODEMANAGER_FREE_PORT_START++))
echo "$NODE will use ports DATANODE_PORT=$DATANODE_PORT and" \
"NODEMANAGER_PORT=$NODEMANAGER_PORT"
# Escape the first : to workaround https://jira.cloudera.com/browse/CDH-16840
LLAMA_PORT_MAPPINGS+="$HADOOP_HOSTNAME\\:$DATANODE_PORT="
LLAMA_PORT_MAPPINGS+="$HADOOP_HOSTNAME:$NODEMANAGER_PORT
"
export NODE NODE_DIR DATANODE_PORT NODEMANAGER_PORT LLAMA_PORT_MAPPINGS
for TEMPLATE_PATH in $(find "$NODE_DIR" -name "*$TEMPLATE_SUFFIX"); do
ACTUAL_PATH="${TEMPLATE_PATH%$TEMPLATE_SUFFIX}"
# Search for strings like ${FOO}, if FOO is defined in the environment then replace
# "${FOO}" with the environment value.
perl -wpl -e 's/\$\{([^}]+)\}/defined $ENV{$1} ? $ENV{$1} : $&/eg' \
"$TEMPLATE_PATH" > "$ACTUAL_PATH"
if [[ -x "$TEMPLATE_PATH" ]]; then
chmod u+x "$ACTUAL_PATH"
fi
rm "$TEMPLATE_PATH"
done
done
}
function start_cluster {
if ! cluster_exists; then
echo "The cluster must be created first"
return 1
fi
for SERVICE in ${SUPPORTED_SERVICES[@]}; do
WEBUI_PORT_VAR="${SERVICE^^}_WEBUI_PORT"
echo "Starting ${SERVICE^^} (Web UI - http://localhost:${!WEBUI_PORT_VAR})"
exec_init_script "$SERVICE*" start
done
# Check if the cluster is still alive...
sleep 10
check_cluster_status
}
function exec_init_script {
SCRIPT_NAME="$1"
shift
for SCRIPT in \
$(find "$NODES_DIR" -path "*/$NODE_PREFIX*/etc/init.d/$SCRIPT_NAME" -executable); do
if "$SCRIPT" status &>/dev/null; then
RUNNING=true
else
RUNNING=false
fi
case $1 in
start) if ! $RUNNING; then "$SCRIPT" start; fi;;
stop) if $RUNNING; then "$SCRIPT" stop; fi;;
*) "$SCRIPT" "$@";;
esac &
done
wait
}
function check_cluster_status {
if ! cluster_exists; then
echo "The cluster does not exist"
return 1
fi
ROLE_COUNT=0
NOT_RUNNING=()
for NODE_DIR in "$NODES_DIR/$NODE_PREFIX"*; do
for SCRIPT in $(find "$NODE_DIR" -path "*/etc/init.d/*" -executable); do
ROLE_COUNT=$((ROLE_COUNT + 1))
if ! "$SCRIPT" status &>/dev/null; then
NOT_RUNNING+=("\n$(basename $SCRIPT) is not running on $(basename $NODE_DIR)")
fi
done
done
case ${#NOT_RUNNING[@]} in
0) echo "The cluster is running"; return;;
$ROLE_COUNT) echo "The cluster is not running"; return 1;;
*) echo -e "${NOT_RUNNING[@]}"; return 1;;
esac
}
function stop_cluster {
if cluster_exists; then
# Stop services in reverse order to give them a chance to shutdown cleanly
for ((SERVICE_IDX=${#SUPPORTED_SERVICES[@]} - 1; SERVICE_IDX >= 0; SERVICE_IDX--)); do
SERVICE="${SUPPORTED_SERVICES[$SERVICE_IDX]}"
echo "Stopping ${SERVICE^^}"
exec_init_script "$SERVICE*" stop
done
fi
# Kill everything anyways just in case a git clean -xdf was done
pkill -f $KILL_CLUSTER_MARKER || true
}
function delete_data {
rm -rf "$NODES_DIR/$NODE_PREFIX"*/data/dfs/{nn,dn}/*
}
function delete_cluster {
pkill -f $KILL_CLUSTER_MARKER || true
rm -rf "$NODES_DIR"
}
function get_node_dir {
readlink -f "$NODES_DIR/$1"
}
function get_hadoop_client_conf_dir {
echo "$NODES_DIR/$NODE_PREFIX"1/etc/hadoop/conf
}
COMMAND=$1
shift
$COMMAND "$@"