IMPALA-13001: Support graceful and force shutdown for impala.sh

This patch add graceful and force shutdown support for impala.sh.

This patch also keep the stdout and stderr log when startup.

This patch also fix some bugs in the impala.sh, including:
 - empty service name check.
 - restart command cannot work.

Testing:
 - Manually deploy package on Ubuntu22.04 and verify it.

Change-Id: Ib7743234952ba6b12694ecc68a920d59fea0d4ba
Reviewed-on: http://gerrit.cloudera.org:8080/21297
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
Xiang Yang
2024-04-13 08:45:56 +00:00
committed by Impala Public Jenkins
parent d83b48cf72
commit 8af0ce8ed6
3 changed files with 75 additions and 26 deletions

2
.gitignore vendored
View File

@@ -42,6 +42,8 @@ CMakeDoxygenDefaults.cmake
CPackConfig.cmake CPackConfig.cmake
CPackSourceConfig.cmake CPackSourceConfig.cmake
_CPack_Packages _CPack_Packages
install_manifest.txt
package/build/
# Build timestamp files # Build timestamp files
.*timestamp .*timestamp

View File

@@ -49,13 +49,14 @@ status() {
while [[ $# -gt 0 ]]; do while [[ $# -gt 0 ]]; do
case ${1} in case ${1} in
impalad|catalogd|admissiond|statestored) service=${1} && shift && break ;; impalad|catalogd|admissiond|statestored) service=${1} && shift && break ;;
*) usage && exit 1 ;; *) >&2 usage && exit 1 ;;
esac esac
done done
[[ ${service} != "" ]] || (>&2 usage && exit 1)
local service_pidfile_key=${service^^}_PIDFILE local service_pidfile_key=${service^^}_PIDFILE
local service_pidfile=${!service_pidfile_key} local service_pidfile=${!service_pidfile_key}
if [[ ! -f ${service_pidfile} ]]; then if [[ ! -f ${service_pidfile} ]]; then
echo "${service} is stopped." >&2 echo "${service} is stopped."
return 1 return 1
fi fi
local pid=$(cat ${service_pidfile}) local pid=$(cat ${service_pidfile})
@@ -63,15 +64,15 @@ status() {
echo "${service} is running with PID ${pid}." echo "${service} is running with PID ${pid}."
return 0 return 0
fi fi
echo "${service} is stopped." >&2 echo "${service} is stopped."
return 1 return 1
} }
# Return 0 if service is stopped in expected time, else otherwise. # Return 0 if service is stopped in expected time, else otherwise.
stop_await() { stop_await() {
local service=${1} service_pidfile=${2} counts=${3} period=${4} local service=${1} service_pidfile=${2} counts=${3} period=${4}
[[ "${counts}" == "0" ]] && exit 0 [[ "${counts}" == "0" ]] && return 0
for ((i=1; i<=${counts}; i++)); do for ((i=1; ${counts} == -1 || i<=${counts}; i++)); do
[[ ${i} -gt 1 ]] && sleep ${period} [[ ${i} -gt 1 ]] && sleep ${period}
if ! kill -0 ${pid} &> /dev/null; then if ! kill -0 ${pid} &> /dev/null; then
rm ${service_pidfile} && echo "(${i}/${counts}) ${service} is stopped." && return 0 rm ${service_pidfile} && echo "(${i}/${counts}) ${service} is stopped." && return 0
@@ -82,36 +83,54 @@ stop_await() {
return 1 return 1
} }
#TODO: Add graceful shutdown for impalads
stop() { stop() {
local service= counts=20 period=2 local service= counts=20 period=2 signal=SIGTERM force=false grace=false
while [[ $# -gt 0 ]]; do while [[ $# -gt 0 ]]; do
case ${1} in case ${1} in
-c) counts=${2} && shift 2 ;; -c) counts=${2} && shift 2 ;;
-p) period=${2} && shift 2 ;; -p) period=${2} && shift 2 ;;
-f) signal=SIGKILL && force=true && shift 1 ;;
-g) signal=SIGRTMIN && grace=true && shift 1 ;;
impalad|catalogd|admissiond|statestored) service=${1} && shift && break ;; impalad|catalogd|admissiond|statestored) service=${1} && shift && break ;;
*) usage && exit 1 ;; *) >&2 usage && exit 1 ;;
esac esac
done done
check_counts ${counts} ${period} check_counts ${counts} ${period}
[[ ${service} != "" ]] || (>&2 usage && exit 1)
# Disable graceful shutdown timeout.
[[ ${grace} == true ]] && counts=-1 || true
if [[ ${grace} == true && ${force} == true ]]; then
echo "Cannot use '-g' and '-f' together."
exit 1
fi
if [[ ${grace} == true && ${service} != impalad ]]; then
echo "Warning: Cannot apply '-g' to ${service} service."
signal=SIGTERM
fi
local service_pidfile_key=${service^^}_PIDFILE local service_pidfile_key=${service^^}_PIDFILE
local service_pidfile=${!service_pidfile_key} local service_pidfile=${!service_pidfile_key}
if [[ ! -f ${service_pidfile} ]]; then if [[ ! -f ${service_pidfile} ]]; then
echo "Already stopped: PID file '${service_pidfile}' not found." echo "Already stopped: PID file '${service_pidfile}' not found."
exit 0 return 0
fi fi
local pid=$(cat ${service_pidfile}) local pid=$(cat ${service_pidfile})
if ! ps -p ${pid} -o comm=|grep ${service} &> /dev/null ; then if ! ps -p ${pid} -o comm=|grep ${service} &> /dev/null ; then
rm ${service_pidfile} rm ${service_pidfile}
echo "Already stopped: ${service} is not running with PID ${pid}." \ echo "Already stopped: ${service} is not running with PID ${pid}." \
"Removed stale file '${service_pidfile}'." "Removed stale file '${service_pidfile}'."
exit 0 return 0
fi fi
echo "Killing ${service} with PID ${pid}." echo "Killing ${service} with PID ${pid}."
kill ${pid} kill -${signal} ${pid}
if ! stop_await ${service} ${service_pidfile} ${counts} ${period}; then if ! stop_await ${service} ${service_pidfile} ${counts} ${period}; then
echo "Timed out waiting ${service} to stop, check logs for more details." if [[ ${grace} == true ]]; then
exit 1 kill -SIGKILL ${pid}
echo "Timed out waiting ${service} to graceful shutdown."
return 0
else
echo "Timed out waiting ${service} to stop, check logs for more details."
return 1
fi
fi fi
} }
@@ -147,20 +166,29 @@ start() {
case ${1} in case ${1} in
-c) counts=${2} && shift 2 ;; -c) counts=${2} && shift 2 ;;
-p) period=${2} && shift 2 ;; -p) period=${2} && shift 2 ;;
# Ignore the '-f' and '-g' parameter to support the restart command.
-f|-g) shift 1 ;;
impalad|catalogd|admissiond|statestored) service=${1} && shift && break ;; impalad|catalogd|admissiond|statestored) service=${1} && shift && break ;;
*) usage && exit 1 ;; *) >&2 usage && exit 1 ;;
esac esac
done done
check_counts ${counts} ${period} check_counts ${counts} ${period}
status ${service} && exit 0 [[ ${service} != "" ]] || (>&2 usage && exit 1)
status ${service} 2> /dev/null && return 0
local service_flagfile=${IMPALA_HOME}/conf/${service}_flags local service_flagfile=${IMPALA_HOME}/conf/${service}_flags
local service_stdout_key=${service^^}_OUTFILE
local service_stderr_key=${service^^}_ERRFILE
local service_pidfile_key=${service^^}_PIDFILE local service_pidfile_key=${service^^}_PIDFILE
local service_stdout=${!service_stdout_key}
local service_stderr=${!service_stderr_key}
local service_pidfile=${!service_pidfile_key} local service_pidfile=${!service_pidfile_key}
mkdir -p $(dirname ${service_pidfile}) mkdir -p $(dirname ${service_pidfile})
echo "Service stdout is redirected to '${service_stdout}'."
echo "Service stderr is redirected to '${service_stderr}'."
# User can override '--flagfile' in the following commandline arguments. # User can override '--flagfile' in the following commandline arguments.
${IMPALA_HOME}/sbin/${service} \ ${IMPALA_HOME}/sbin/${service} \
--flagfile=${service_flagfile} \ --flagfile=${service_flagfile} \
${@} & ${@} >> ${service_stdout} 2>> ${service_stderr} &
local pid=$! local pid=$!
echo ${pid} > ${service_pidfile} echo ${pid} > ${service_pidfile}
# Sleep 1s so the glog output won't be messed up with waiting messages. # Sleep 1s so the glog output won't be messed up with waiting messages.
@@ -173,18 +201,19 @@ restart() {
} }
health() { health() {
local service= counts=20 period=2 local service= counts=20 period=2 code=
while [[ $# -gt 0 ]]; do while [[ $# -gt 0 ]]; do
case ${1} in case ${1} in
-c) counts=${2} && shift 2 ;; -c) counts=${2} && shift 2 ;;
-p) period=${2} && shift 2 ;; -p) period=${2} && shift 2 ;;
impalad|catalogd|admissiond|statestored) service=${1} && shift ;; impalad|catalogd|admissiond|statestored) service=${1} && shift ;;
*) usage && exit 1 ;; *) >&2 usage && exit 1 ;;
esac esac
done done
check_counts ${counts} ${period} check_counts ${counts} ${period}
[[ "${counts}" == "0" ]] && exit 0 [[ "${counts}" == "0" ]] && exit 0
status ${service} || exit 1 [[ ${service} != "" ]] || (>&2 usage && exit 1)
status ${service} > /dev/null || exit 1
# Determine Web Server port # Determine Web Server port
local service_flagfile=${IMPALA_HOME}/conf/${service}_flags local service_flagfile=${IMPALA_HOME}/conf/${service}_flags
local service_pidfile_key=${service^^}_PIDFILE local service_pidfile_key=${service^^}_PIDFILE
@@ -207,8 +236,8 @@ health() {
# Request healthz code # Request healthz code
for ((i=1; i<=${counts}; i++)); do for ((i=1; i<=${counts}; i++)); do
[[ ${i} -gt 1 ]] && sleep ${period} || true [[ ${i} -gt 1 ]] && sleep ${period} || true
local code=$(curl -s http://localhost:${port}/healthz) if ! code=$(curl -s http://localhost:${port}/healthz); then
if [[ $? != 0 ]]; then status ${service} > /dev/null || exit 1
echo "(${i}/${counts}) ${service} on port ${port} is not ready." echo "(${i}/${counts}) ${service} on port ${port} is not ready."
elif [[ "${code}" != "OK" ]]; then elif [[ "${code}" != "OK" ]]; then
echo "(${i}/${counts}) Waiting for ${service} to be ready." echo "(${i}/${counts}) Waiting for ${service} to be ready."
@@ -237,10 +266,16 @@ usage() {
echo " stop: stop an Impala daemon service, wait until service is stopped." echo " stop: stop an Impala daemon service, wait until service is stopped."
echo " options:" echo " options:"
echo " -c: maximum count of checks, defaults to 20." echo " -c: maximum count of checks, defaults to 20."
echo " -f: force kill a daemon service."
echo " -g: graceful shutdown the impalad service."
echo " -p: seconds of period between checks, defaults to 2." echo " -p: seconds of period between checks, defaults to 2."
echo echo
echo " restart: restart an Impala daemon service." echo " restart: restart an Impala daemon service."
echo " options: same as start command." echo " options:"
echo " -c: maximum count of checks, defaults to 20."
echo " -f: force kill a daemon service."
echo " -g: graceful shutdown the impalad service."
echo " -p: seconds of period between checks, defaults to 2."
echo echo
echo " status: check the process status of an Impala daemon service." echo " status: check the process status of an Impala daemon service."
echo echo
@@ -261,11 +296,11 @@ version() {
main() { main() {
[[ $# -ge 1 && ${1} == "--help" ]] && usage && exit 0 [[ $# -ge 1 && ${1} == "--help" ]] && usage && exit 0
[[ $# -ge 1 && ${1} == "--version" ]] && version && exit 0 [[ $# -ge 1 && ${1} == "--version" ]] && version && exit 0
[[ $# -lt 2 ]] && usage && exit 1 [[ $# -lt 2 ]] && >&2 usage && exit 1
local command=${1} local command=${1}
case ${command} in case ${command} in
start|stop|restart|status|health) shift && init && ${command} $@ ;; start|stop|restart|status|health) shift && init && ${command} ${@} ;;
*) usage && exit 1 ;; *) >&2 usage && exit 1 ;;
esac esac
} }

View File

@@ -36,8 +36,20 @@
# Specify JVM options. # Specify JVM options.
export JAVA_TOOL_OPTIONS=${JAVA_TOOL_OPTIONS:-} export JAVA_TOOL_OPTIONS=${JAVA_TOOL_OPTIONS:-}
# Specify default pidfile directories. # Specify default pidfile.
: ${IMPALAD_PIDFILE:="/tmp/impalad.pid"} : ${IMPALAD_PIDFILE:="/tmp/impalad.pid"}
: ${CATALOGD_PIDFILE:="/tmp/catalogd.pid"} : ${CATALOGD_PIDFILE:="/tmp/catalogd.pid"}
: ${ADMISSIOND_PIDFILE:="/tmp/admissiond.pid"} : ${ADMISSIOND_PIDFILE:="/tmp/admissiond.pid"}
: ${STATESTORED_PIDFILE:="/tmp/statestored.pid"} : ${STATESTORED_PIDFILE:="/tmp/statestored.pid"}
# Specify default stdout file.
: ${IMPALAD_OUTFILE:="/tmp/impalad.out"}
: ${CATALOGD_OUTFILE:="/tmp/catalogd.out"}
: ${ADMISSIOND_OUTFILE:="/tmp/admissiond.out"}
: ${STATESTORED_OUTFILE:="/tmp/statestored.out"}
# Specify default stderr file.
: ${IMPALAD_ERRFILE:="/tmp/impalad.err"}
: ${CATALOGD_ERRFILE:="/tmp/catalogd.err"}
: ${ADMISSIOND_ERRFILE:="/tmp/admissiond.err"}
: ${STATESTORED_ERRFILE:="/tmp/statestored.err"}