mirror of
https://github.com/apache/impala.git
synced 2025-12-23 21:08:39 -05:00
Impala 4 moved to using CDP versions for components, which involves adopting Hive 3. This removes the old code supporting CDH components and Hive 2. Specifically, it does the following: 1. Remove USE_CDP_HIVE and default to the values from USE_CDP_HIVE=true. USE_CDP_HIVE now has no effect on the Impala environment. This also means that bin/jenkins/build-all-flag-combinations.sh no longer include USE_CDP_HIVE=false as a configuration. 2. Remove USE_CDH_KUDU and default to getting Impala from the native toolchain. 3. Ban IMPALA_HIVE_MAJOR_VERSION<3 and remove related code, including the IMPALA_HIVE_MAJOR_VERSION=2 maven profile in fe/pom.xml. There is a fair amount of code that still references the Hive major version. Upstream Hive is now working on Hive 4, so there is a high likelihood that we'll need some code to deal with that transition. This leaves some code (such as maven profiles) and test logic in place. Change-Id: Id85e849beaf4e19dda4092874185462abd2ec608 Reviewed-on: http://gerrit.cloudera.org:8080/15869 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
149 lines
5.5 KiB
Bash
Executable File
149 lines
5.5 KiB
Bash
Executable File
#!/bin/bash
|
|
#
|
|
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
set -euo pipefail
|
|
. $IMPALA_HOME/bin/report_build_error.sh
|
|
setup_report_build_error
|
|
|
|
HIVE_SERVER_PORT=10000
|
|
export HIVE_SERVER2_THRIFT_PORT=11050
|
|
HIVE_METASTORE_PORT=9083
|
|
LOGDIR=${IMPALA_CLUSTER_LOGS_DIR}/hive
|
|
HIVES2_TRANSPORT="plain_sasl"
|
|
METASTORE_TRANSPORT="buffered"
|
|
ONLY_METASTORE=0
|
|
ENABLE_RANGER_AUTH=0
|
|
|
|
CLUSTER_BIN=${IMPALA_HOME}/testdata/bin
|
|
|
|
if ${CLUSTER_DIR}/admin is_kerberized; then
|
|
# Making a kerberized cluster... set some more environment variables.
|
|
. ${MINIKDC_ENV}
|
|
|
|
HIVES2_TRANSPORT="kerberos"
|
|
# The metastore isn't kerberized yet:
|
|
# METASTORE_TRANSPORT="kerberos"
|
|
fi
|
|
|
|
mkdir -p ${LOGDIR}
|
|
|
|
while [ -n "$*" ]
|
|
do
|
|
case $1 in
|
|
-only_metastore)
|
|
ONLY_METASTORE=1
|
|
;;
|
|
-with_ranger)
|
|
ENABLE_RANGER_AUTH=1
|
|
echo "Starting Hive with Ranger authorization."
|
|
;;
|
|
-help|-h|*)
|
|
echo "run-hive-server.sh : Starts the hive server and the metastore."
|
|
echo "[-only_metastore] : Only starts the hive metastore."
|
|
echo "[-with_ranger] : Starts with Ranger authorization (only for Hive 3)."
|
|
exit 1;
|
|
;;
|
|
esac
|
|
shift;
|
|
done
|
|
|
|
# TODO: We should have a retry loop for every service we start.
|
|
# Kill for a clean start.
|
|
${CLUSTER_BIN}/kill-hive-server.sh &> /dev/null
|
|
|
|
export HIVE_METASTORE_HADOOP_OPTS="-Xdebug -Xrunjdwp:transport=dt_socket,server=y,\
|
|
suspend=n,address=30010"
|
|
|
|
# If this is CDP Hive we need to manually add the sentry jars in the classpath since
|
|
# CDH Hive metastore scripts do not do so. This is currently to make sure that we can run
|
|
# all the tests including sentry tests
|
|
# TODO: This can be removed when we move to Ranger completely
|
|
if [[ -n "$SENTRY_HOME" ]]; then
|
|
for f in ${SENTRY_HOME}/lib/sentry-binding-hive*.jar; do
|
|
FILE_NAME=$(basename $f)
|
|
# exclude all the hive jars from being included in the classpath since Sentry
|
|
# depends on Hive 2.1.1
|
|
if [[ ! $FILE_NAME == hive* ]]; then
|
|
export HADOOP_CLASSPATH=${HADOOP_CLASSPATH}:${f}
|
|
fi
|
|
done
|
|
fi
|
|
|
|
# Add Ranger dependencies if we are starting with Ranger authorization enabled.
|
|
if [[ $ENABLE_RANGER_AUTH -eq 1 ]]; then
|
|
export HIVE_CONF_DIR="$HADOOP_CONF_DIR/hive-site-ranger-auth/"
|
|
for f in "$RANGER_HOME"/ews/webapp/WEB-INF/classes/ranger-plugins/hive/ranger-*.jar \
|
|
"$RANGER_HOME"/ews/webapp/WEB-INF/lib/*.jar \
|
|
"$RANGER_HOME"/ews/lib/ranger-*.jar; do
|
|
FILE_NAME=$(basename $f)
|
|
# Exclude unneccessary jars.
|
|
if [[ ! $FILE_NAME == hive* && ! $FILE_NAME == hadoop* && ! $FILE_NAME == hbase* \
|
|
&& ! $FILE_NAME == zookeeper* ]]; then
|
|
export HADOOP_CLASSPATH=${HADOOP_CLASSPATH}:${f}
|
|
fi
|
|
done
|
|
fi
|
|
|
|
# For Hive 3, we use Tez for execution. We have to add it to the classpath.
|
|
# NOTE: it would seem like this would only be necessary on the HS2 classpath,
|
|
# but compactions are initiated from the HMS in Hive 3. This may change at
|
|
# some point in the future, in which case we can add this to only the
|
|
# HS2 classpath.
|
|
export HADOOP_CLASSPATH=${HADOOP_CLASSPATH}:${TEZ_HOME}/*
|
|
# This is a little hacky, but Tez bundles a bunch of junk into lib/, such
|
|
# as extra copies of the hadoop libraries, etc, and we want to avoid conflicts.
|
|
# So, we'll be a bit choosy about what we add to the classpath here.
|
|
for jar in $TEZ_HOME/lib/* ; do
|
|
case $(basename $jar) in
|
|
commons-*|RoaringBitmap*)
|
|
export HADOOP_CLASSPATH=$HADOOP_CLASSPATH:$jar
|
|
;;
|
|
esac
|
|
done
|
|
|
|
# Add kudu-hive.jar to the Hive Metastore classpath, so that Kudu's HMS
|
|
# plugin can be loaded.
|
|
for file in ${IMPALA_KUDU_JAVA_HOME}/*kudu-hive*jar; do
|
|
export HADOOP_CLASSPATH=${HADOOP_CLASSPATH}:${file}
|
|
done
|
|
# Default to skip validation on Kudu tables if KUDU_SKIP_HMS_PLUGIN_VALIDATION
|
|
# is unset.
|
|
export KUDU_SKIP_HMS_PLUGIN_VALIDATION=${KUDU_SKIP_HMS_PLUGIN_VALIDATION:-1}
|
|
|
|
# Starts a Hive Metastore Server on the specified port.
|
|
# To debug log4j2 loading issues, add to HADOOP_CLIENT_OPTS:
|
|
# -Dorg.apache.logging.log4j.simplelog.StatusLogger.level=TRACE
|
|
HADOOP_CLIENT_OPTS="-Xmx2024m -Dhive.log.file=hive-metastore.log" hive \
|
|
--service metastore -p $HIVE_METASTORE_PORT > ${LOGDIR}/hive-metastore.out 2>&1 &
|
|
|
|
# Wait for the Metastore to come up because HiveServer2 relies on it being live.
|
|
${CLUSTER_BIN}/wait-for-metastore.py --transport=${METASTORE_TRANSPORT}
|
|
|
|
if [ ${ONLY_METASTORE} -eq 0 ]; then
|
|
# Starts a HiveServer2 instance on the port specified by the HIVE_SERVER2_THRIFT_PORT
|
|
# environment variable. HADOOP_HEAPSIZE should be set to at least 2048 to avoid OOM
|
|
# when loading ORC tables like widerow.
|
|
HADOOP_CLIENT_OPTS="-Xmx2048m -Dhive.log.file=hive-server2.log" hive \
|
|
--service hiveserver2 > ${LOGDIR}/hive-server2.out 2>&1 &
|
|
|
|
# Wait for the HiveServer2 service to come up because callers of this script
|
|
# may rely on it being available.
|
|
${CLUSTER_BIN}/wait-for-hiveserver2.py --transport=${HIVES2_TRANSPORT}
|
|
fi
|