IMPALA-9843: Add support for metastore db schema upgrade

This change adds support to upgrade the HMS database schema using the
hive schema tool. It adds a new option to the buildall.sh script
which can be provided to upgrade the HMS db schema. Alternatively,
users can directly upgrade the schema using the
create-test-configuration.sh script. The logs for the schema upgrade
are available in logs/cluster/schematool.log.

Following invocations will upgrade the HMS database schema.

1. buildall.sh -upgrade_metastore_db
2. bin/create-test-configuration.sh -upgrade_metastore_db

This upgrade option is idempotent. It is a no-op if the metastore
schema is already at its latest version. In case of any errors, the
only fallback currently is to format the metastore schema and load
the test data again.

Testing:
Upgraded the HMS schema on my local dev environment and made
sure that the HMS service starts without any errors.

Change-Id: I85af8d57e110ff284832056a1661f94b85ed3b09
Reviewed-on: http://gerrit.cloudera.org:8080/16054
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
Vihang Karajgaonkar
2020-06-09 12:44:21 -07:00
committed by Vihang Karajgaonkar
parent 6ca6e40358
commit f8c28f8adf
2 changed files with 30 additions and 3 deletions

View File

@@ -66,6 +66,7 @@ function generate_config {
CREATE_METASTORE=0
CREATE_RANGER_POLICY_DB=0
UPGRADE_METASTORE_DB=0
# parse command line options
for ARG in $*
@@ -77,9 +78,13 @@ do
-create_ranger_policy_db)
CREATE_RANGER_POLICY_DB=1
;;
-upgrade_metastore_db)
UPGRADE_METASTORE_DB=1
;;
-help|*)
echo "[-create_metastore] : If true, creates a new metastore."
echo "[-create_ranger_policy_db] : If true, creates a new Ranger policy db."
echo "[-upgrade_metastore_db] : If true, upgrades the schema of HMS db."
exit 1
;;
esac
@@ -163,12 +168,20 @@ if [ $CREATE_METASTORE -eq 1 ]; then
# version and invokes the appropriate scripts
CLASSPATH={$CLASSPATH}:${CONFIG_DIR} ${HIVE_HOME}/bin/schematool -initSchema -dbType \
postgres 1>${IMPALA_CLUSTER_LOGS_DIR}/schematool.log 2>&1
# TODO: We probably don't need to do this anymore
# Increase the size limit of PARAM_VALUE from SERDE_PARAMS table to be able to create
# HBase tables with large number of columns.
echo "alter table \"SERDE_PARAMS\" alter column \"PARAM_VALUE\" type character varying" \
| psql -q -U hiveuser -d ${METASTORE_DB}
fi
if [ $UPGRADE_METASTORE_DB -eq 1 ]; then
echo "Upgrading the schema of metastore db ${METASTORE_DB}. Check \
${IMPALA_CLUSTER_LOGS_DIR}/schematool.log for details."
CLASSPATH={$CLASSPATH}:${CONFIG_DIR} ${HIVE_HOME}/bin/schematool -upgradeSchema \
-dbType postgres 1>${IMPALA_CLUSTER_LOGS_DIR}/schematool.log 2>&1
fi
if [ $CREATE_RANGER_POLICY_DB -eq 1 ]; then
echo "Creating Ranger Policy Server DB"
dropdb -U hiveuser "${RANGER_POLICY_DB}" 2> /dev/null || true

View File

@@ -58,6 +58,7 @@ TESTDATA_ACTION=0
TESTS_ACTION=1
FORMAT_CLUSTER=0
FORMAT_METASTORE=0
UPGRADE_METASTORE_SCHEMA=0
FORMAT_RANGER_POLICY_DB=0
NEED_MINICLUSTER=0
START_IMPALA_CLUSTER=0
@@ -114,6 +115,9 @@ do
-format_metastore)
FORMAT_METASTORE=1
;;
-upgrade_metastore_db)
UPGRADE_METASTORE_SCHEMA=1
;;
-format_ranger_policy_db)
FORMAT_RANGER_POLICY_DB=1
;;
@@ -201,6 +205,8 @@ do
"[Default: False]"
echo "[-format_cluster] : Format the minicluster [Default: False]"
echo "[-format_metastore] : Format the metastore db [Default: False]"
echo "[-upgrade_metastore_db] : Upgrades the schema of metastore db"\
"[Default: False]"
echo "[-format_ranger_policy_db] : Format the Ranger policy db [Default: False]"
echo "[-release_and_debug] : Build both release and debug binaries. Overrides "\
"other build types [Default: false]"
@@ -269,7 +275,10 @@ Examples of common tasks:
./buildall.sh -testdata
# Build, format mini-cluster and metastore, load all test data, run tests
./buildall.sh -testdata -format"
./buildall.sh -testdata -format
# Build and upgrade metastore schema to latest.
./buildall.sh -upgrade_metastore_db"
exit 1
;;
esac
@@ -349,7 +358,7 @@ fi
if [[ $TESTS_ACTION -eq 1 || $TESTDATA_ACTION -eq 1 || $FORMAT_CLUSTER -eq 1 ||
$FORMAT_METASTORE -eq 1 || $FORMAT_RANGER_POLICY_DB -eq 1 || -n "$SNAPSHOT_FILE" ||
-n "$METASTORE_SNAPSHOT_FILE" ]]; then
-n "$METASTORE_SNAPSHOT_FILE" || $UPGRADE_METASTORE_SCHEMA -eq 1 ]]; then
NEED_MINICLUSTER=1
fi
@@ -486,7 +495,8 @@ reconfigure_test_cluster() {
"${IMPALA_HOME}/bin/start-impala-cluster.py" --kill --force
if [[ "$FORMAT_METASTORE" -eq 1 || "$FORMAT_CLUSTER" -eq 1 ||
"$FORMAT_RANGER_POLICY_DB" -eq 1 || -n "$METASTORE_SNAPSHOT_FILE" ]]
"$FORMAT_RANGER_POLICY_DB" -eq 1 || -n "$METASTORE_SNAPSHOT_FILE" ||
"$UPGRADE_METASTORE_SCHEMA" -eq 1 ]]
then
# Kill any processes that may be accessing postgres metastore. To be safe, this is
# done before we make any changes to the config files.
@@ -502,6 +512,10 @@ reconfigure_test_cluster() {
CREATE_TEST_CONFIG_ARGS+=" -create_metastore"
fi
if [[ "$UPGRADE_METASTORE_SCHEMA" -eq 1 ]]; then
CREATE_TEST_CONFIG_ARGS+=" -upgrade_metastore_db"
fi
# Generate the Hadoop configs needed by Impala
"${IMPALA_HOME}/bin/create-test-configuration.sh" ${CREATE_TEST_CONFIG_ARGS}