mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
Reverts support for o3fs as a default filesystem added in IMPALA-9442. Updates test setup to use ofs instead. Munges absolute paths in Iceberg metadata to match the new location required for ofs. Ozone has strict requirements on volume and bucket names, so all tables must be created within a bucket (e.g. inside /impala/test-warehouse/). Change-Id: I45e90d30b2e68876dec0db3c43ac15ee510b17bd Reviewed-on: http://gerrit.cloudera.org:8080/19001 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
94 lines
3.9 KiB
Bash
Executable File
94 lines
3.9 KiB
Bash
Executable File
#!/bin/bash
|
|
#
|
|
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
#
|
|
# Loads a hive metastore snapshot file to re-create its postgres database.
|
|
# A metastore snapshot file is produced as an artifact of a successful
|
|
# full data load build.
|
|
|
|
set -euo pipefail
|
|
. $IMPALA_HOME/bin/report_build_error.sh
|
|
setup_report_build_error
|
|
|
|
. ${IMPALA_HOME}/bin/impala-config.sh > /dev/null 2>&1
|
|
|
|
if [[ $# -ne 1 ]]; then
|
|
echo "Usage: load-metastore-snapshot.sh [<metastore_snapshot_file>]"
|
|
exit 1
|
|
fi
|
|
|
|
: ${TEST_WAREHOUSE_DIR=/test-warehouse}
|
|
|
|
SNAPSHOT_FILE=$1
|
|
if [ ! -f ${SNAPSHOT_FILE} ]; then
|
|
echo "Metastore Snapshot file '${SNAPSHOT_FILE}' not found"
|
|
exit 1
|
|
fi
|
|
|
|
# Copy the snapshot time to a temporary location
|
|
TMP_SNAPSHOT_FILE=/tmp/tmp-hive-metastore-snapshot.txt
|
|
rm -f ${TMP_SNAPSHOT_FILE}
|
|
cp ${SNAPSHOT_FILE} ${TMP_SNAPSHOT_FILE}
|
|
|
|
# The snapshot file has jenkins as the default user, search and replace with the current
|
|
# user (this is only useful for local environments).
|
|
# TODO: While this is safe at the moment, there is no guarentee that it will remain so.
|
|
# We're at risk is a table/column name has the string 'jenkins' in it. Find a robust way
|
|
# to do the transformation.
|
|
if [ ${USER} != "jenkins" ]; then
|
|
echo "Searching and replacing jenkins with ${USER}"
|
|
sed -i "s/jenkins/${USER}/g" ${TMP_SNAPSHOT_FILE}
|
|
fi
|
|
|
|
|
|
# When the tests are run on a filesystem other than hdfs, we need to change the location
|
|
# of the tables in the metastore. The location change breaks down into two cases:
|
|
# - We use the other filesystem as a secondary filesystem. In this case, the
|
|
# core-site.xml still point to hdfs. We need to use the FILESYSTEM_PREFIX environment
|
|
# variable to determine the table location.
|
|
# - We use the other filesystem as the default filesystem. In this case, we use the
|
|
# DEFAULT_FS environment variable to determine the table locations.
|
|
if [[ "${FILESYSTEM_PREFIX}" != "" ]]; then
|
|
echo "Changing table metadata to point to ${FILESYSTEM_PREFIX}"
|
|
sed -i "s|hdfs://localhost:20500|${FILESYSTEM_PREFIX}|g" ${TMP_SNAPSHOT_FILE}
|
|
elif [[ "${DEFAULT_FS}" != "hdfs://localhost:20500" ]]; then
|
|
echo "Changing table metadata to point to ${DEFAULT_FS}"
|
|
sed -i "s|hdfs://localhost:20500|${DEFAULT_FS}|g" ${TMP_SNAPSHOT_FILE}
|
|
fi
|
|
|
|
if [[ "${WAREHOUSE_LOCATION_PREFIX}" != "" ]]; then
|
|
echo "Adding prefix ${WAREHOUSE_LOCATION_PREFIX} to iceberg.catalog_location"
|
|
cloc='iceberg\.catalog_location\t'
|
|
sed -i "s|\(${cloc}\)\(${TEST_WAREHOUSE_DIR}\)|\1${WAREHOUSE_LOCATION_PREFIX}\2|g" \
|
|
${TMP_SNAPSHOT_FILE}
|
|
fi
|
|
|
|
# Drop and re-create the hive metastore database
|
|
dropdb -U hiveuser ${METASTORE_DB} 2> /dev/null || true
|
|
createdb -U hiveuser ${METASTORE_DB}
|
|
|
|
# Copy the contents of the SNAPSHOT_FILE
|
|
psql -q -U hiveuser ${METASTORE_DB} < ${TMP_SNAPSHOT_FILE}
|
|
# Two tables (tpch.nation and functional.alltypestiny) have cache_directive_id set in
|
|
# their metadata. These directives are now stale, and will cause any query that attempts
|
|
# to cache the data in the tables to fail.
|
|
psql -q -U hiveuser -d ${METASTORE_DB} -c \
|
|
"delete from \"TABLE_PARAMS\" where \"PARAM_KEY\"='cache_directive_id'"
|
|
psql -q -U hiveuser -d ${METASTORE_DB} -c \
|
|
"delete from \"PARTITION_PARAMS\" where \"PARAM_KEY\"='cache_directive_id'"
|