Files
impala/bin/create-test-configuration.sh
Lenni Kuff f34a0507bf [CDH5] Add support for Sentry Service to Impala
This change adds support for authorizing based on policy metadata read from the Sentry
Service. Authorization is role based and roles are granted to user groups. Each role
can have zero or more privileges associated with it, granting fine grained access to
specific catalog objects at server, URI, database, or table scope. This patch only
adds support to authorize against metadata read from the Sentry Policy Service, it does
not add support for GRANT/REVOKE statements in Impala.

The authorization metadata is read by the catalog server from the Sentry Service and
propagated to all nodes in the cluster in the "catalog-update" statestore topic. To
enable the Catalog Server to read policy metadata, the --sentry_config must be
set to a valid sentry-site.xml config file.

On the impalad side, we continue to support authorization based on a file-based provider.
To enable file based authorization set the --authorization_policy_file to a
non-empty value. If --authorization_policy_file is not set, authorization will be done
based on cached policy metadata received from the Catalog Server (via the statestore).

TODO: There are still some issues with the Sentry Service that require disabling some of
the authorization tests and adding some workarounds. I have added comments in the code
where these workarounds are needed.

Change-Id: I3765748d2cdbe00f59eefa3c971558efede38eb1
Reviewed-on: http://gerrit.ent.cloudera.com:8080/2552
Reviewed-by: Lenni Kuff <lskuff@cloudera.com>
Tested-by: Lenni Kuff <lskuff@cloudera.com>
2014-06-03 07:19:52 -07:00

128 lines
4.1 KiB
Bash
Executable File

#!/bin/bash
# Copyright 2012 Cloudera Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Create the test environment needed by Impala. Includes generation of the
# Hadoop config files: core-site.xml, hbase-site.xml, hive-site.xml as well
# as creation of the Hive metastore.
set -e
CREATE_METASTORE=0
# parse command line options
for ARG in $*
do
case "$ARG" in
-create_metastore)
CREATE_METASTORE=1
;;
-help|*)
echo "[-create_metastore] : If true, creates a new metastore."
exit 1
;;
esac
done
# If a specific metastore db is defined, use that. Otherwise create unique metastore
# DB name based on the current directory.
if [ -z "${METASTORE_DB}" ]; then
METASTORE_DB=`basename ${IMPALA_HOME} | sed -e "s/\\./_/g" | sed -e "s/[.-]/_/g"`
fi
set -u
CLUSTER_DIR=${IMPALA_HOME}/testdata/cluster
${CLUSTER_DIR}/admin create_cluster
# Convert Metastore DB name to be lowercase
export METASTORE_DB=`echo $METASTORE_DB | tr '[A-Z]' '[a-z]'`
export CURRENT_USER=`whoami`
CONFIG_DIR=${IMPALA_HOME}/fe/src/test/resources
echo "Config dir: ${CONFIG_DIR}"
echo "Current user: ${CURRENT_USER}"
echo "Metastore DB: hive_${METASTORE_DB}"
pushd ${CONFIG_DIR}
# Cleanup any existing files
rm -f {core,hdfs,hbase,hive}-site.xml
rm -f authz-provider.ini
# TODO: Throw an error if the template references an undefined environment variable
if [ $CREATE_METASTORE -eq 1 ]; then
echo "Creating postgresql database for Hive metastore"
set +o errexit
dropdb -U hiveuser hive_$METASTORE_DB
set -e
createdb -U hiveuser hive_$METASTORE_DB
psql -U hiveuser -d hive_$METASTORE_DB \
-f ${HIVE_HOME}/scripts/metastore/upgrade/postgres/hive-schema-0.12.0.postgres.sql
fi
set +e
echo "Creating Sentry Policy Server DB"
createdb -U hiveuser sentry_policy
set -e
function generate_config {
# Search for strings like ${FOO}, if FOO is defined in the environment then replace
# "${FOO}" with the environment value.
perl -wpl -e 's/\$\{([^}]+)\}/defined $ENV{$1} ? $ENV{$1} : $&/eg' $1 > $2
}
echo "Linking core-site.xml from local cluster"
CLUSTER_HADOOP_CONF_DIR=$(${CLUSTER_DIR}/admin get_hadoop_client_conf_dir)
ln -s ${CLUSTER_HADOOP_CONF_DIR}/core-site.xml
echo "Linking hdfs-site.xml from local cluster"
ln -s ${CLUSTER_HADOOP_CONF_DIR}/hdfs-site.xml
echo "Generating hive-site.xml using postgresql for metastore"
generate_config postgresql-hive-site.xml.template hive-site.xml
echo "Generating hive-log4j.properties"
generate_config hive-log4j.properties.template hive-log4j.properties
echo "Generating hbase-site.xml"
generate_config hbase-site.xml.template hbase-site.xml
echo "Generating authorization policy file"
generate_config authz-policy.ini.template authz-policy.ini
echo "Generating sentry policy server config file"
generate_config sentry-site.xml.template sentry-site.xml
popd
echo "Completed config generation"
# Creates a symlink in TARGET_DIR to all subdirectories under SOURCE_DIR
function symlink_subdirs {
SOURCE_DIR=$1
TARGET_DIR=$2
if [ -d "${SOURCE_DIR}" ]; then
find ${SOURCE_DIR}/ -maxdepth 1 -mindepth 1 -type d -exec ln -f -s {} ${TARGET_DIR} \;
else
echo "No auxiliary tests found at: ${SOURCE_DIR}"
fi
}
# The Impala test framework support running additional tests outside of the main repo.
# This is an optional feature that can be enabled by setting the IMPALA_AUX_* environment
# variables to valid locations.
echo "Searching for auxiliary tests, workloads, and datasets (if any exist)."
symlink_subdirs ${IMPALA_AUX_WORKLOAD_DIR} ${IMPALA_WORKLOAD_DIR}
symlink_subdirs ${IMPALA_AUX_DATASET_DIR} ${IMPALA_DATASET_DIR}
symlink_subdirs ${IMPALA_AUX_TEST_HOME}/tests ${IMPALA_HOME}/tests