mirror of
https://github.com/apache/impala.git
synced 2026-01-22 00:01:21 -05:00
bin/bootstrap_system.sh was looking for a strict combination of OS platform / pg_hba.conf location / default authN method when setting up the default Impala build environment. This combo proved to be too strict for CentOS 7.4 running with PostreSQL 9.2, and failed to set up this environment correctly, which lead to a failure in the dataload phase. The patch fixes the permission tweak for this combination by relaxing the initial condition where the tweak may be applied. Change-Id: I31cbee6c22f9c27b1ff2b9200bb356be14429f39 Reviewed-on: http://gerrit.cloudera.org:8080/15243 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
435 lines
16 KiB
Bash
Executable File
435 lines
16 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
# This script bootstraps a system for Impala development from almost nothing; it is known
|
|
# to work on Ubuntu 16.04. It clobbers some local environment and system
|
|
# configurations, so it is best to run this in a fresh install. It also sets up the
|
|
# ~/.bashrc for the calling user and impala-config-local.sh with some environment
|
|
# variables to make Impala compile and run after this script is complete.
|
|
# When IMPALA_HOME is set, the script will bootstrap Impala development in the
|
|
# location specified.
|
|
#
|
|
# The intended user is a person who wants to start contributing code to Impala. This
|
|
# script serves as an executable reference point for how to get started.
|
|
#
|
|
# To run this in a Docker container:
|
|
#
|
|
# 1. Run with --privileged
|
|
# 2. Give the container a non-root sudoer wih NOPASSWD:
|
|
# apt-get update
|
|
# apt-get install sudo
|
|
# adduser --disabled-password --gecos '' impdev
|
|
# echo 'impdev ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
|
|
# 3. Run this script as that user: su - impdev -c /bootstrap_development.sh
|
|
#
|
|
# This script has some specializations for CentOS/Redhat 6/7 and Ubuntu.
|
|
# Of note, inside of Docker, Redhat 7 doesn't allow you to start daemons
|
|
# with systemctl, so sshd and postgresql are started manually in those cases.
|
|
|
|
set -eu -o pipefail
|
|
|
|
: ${IMPALA_HOME:=~/Impala}
|
|
|
|
if [[ -t 1 ]] # if on an interactive terminal
|
|
then
|
|
echo "This script will clobber some system settings. Are you sure you want to"
|
|
echo -n "continue? "
|
|
while true
|
|
do
|
|
read -p "[yes/no] " ANSWER
|
|
ANSWER=$(echo "$ANSWER" | tr /a-z/ /A-Z/)
|
|
if [[ $ANSWER = YES ]]
|
|
then
|
|
break
|
|
elif [[ $ANSWER = NO ]]
|
|
then
|
|
echo "OK, Bye!"
|
|
exit 1
|
|
fi
|
|
done
|
|
fi
|
|
|
|
set -x
|
|
|
|
# Determine whether we're running on redhat or ubuntu
|
|
REDHAT=
|
|
REDHAT6=
|
|
REDHAT7=
|
|
UBUNTU=
|
|
UBUNTU16=
|
|
UBUNTU18=
|
|
IN_DOCKER=
|
|
if [[ -f /etc/redhat-release ]]; then
|
|
REDHAT=true
|
|
echo "Identified redhat system."
|
|
if grep 'release 7\.' /etc/redhat-release; then
|
|
REDHAT7=true
|
|
echo "Identified redhat7 system."
|
|
fi
|
|
if grep 'release 6\.' /etc/redhat-release; then
|
|
REDHAT6=true
|
|
echo "Identified redhat6 system."
|
|
fi
|
|
# TODO: restrict redhat versions
|
|
else
|
|
source /etc/lsb-release
|
|
if [[ $DISTRIB_ID = Ubuntu ]]
|
|
then
|
|
UBUNTU=true
|
|
echo "Identified Ubuntu system."
|
|
# Kerberos setup would pop up dialog boxes without this
|
|
export DEBIAN_FRONTEND=noninteractive
|
|
if [[ $DISTRIB_RELEASE = 16.04 ]]
|
|
then
|
|
UBUNTU16=true
|
|
echo "Identified Ubuntu 16.04 system."
|
|
elif [[ $DISTRIB_RELEASE = 18.04 ]]
|
|
then
|
|
UBUNTU18=true
|
|
echo "Identified Ubuntu 18.04 system."
|
|
else
|
|
echo "This script only supports 16.04 or 18.04 of Ubuntu" >&2
|
|
exit 1
|
|
fi
|
|
else
|
|
echo "This script only supports Ubuntu or RedHat" >&2
|
|
exit 1
|
|
fi
|
|
fi
|
|
if grep docker /proc/1/cgroup; then
|
|
IN_DOCKER=true
|
|
echo "Identified we are running inside of Docker."
|
|
fi
|
|
|
|
# Helper function to execute following command only on Ubuntu
|
|
function ubuntu {
|
|
if [[ "$UBUNTU" == true ]]; then
|
|
"$@"
|
|
fi
|
|
}
|
|
|
|
# Helper function to execute following command only on Ubuntu 16.04
|
|
function ubuntu16 {
|
|
if [[ "$UBUNTU16" == true ]]; then
|
|
"$@"
|
|
fi
|
|
}
|
|
|
|
# Helper function to execute following command only on Ubuntu 18.04
|
|
function ubuntu18 {
|
|
if [[ "$UBUNTU18" == true ]]; then
|
|
"$@"
|
|
fi
|
|
}
|
|
|
|
# Helper function to execute following command only on RedHat
|
|
function redhat {
|
|
if [[ "$REDHAT" == true ]]; then
|
|
"$@"
|
|
fi
|
|
}
|
|
|
|
# Helper function to execute following command only on RedHat6
|
|
function redhat6 {
|
|
if [[ "$REDHAT6" == true ]]; then
|
|
"$@"
|
|
fi
|
|
}
|
|
# Helper function to execute following command only on RedHat7
|
|
function redhat7 {
|
|
if [[ "$REDHAT7" == true ]]; then
|
|
"$@"
|
|
fi
|
|
}
|
|
# Helper function to execute following command only in docker
|
|
function indocker {
|
|
if [[ "$IN_DOCKER" == true ]]; then
|
|
"$@"
|
|
fi
|
|
}
|
|
# Helper function to execute following command only outside of docker
|
|
function notindocker {
|
|
if [[ "$IN_DOCKER" != true ]]; then
|
|
"$@"
|
|
fi
|
|
}
|
|
|
|
# Note that yum has its own retries; see yum.conf(5).
|
|
REAL_APT_GET=$(ubuntu which apt-get)
|
|
function apt-get {
|
|
for ITER in $(seq 1 20); do
|
|
echo "ATTEMPT: ${ITER}"
|
|
if sudo -E "${REAL_APT_GET}" "$@"
|
|
then
|
|
return 0
|
|
fi
|
|
sleep "${ITER}"
|
|
done
|
|
echo "NO MORE RETRIES"
|
|
return 1
|
|
}
|
|
|
|
echo ">>> Installing build tools"
|
|
ubuntu apt-get update
|
|
ubuntu apt-get --yes install ccache g++ gcc libffi-dev liblzo2-dev libkrb5-dev \
|
|
krb5-admin-server krb5-kdc krb5-user libsasl2-dev libsasl2-modules \
|
|
libsasl2-modules-gssapi-mit libssl-dev make ninja-build ntp \
|
|
ntpdate python-dev python-setuptools postgresql ssh wget vim-common psmisc \
|
|
lsof openjdk-8-jdk openjdk-8-source openjdk-8-dbg apt-utils git ant
|
|
|
|
if [[ "$UBUNTU" == true ]]; then
|
|
# Don't use openjdk-8-jdk 8u181-b13-1ubuntu0.16.04.1 which is known to break the
|
|
# surefire tests. If we detect that version, we downgrade to the last known good one.
|
|
# See https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=911925 for details.
|
|
JDK_BAD_VERSION="8u181-b13-1ubuntu0.16.04.1"
|
|
if dpkg -l openjdk-8-jdk | grep -q $JDK_BAD_VERSION; then
|
|
JDK_TARGET_VERSION="8u181-b13-0ubuntu0.16.04.1"
|
|
DEB_DIR=$(mktemp -d)
|
|
pushd $DEB_DIR
|
|
wget --no-verbose \
|
|
"https://launchpadlibrarian.net/380913637/openjdk-8-jdk_8u181-b13-0ubuntu0.16.04.1_amd64.deb" \
|
|
"https://launchpadlibrarian.net/380913636/openjdk-8-jdk-headless_8u181-b13-0ubuntu0.16.04.1_amd64.deb" \
|
|
"https://launchpadlibrarian.net/380913641/openjdk-8-jre_8u181-b13-0ubuntu0.16.04.1_amd64.deb" \
|
|
"https://launchpadlibrarian.net/380913638/openjdk-8-jre-headless_8u181-b13-0ubuntu0.16.04.1_amd64.deb" \
|
|
"https://launchpadlibrarian.net/380913642/openjdk-8-source_8u181-b13-0ubuntu0.16.04.1_all.deb" \
|
|
"https://launchpadlibrarian.net/380913633/openjdk-8-dbg_8u181-b13-0ubuntu0.16.04.1_amd64.deb"
|
|
sudo dpkg -i *.deb
|
|
popd
|
|
rm -rf $DEB_DIR
|
|
fi
|
|
fi
|
|
|
|
# Ubuntu 18.04 installs OpenJDK 11 and configures it as the default Java version.
|
|
# Impala is currently tested with OpenJDK 8, so configure that version as the default.
|
|
ARCH_NAME=$(uname -p)
|
|
if [[ $ARCH_NAME == 'aarch64' ]]; then
|
|
ubuntu18 sudo update-java-alternatives -s java-1.8.0-openjdk-arm64
|
|
else
|
|
ubuntu18 sudo update-java-alternatives -s java-1.8.0-openjdk-amd64
|
|
fi
|
|
|
|
redhat sudo yum install -y curl gcc gcc-c++ git krb5-devel krb5-server krb5-workstation \
|
|
libevent-devel libffi-devel make ntp ntpdate ntp-perl openssl-devel cyrus-sasl \
|
|
cyrus-sasl-gssapi cyrus-sasl-devel cyrus-sasl-plain \
|
|
python-devel python-setuptools postgresql postgresql-server \
|
|
wget vim-common nscd cmake lzo-devel fuse-devel snappy-devel zlib-devel \
|
|
psmisc lsof openssh-server redhat-lsb java-1.8.0-openjdk-devel \
|
|
java-1.8.0-openjdk-src python-argparse
|
|
|
|
# CentOS repos don't contain ccache, so install from EPEL
|
|
redhat sudo yum install -y epel-release
|
|
redhat sudo yum install -y ccache
|
|
|
|
# Clean up yum caches
|
|
redhat sudo yum clean all
|
|
|
|
# Download ant for centos
|
|
redhat sudo wget -nv \
|
|
https://downloads.apache.org/ant/binaries/apache-ant-1.9.14-bin.tar.gz
|
|
redhat sha512sum -c - <<< '487dbd1d7f678a92924ba884a57e910ccb4fe565c554278795a8fdfc80c4e88d81ebc2ccecb5a8f353f0b2076572bb921499a2cadb064e0f44fc406a3c31da20 apache-ant-1.9.14-bin.tar.gz'
|
|
redhat sudo tar -C /usr/local -xzf apache-ant-1.9.14-bin.tar.gz
|
|
redhat sudo ln -s /usr/local/apache-ant-1.9.14/bin/ant /usr/local/bin
|
|
|
|
# Download maven for all OSes, since the OS-packaged version can be
|
|
# pretty old.
|
|
if [ ! -d /usr/local/apache-maven-3.5.4 ]; then
|
|
sudo wget -nv \
|
|
https://downloads.apache.org/maven/maven-3/3.5.4/binaries/apache-maven-3.5.4-bin.tar.gz
|
|
sha512sum -c - <<< '2a803f578f341e164f6753e410413d16ab60fabe31dc491d1fe35c984a5cce696bc71f57757d4538fe7738be04065a216f3ebad4ef7e0ce1bb4c51bc36d6be86 apache-maven-3.5.4-bin.tar.gz'
|
|
sudo tar -C /usr/local -xzf apache-maven-3.5.4-bin.tar.gz
|
|
sudo ln -s /usr/local/apache-maven-3.5.4/bin/mvn /usr/local/bin
|
|
fi
|
|
|
|
if ! { service --status-all | grep -E '^ \[ \+ \] ssh$'; }
|
|
then
|
|
ubuntu sudo service ssh start
|
|
# TODO: CentOS/RH 7 uses systemd, and this doesn't work.
|
|
redhat6 sudo service sshd start
|
|
redhat7 notindocker sudo service sshd start
|
|
redhat7 indocker sudo /usr/bin/ssh-keygen -A
|
|
redhat7 indocker sudo /usr/sbin/sshd
|
|
fi
|
|
|
|
# TODO: config ccache to give it plenty of space
|
|
# TODO: check that there is enough space on disk to do a build and data load
|
|
# TODO: make this work with non-bash shells
|
|
|
|
echo ">>> Configuring system"
|
|
|
|
ubuntu sudo service ntp stop
|
|
redhat6 sudo service ntpd stop
|
|
redhat7 notindocker sudo service ntpd stop
|
|
sudo ntpdate us.pool.ntp.org
|
|
# If on EC2, use Amazon's ntp servers
|
|
# EC2 nodes expose this IP address internally as a way to gather instance metadata.
|
|
# The assumption is that only AWS nodes do this
|
|
if wget -q -T 1 -t 1 -o /dev/null http://169.254.169.254/latest/dynamic/instance-identity
|
|
then
|
|
sudo sed -i 's/ubuntu\.pool/amazon\.pool/' /etc/ntp.conf
|
|
fi
|
|
# While it is nice to have ntpd running to keep the clock in sync, that does not work in a
|
|
# --privileged docker container, and a non-privileged container cannot run ntpdate, which
|
|
# is strictly needed by Kudu.
|
|
# TODO: Make privileged docker start ntpd
|
|
ubuntu sudo service ntp start || grep docker /proc/1/cgroup
|
|
redhat6 sudo service ntpd start || grep docker /proc/1/cgroup
|
|
notindocker redhat7 sudo service ntpd start
|
|
|
|
# IMPALA-3932, IMPALA-3926
|
|
if [[ $UBUNTU = true && ( $DISTRIB_RELEASE = 16.04 || $DISTRIB_RELEASE = 18.04 ) ]]
|
|
then
|
|
SET_LD_LIBRARY_PATH="export LD_LIBRARY_PATH=/usr/lib/${ARCH_NAME}-linux-gnu/${LD_LIBRARY_PATH:+:$LD_LIBRARY_PATH}"
|
|
echo -e "\n$SET_LD_LIBRARY_PATH" >> "${IMPALA_HOME}/bin/impala-config-local.sh"
|
|
eval "$SET_LD_LIBRARY_PATH"
|
|
fi
|
|
|
|
redhat6 sudo service postgresql initdb
|
|
redhat6 sudo service postgresql stop
|
|
redhat7 notindocker sudo service postgresql initdb
|
|
redhat7 notindocker sudo service postgresql stop
|
|
redhat7 indocker sudo -u postgres PGDATA=/var/lib/pgsql/data pg_ctl init
|
|
ubuntu sudo service postgresql stop
|
|
|
|
# These configurations expose connectiong to PostgreSQL via md5-hashed
|
|
# passwords over TCP to localhost, and the local socket is trusted
|
|
# widely.
|
|
ubuntu sudo sed -ri 's/local +all +all +peer/local all all trust/g' \
|
|
/etc/postgresql/*/main/pg_hba.conf
|
|
redhat sudo sed -ri 's/local +all +all +(ident|peer)/local all all trust/g' \
|
|
/var/lib/pgsql/data/pg_hba.conf
|
|
# Accept md5 passwords from localhost
|
|
redhat sudo sed -i -e 's,\(host.*\)ident,\1md5,' /var/lib/pgsql/data/pg_hba.conf
|
|
|
|
ubuntu sudo service postgresql start
|
|
redhat6 sudo service postgresql start
|
|
redhat7 notindocker sudo service postgresql start
|
|
# Important to redirect pg_ctl to a logfile, lest it keep the stdout
|
|
# file descriptor open, preventing the shell from exiting.
|
|
redhat7 indocker sudo -u postgres PGDATA=/var/lib/pgsql/data bash -c \
|
|
"pg_ctl start -w --timeout=120 >> /var/lib/pgsql/pg.log 2>&1"
|
|
|
|
# Set up postgress for HMS
|
|
if ! [[ 1 = $(sudo -u postgres psql -At -c "SELECT count(*) FROM pg_roles WHERE rolname = 'hiveuser';") ]]
|
|
then
|
|
sudo -u postgres psql -c "CREATE ROLE hiveuser LOGIN PASSWORD 'password';"
|
|
fi
|
|
sudo -u postgres psql -c "ALTER ROLE hiveuser WITH CREATEDB;"
|
|
sudo -u postgres psql -c "SELECT * FROM pg_roles WHERE rolname = 'hiveuser';"
|
|
|
|
# Setup ssh to ssh to localhost
|
|
mkdir -p ~/.ssh
|
|
chmod go-rwx ~/.ssh
|
|
if ! [[ -f ~/.ssh/id_rsa ]]
|
|
then
|
|
ssh-keygen -t rsa -N '' -q -f ~/.ssh/id_rsa
|
|
fi
|
|
|
|
{ echo "" | cat - ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys; } && chmod 0600 ~/.ssh/authorized_keys
|
|
echo -e "\nNoHostAuthenticationForLocalhost yes" >> ~/.ssh/config && chmod 0600 ~/.ssh/config
|
|
ssh localhost whoami
|
|
|
|
# Workarounds for HDFS networking issues: On the minicluster, tests that rely
|
|
# on WebHDFS may fail with "Connection refused" errors because the namenode
|
|
# will return a "Location:" redirect to the hostname, but the datanode is only
|
|
# listening on localhost. See also HDFS-13797. To reproduce this, the following
|
|
# snippet may be useful:
|
|
#
|
|
# $impala-python
|
|
# >>> import logging
|
|
# >>> logging.basicConfig(level=logging.DEBUG)
|
|
# >>> logging.getLogger("requests.packages.urllib3").setLevel(logging.DEBUG)
|
|
# >>> from pywebhdfs.webhdfs import PyWebHdfsClient
|
|
# >>> PyWebHdfsClient(host='localhost',port='5070', user_name='hdfs').read_file(
|
|
# "/test-warehouse/tpch.region/region.tbl")
|
|
# INFO:...:Starting new HTTP connection (1): localhost
|
|
# DEBUG:...:"GET /webhdfs/v1//t....tbl?op=OPEN&user.name=hdfs HTTP/1.1" 307 0
|
|
# INFO:...:Starting new HTTP connection (1): HOSTNAME.DOMAIN
|
|
# Traceback (most recent call last):
|
|
# ...
|
|
# ...ConnectionError: ('Connection aborted.', error(111, 'Connection refused'))
|
|
echo -e "\n127.0.0.1 $(hostname -s) $(hostname)" | sudo tee -a /etc/hosts
|
|
#
|
|
# In Docker, one can change /etc/hosts as above but not with sed -i. The error message is
|
|
# "sed: cannot rename /etc/sedc3gPj8: Device or resource busy". The following lines are
|
|
# basically sed -i but with cp instead of mv for -i part.
|
|
NEW_HOSTS=$(mktemp)
|
|
sed 's/127.0.1.1/127.0.0.1/g' /etc/hosts > "${NEW_HOSTS}"
|
|
diff -u /etc/hosts "${NEW_HOSTS}" || true
|
|
sudo cp "${NEW_HOSTS}" /etc/hosts
|
|
rm "${NEW_HOSTS}"
|
|
|
|
sudo mkdir -p /var/lib/hadoop-hdfs
|
|
sudo chown $(whoami) /var/lib/hadoop-hdfs/
|
|
|
|
# TODO: restrict this to only the users it is needed for
|
|
echo -e "\n* - nofile 1048576" | sudo tee -a /etc/security/limits.conf
|
|
|
|
# Default on CentOS limits a user to 1024 or 4096 processes (threads) , which isn't
|
|
# enough for minicluster with all of its friends.
|
|
redhat sudo sed -i 's,\*\s*soft\s*nproc\s*[0-9]*$,* soft nproc unlimited,' \
|
|
/etc/security/limits.d/*-nproc.conf
|
|
|
|
echo ">>> Checking out Impala"
|
|
|
|
# If there is no Impala git repo, get one now
|
|
if ! [[ -d "$IMPALA_HOME" ]]
|
|
then
|
|
time -p git clone https://gitbox.apache.org/repos/asf/impala.git "$IMPALA_HOME"
|
|
fi
|
|
cd "$IMPALA_HOME"
|
|
SET_IMPALA_HOME="export IMPALA_HOME=$(pwd)"
|
|
echo -e "\n$SET_IMPALA_HOME" >> ~/.bashrc
|
|
eval "$SET_IMPALA_HOME"
|
|
|
|
# Ubuntu and RH install JDK's in slightly different paths.
|
|
if [[ $UBUNTU == true ]]; then
|
|
# Assert that there's only one glob match.
|
|
[ 1 == $(compgen -G "/usr/lib/jvm/java-8-openjdk-*" | wc -l) ]
|
|
SET_JAVA_HOME="export JAVA_HOME=$(compgen -G '/usr/lib/jvm/java-8-openjdk-*')"
|
|
else
|
|
# Assert that there's only one glob match.
|
|
[ 1 == $(compgen -G "/usr/lib/jvm/java-1.8.0-openjdk-*" | wc -l) ]
|
|
SET_JAVA_HOME="export JAVA_HOME=$(compgen -G '/usr/lib/jvm/java-1.8.0-openjdk-*')"
|
|
fi
|
|
|
|
echo -e "\n$SET_JAVA_HOME" >> "${IMPALA_HOME}/bin/impala-config-local.sh"
|
|
eval "$SET_JAVA_HOME"
|
|
|
|
# Assert that we have a java available
|
|
test -f $JAVA_HOME/bin/java
|
|
|
|
# LZO is not needed to compile or run Impala, but it is needed for the data load
|
|
echo ">>> Checking out Impala-lzo"
|
|
: ${IMPALA_LZO_HOME:="${IMPALA_HOME}/../Impala-lzo"}
|
|
if ! [[ -d "$IMPALA_LZO_HOME" ]]
|
|
then
|
|
git clone --branch master https://github.com/cloudera/impala-lzo.git "$IMPALA_LZO_HOME"
|
|
fi
|
|
|
|
echo ">>> Checking out and building hadoop-lzo"
|
|
|
|
: ${HADOOP_LZO_HOME:="${IMPALA_HOME}/../hadoop-lzo"}
|
|
if ! [[ -d "$HADOOP_LZO_HOME" ]]
|
|
then
|
|
git clone https://github.com/cloudera/hadoop-lzo.git "$HADOOP_LZO_HOME"
|
|
fi
|
|
cd "$HADOOP_LZO_HOME"
|
|
time -p ant package
|
|
cd "$IMPALA_HOME"
|