mirror of
https://github.com/apache/impala.git
synced 2025-12-19 09:58:28 -05:00
This patch stop setting up and building impala-shell for Python 2. A more thorough clean up will be done in the future. Testing: Pass build and test/shell/ in RHEL8. Change-Id: Ic7d59b283f4e2f011880ff6221d550b52714a538 Reviewed-on: http://gerrit.cloudera.org:8080/23750 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
579 lines
21 KiB
Bash
Executable File
579 lines
21 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
# This script bootstraps a system for Impala development from almost nothing; it is known
|
|
# to work on Ubuntu 16.04. It clobbers some local environment and system
|
|
# configurations, so it is best to run this in a fresh install. It also sets up the
|
|
# ~/.bashrc for the calling user and impala-config-local.sh with some environment
|
|
# variables to make Impala compile and run after this script is complete.
|
|
# When IMPALA_HOME is set, the script will bootstrap Impala development in the
|
|
# location specified.
|
|
#
|
|
# The intended user is a person who wants to start contributing code to Impala. This
|
|
# script serves as an executable reference point for how to get started.
|
|
#
|
|
# To run this in a Docker container:
|
|
#
|
|
# 1. Run with --privileged
|
|
# 2. Give the container a non-root sudoer wih NOPASSWD:
|
|
# apt-get update
|
|
# apt-get install sudo
|
|
# adduser --disabled-password --gecos '' impdev
|
|
# echo 'impdev ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers
|
|
# 3. Run this script as that user: su - impdev -c /bootstrap_development.sh
|
|
#
|
|
# This script has some specializations for CentOS/Redhat 6/7 and Ubuntu.
|
|
# Of note, inside of Docker, Redhat 7 doesn't allow you to start daemons
|
|
# with systemctl, so sshd and postgresql are started manually in those cases.
|
|
|
|
set -eu -o pipefail
|
|
|
|
: ${IMPALA_HOME:=$(cd "$(dirname $0)"/..; pwd)}
|
|
export IMPALA_HOME
|
|
|
|
if [[ -t 1 ]] # if on an interactive terminal
|
|
then
|
|
echo "This script will clobber some system settings. Are you sure you want to"
|
|
echo -n "continue? "
|
|
while true
|
|
do
|
|
read -p "[yes/no] " ANSWER
|
|
ANSWER=$(echo "$ANSWER" | tr /a-z/ /A-Z/)
|
|
if [[ $ANSWER = YES ]]
|
|
then
|
|
break
|
|
elif [[ $ANSWER = NO ]]
|
|
then
|
|
echo "OK, Bye!"
|
|
exit 1
|
|
fi
|
|
done
|
|
fi
|
|
|
|
set -x
|
|
|
|
# Determine whether we're running on redhat or ubuntu
|
|
REDHAT=
|
|
REDHAT7=
|
|
REDHAT8=
|
|
REDHAT9=
|
|
UBUNTU=
|
|
UBUNTU16=
|
|
UBUNTU18=
|
|
UBUNTU20=
|
|
UBUNTU22=
|
|
UBUNTU24=
|
|
IN_DOCKER=
|
|
if [[ -f /etc/redhat-release ]]; then
|
|
REDHAT=true
|
|
echo "Identified redhat system."
|
|
if grep 'release 9\.' /etc/redhat-release; then
|
|
REDHAT9=true
|
|
echo "Identified redhat9 system."
|
|
fi
|
|
if grep 'release 8\.' /etc/redhat-release; then
|
|
REDHAT8=true
|
|
echo "Identified redhat8 system."
|
|
fi
|
|
if grep 'release 7\.' /etc/redhat-release; then
|
|
REDHAT7=true
|
|
echo "Identified redhat7 system."
|
|
fi
|
|
# TODO: restrict redhat versions
|
|
else
|
|
source /etc/lsb-release
|
|
if [[ $DISTRIB_ID = Ubuntu ]]
|
|
then
|
|
UBUNTU=true
|
|
echo "Identified Ubuntu system."
|
|
# Kerberos setup would pop up dialog boxes without this
|
|
export DEBIAN_FRONTEND=noninteractive
|
|
if [[ $DISTRIB_RELEASE = 16.04 ]]
|
|
then
|
|
UBUNTU16=true
|
|
echo "Identified Ubuntu 16.04 system."
|
|
elif [[ $DISTRIB_RELEASE = 18.04 ]]
|
|
then
|
|
UBUNTU18=true
|
|
echo "Identified Ubuntu 18.04 system."
|
|
elif [[ $DISTRIB_RELEASE = 20.04 ]]
|
|
then
|
|
UBUNTU20=true
|
|
echo "Identified Ubuntu 20.04 system."
|
|
elif [[ $DISTRIB_RELEASE = 22.04 ]]
|
|
then
|
|
UBUNTU22=true
|
|
echo "Identified Ubuntu 22.04 system."
|
|
elif [[ $DISTRIB_RELEASE = 24.04 ]]
|
|
then
|
|
UBUNTU24=true
|
|
echo "Identified Ubuntu 24.04 system."
|
|
else
|
|
echo "This script supports Ubuntu versions 16.04, 18.04, 20.04, 22.04, or 24.04" >&2
|
|
exit 1
|
|
fi
|
|
else
|
|
echo "This script only supports Ubuntu or RedHat" >&2
|
|
exit 1
|
|
fi
|
|
fi
|
|
if grep docker /proc/1/cgroup; then
|
|
IN_DOCKER=true
|
|
echo "Identified we are running inside of Docker."
|
|
fi
|
|
|
|
# Helper function to execute following command only on Ubuntu
|
|
function ubuntu {
|
|
if [[ "$UBUNTU" == true ]]; then
|
|
"$@"
|
|
fi
|
|
}
|
|
|
|
# Helper function to execute following command only on Ubuntu 16.04
|
|
function ubuntu16 {
|
|
if [[ "$UBUNTU16" == true ]]; then
|
|
"$@"
|
|
fi
|
|
}
|
|
|
|
# Helper function to execute following command only on Ubuntu 18.04
|
|
function ubuntu18 {
|
|
if [[ "$UBUNTU18" == true ]]; then
|
|
"$@"
|
|
fi
|
|
}
|
|
|
|
function ubuntu20 {
|
|
if [[ "$UBUNTU20" == true ]]; then
|
|
"$@"
|
|
fi
|
|
}
|
|
|
|
function ubuntu22 {
|
|
if [[ "$UBUNTU22" == true ]]; then
|
|
"$@"
|
|
fi
|
|
}
|
|
|
|
function ubuntu24 {
|
|
if [[ "$UBUNTU24" == true ]]; then
|
|
"$@"
|
|
fi
|
|
}
|
|
|
|
# Helper function to execute following command only on RedHat
|
|
function redhat {
|
|
if [[ "$REDHAT" == true ]]; then
|
|
"$@"
|
|
fi
|
|
}
|
|
|
|
# Helper function to execute following command only on RedHat7
|
|
function redhat7 {
|
|
if [[ "$REDHAT7" == true ]]; then
|
|
"$@"
|
|
fi
|
|
}
|
|
# Helper function to execute following command only on RedHat8
|
|
function redhat8 {
|
|
if [[ "$REDHAT8" == true ]]; then
|
|
"$@"
|
|
fi
|
|
}
|
|
# Helper function to execute following command only on RedHat8
|
|
function redhat9 {
|
|
if [[ "$REDHAT9" == true ]]; then
|
|
"$@"
|
|
fi
|
|
}
|
|
# Helper function to execute following command only in docker
|
|
function indocker {
|
|
if [[ "$IN_DOCKER" == true ]]; then
|
|
"$@"
|
|
fi
|
|
}
|
|
# Helper function to execute following command only outside of docker
|
|
function notindocker {
|
|
if [[ "$IN_DOCKER" != true ]]; then
|
|
"$@"
|
|
fi
|
|
}
|
|
|
|
# X permission on home directory is needed for some uses of postgresql (IMPALA-13693)
|
|
chmod o+X ~
|
|
|
|
# Note that yum has its own retries; see yum.conf(5).
|
|
REAL_APT_GET=$(ubuntu which apt-get)
|
|
function apt-get {
|
|
for ITER in $(seq 1 30); do
|
|
echo "ATTEMPT: ${ITER}"
|
|
if sudo -E "${REAL_APT_GET}" "$@"
|
|
then
|
|
return 0
|
|
fi
|
|
sleep "${ITER}"
|
|
done
|
|
echo "NO MORE RETRIES"
|
|
return 1
|
|
}
|
|
|
|
echo ">>> Installing build tools"
|
|
if [[ "$UBUNTU" == true ]]; then
|
|
while sudo fuser /var/lib/dpkg/lock-frontend; do
|
|
sleep 1
|
|
done
|
|
fi
|
|
|
|
# Set UBUNTU_JAVA_VERSION, UBUNTU_PACKAGE_ARCH, REDHAT_JAVA_VERSION
|
|
source "$IMPALA_HOME/bin/impala-config-java.sh"
|
|
|
|
ubuntu apt-get update
|
|
ubuntu apt-get --yes install ccache curl file gawk g++ gcc apt-utils git libffi-dev \
|
|
libkrb5-dev krb5-admin-server krb5-kdc krb5-user libsasl2-dev \
|
|
libsasl2-modules libsasl2-modules-gssapi-mit libssl-dev make ninja-build \
|
|
python3-dev python3-setuptools python3-venv postgresql \
|
|
ssh wget vim-common psmisc lsof net-tools language-pack-en libxml2-dev \
|
|
libxslt-dev openjdk-${UBUNTU_JAVA_VERSION}-jdk \
|
|
openjdk-${UBUNTU_JAVA_VERSION}-source openjdk-${UBUNTU_JAVA_VERSION}-dbg
|
|
|
|
# Regular python packages don't exist on Ubuntu 22. Everything is Python 3.
|
|
ubuntu16 apt-get --yes install python python-dev python-setuptools
|
|
ubuntu18 apt-get --yes install python python-dev python-setuptools
|
|
ubuntu20 apt-get --yes install python python-dev python-setuptools
|
|
|
|
# Ubuntu 20's Python 2.7.18-1~20.04.5 version has a bug in its tarfile support.
|
|
# If we detect the affected tarfile.py, download a patched version and overwrite it.
|
|
if [[ $UBUNTU20 == true ]]; then
|
|
if [[ -f /usr/lib/python2.7/tarfile.py ]]; then
|
|
TARFILE_PY_HASH=$(sha1sum /usr/lib/python2.7/tarfile.py | cut -d' ' -f1)
|
|
if [[ "${TARFILE_PY_HASH}" == "6e1a6d9ea2a535cbb17fe266ed9ac76eb5e27b89" ]]; then
|
|
TMP_DIR=$(mktemp -d)
|
|
pushd $TMP_DIR
|
|
wget -nv https://launchpadlibrarian.net/759546541/tarfile.py
|
|
sudo cp tarfile.py /usr/lib/python2.7/tarfile.py
|
|
popd
|
|
rm -rf $TMP_DIR
|
|
fi
|
|
fi
|
|
fi
|
|
|
|
# Required by Kudu in the minicluster. Older Kudu versions depend on libtinfo5,
|
|
# versions that can be compiled for Ubuntu 24.04 depend on libtinfo6.
|
|
ubuntu20 apt-get --yes install libtinfo5 libtinfo6
|
|
ubuntu22 apt-get --yes install libtinfo5 libtinfo6
|
|
ubuntu24 apt-get --yes install libtinfo6
|
|
|
|
ARCH_NAME=$(uname -p)
|
|
if [[ $ARCH_NAME == 'aarch64' ]]; then
|
|
ubuntu apt-get --yes install unzip pkg-config flex maven python3-pip build-essential \
|
|
texinfo bison autoconf automake libtool libz-dev libncurses-dev \
|
|
libncurses5-dev libreadline-dev
|
|
fi
|
|
|
|
ubuntu sudo update-java-alternatives -l || true
|
|
|
|
# Configure the default Java version to be the version we selected.
|
|
ubuntu sudo update-java-alternatives -v -s \
|
|
java-1.${UBUNTU_JAVA_VERSION}.0-openjdk-${UBUNTU_PACKAGE_ARCH}
|
|
|
|
redhat sudo yum install -y file gawk gcc gcc-c++ git krb5-devel krb5-server \
|
|
krb5-workstation libevent-devel libffi-devel make openssl-devel cyrus-sasl \
|
|
cyrus-sasl-gssapi cyrus-sasl-devel cyrus-sasl-plain \
|
|
postgresql postgresql-server rpm-build \
|
|
wget vim-common nscd cmake zlib-devel \
|
|
procps psmisc lsof openssh-server python3-devel python3-setuptools \
|
|
net-tools langpacks-en glibc-langpack-en libxml2-devel libxslt-devel \
|
|
java-${REDHAT_JAVA_VERSION}-openjdk-src java-${REDHAT_JAVA_VERSION}-openjdk-devel
|
|
|
|
redhat sudo alternatives --set java java-${REDHAT_JAVA_VERSION}-openjdk.${ARCH_NAME}
|
|
redhat sudo alternatives --set javac java-${REDHAT_JAVA_VERSION}-openjdk.${ARCH_NAME}
|
|
redhat sudo alternatives --set java_sdk_openjdk java-${REDHAT_JAVA_VERSION}-openjdk.${ARCH_NAME}
|
|
redhat sudo alternatives --set jre_openjdk java-${REDHAT_JAVA_VERSION}-openjdk.${ARCH_NAME}
|
|
|
|
# update-java-alternatives may not take effect if there is a Java in PATH
|
|
which java
|
|
java -version
|
|
which javac
|
|
javac -version
|
|
|
|
# fuse-devel doesn't exist for Redhat 9
|
|
redhat7 sudo yum install -y fuse-devel curl
|
|
redhat8 sudo yum install -y fuse-devel curl
|
|
# Redhat9 can have curl-minimal preinstalled, which can conflict with curl.
|
|
# Adding --allowerasing allows curl to replace curl-minimal.
|
|
redhat9 sudo yum install -y --allowerasing curl
|
|
|
|
# RedHat / CentOS 8 exposes only specific versions of Python.
|
|
# Set up unversioned default Python 2.x for older CentOS versions
|
|
redhat7 sudo yum install -y python-devel python-setuptools python-argparse
|
|
|
|
# Install Python 2.x explicitly for CentOS 8
|
|
function setup_python2() {
|
|
if command -v python && [[ $(python --version 2>&1 | cut -d ' ' -f 2) =~ 2\. ]]; then
|
|
echo "We have Python 2.x";
|
|
else
|
|
if ! command -v python2; then
|
|
# Python2 needs to be installed
|
|
sudo dnf install -y python2
|
|
fi
|
|
# Here Python2 is installed, but is not the default Python.
|
|
# 1. Link pip's version to Python's version
|
|
sudo alternatives --add-slave python /usr/bin/python2 /usr/bin/pip pip /usr/bin/pip2
|
|
sudo alternatives --add-slave python /usr/libexec/no-python /usr/bin/pip pip \
|
|
/usr/libexec/no-python
|
|
# 2. Set Python2 (with pip2) to be the system default.
|
|
sudo alternatives --set python /usr/bin/python2
|
|
fi
|
|
# Here the Python2 runtime is already installed, add the dev package
|
|
sudo dnf -y install python2-devel
|
|
}
|
|
|
|
# IMPALA-14606: Stop building using Python 2 and always run with
|
|
# IMPALA_USE_PYTHON3_TESTS=true.
|
|
# redhat8 setup_python2
|
|
redhat8 pip install --user argparse
|
|
|
|
# Point Python to Python 3 for Redhat 9 and Ubuntu 22, or newer
|
|
function setup_python3() {
|
|
# If python is already set, then use it. Otherwise, try to point python to python3.
|
|
if ! command -v python > /dev/null; then
|
|
if command -v python3 ; then
|
|
# Newer OSes (e.g. Redhat 9 and equivalents) make it harder to get Python 2, and we
|
|
# need to start using Python 3 by default.
|
|
# For these new OSes (Ubuntu 22+, Redhat 9), there is no alternative entry for
|
|
# python, so we need to create one from scratch.
|
|
if command -v alternatives > /dev/null; then
|
|
if sudo alternatives --list | grep python > /dev/null ; then
|
|
sudo alternatives --set python /usr/bin/python3
|
|
else
|
|
# The alternative doesn't exist, create it
|
|
sudo alternatives --install /usr/bin/python python /usr/bin/python3 20
|
|
fi
|
|
elif command -v update-alternatives > /dev/null; then
|
|
# This is what Ubuntu 20/22+ does. There is no official python alternative,
|
|
# so we need to create one.
|
|
sudo update-alternatives --install /usr/bin/python python /usr/bin/python3 20
|
|
else
|
|
echo "ERROR: trying to set python to point to python3"
|
|
echo "ERROR: alternatives/update-alternatives don't exist, so giving up..."
|
|
exit 1
|
|
fi
|
|
fi
|
|
fi
|
|
}
|
|
|
|
redhat9 setup_python3
|
|
ubuntu22 setup_python3
|
|
ubuntu24 setup_python3
|
|
|
|
# CentOS repos don't contain ccache, so install from EPEL
|
|
redhat sudo yum install -y epel-release
|
|
redhat sudo yum install -y ccache
|
|
|
|
# Clean up yum caches
|
|
redhat sudo yum clean all
|
|
|
|
# Download Maven since the packaged version is pretty old.
|
|
: ${IMPALA_TOOLCHAIN_HOST:=native-toolchain.s3.amazonaws.com}
|
|
MVN_VERSION="3.9.8"
|
|
if [ ! -d "/usr/local/apache-maven-${MVN_VERSION}" ]; then
|
|
sudo wget -nv \
|
|
"https://${IMPALA_TOOLCHAIN_HOST}/maven/apache-maven-${MVN_VERSION}-bin.tar.gz"
|
|
sha512sum -c - <<< "7d171def9b85846bf757a2cec94b7529371068a0670df14682447224e57983528e97a6d1b850327e4ca02b139abaab7fcb93c4315119e6f0ffb3f0cbc0d0b9a2 apache-maven-${MVN_VERSION}-bin.tar.gz"
|
|
sudo tar -C /usr/local -xzf "apache-maven-${MVN_VERSION}-bin.tar.gz"
|
|
# Ensure that Impala's preferred version is installed locally,
|
|
# even if a previous version exists there.
|
|
sudo ln -s -f "/usr/local/apache-maven-${MVN_VERSION}/bin/mvn" "/usr/local/bin"
|
|
|
|
# reset permissions on redhat8
|
|
# TODO: figure out why this is necessary for redhat8
|
|
MAVEN_DIRECTORY="/usr/local/apache-maven-${MVN_VERSION}"
|
|
redhat8 indocker sudo chmod 0755 ${MAVEN_DIRECTORY}
|
|
redhat8 indocker sudo chmod 0755 ${MAVEN_DIRECTORY}/{bin,boot}
|
|
redhat9 indocker sudo chmod 0755 ${MAVEN_DIRECTORY}
|
|
redhat9 indocker sudo chmod 0755 ${MAVEN_DIRECTORY}/{bin,boot}
|
|
fi
|
|
|
|
if ! { service --status-all | grep -E '^ \[ \+ \] ssh$'; }
|
|
then
|
|
ubuntu sudo service ssh start
|
|
redhat notindocker sudo service sshd start
|
|
redhat indocker sudo /usr/bin/ssh-keygen -A
|
|
redhat indocker sudo /usr/sbin/sshd
|
|
# The CentOS 8.1 image includes /var/run/nologin by mistake; this file prevents
|
|
# SSH logins. See https://github.com/CentOS/sig-cloud-instance-images/issues/60
|
|
redhat8 indocker sudo rm -f /var/run/nologin
|
|
fi
|
|
|
|
# TODO: config ccache to give it plenty of space
|
|
# TODO: check that there is enough space on disk to do a build and data load
|
|
# TODO: make this work with non-bash shells
|
|
|
|
echo ">>> Configuring system"
|
|
|
|
function setup_postgresql() {
|
|
echo ">>> Configuring postgresql. This can fail if postgres is already initialized"
|
|
|
|
# initdb can fail if it was run before on this host - ignore this error
|
|
redhat notindocker sudo service postgresql initdb || true
|
|
redhat notindocker sudo service postgresql stop
|
|
redhat indocker sudo -u postgres PGDATA=/var/lib/pgsql/data pg_ctl init
|
|
ubuntu sudo service postgresql stop
|
|
|
|
# These configurations expose connectiong to PostgreSQL via md5-hashed
|
|
# passwords over TCP to localhost, and the local socket is trusted
|
|
# widely.
|
|
ubuntu sudo sed -ri 's/local +all +all +peer/local all all trust/g' \
|
|
/etc/postgresql/*/main/pg_hba.conf
|
|
# Accept remote connections from the hosts in the same subnet.
|
|
ubuntu sudo sed -ri "s/#listen_addresses = 'localhost'/listen_addresses = '0.0.0.0'/g" \
|
|
/etc/postgresql/*/main/postgresql.conf
|
|
ubuntu sudo sed -ri 's/host +all +all +127.0.0.1\/32/host all all samenet/g' \
|
|
/etc/postgresql/*/main/pg_hba.conf
|
|
redhat sudo sed -ri 's/local +all +all +(ident|peer)/local all all trust/g' \
|
|
/var/lib/pgsql/data/pg_hba.conf
|
|
# Accept md5 passwords from localhost
|
|
redhat sudo sed -i -e 's,\(host.*\)ident,\1md5,' /var/lib/pgsql/data/pg_hba.conf
|
|
# Accept remote connections from the hosts in the same subnet.
|
|
redhat sudo sed -ri "s/#listen_addresses = 'localhost'/listen_addresses = '0.0.0.0'/g" \
|
|
/var/lib/pgsql/data/postgresql.conf
|
|
redhat sudo sed -ri 's/host +all +all +127.0.0.1\/32/host all all samenet/g' \
|
|
/var/lib/pgsql/data/pg_hba.conf
|
|
|
|
ubuntu sudo service postgresql start
|
|
redhat notindocker sudo service postgresql start
|
|
# Important to redirect pg_ctl to a logfile, lest it keep the stdout
|
|
# file descriptor open, preventing the shell from exiting.
|
|
redhat indocker sudo -u postgres PGDATA=/var/lib/pgsql/data bash -c \
|
|
"pg_ctl start -w --timeout=120 >> /var/lib/pgsql/pg.log 2>&1"
|
|
|
|
# Set up postgres for HMS
|
|
if ! [[ 1 = $(sudo -u postgres psql -At -c "SELECT count(*) FROM pg_roles WHERE rolname = 'hiveuser';") ]]
|
|
then
|
|
sudo -u postgres psql -c "CREATE ROLE hiveuser LOGIN PASSWORD 'password';"
|
|
fi
|
|
sudo -u postgres psql -c "ALTER ROLE hiveuser WITH CREATEDB;"
|
|
# On Ubuntu 18.04 aarch64 version, the sql 'select * from pg_roles' blocked,
|
|
# because output of 'select *' is too long to display in 1 line.
|
|
# So here just change it to 'select count(*)' as a work around.
|
|
if [[ $ARCH_NAME == 'aarch64' ]]; then
|
|
sudo -u postgres psql -c "SELECT count(*) FROM pg_roles WHERE rolname = 'hiveuser';"
|
|
else
|
|
sudo -u postgres psql -c "SELECT * FROM pg_roles WHERE rolname = 'hiveuser';"
|
|
fi
|
|
echo ">>> Configuring postgresql finished."
|
|
}
|
|
|
|
# Setup ssh to ssh to localhost
|
|
mkdir -p ~/.ssh
|
|
chmod go-rwx ~/.ssh
|
|
if ! [[ -f ~/.ssh/id_rsa ]]
|
|
then
|
|
ssh-keygen -t rsa -N '' -q -f ~/.ssh/id_rsa
|
|
fi
|
|
|
|
{ echo "" | cat - ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys; } && chmod 0600 ~/.ssh/authorized_keys
|
|
echo -e "\nNoHostAuthenticationForLocalhost yes" >> ~/.ssh/config && chmod 0600 ~/.ssh/config
|
|
ssh localhost whoami
|
|
|
|
# Workarounds for HDFS networking issues: On the minicluster, tests that rely
|
|
# on WebHDFS may fail with "Connection refused" errors because the namenode
|
|
# will return a "Location:" redirect to the hostname, but the datanode is only
|
|
# listening on localhost. See also HDFS-13797. To reproduce this, the following
|
|
# snippet may be useful:
|
|
#
|
|
# $impala-python3
|
|
# >>> import logging
|
|
# >>> logging.basicConfig(level=logging.DEBUG)
|
|
# >>> logging.getLogger("requests.packages.urllib3").setLevel(logging.DEBUG)
|
|
# >>> from pywebhdfs.webhdfs import PyWebHdfsClient
|
|
# >>> PyWebHdfsClient(host='localhost',port='5070', user_name='hdfs').read_file(
|
|
# "/test-warehouse/tpch.region/region.tbl")
|
|
# INFO:...:Starting new HTTP connection (1): localhost
|
|
# DEBUG:...:"GET /webhdfs/v1//t....tbl?op=OPEN&user.name=hdfs HTTP/1.1" 307 0
|
|
# INFO:...:Starting new HTTP connection (1): HOSTNAME.DOMAIN
|
|
# Traceback (most recent call last):
|
|
# ...
|
|
# ...ConnectionError: ('Connection aborted.', error(111, 'Connection refused'))
|
|
# Prefer the FQDN first for rpc-mgr-kerberized-test as newer krb5 requires FQDN.
|
|
add_if_not_there() {
|
|
grep -q "$2" $1 || echo "$2" | sudo tee -a $1
|
|
}
|
|
add_if_not_there "/etc/hosts" "127.0.0.1 $(hostname) $(hostname -s)"
|
|
|
|
# Add hostnames with multiple labels to allow matching wildcard TLS certificates.
|
|
# Create names that map to v4/v6/dual localhost to help ipv6 testing.
|
|
add_if_not_there "/etc/hosts" "127.0.0.1 ip4.impala.test ip46.impala.test"
|
|
add_if_not_there "/etc/hosts" "::1 ip6.impala.test ip46.impala.test"
|
|
|
|
#
|
|
# In Docker, one can change /etc/hosts as above but not with sed -i. The error message is
|
|
# "sed: cannot rename /etc/sedc3gPj8: Device or resource busy". The following lines are
|
|
# basically sed -i but with cp instead of mv for -i part.
|
|
NEW_HOSTS=$(mktemp)
|
|
sed 's/127.0.1.1/127.0.0.1/g' /etc/hosts > "${NEW_HOSTS}"
|
|
diff -u /etc/hosts "${NEW_HOSTS}" || true
|
|
sudo cp "${NEW_HOSTS}" /etc/hosts
|
|
rm "${NEW_HOSTS}"
|
|
|
|
sudo mkdir -p /var/lib/hadoop-hdfs
|
|
sudo chown $(whoami) /var/lib/hadoop-hdfs/
|
|
|
|
# TODO: restrict this to only the users it is needed for
|
|
echo -e "\n* - nofile 1048576" | sudo tee -a /etc/security/limits.conf
|
|
|
|
# Increase memlock for HDFS caching. On RedHat systems this defaults to 64 (KB). Ubuntu
|
|
# uses systemd, which sets its own default. With Ubuntu 18.04 that default is 16 KB,
|
|
# 20.04+ defaults to 64 MB. Set all to 64 MB for the current user; Impala test systems
|
|
# require 10s of GBs of memory, so this setting should not be a problem.
|
|
USER=${USER-$(id -un)}
|
|
echo -e "${USER} - memlock 65536" | sudo tee /etc/security/limits.d/10-memlock.conf
|
|
|
|
# Default on CentOS limits a user to 1024 or 4096 processes (threads) , which isn't
|
|
# enough for minicluster with all of its friends.
|
|
redhat7 sudo sed -i 's,\*\s*soft\s*nproc\s*[0-9]*$,* soft nproc unlimited,' \
|
|
/etc/security/limits.d/*-nproc.conf
|
|
redhat8 echo -e "* soft nproc unlimited" | sudo tee -a /etc/security/limits.conf
|
|
redhat9 echo -e "* soft nproc unlimited" | sudo tee -a /etc/security/limits.conf
|
|
|
|
echo ">>> Checking out Impala"
|
|
|
|
# If there is no Impala git repo, get one now
|
|
if ! [[ -d "$IMPALA_HOME" ]]
|
|
then
|
|
time -p git clone https://gitbox.apache.org/repos/asf/impala.git "$IMPALA_HOME"
|
|
fi
|
|
cd "$IMPALA_HOME"
|
|
SET_IMPALA_HOME="export IMPALA_HOME=$(pwd)"
|
|
echo -e "\n$SET_IMPALA_HOME" >> ~/.bashrc
|
|
eval "$SET_IMPALA_HOME"
|
|
|
|
# Try to prepopulate the m2 directory to save time
|
|
if [[ "${PREPOPULATE_M2_REPOSITORY:-true}" == true ]] ; then
|
|
echo ">>> Populating m2 directory..."
|
|
if ! bin/jenkins/populate_m2_directory.py ; then
|
|
echo "Failed to prepopulate the m2 directory. Continuing..."
|
|
fi
|
|
else
|
|
echo ">>> Skip populating m2 directory"
|
|
fi
|
|
|
|
setup_postgresql
|
|
# Be careful about adding code after postgres initialization, it may fail (IMPALA-13802).
|