mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
Initial implementation of KUDU-1261 (array column type) recently merged in upstream Apache Kudu repository. This patch add initial Impala support for working with Kudu tables having array type columns. Unlike rows, the elements of a Kudu array are stored in a different format than Impala. Instead of per-row bit flag for NULL info, values and NULL bits are stored in separate arrays. The following types of queries are not supported in this patch: - (IMPALA-14538) Queries that reference an array column as a table, e.g. ```sql SELECT item FROM kudu_array.array_int; ``` - (IMPALA-14539) Queries that create duplicate collection slots, e.g. ```sql SELECT array_int FROM kudu_array AS t, t.array_int AS unnested; ``` Testing: - Add some FE tests in AnalyzeDDLTest and AnalyzeKuduDDLTest. - Add EE test test_kudu.py::TestKuduArray. Since Impala does not support inserting complex types, including array, the data insertion part of the test is achieved through custom C++ code kudu-array-inserter.cc that insert into Kudu via Kudu C++ client. It would be great if we could migrate it to Python so that it can be moved to the same file as the test (IMPALA-14537). - Pass core tests. Co-authored-by: Riza Suminto Change-Id: I9282aac821bd30668189f84b2ed8fff7047e7310 Reviewed-on: http://gerrit.cloudera.org:8080/23493 Reviewed-by: Alexey Serbin <alexey@apache.org> Reviewed-by: Michael Smith <michael.smith@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
126 lines
4.2 KiB
Bash
Executable File
126 lines
4.2 KiB
Bash
Executable File
#!/bin/bash
|
|
|
|
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
|
|
# This script runs Impala's tests with a dockerised minicluster.
|
|
# It has been tested on Ubuntu 16.04.
|
|
set -x
|
|
set -eu -o pipefail
|
|
|
|
DOCKER_NETWORK="test-impala-cluster"
|
|
|
|
# Helper to source impala-config.sh, which may have unbound variables
|
|
source_impala_config() {
|
|
set +u
|
|
. ./bin/impala-config.sh > /dev/null 2>&1
|
|
set -u
|
|
}
|
|
|
|
source_impala_config
|
|
|
|
onexit() {
|
|
# Get the logs from all docker containers
|
|
DOCKER_LOGS_DIR="${IMPALA_HOME}/logs/docker_logs"
|
|
mkdir -p "${DOCKER_LOGS_DIR}"
|
|
for container in $(docker ps -a -q); do
|
|
docker logs ${container} > "${DOCKER_LOGS_DIR}/${container}.log" 2>&1 || true
|
|
done
|
|
|
|
# Clean up docker containers and networks that may have been created by
|
|
# these tests.
|
|
docker rm -f $(docker ps -a -q) || true
|
|
docker network rm $DOCKER_NETWORK || true
|
|
}
|
|
trap onexit EXIT
|
|
|
|
# Check that docker is running and that our user can interact with it.
|
|
docker run hello-world
|
|
|
|
# Set up the test network.
|
|
./docker/configure_test_network.sh $DOCKER_NETWORK
|
|
|
|
# Pick up the new variables.
|
|
source_impala_config
|
|
|
|
# Dump diagnostics for networks and check connectivity.
|
|
ifconfig
|
|
ping -c 1 $INTERNAL_LISTEN_HOST
|
|
|
|
# Check that ssh to localhost via Docker gateway works.
|
|
if ! ssh -n $INTERNAL_LISTEN_HOST "echo 'SSH success!'"; then
|
|
echo "Failed to ssh, will try to add docker network gateway to known hosts"
|
|
ssh-keyscan $INTERNAL_LISTEN_HOST >> ~/.ssh/known_hosts
|
|
ssh -n $INTERNAL_LISTEN_HOST "echo 'SSH success!'"
|
|
fi
|
|
|
|
# Build Impala and load data with the non-dockerised path.
|
|
# TODO: this is a bit awkward. It would be better to have a single invocation
|
|
# to run everything with docker.
|
|
# Skip building backend tests, which aren't used.
|
|
time -p ./buildall.sh -format -testdata -notests < /dev/null
|
|
# Kill non-dockerised cluster.
|
|
start-impala-cluster.py --kill
|
|
|
|
# Build the docker images required to start the cluster.
|
|
# parquet-reader and impala-profile-tool are needed for e2e tests but not built for
|
|
# non-test build.
|
|
IMAGE_TYPE=docker_debug
|
|
case ${IMPALA_DOCKER_JAVA:-8} in
|
|
11)
|
|
IMAGE_TYPE=${IMAGE_TYPE}_java11
|
|
;;
|
|
17)
|
|
IMAGE_TYPE=${IMAGE_TYPE}_java17
|
|
;;
|
|
*)
|
|
;;
|
|
esac
|
|
make -j ${IMPALA_BUILD_THREADS} \
|
|
${IMAGE_TYPE}_images parquet-reader impala-profile-tool kudu-array-inserter
|
|
|
|
source_impala_config
|
|
|
|
FAIR_SCHED_CONF=/opt/impala/conf/minicluster-fair-scheduler.xml
|
|
LLAMA_CONF=/opt/impala/conf/minicluster-llama-site.xml
|
|
export TEST_START_CLUSTER_ARGS="--docker_network=${DOCKER_NETWORK}"
|
|
TEST_START_CLUSTER_ARGS+=" --data_cache_dir=/tmp --data_cache_size=500m"
|
|
TEST_START_CLUSTER_ARGS+=" --impalad_args=--disk_spill_compression_codec=lz4"
|
|
TEST_START_CLUSTER_ARGS+=" --impalad_args=--disk_spill_punch_holes=true"
|
|
TEST_START_CLUSTER_ARGS+=" --impalad_args=-fair_scheduler_allocation_path=${FAIR_SCHED_CONF}"
|
|
TEST_START_CLUSTER_ARGS+=" --impalad_args=-llama_site_path=${LLAMA_CONF}"
|
|
export MAX_PYTEST_FAILURES=0
|
|
export NUM_CONCURRENT_TESTS=$(nproc)
|
|
# Frontend tests fail because of localhost hardcoded everywhere
|
|
export FE_TEST=false
|
|
# No need to run backend tests - they are identical with non-docker build.
|
|
export BE_TEST=false
|
|
# TODO: custom cluster tests may provide some useful coverage but require work
|
|
# to make them start up dockerised clusters and will probably make more assumptions
|
|
# about the cluster being tested.
|
|
export CLUSTER_TEST=false
|
|
RET_CODE=0
|
|
if ! ./bin/run-all-tests.sh; then
|
|
RET_CODE=1
|
|
fi
|
|
|
|
# Shutdown minicluster at the end
|
|
./testdata/bin/kill-all.sh
|
|
|
|
exit $RET_CODE
|