IMPALA-8534: data cache for dockerised tests

This adds support for the data cache in dockerised clusters in
start-impala-cluster.py. It is handled similarly to the
log directories - we ensure that a separate data cache
directory is created for each container, then mount
it at /opt/impala/cache inside the container.

This is then enabled by default for the dockerised tests.

Testing:
Did a dockerised test run.

Change-Id: I2c75d4a5c1eea7a540d051bb175537163dec0e29
Reviewed-on: http://gerrit.cloudera.org:8080/13934
Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
Tim Armstrong
2019-07-26 18:39:36 -07:00
committed by Impala Public Jenkins
parent b6b45c0665
commit 88da6fd421
2 changed files with 26 additions and 4 deletions

View File

@@ -77,6 +77,7 @@ make -j ${IMPALA_BUILD_THREADS} docker_images parquet-reader
source_impala_config
export TEST_START_CLUSTER_ARGS="--docker_network=${DOCKER_NETWORK}"
TEST_START_CLUSTER_ARGS+=" --data_cache_dir=/tmp --data_cache_size=500m"
export MAX_PYTEST_FAILURES=0
export NUM_CONCURRENT_TESTS=$(nproc)
# Frontend tests fail because of localhost hardcoded everywhere

View File

@@ -138,6 +138,8 @@ IMPALA_HOME = os.environ["IMPALA_HOME"]
CORE_SITE_PATH = os.path.join(IMPALA_HOME, "fe/src/test/resources/core-site.xml")
KNOWN_BUILD_TYPES = ["debug", "release", "latest"]
IMPALA_LZO = os.environ["IMPALA_LZO"]
# The location in the container where the cache is always mounted.
DATA_CACHE_CONTAINER_PATH = "/opt/impala/cache"
# Kills have a timeout to prevent automated scripts from hanging indefinitely.
# It is set to a high value to avoid failing if processes are slow to shut down.
@@ -348,8 +350,15 @@ def build_impalad_arg_lists(cluster_size, num_coordinators, use_exclusive_coordi
# Try creating the directory if it doesn't exist already. May raise exception.
if not os.path.exists(data_cache_path):
os.mkdir(data_cache_path)
if options.docker_network is None:
data_cache_path_arg = data_cache_path
else:
# The data cache directory will always be mounted at the same path inside the
# container.
data_cache_path_arg = DATA_CACHE_CONTAINER_PATH
args = "-data_cache={dir}:{quota} {args}".format(
dir=data_cache_path, quota=options.data_cache_size, args=args)
dir=data_cache_path_arg, quota=options.data_cache_size, args=args)
# Appended at the end so they can override previous args.
if i < len(per_impalad_args):
@@ -526,7 +535,7 @@ class DockerMiniClusterOperations(object):
DEFAULT_HS2_HTTP_PORT: chosen_ports['hs2_http_port'],
DEFAULT_IMPALAD_WEBSERVER_PORT: chosen_ports['webserver_port']}
self.__run_container__("impalad_coord_exec", impalad_arg_lists[i], port_map, i,
mem_limit=mem_limit)
mem_limit=mem_limit, supports_data_cache=True)
def __gen_container_name__(self, daemon, instance=None):
"""Generate the name for the container, which should be unique among containers
@@ -541,7 +550,8 @@ class DockerMiniClusterOperations(object):
return daemon
return "{0}-{1}".format(daemon, instance)
def __run_container__(self, daemon, args, port_map, instance=None, mem_limit=None):
def __run_container__(self, daemon, args, port_map, instance=None, mem_limit=None,
supports_data_cache=False):
"""Launch a container with the daemon - impalad, catalogd, or statestored. If there
are multiple impalads in the cluster, a unique instance number must be specified.
'args' are command-line arguments to be appended to the end of the daemon command
@@ -549,7 +559,9 @@ class DockerMiniClusterOperations(object):
--docker_auto_ports was set on the command line, 'port_map' is ignored and Docker
will automatically choose the mapping. If there is an existing running or stopped
container with the same name, it will be destroyed. If provided, mem_limit is
passed to "docker run" as a string to set the memory limit for the container."""
passed to "docker run" as a string to set the memory limit for the container.
If 'supports_data_cache' is true and the data cache is enabled via --data_cache_dir,
mount the data cache inside the container."""
self.__destroy_container__(daemon, instance)
if options.docker_auto_ports:
port_args = ["-P"]
@@ -578,6 +590,15 @@ class DockerMiniClusterOperations(object):
os.makedirs(log_dir)
mount_args += ["--mount", "type=bind,src={0},dst=/opt/impala/logs".format(log_dir)]
# Create a data cache subdirectory for each daemon and mount at /opt/impala/cache
# in the container.
if options.data_cache_dir and supports_data_cache:
data_cache_dir = os.path.join(options.data_cache_dir, host_name + "_cache")
if not os.path.isdir(data_cache_dir):
os.makedirs(data_cache_dir)
mount_args += ["--mount", "type=bind,src={0},dst={1}".format(
data_cache_dir, DATA_CACHE_CONTAINER_PATH)]
# Run the container as the current user.
user_args = ["--user", "{0}:{1}".format(os.getuid(), os.getgid())]