mirror of
https://github.com/apache/impala.git
synced 2025-12-23 21:08:39 -05:00
IMPALA-12727: Reduce IO threads for non-TARGET_FILESYSTEM filesystems
The DiskIoMgr starts a large number of threads for each different type of object store, most of which are idle. For development, this slows down processing minidumps and debugging with gdb. This adds an option "reduce_disk_io_threads" to bin/start-impala-cluster.py that sets the thread count startup parameter to one for any filesystem that is not the TARGET_FILESYSTEM. On a typical development setup running against HDFS, this reduces the number of DiskIoMgr threads by 150 and the HDFS monitoring threads by 150 as well. This option is enabled by default. It can disabled by setting --reduce_disk_io_threads=False for bin/start-impala-cluster.py. Separately, DiskIoMgr should be modified to reduce the number of threads it spawns in general. Testing: - Hand tested this on my local development system Change-Id: Ic8ee1fb1f9b9fe65d542d024573562b3bb120b76 Reviewed-on: http://gerrit.cloudera.org:8080/20920 Reviewed-by: Michael Smith <michael.smith@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
@@ -53,6 +53,7 @@ LOG.setLevel(level=logging.DEBUG)
|
||||
KUDU_MASTER_HOSTS = os.getenv("KUDU_MASTER_HOSTS", "127.0.0.1")
|
||||
DEFAULT_IMPALA_MAX_LOG_FILES = os.environ.get("IMPALA_MAX_LOG_FILES", 10)
|
||||
INTERNAL_LISTEN_HOST = os.getenv("INTERNAL_LISTEN_HOST", "localhost")
|
||||
TARGET_FILESYSTEM = os.getenv("TARGET_FILESYSTEM") or "hdfs"
|
||||
|
||||
# Options
|
||||
parser = OptionParser()
|
||||
@@ -168,6 +169,10 @@ parser.add_option("--enable_statestored_ha", dest="enable_statestored_ha",
|
||||
action="store_true", default=False,
|
||||
help="If true, enables StatestoreD HA - the cluster will be launched "
|
||||
"with two statestored instances as Active-Passive HA pair.")
|
||||
parser.add_option("--reduce_disk_io_threads", default="True", type="choice",
|
||||
choices=["true", "True", "false", "False"],
|
||||
help="If true, reduce the number of disk io mgr threads for "
|
||||
"filesystems that are not the TARGET_FILESYSTEM.")
|
||||
|
||||
# For testing: list of comma-separated delays, in milliseconds, that delay impalad catalog
|
||||
# replica initialization. The ith delay is applied to the ith impalad.
|
||||
@@ -577,6 +582,34 @@ def build_impalad_arg_lists(cluster_size, num_coordinators, use_exclusive_coordi
|
||||
args=args, state_store_port=state_store_port,
|
||||
state_store_2_port=state_store_2_port)
|
||||
|
||||
if options.reduce_disk_io_threads.lower() == 'true':
|
||||
# This leaves the default value for the TARGET_FILESYSTEM, but it reduces the thread
|
||||
# count for every other filesystem that is not the TARGET_FILESYSTEM.
|
||||
if TARGET_FILESYSTEM != 'abfs':
|
||||
args = "{args} -num_abfs_io_threads=1".format(args=args)
|
||||
if TARGET_FILESYSTEM != 'adls':
|
||||
args = "{args} -num_adls_io_threads=1".format(args=args)
|
||||
if TARGET_FILESYSTEM != 'cosn':
|
||||
args = "{args} -num_cos_io_threads=1".format(args=args)
|
||||
if TARGET_FILESYSTEM != 'gs':
|
||||
args = "{args} -num_gcs_io_threads=1".format(args=args)
|
||||
if TARGET_FILESYSTEM != 'hdfs':
|
||||
args = "{args} -num_remote_hdfs_file_oper_io_threads=1".format(args=args)
|
||||
args = "{args} -num_remote_hdfs_io_threads=1".format(args=args)
|
||||
if TARGET_FILESYSTEM != 'obs':
|
||||
args = "{args} -num_obs_io_threads=1".format(args=args)
|
||||
if TARGET_FILESYSTEM != 'oss':
|
||||
args = "{args} -num_oss_io_threads=1".format(args=args)
|
||||
if TARGET_FILESYSTEM != 'ozone':
|
||||
args = "{args} -num_ozone_io_threads=1".format(args=args)
|
||||
if TARGET_FILESYSTEM != 's3':
|
||||
args = "{args} -num_s3_io_threads=1".format(args=args)
|
||||
args = "{args} -num_s3_file_oper_io_threads=1".format(args=args)
|
||||
|
||||
# SFS (single-file system) doesn't have a corresponding TARGET_FILESYSTEM, and
|
||||
# it can always be restricted.
|
||||
args = "{args} -num_sfs_io_threads=1".format(args=args)
|
||||
|
||||
if "geospatial_library" not in args:
|
||||
args = "{args} -geospatial_library={geospatial_library}".format(
|
||||
args=args, geospatial_library=options.geospatial_library)
|
||||
|
||||
Reference in New Issue
Block a user