IMPALA-6042: Allow Impala shell to use a global impalarc config

Currently, impalarc files can be specified on a per-user basis
(stored in ~/.impalarc), and they aren't created by default. The
Impala shell should pick up /etc/impalarc as well, in addition
to the user-specific configurations.

The intent here is to allow a "global" configuration of the shell
by a system administrator. The default path of the global config
file can be changed by setting the $IMPALA_SHELL_GLOBAL_CONFIG_FILE
environment variable.

Note that the options set in the user config file take precedence
over those in the global config file.

Change-Id: I3a3179b6d9c9e3b2b01d6d3c5847cadb68782816
Reviewed-on: http://gerrit.cloudera.org:8080/13313
Reviewed-by: Bikramjeet Vig <bikramjeet.vig@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
Ethan Xue
2019-05-09 14:21:42 -07:00
committed by Impala Public Jenkins
parent b1cb879577
commit 487547ec44
9 changed files with 112 additions and 36 deletions

View File

@@ -133,6 +133,7 @@ testdata/tzdb_tiny/*
tests/pytest.ini
tests/shell/bad_impalarc
tests/shell/good_impalarc
tests/shell/good_impalarc2
tests/shell/impalarc_with_error
tests/shell/impalarc_with_query_options
tests/shell/impalarc_with_warnings

View File

@@ -153,6 +153,8 @@ class ImpalaShell(object, cmd.Cmd):
# Minimum time in seconds between two calls to get the exec summary.
PROGRESS_UPDATE_INTERVAL = 1.0
# Environment variable used to source a global config file
GLOBAL_CONFIG_FILE = "IMPALA_SHELL_GLOBAL_CONFIG_FILE"
def __init__(self, options, query_options):
cmd.Cmd.__init__(self)
@@ -1608,34 +1610,58 @@ def get_intro(options):
if __name__ == "__main__":
"""
There are two types of options: shell options and query_options. Both can be set in the
command line, which override the options set in config file (.impalarc). The default
shell options come from impala_shell_config_defaults.py. Query options have no defaults
within the impala-shell, but they do have defaults on the server. Query options can be
also changed in impala-shell with the 'set' command.
There are two types of options: shell options and query_options. Both can be set on the
command line, which override default options. Specifically, if there exists a global
config file (default path: /etc/impalarc) then options are loaded from that file. If
there exists a user config file (~/.impalarc), then options are loaded in from that
file and override any options already loaded from the global impalarc. The default shell
options come from impala_shell_config_defaults.py. Query options have no defaults within
the impala-shell, but they do have defaults on the server. Query options can be also
changed in impala-shell with the 'set' command.
"""
# pass defaults into option parser
parser = get_option_parser(impala_shell_defaults)
options, args = parser.parse_args()
# use path to file specified by user in config_file option
user_config = os.path.expanduser(options.config_file);
# by default, use the .impalarc in the home directory
config_to_load = impala_shell_defaults.get("config_file")
# verify user_config, if found
if os.path.isfile(user_config) and user_config != config_to_load:
if options.verbose:
print_to_stderr("Loading in options from config file: %s \n" % user_config)
# Command line overrides loading ~/.impalarc
config_to_load = user_config
elif user_config != config_to_load:
print_to_stderr('%s not found.\n' % user_config)
sys.exit(1)
# default shell options loaded in from impala_shell_config_defaults.py
# options defaults overwritten by those in config file
# by default, use the impalarc in the user's home directory
# and superimpose it on the global impalarc config
global_config = os.path.expanduser(
os.environ.get(ImpalaShell.GLOBAL_CONFIG_FILE,
impala_shell_defaults['global_config_default_path']))
if os.path.isfile(global_config):
# Always output the source of the global config if verbose
if options.verbose:
print_to_stderr(
"Loading in options from global config file: %s \n" % global_config)
elif global_config != impala_shell_defaults['global_config_default_path']:
print_to_stderr('%s not found.\n' % global_config)
sys.exit(1)
# Override the default user config by a custom config if necessary
user_config = impala_shell_defaults.get("config_file")
input_config = os.path.expanduser(options.config_file)
# verify input_config, if found
if input_config != user_config:
if os.path.isfile(input_config):
if options.verbose:
print_to_stderr("Loading in options from config file: %s \n" % input_config)
# command line overrides loading ~/.impalarc
user_config = input_config
else:
print_to_stderr('%s not found.\n' % input_config)
sys.exit(1)
configs_to_load = [global_config, user_config]
# load shell and query options from the list of config files
# in ascending order of precedence
try:
loaded_shell_options, query_options = get_config_from_file(config_to_load,
parser.option_list)
loaded_shell_options = {}
query_options = {}
for config_file in configs_to_load:
s_options, q_options = get_config_from_file(config_file,
parser.option_list)
loaded_shell_options.update(s_options)
query_options.update(q_options)
impala_shell_defaults.update(loaded_shell_options)
except Exception, e:
print_to_stderr(e)

View File

@@ -17,7 +17,7 @@
# specific language governing permissions and limitations
# under the License.
# defaults for OptionParser options stored in dict
# default options used by the Impala shell stored in a dict
import getpass
import os
@@ -52,4 +52,5 @@ impala_shell_defaults = {
'version': False,
'write_delimited': False,
'client_connect_timeout_ms': 60000,
}
'global_config_default_path': '/etc/impalarc',
}

View File

@@ -30,7 +30,7 @@
import ConfigParser
import sys
from impala_shell_config_defaults import impala_shell_defaults
from optparse import OptionParser
from optparse import OptionParser, SUPPRESS_HELP
class ConfigFileFormatError(Exception):
@@ -285,8 +285,8 @@ def get_option_parser(defaults):
# (print quiet is false since verbose is true)
if option == parser.get_option('--quiet'):
option.help += " [default: %s]" % (not defaults['verbose'])
elif option != parser.get_option('--help'):
# don't want to print default value for help
elif option != parser.get_option('--help') and option.help is not SUPPRESS_HELP:
# don't want to print default value for help or options without help text
option.help += " [default: %default]"
parser.set_defaults(**defaults)

View File

@@ -0,0 +1,7 @@
[impala]
query=select 2
keyval=msg1=test
verbose=true
Q=DEFAULT_FILE_FORMAT=avro
[impala.query_options]
DEFAULT_FILE_FORMAT=text

View File

@@ -1,6 +1,9 @@
[impala]
Q=DEFAULT_FILE_FORMAT=avro
[impala.query_options]
EXPLAIN_LEVEL=1
explain_LEVEL=2
MT_DOP=2
invalid_query_option=1
DEFAULT_FILE_FORMAT=parquet

View File

@@ -24,6 +24,8 @@ import signal
import socket
import tempfile
from shell.impala_shell import ImpalaShell as ImpalaShellClass
from subprocess import call, Popen
from tests.common.impala_service import ImpaladService
from tests.common.impala_test_suite import ImpalaTestSuite, IMPALAD_HS2_HOST_PORT
@@ -474,15 +476,49 @@ class TestImpalaShell(ImpalaTestSuite):
assert 'UnicodeDecodeError' not in result.stderr
assert RUSSIAN_CHARS.encode('utf-8') in result.stdout
@pytest.mark.execute_serially # This tests invalidates metadata, and must run serially
def test_global_config_file(self, vector):
"""Test global and user configuration files."""
args = []
# shell uses shell options in global config
env = {
ImpalaShellClass.GLOBAL_CONFIG_FILE: '{0}/good_impalarc2'.format(QUERY_FILE_PATH)}
result = run_impala_shell_cmd(vector, args, env=env)
assert 'WARNING:' not in result.stderr, \
"A valid config file should not trigger any warning: {0}".format(result.stderr)
assert 'Query: select 2' in result.stderr
# shell uses query options in global config
args = ['-q', 'set;']
result = run_impala_shell_cmd(vector, args, env=env)
assert 'DEFAULT_FILE_FORMAT: avro' in result.stdout
# shell options and query options in global config get overriden
# by options in user config
args = ['--config_file={0}/good_impalarc'.format(QUERY_FILE_PATH),
"""--query=select '${VAR:msg1}'; set"""]
result = run_impala_shell_cmd(vector, args, env=env)
assert 'Query: select \'hello\'' in result.stderr
assert 'DEFAULT_FILE_FORMAT: parquet' in result.stdout
# command line options override options in global config
args = ['--query_option=DEFAULT_FILE_FORMAT=text',
"""--query=select '${VAR:msg1}'; set"""]
result = run_impala_shell_cmd(vector, args, env=env)
assert 'Query: select \'test\'' in result.stderr
assert 'DEFAULT_FILE_FORMAT: text' in result.stdout
# specified global config file does not exist
env = {ImpalaShellClass.GLOBAL_CONFIG_FILE: '/does_not_exist'}
run_impala_shell_cmd(vector, args, env=env, expect_success=False)
def test_config_file(self, vector):
"""Test the optional configuration file."""
# Positive tests
args = ['--config_file=%s/good_impalarc' % QUERY_FILE_PATH]
result = run_impala_shell_cmd(vector, args)
assert 'WARNING:' not in result.stderr
assert 'WARNING:' not in result.stderr, \
"A valid config file should not trigger any warning: {0}".format(result.stderr)
assert 'Query: select 1' in result.stderr
# override option in config file through command line
args = ['--config_file={0}/good_impalarc'.format(QUERY_FILE_PATH), '--query=select 2']
result = run_impala_shell_cmd(vector, args)
@@ -773,7 +809,7 @@ class TestImpalaShell(ImpalaTestSuite):
try:
args = ['-q', '-f', sql_path, '-d', unique_database]
start_time = time()
run_impala_shell_cmd(vector, args, False)
run_impala_shell_cmd(vector, args, expect_success=False)
end_time = time()
time_limit_s = 10
actual_time_s = end_time - start_time

View File

@@ -431,6 +431,8 @@ class TestImpalaShellInteractive(ImpalaTestSuite):
assert "\tEXPLAIN_LEVEL: 2" in result.stdout
assert "INVALID_QUERY_OPTION is not supported for the impalad being connected to, "\
"ignoring." in result.stdout
# Verify that query options under [impala] override those under [impala.query_options]
assert "\tDEFAULT_FILE_FORMAT: avro" in result.stdout
def test_source_file(self, vector):
cwd = os.getcwd()

View File

@@ -98,14 +98,14 @@ def assert_pattern(pattern, result, text, message):
assert m and m.group(0) == result, message
def run_impala_shell_cmd(vector, shell_args, expect_success=True, stdin_input=None,
wait_until_connected=True):
def run_impala_shell_cmd(vector, shell_args, env=None, expect_success=True,
stdin_input=None, wait_until_connected=True):
"""Runs the Impala shell on the commandline.
'shell_args' is a string which represents the commandline options.
Returns a ImpalaShellResult.
"""
result = run_impala_shell_cmd_no_expect(vector, shell_args, stdin_input,
result = run_impala_shell_cmd_no_expect(vector, shell_args, env, stdin_input,
expect_success and wait_until_connected)
if expect_success:
assert result.rc == 0, "Cmd %s was expected to succeed: %s" % (shell_args,
@@ -115,7 +115,7 @@ def run_impala_shell_cmd(vector, shell_args, expect_success=True, stdin_input=No
return result
def run_impala_shell_cmd_no_expect(vector, shell_args, stdin_input=None,
def run_impala_shell_cmd_no_expect(vector, shell_args, env=None, stdin_input=None,
wait_until_connected=True):
"""Runs the Impala shell on the commandline.
@@ -124,7 +124,7 @@ def run_impala_shell_cmd_no_expect(vector, shell_args, stdin_input=None,
Does not assert based on success or failure of command.
"""
p = ImpalaShell(vector, shell_args, wait_until_connected=wait_until_connected)
p = ImpalaShell(vector, shell_args, env=env, wait_until_connected=wait_until_connected)
result = p.get_result(stdin_input)
return result