Files
impala/tests/performance/query_executor.py
Thomas Tauber-Marshall e6e2baea33 IMPALA-4372: 'Describe formatted' returns types in upper case
A recent change caused 'describe formatted' to display the types
in all upper case, but we want 'describe formatted' to match Hive's
'describe' output, which displays the types in lower case.

This patch also fixes several problems with test_describe_formatted,
which was encountering an error but reporting success.

Change-Id: I274b97d4d1247244247fb38a5ca7f4c10bba8d22
Reviewed-on: http://gerrit.cloudera.org:8080/4861
Reviewed-by: Dimitris Tsirogiannis <dtsirogiannis@cloudera.com>
Tested-by: Internal Jenkins
2016-11-15 05:38:12 +00:00

229 lines
7.2 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#
# Module used for executing queries and gathering results.
# The QueryExecutor is meant to be generic and doesn't
# have the knowledge of how to actually execute a query. It takes a query and its config
# and executes is against a executor function.
# For example (in pseudo-code):
#
# def exec_func(query, config):
# ...
#
# config = ImpalaBeeswaxQueryExecConfig()
# executor = QueryExecutor('beeswax', query, config, exec_func)
# executor.run()
# result = executor.result
import logging
import os
# Setup logging for this module.
logging.basicConfig(level=logging.INFO, format='[%(name)s] %(threadName)s: %(message)s')
LOG = logging.getLogger('query_executor')
LOG.setLevel(level=logging.INFO)
# globals.
hive_result_regex = 'Time taken: (\d*).(\d*) seconds'
## TODO: Split executors into their own modules.
class QueryExecConfig(object):
"""Base Class for Execution Configs
Attributes:
plugin_runner (PluginRunner?)
"""
def __init__(self, plugin_runner=None):
self.plugin_runner = plugin_runner
class ImpalaQueryExecConfig(QueryExecConfig):
"""Base class for Impala query execution config
Attributes:
impalad (str): address of impalad <host>:<port>
"""
def __init__(self, plugin_runner=None, impalad='localhost:21000'):
super(ImpalaQueryExecConfig, self).__init__(plugin_runner=plugin_runner)
self._impalad = impalad
@property
def impalad(self):
return self._impalad
@impalad.setter
def impalad(self, value):
self._impalad = value
class JdbcQueryExecConfig(ImpalaQueryExecConfig):
"""Impala query execution config for jdbc
Attributes:
tranport (?): ?
"""
JDBC_CLIENT_PATH = os.path.join(os.environ['IMPALA_HOME'], 'bin/run-jdbc-client.sh')
def __init__(self, plugin_runner=None, impalad='localhost:21050', transport=None):
super(JdbcQueryExecConfig, self).__init__(plugin_runner=plugin_runner,
impalad=impalad)
self.transport = transport
@property
def jdbc_client_cmd(self):
"""The args to run the jdbc client.
Constructed on the fly, since the impalad it points to can change.
"""
assert self.transport is not None
return JdbcQueryExecConfig.JDBC_CLIENT_PATH + ' -i "%s" -t %s' % (self._impalad,
self.transport)
class ImpalaHS2QueryConfig(ImpalaQueryExecConfig):
def __init__(self, use_kerberos=False, impalad="localhost:21050", plugin_runner=None):
super(ImpalaHS2QueryConfig, self).__init__(plugin_runner=plugin_runner,
impalad=impalad)
# TODO Use a config dict for query execution options similar to HS2
self.use_kerberos = use_kerberos
class HiveHS2QueryConfig(QueryExecConfig):
def __init__(self,
plugin_runner=None,
exec_options = None,
use_kerberos=False,
user=None,
hiveserver='localhost'):
super(HiveHS2QueryConfig, self).__init__()
self.exec_options = dict()
self._build_options(exec_options)
self.use_kerberos = use_kerberos
self.user = user
self.hiveserver = hiveserver
def _build_options(self, exec_options):
"""Read the exec_options into self.exec_options
Args:
exec_options (str): String formatted as "command1;command2"
"""
if exec_options:
# exec_options are seperated by ; on the command line
options = exec_options.split(';')
for option in options:
key, value = option.split(':')
# The keys in HiveService QueryOptions are lower case.
self.exec_options[key.lower()] = value
class BeeswaxQueryExecConfig(ImpalaQueryExecConfig):
"""Impala query execution config for beeswax
Args:
use_kerberos (boolean)
exec_options (str): String formatted as "opt1:val1;opt2:val2"
impalad (str): address of impalad <host>:<port>
plugin_runner (?): ?
Attributes:
use_kerberos (boolean)
exec_options (dict str -> str): execution options
"""
def __init__(self, use_kerberos=False, exec_options=None, impalad='localhost:21000',
plugin_runner=None):
super(BeeswaxQueryExecConfig, self).__init__(plugin_runner=plugin_runner,
impalad=impalad)
self.use_kerberos = use_kerberos
self.exec_options = dict()
self._build_options(exec_options)
def _build_options(self, exec_options):
"""Read the exec_options into self.exec_options
Args:
exec_options (str): String formatted as "opt1:val1;opt2:val2"
"""
if exec_options:
# exec_options are seperated by ; on the command line
options = exec_options.split(';')
for option in options:
key, value = option.split(':')
# The keys in ImpalaService QueryOptions are upper case.
self.exec_options[key.upper()] = value
class QueryExecutor(object):
"""Executes a query.
Args:
name (str): eg. "hive"
query (str): string containing SQL query to be executed
func (function): Function that accepts a QueryExecOption parameter and returns a
ImpalaQueryResult. Eg. execute_using_impala_beeswax
config (QueryExecOption)
exit_on_error (boolean): Exit right after an error encountered.
Attributes:
exec_func (function): Function that accepts a QueryExecOption parameter and returns a
ImpalaQueryResult.
exec_config (QueryExecOption)
query (str): string containing SQL query to be executed
exit_on_error (boolean): Exit right after an error encountered.
executor_name (str): eg. "hive"
result (ImpalaQueryResult): Contains the result after execute method is called.
"""
def __init__(self, name, query, func, config, exit_on_error):
self.exec_func = func
self.exec_config = config
self.query = query
self.exit_on_error = exit_on_error
self.executor_name = name
self._result = None
def prepare(self, impalad):
"""Prepare the query to be run.
For now, this sets the impalad that the query connects to. If the executor is hive,
it's a no op.
"""
if 'hive' not in self.executor_name:
self.exec_config.impalad = impalad
def execute(self):
"""Execute the query using the given execution function"""
LOG.debug('Executing %s' % self.query)
self._result = self.exec_func(self.query, self.exec_config)
if not self._result.success:
if self.exit_on_error:
raise RuntimeError(self._result.query_error)
else:
LOG.info("Continuing execution")
@property
def result(self):
"""Getter for the result of the query execution.
A result is a ImpalaQueryResult object that contains the details of a single run of
the query.
"""
return self._result