mirror of
https://github.com/apache/impala.git
synced 2025-12-30 12:02:10 -05:00
Added execution summary, modified benchmark to handle JSON
- Added execution summary to the beeswax client and QueryResult - Modified report-benchmark-results to handle JSON and perform execution summary comparison between runs - Added comments to the new workload runner Change-Id: I9c3c5f2fdc5d8d1e70022c4077334bc44e3a2d1d Reviewed-on: http://gerrit.ent.cloudera.com:8080/3598 Reviewed-by: Taras Bobrovytsky <tbobrovytsky@cloudera.com> Tested-by: jenkins (cherry picked from commit fd0b1406be2511c202e02fa63af94fbbe5e18eee) Reviewed-on: http://gerrit.ent.cloudera.com:8080/3618
This commit is contained in:
committed by
jenkins
parent
3bed0be1df
commit
e94de02469
@@ -17,10 +17,16 @@ import shlex
|
||||
import traceback
|
||||
import getpass
|
||||
import re
|
||||
import prettytable
|
||||
|
||||
from beeswaxd import BeeswaxService
|
||||
from beeswaxd.BeeswaxService import QueryState
|
||||
from datetime import datetime
|
||||
try:
|
||||
# If Exec Summary is not implemented in Impala, this cannot be imported
|
||||
from ExecStats.ttypes import TExecStats
|
||||
except ImportError:
|
||||
pass
|
||||
from ImpalaService import ImpalaService
|
||||
from ImpalaService.ImpalaService import TImpalaQueryOptions, TResetTableReq
|
||||
from tests.util.thrift_util import create_transport
|
||||
@@ -57,6 +63,7 @@ class ImpalaBeeswaxResult(object):
|
||||
self.summary = kwargs.get('summary', str())
|
||||
self.schema = kwargs.get('schema', None)
|
||||
self.runtime_profile = kwargs.get('runtime_profile', str())
|
||||
self.exec_summary = kwargs.get('exec_summary', None)
|
||||
|
||||
def get_data(self):
|
||||
return self.__format_data()
|
||||
@@ -154,7 +161,7 @@ class ImpalaBeeswaxClient(object):
|
||||
start = time.time()
|
||||
start_time = datetime.now()
|
||||
handle = self.__execute_query(query_string.strip())
|
||||
result = self.fetch_results(query_string, handle)
|
||||
result = self.fetch_results(query_string, handle)
|
||||
result.time_taken = time.time() - start
|
||||
result.start_time = start_time
|
||||
# Don't include the time it takes to get the runtime profile in the execution time
|
||||
@@ -164,8 +171,125 @@ class ImpalaBeeswaxClient(object):
|
||||
# the handle twice.
|
||||
if self.__get_query_type(query_string) != 'insert':
|
||||
self.close_query(handle)
|
||||
result.exec_summary = self.get_exec_summary(handle)
|
||||
return result
|
||||
|
||||
def get_exec_summary(self, handle):
|
||||
"""Calls GetExecSummary() for the last query handle"""
|
||||
try:
|
||||
summary = self.__do_rpc(lambda: self.imp_service.GetExecSummary(handle))
|
||||
except ImpalaBeeswaxException:
|
||||
summary = None
|
||||
|
||||
if summary is None or summary.nodes is None:
|
||||
return None
|
||||
# If exec summary is not implemented in Impala, this function returns, so we do not
|
||||
# get the function __build_summary_table which requires TExecStats to be imported.
|
||||
|
||||
output = []
|
||||
self.__build_summary_table(summary, 0, False, 0, output)
|
||||
return output
|
||||
|
||||
def __build_summary_table(self, summary, idx, is_fragment_root, indent_level, output):
|
||||
"""NOTE: This was taken impala_shell.py. This method will be a placed in a library
|
||||
that is shared between impala_shell and this file.
|
||||
|
||||
Direct translation of Coordinator::PrintExecSummary() to recursively build a list
|
||||
of rows of summary statistics, one per exec node
|
||||
|
||||
summary: the TExecSummary object that contains all the summary data
|
||||
|
||||
idx: the index of the node to print
|
||||
|
||||
is_fragment_root: true if the node to print is the root of a fragment (and therefore
|
||||
feeds into an exchange)
|
||||
|
||||
indent_level: the number of spaces to print before writing the node's label, to give
|
||||
the appearance of a tree. The 0th child of a node has the same indent_level as its
|
||||
parent. All other children have an indent_level of one greater than their parent.
|
||||
|
||||
output: the list of rows into which to append the rows produced for this node and its
|
||||
children.
|
||||
|
||||
Returns the index of the next exec node in summary.exec_nodes that should be
|
||||
processed, used internally to this method only.
|
||||
"""
|
||||
attrs = ["latency_ns", "cpu_time_ns", "cardinality", "memory_used"]
|
||||
|
||||
# Initialise aggregate and maximum stats
|
||||
agg_stats, max_stats = TExecStats(), TExecStats()
|
||||
for attr in attrs:
|
||||
setattr(agg_stats, attr, 0)
|
||||
setattr(max_stats, attr, 0)
|
||||
|
||||
node = summary.nodes[idx]
|
||||
for stats in node.exec_stats:
|
||||
for attr in attrs:
|
||||
val = getattr(stats, attr)
|
||||
if val is not None:
|
||||
setattr(agg_stats, attr, getattr(agg_stats, attr) + val)
|
||||
setattr(max_stats, attr, max(getattr(max_stats, attr), val))
|
||||
|
||||
if len(node.exec_stats) > 0:
|
||||
avg_time = agg_stats.latency_ns / len(node.exec_stats)
|
||||
else:
|
||||
avg_time = 0
|
||||
|
||||
# If the node is a broadcast-receiving exchange node, the cardinality of rows produced
|
||||
# is the max over all instances (which should all have received the same number of
|
||||
# rows). Otherwise, the cardinality is the sum over all instances which process
|
||||
# disjoint partitions.
|
||||
if node.is_broadcast and is_fragment_root:
|
||||
cardinality = max_stats.cardinality
|
||||
else:
|
||||
cardinality = agg_stats.cardinality
|
||||
|
||||
est_stats = node.estimated_stats
|
||||
|
||||
label_prefix = ""
|
||||
if indent_level > 0:
|
||||
label_prefix = "|"
|
||||
if is_fragment_root:
|
||||
label_prefix += " " * indent_level
|
||||
else:
|
||||
label_prefix += "--" * indent_level
|
||||
|
||||
row = {}
|
||||
row["prefix"] = label_prefix
|
||||
row["operator"] = node.label
|
||||
row["num_hosts"] = len(node.exec_stats)
|
||||
row["avg_time"] = avg_time
|
||||
row["max_time"] = max_stats.latency_ns
|
||||
row["num_rows"] = cardinality
|
||||
row["est_num_rows"] = est_stats.cardinality
|
||||
row["peak_mem"] = max_stats.memory_used
|
||||
row["est_peak_mem"] = est_stats.memory_used
|
||||
row["detail"] = node.label_detail
|
||||
output.append(row)
|
||||
|
||||
try:
|
||||
sender_idx = summary.exch_to_sender_map[idx]
|
||||
# This is an exchange node, so the sender is a fragment root, and should be printed
|
||||
# next.
|
||||
self.__build_summary_table(summary, sender_idx, True, indent_level, output)
|
||||
except (KeyError, TypeError):
|
||||
# Fall through if idx not in map, or if exch_to_sender_map itself is not set
|
||||
pass
|
||||
|
||||
idx += 1
|
||||
if node.num_children > 0:
|
||||
first_child_output = []
|
||||
idx = \
|
||||
self.__build_summary_table(summary, idx, False, indent_level, first_child_output)
|
||||
for child_idx in xrange(1, node.num_children):
|
||||
# All other children are indented (we only have 0, 1 or 2 children for every exec
|
||||
# node at the moment)
|
||||
idx = self.__build_summary_table(summary, idx, False, indent_level + 1, output)
|
||||
output += first_child_output
|
||||
return idx
|
||||
|
||||
|
||||
|
||||
def get_runtime_profile(self, handle):
|
||||
return self.__do_rpc(lambda: self.imp_service.GetRuntimeProfile(handle))
|
||||
|
||||
|
||||
@@ -20,9 +20,9 @@ class PerfResultDataStore(object):
|
||||
(username, password, host, database_name)
|
||||
self.connection = MySQLdb.connect(host, username, password, database_name)
|
||||
|
||||
def get_file_format_id(self, file_format, compression):
|
||||
def get_file_format_id(self, file_format, compression_codec, compression_type):
|
||||
""" Gets the file_format_id for the fiven file_format/compression codec"""
|
||||
return self.__get_file_format_id(file_format, compression)
|
||||
return self.__get_file_format_id(file_format, compression_codec, compression_type)
|
||||
|
||||
def get_query_id(self, query_name, query):
|
||||
""" Gets the query_id for the given query name and query text """
|
||||
@@ -70,12 +70,8 @@ class PerfResultDataStore(object):
|
||||
|
||||
# Internal methods
|
||||
@cursor_wrapper
|
||||
def __get_file_format_id(self, file_format, compression, cursor):
|
||||
def __get_file_format_id(self, file_format, compression_codec, compression_type, cursor):
|
||||
""" Gets the file_format_id for the fiven file_format/compression codec"""
|
||||
if compression == 'none':
|
||||
compression_codec, compression_type = ['none', 'none']
|
||||
else:
|
||||
compression_codec, compression_type = compression.split('/')
|
||||
result = cursor.execute("select file_type_id from FileType where format=%s and "\
|
||||
"compression_codec=%s and compression_type=%s",
|
||||
(file_format, compression_codec, compression_type))
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -18,7 +18,18 @@ from tests.util.test_file_parser import QueryTestSectionReader
|
||||
# TODO: This interface needs to be more robust; At the moment, it has two users with
|
||||
# completely different uses (the benchmark suite and the impala test suite)
|
||||
class Query(object):
|
||||
"""Represents a query and all the information neede to execute it"""
|
||||
"""Represents a query and all the information neede to execute it
|
||||
|
||||
Attributes:
|
||||
query_str (str): The SQL query string.
|
||||
name (str): query name?
|
||||
scale_factor (str): for example 300gb, used to determine the database.
|
||||
test_vector (?): Specifies some parameters
|
||||
results (list of ?): ?
|
||||
workload_name (str): for example tpch, tpcds, visa (used to determine directory)
|
||||
db (str): ? represents the database
|
||||
table_format_str (str): ?
|
||||
"""
|
||||
def __init__(self, **kwargs):
|
||||
self.query_str = kwargs.get('query_str')
|
||||
self.name = kwargs.get('name')
|
||||
@@ -41,6 +52,7 @@ class Query(object):
|
||||
self.db == other.db)
|
||||
|
||||
def __build_query(self):
|
||||
"""Populates db, query_str, table_format_str"""
|
||||
self.db = QueryTestSectionReader.get_db_name(self.test_vector, self.scale_factor)
|
||||
self.query_str = QueryTestSectionReader.build_query(self.query_str.strip())
|
||||
self.table_format_str = '%s/%s/%s' % (self.test_vector.file_format,
|
||||
@@ -56,16 +68,27 @@ class Query(object):
|
||||
class QueryResult(object):
|
||||
"""Contains the results of a query execution.
|
||||
|
||||
A query execution results contains the following fields:
|
||||
query - The query object
|
||||
time_taken - Time taken to execute the query
|
||||
start_time - The time at which the client submits the query.
|
||||
data - Query results
|
||||
client_name - The thread id
|
||||
runtime_profile - Saved runtime profile of the query's execution.
|
||||
query_error - Empty string if the query succeeded. Error returned by the client if
|
||||
it failed.
|
||||
Parameters:
|
||||
Required:
|
||||
query (Query): The query object associated with this result.
|
||||
start_time (datetime): Timestamp at the start of execution.
|
||||
query_config (BeeswaxQueryExecConfig)
|
||||
client_name (int): The thread id
|
||||
|
||||
Optional:
|
||||
time_taken (float): Time taken to execute the query.
|
||||
summary (str): query exection summary (ex. returned 10 rows)
|
||||
data (list of str): Query results returned by Impala.
|
||||
runtime_profile (str): Saved runtime profile of the query's execution.
|
||||
exec_summary (TExecSummary)
|
||||
success (bool): True if the execution was successful.
|
||||
|
||||
Attributes - these are modified by another class:
|
||||
query_error (str): Empty string if the query succeeded. Error returned by the client
|
||||
if it failed.
|
||||
executor_name (str)
|
||||
"""
|
||||
|
||||
def __init__(self, query, **kwargs):
|
||||
self.query = query
|
||||
self.time_taken = kwargs.get('time_taken', 0.0)
|
||||
@@ -75,6 +98,7 @@ class QueryResult(object):
|
||||
self.query_config = kwargs.get('query_config')
|
||||
self.client_name = kwargs.get('client_name')
|
||||
self.runtime_profile = kwargs.get('runtime_profile', str())
|
||||
self.exec_summary = kwargs.get('exec_summary', str())
|
||||
self.success = kwargs.get('success', False)
|
||||
self.query_error = str()
|
||||
self.executor_name = str()
|
||||
|
||||
@@ -50,13 +50,22 @@ hive_result_regex = 'Time taken: (\d*).(\d*) seconds'
|
||||
|
||||
## TODO: Split executors into their own modules.
|
||||
class QueryExecConfig(object):
|
||||
"""Base Class for Execution Configs"""
|
||||
"""Base Class for Execution Configs
|
||||
|
||||
Attributes:
|
||||
plugin_runner (PluginRunner?)
|
||||
"""
|
||||
def __init__(self, plugin_runner=None):
|
||||
self.plugin_runner = plugin_runner
|
||||
|
||||
|
||||
class ImpalaQueryExecConfig(QueryExecConfig):
|
||||
"""Base class for Impala query execution config"""
|
||||
"""Base class for Impala query execution config
|
||||
|
||||
Attributes:
|
||||
impalad (str): address of impalad <host>:<port>
|
||||
"""
|
||||
|
||||
def __init__(self, plugin_runner=None, impalad='localhost:21000'):
|
||||
super(ImpalaQueryExecConfig, self).__init__(plugin_runner=plugin_runner)
|
||||
self._impalad = impalad
|
||||
@@ -71,8 +80,14 @@ class ImpalaQueryExecConfig(QueryExecConfig):
|
||||
|
||||
|
||||
class JdbcQueryExecConfig(ImpalaQueryExecConfig):
|
||||
"""Impala query execution config for jdbc"""
|
||||
"""Impala query execution config for jdbc
|
||||
|
||||
Attributes:
|
||||
tranport (?): ?
|
||||
"""
|
||||
|
||||
JDBC_CLIENT_PATH = os.path.join(os.environ['IMPALA_HOME'], 'bin/run-jdbc-client.sh')
|
||||
|
||||
def __init__(self, plugin_runner=None, impalad='localhost:21050', transport=None):
|
||||
super(JdbcQueryExecConfig, self).__init__(plugin_runner=plugin_runner,
|
||||
impalad=impalad)
|
||||
@@ -87,9 +102,20 @@ class JdbcQueryExecConfig(ImpalaQueryExecConfig):
|
||||
return JdbcQueryExecConfig.JDBC_CLIENT_PATH + ' -i "%s" -t %s' % (self._impalad,
|
||||
self.transport)
|
||||
|
||||
|
||||
class BeeswaxQueryExecConfig(ImpalaQueryExecConfig):
|
||||
"""Impala query execution config for beeswax"""
|
||||
"""Impala query execution config for beeswax
|
||||
|
||||
Args:
|
||||
use_kerberos (boolean)
|
||||
exec_options (str): String formatted as "opt1:val1;opt2:val2"
|
||||
impalad (str): address of impalad <host>:<port>
|
||||
plugin_runner (?): ?
|
||||
|
||||
Attributes:
|
||||
use_kerberos (boolean)
|
||||
exec_options (dict str -> str): execution options
|
||||
"""
|
||||
|
||||
def __init__(self, use_kerberos=False, exec_options=None, impalad='localhost:21000',
|
||||
plugin_runner=None):
|
||||
super(BeeswaxQueryExecConfig, self).__init__(plugin_runner=plugin_runner,
|
||||
@@ -99,7 +125,12 @@ class BeeswaxQueryExecConfig(ImpalaQueryExecConfig):
|
||||
self.__build_options(exec_options)
|
||||
|
||||
def __build_options(self, exec_options):
|
||||
"""Read the exec_options into a dictionary"""
|
||||
"""Read the exec_options into self.exec_options
|
||||
|
||||
Args:
|
||||
exec_options (str): String formatted as "opt1:val1;opt2:val2"
|
||||
"""
|
||||
|
||||
if exec_options:
|
||||
# exec_options are seperated by ; on the command line
|
||||
options = exec_options.split(';')
|
||||
@@ -121,13 +152,27 @@ class HiveQueryExecConfig(QueryExecConfig):
|
||||
|
||||
|
||||
class QueryExecutor(object):
|
||||
def __init__(self, name, query, func, config, exit_on_error):
|
||||
"""
|
||||
Executes a query.
|
||||
"""Executes a query.
|
||||
|
||||
The query_exec_func needs to be a function that accepts a QueryExecOption parameter
|
||||
and returns a QueryResult.
|
||||
"""
|
||||
Args:
|
||||
name (str): eg. "hive"
|
||||
query (str): string containing SQL query to be executed
|
||||
func (function): Function that accepts a QueryExecOption parameter and returns a
|
||||
QueryResult. Eg. execute_using_impala_beeswax
|
||||
config (QueryExecOption)
|
||||
exit_on_error (boolean): Exit right after an error encountered.
|
||||
|
||||
Attributes:
|
||||
exec_func (function): Function that accepts a QueryExecOption parameter and returns a
|
||||
QueryResult.
|
||||
exec_config (QueryExecOption)
|
||||
query (str): string containing SQL query to be executed
|
||||
exit_on_error (boolean): Exit right after an error encountered.
|
||||
executor_name (str): eg. "hive"
|
||||
result (QueryResult): Contains the result after execute method is called.
|
||||
"""
|
||||
|
||||
def __init__(self, name, query, func, config, exit_on_error):
|
||||
self.exec_func = func
|
||||
self.exec_config = config
|
||||
self.query = query
|
||||
@@ -163,7 +208,15 @@ class QueryExecutor(object):
|
||||
return self.__result
|
||||
|
||||
def establish_beeswax_connection(query, query_config):
|
||||
"""Establish a connection to the user specified impalad"""
|
||||
"""Establish a connection to the user specified impalad.
|
||||
|
||||
Args:
|
||||
query_config (QueryExecConfig)
|
||||
|
||||
Returns:
|
||||
(boolean, ImpalaBeeswaxClient): True if successful
|
||||
"""
|
||||
|
||||
# TODO: Make this generic, for hive etc.
|
||||
use_kerberos = query_config.use_kerberos
|
||||
client = ImpalaBeeswaxClient(query_config.impalad, use_kerberos=use_kerberos)
|
||||
@@ -177,8 +230,16 @@ def establish_beeswax_connection(query, query_config):
|
||||
def execute_using_impala_beeswax(query, query_config):
|
||||
"""Executes a query using beeswax.
|
||||
|
||||
A new client is created per query, then destroyed. Returns QueryResult()
|
||||
A new client is created per query, then destroyed.
|
||||
|
||||
Args:
|
||||
query (str): string containing the query to be executed.
|
||||
query_config (QueryExecConfig)
|
||||
|
||||
Returns:
|
||||
QueryResult
|
||||
"""
|
||||
|
||||
# Create a client object to talk to impalad
|
||||
exec_result = QueryResult(query, query_config=query_config)
|
||||
plugin_runner = query_config.plugin_runner
|
||||
@@ -203,23 +264,52 @@ def execute_using_impala_beeswax(query, query_config):
|
||||
return construct_exec_result(result, exec_result)
|
||||
|
||||
def build_context(query, query_config):
|
||||
"""Build context based on query config for plugin_runner.
|
||||
|
||||
Why not pass QueryExecConfig to plugins directly?
|
||||
|
||||
Args:
|
||||
query (str)
|
||||
query_config (QueryExecConfig)
|
||||
|
||||
Returns:
|
||||
dict str -> str
|
||||
"""
|
||||
|
||||
context = vars(query_config)
|
||||
context['query'] = query
|
||||
return context
|
||||
|
||||
def construct_exec_result(result, exec_result):
|
||||
""" Transform an ImpalaBeeswaxResult object to a QueryResult object.
|
||||
|
||||
Args:
|
||||
result (ImpalaBeeswasResult): Tranfers data from here.
|
||||
exec_result (QueryResult): Transfers data to here.
|
||||
|
||||
Returns:
|
||||
QueryResult
|
||||
"""
|
||||
Transform an ImpalaBeeswaxResult object to a QueryResult object.
|
||||
"""
|
||||
|
||||
# Return immedietely if the query failed.
|
||||
if not result.success: return exec_result
|
||||
exec_result.success = True
|
||||
for attr in ['data', 'runtime_profile', 'start_time', 'time_taken', 'summary']:
|
||||
attrs = ['data', 'runtime_profile', 'start_time',
|
||||
'time_taken', 'summary', 'exec_summary']
|
||||
for attr in attrs:
|
||||
setattr(exec_result, attr, getattr(result, attr))
|
||||
return exec_result
|
||||
|
||||
def execute_shell_cmd(cmd):
|
||||
"""Executes a command in the shell, pipes the output to local variables"""
|
||||
"""Executes a command in the shell, pipes the output to local variables
|
||||
|
||||
Args:
|
||||
cmd (str): Command to be executed.
|
||||
|
||||
Returns:
|
||||
(str, str, str): return code, stdout, stderr
|
||||
"""
|
||||
|
||||
LOG.debug('Executing: %s' % (cmd,))
|
||||
# Popen needs a list as its first parameter.
|
||||
# The first element is the command, with the rest being arguments.
|
||||
|
||||
@@ -34,12 +34,21 @@ LOG.setLevel(level=logging.DEBUG)
|
||||
class Scheduler(object):
|
||||
"""Schedules the submission of workloads across one of more clients.
|
||||
|
||||
A workload execution expects the following arguments:
|
||||
query_executors: A list of initialized query executor objects.
|
||||
shuffle: Change the order of execution of queries in a workload. By default, the queries
|
||||
are executed sorted by query name.
|
||||
num_clients: The degree of parallelism.
|
||||
impalads: A list of impalads to connect to. Ignored when the executor is hive.
|
||||
Args:
|
||||
query_executors (list of QueryExecutor): the objects should be initialized.
|
||||
shuffle (boolean): If True, change the order of execution of queries in a workload.
|
||||
By default, the queries are executed sorted by query name.
|
||||
num_clients (int): Number of concurrent clients.
|
||||
impalads (list of str): A list of impalads to connect to. Ignored when the executor
|
||||
is hive.
|
||||
|
||||
Attributes:
|
||||
query_executors (list of QueryExecutor): initialized query executors
|
||||
shuffle (boolean): shuffle query executors
|
||||
iterations (int): number of iterations ALL query executors will run
|
||||
query_iterations (int): number of times each query executor will execute
|
||||
impalads (list of str?): list of impalads for execution. It is rotated after each execution.
|
||||
num_clients (int): Number of concurrent clients
|
||||
"""
|
||||
def __init__(self, **kwargs):
|
||||
self.query_executors = kwargs.get('query_executors')
|
||||
@@ -77,7 +86,12 @@ class Scheduler(object):
|
||||
return self.impalads[-1]
|
||||
|
||||
def __run_queries(self, thread_num):
|
||||
"""Runs the list of query executors"""
|
||||
"""This method is run by every thread concurrently.
|
||||
|
||||
Args:
|
||||
thread_num (int): Thread number. Used for setting the client name in the result.
|
||||
"""
|
||||
|
||||
# each thread gets its own copy of query_executors
|
||||
query_executors = deepcopy(sorted(self.query_executors, key=lambda x: x.query.name))
|
||||
for j in xrange(self.iterations):
|
||||
|
||||
@@ -27,7 +27,18 @@ class Workload(object):
|
||||
|
||||
A workload is the internal representation for the set of queries on a dataset. It
|
||||
consists of the dataset name, and a mapping of query names to query strings.
|
||||
|
||||
Args:
|
||||
name (str): workload name. (Eg. tpch)
|
||||
query_name_filters (list of str): List of regular expressions used for matching query
|
||||
names
|
||||
|
||||
Attributes:
|
||||
name (str): workload name (Eg. tpch)
|
||||
__query_map (dict): contains a query name -> string mapping; mapping of query name to
|
||||
section (ex. "TPCH-Q10" -> "select * from...")
|
||||
"""
|
||||
|
||||
WORKLOAD_DIR = os.environ['IMPALA_WORKLOAD_DIR']
|
||||
|
||||
def __init__(self, name, query_name_filters=None):
|
||||
@@ -82,7 +93,15 @@ class Workload(object):
|
||||
|
||||
Transform all the queries in the workload's query map to query objects based on the
|
||||
input test vector and scale factor.
|
||||
|
||||
Args:
|
||||
test_vector (?): query vector
|
||||
scale_factor (str): eg. "300gb"
|
||||
|
||||
Returns:
|
||||
(list of Query): these will be consumed by ?
|
||||
"""
|
||||
|
||||
queries = list()
|
||||
for query_name, query_str in self.__query_map.iteritems():
|
||||
queries.append(Query(name=query_name,
|
||||
|
||||
@@ -37,6 +37,19 @@ class WorkloadRunner(object):
|
||||
Internally, for each workload, this module looks up and parses that workload's
|
||||
query files and reads the workload's test vector to determine what combination(s)
|
||||
of file format / compression to run with.
|
||||
|
||||
Args:
|
||||
workload (Workload)
|
||||
scale_factor (str): eg. "300gb"
|
||||
config (WorkloadConfig)
|
||||
|
||||
Attributes:
|
||||
workload (Workload)
|
||||
scale_factor (str): eg. "300gb"
|
||||
config (WorkloadConfig)
|
||||
exit_on_error (boolean)
|
||||
results (list of QueryResult)
|
||||
__test_vectors (list of ?)
|
||||
"""
|
||||
def __init__(self, workload, scale_factor, config):
|
||||
self.workload = workload
|
||||
@@ -106,7 +119,7 @@ class WorkloadRunner(object):
|
||||
self.exit_on_error)
|
||||
query_executors.append(query_executor)
|
||||
# Initialize the scheduler.
|
||||
scheduler= Scheduler(query_executors=query_executors,
|
||||
scheduler = Scheduler(query_executors=query_executors,
|
||||
shuffle=self.config.shuffle_queries,
|
||||
iterations=self.config.workload_iterations,
|
||||
query_iterations=self.config.query_iterations,
|
||||
|
||||
@@ -36,38 +36,38 @@ class PluginRunner(object):
|
||||
'''
|
||||
|
||||
def __init__(self, plugin_infos):
|
||||
self._available_modules = self._get_plugin_modules()
|
||||
self._get_plugins_from_modules(plugin_infos)
|
||||
self.__available_modules = self.__get_plugin_modules()
|
||||
self.__get_plugins_from_modules(plugin_infos)
|
||||
|
||||
@property
|
||||
def plugins(self):
|
||||
return self._plugins
|
||||
return self.__plugins
|
||||
|
||||
def __getstate__(self):
|
||||
state = self.__dict__.copy()
|
||||
del state['_available_modules']
|
||||
del state['__available_modules']
|
||||
return state
|
||||
|
||||
def _get_plugin_modules(self):
|
||||
def __get_plugin_modules(self):
|
||||
''' Gets all the modules in the directory and imports them'''
|
||||
modules = pkgutil.iter_modules(path=[PLUGIN_DIR])
|
||||
available_modules = []
|
||||
for loader, mod_name, ispkg in modules:
|
||||
yield __import__("tests.benchmark.plugins.%s" % mod_name, fromlist=[mod_name])
|
||||
|
||||
def _get_plugins_from_modules(self, plugin_infos):
|
||||
def __get_plugins_from_modules(self, plugin_infos):
|
||||
'''Look for user specified plugins in the available modules.'''
|
||||
self._plugins = []
|
||||
self.__plugins = []
|
||||
plugin_names = []
|
||||
for module in self._available_modules:
|
||||
for module in self.__available_modules:
|
||||
for plugin_info in plugin_infos:
|
||||
plugin_name, scope = self._get_plugin_info(plugin_info)
|
||||
plugin_name, scope = self.__get_plugin_info(plugin_info)
|
||||
plugin_names.append(plugin_name)
|
||||
if hasattr(module, plugin_name):
|
||||
self._plugins.append(getattr(module, plugin_name)(scope=scope.lower()))
|
||||
self.__plugins.append(getattr(module, plugin_name)(scope=scope.lower()))
|
||||
# The plugin(s) that could not be loaded are captured in the set difference
|
||||
# between plugin_names and self.__plugins
|
||||
plugins_found = [p.__name__ for p in self._plugins]
|
||||
plugins_found = [p.__name__ for p in self.__plugins]
|
||||
LOG.debug("Plugins found: %s" % ', '.join(plugins_found))
|
||||
plugins_not_found = set(plugin_names).difference(plugins_found)
|
||||
# If the user's entered a plugin that does not exist, raise an error.
|
||||
@@ -75,7 +75,7 @@ class PluginRunner(object):
|
||||
msg = "Plugin(s) not found: %s" % (','.join(list(plugins_not_found)))
|
||||
raise RuntimeError, msg
|
||||
|
||||
def _get_plugin_info(self, plugin_info):
|
||||
def __get_plugin_info(self, plugin_info):
|
||||
info = plugin_info.split(':')
|
||||
if len(info) == 1:
|
||||
return info[0], 'query'
|
||||
@@ -85,20 +85,20 @@ class PluginRunner(object):
|
||||
raise ValueError("Plugin names specified in the form <plugin_name>[:<scope>]")
|
||||
|
||||
def print_plugin_names(self):
|
||||
for p in self._plugins:
|
||||
for p in self.__plugins:
|
||||
LOG.debug("Plugin: %s, Scope: %s" % (p.__name__, p.scope))
|
||||
|
||||
def run_plugins_pre(self, context=None, scope=None):
|
||||
if len(self._plugins) == 0: return
|
||||
if len(self.__plugins) == 0: return
|
||||
if context: context['scope'] = scope
|
||||
for p in self._plugins:
|
||||
for p in self.__plugins:
|
||||
if not scope or p.scope == scope.lower():
|
||||
LOG.debug('Running pre-hook for %s at scope %s' % (p.__name__, scope))
|
||||
p.run_pre_hook(context=context)
|
||||
|
||||
def run_plugins_post(self, context=None, scope=None):
|
||||
if len(self._plugins) == 0: return
|
||||
for p in self._plugins:
|
||||
if len(self.__plugins) == 0: return
|
||||
for p in self.__plugins:
|
||||
if not scope or p.scope == scope.lower():
|
||||
LOG.debug('Running post-hook for %s at scope %s' % (p.__name__, scope))
|
||||
p.run_post_hook(context=context)
|
||||
|
||||
Reference in New Issue
Block a user