mirror of
https://github.com/apache/impala.git
synced 2026-01-25 18:01:04 -05:00
Many of the test modules included calls to 'logging.basicConfig' at global scope in their implementation. This meant that by just importing one of these files, other tests would inherit their logging format. This is typically a bad idea in Python -- modules should not have side effects like this on import. The format was additionally inconsistent. In some cases we had a "--" prepended to the format, and in others we didn't. The "--" is very useful since it lets developers copy-paste query-test output back into the shell to reproduce an issue. This patch fixes the above by centralizing the logging configuration in a pytest hook that runs prior to all pytests. A few other non-pytest related tools now configure logging in their "main" code which is only triggered when the module is executed directly. I tested that, with this change, logs still show up properly in the .xml output files from 'run-tests.py' as well as when running tests manually from impala-py.test Change-Id: I55ef0214b43f87da2d71804913ba4caa964f789f Reviewed-on: http://gerrit.cloudera.org:8080/11225 Reviewed-by: Philip Zeyliger <philip@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
156 lines
5.5 KiB
Python
156 lines
5.5 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
import logging
|
|
|
|
from tests.common.test_dimensions import (
|
|
TableFormatInfo,
|
|
get_dataset_from_workload,
|
|
load_table_info_dimension)
|
|
from tests.performance.query_executor import (
|
|
BeeswaxQueryExecConfig,
|
|
HiveHS2QueryConfig,
|
|
ImpalaHS2QueryConfig,
|
|
JdbcQueryExecConfig,
|
|
QueryExecutor)
|
|
from tests.performance.query_exec_functions import (
|
|
execute_using_hive_hs2,
|
|
execute_using_impala_beeswax,
|
|
execute_using_impala_hs2,
|
|
execute_using_jdbc)
|
|
from tests.performance.scheduler import Scheduler
|
|
|
|
LOG = logging.getLogger('workload_runner')
|
|
|
|
|
|
class WorkloadRunner(object):
|
|
"""Runs query files and captures results from the specified workload(s)
|
|
|
|
The usage is:
|
|
1) Initialize WorkloadRunner with desired execution parameters.
|
|
2) Call workload_runner.run()
|
|
|
|
Internally, for each workload, this module looks up and parses that workload's
|
|
query files and reads the workload's test vector to determine what combination(s)
|
|
of file format / compression to run with.
|
|
|
|
Args:
|
|
workload (Workload)
|
|
scale_factor (str): eg. "300gb"
|
|
config (WorkloadConfig)
|
|
|
|
Attributes:
|
|
workload (Workload)
|
|
scale_factor (str): eg. "300gb"
|
|
config (WorkloadConfig)
|
|
exit_on_error (boolean)
|
|
results (list of ImpalaQueryResult)
|
|
_test_vectors (list of ?)
|
|
"""
|
|
|
|
def __init__(self, workload, scale_factor, config):
|
|
self.workload = workload
|
|
self.scale_factor = scale_factor
|
|
self.config = config
|
|
self.exit_on_error = not self.config.continue_on_query_error
|
|
if self.config.verbose: LOG.setLevel(level=logging.DEBUG)
|
|
self._generate_test_vectors()
|
|
self._results = list()
|
|
|
|
@property
|
|
def results(self):
|
|
return self._results
|
|
|
|
def _generate_test_vectors(self):
|
|
"""Generate test vector objects
|
|
|
|
If the user has specified a set for table_formats, generate them, otherwise generate
|
|
vectors for all table formats within the specified exploration strategy.
|
|
"""
|
|
self._test_vectors = []
|
|
if self.config.table_formats:
|
|
dataset = get_dataset_from_workload(self.workload.name)
|
|
for tf in self.config.table_formats:
|
|
self._test_vectors.append(TableFormatInfo.create_from_string(dataset, tf))
|
|
else:
|
|
vectors = load_table_info_dimension(self.workload.name,
|
|
self.config.exploration_strategy)
|
|
self._test_vectors = [vector.value for vector in vectors]
|
|
|
|
def _create_executor(self, executor_name):
|
|
query_options = {
|
|
'impala_beeswax': lambda: (execute_using_impala_beeswax,
|
|
BeeswaxQueryExecConfig(plugin_runner=self.config.plugin_runner,
|
|
exec_options=self.config.exec_options,
|
|
use_kerberos=self.config.use_kerberos,
|
|
)),
|
|
'impala_jdbc': lambda: (execute_using_jdbc,
|
|
JdbcQueryExecConfig(plugin_runner=self.config.plugin_runner)
|
|
),
|
|
'impala_hs2': lambda: (execute_using_impala_hs2,
|
|
ImpalaHS2QueryConfig(plugin_runner=self.config.plugin_runner,
|
|
use_kerberos=self.config.use_kerberos
|
|
)),
|
|
'hive_hs2': lambda: (execute_using_hive_hs2,
|
|
HiveHS2QueryConfig(hiveserver=self.config.hiveserver,
|
|
plugin_runner=self.config.plugin_runner,
|
|
exec_options=self.config.exec_options,
|
|
user=self.config.user,
|
|
use_kerberos=self.config.use_kerberos
|
|
))
|
|
} [executor_name]()
|
|
return query_options
|
|
|
|
def _execute_queries(self, queries):
|
|
"""Execute a set of queries.
|
|
|
|
Create query executors for each query, and pass them along with config information to
|
|
the scheduler.
|
|
"""
|
|
executor_name = "{0}_{1}".format(self.config.exec_engine, self.config.client_type)
|
|
exec_func, exec_config = self._create_executor(executor_name)
|
|
query_executors = []
|
|
# Build an executor for each query
|
|
for query in queries:
|
|
query_executor = QueryExecutor(executor_name,
|
|
query,
|
|
exec_func,
|
|
exec_config,
|
|
self.exit_on_error)
|
|
query_executors.append(query_executor)
|
|
# Initialize the scheduler.
|
|
scheduler = Scheduler(query_executors=query_executors,
|
|
shuffle=self.config.shuffle_queries,
|
|
iterations=self.config.workload_iterations,
|
|
query_iterations=self.config.query_iterations,
|
|
impalads=self.config.impalads,
|
|
num_clients=self.config.num_clients,
|
|
plan_first=getattr(self.config, 'plan_first', False))
|
|
|
|
scheduler.run()
|
|
self._results.extend(scheduler.results)
|
|
|
|
def run(self):
|
|
"""
|
|
Runs the workload against all test vectors serially and stores the results.
|
|
"""
|
|
for test_vector in self._test_vectors:
|
|
# Transform the query strings to Query objects for a combination of scale factor and
|
|
# the test vector.
|
|
queries = self.workload.construct_queries(test_vector, self.scale_factor)
|
|
self._execute_queries(queries)
|