# Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # from __future__ import absolute_import, division, print_function import os import fnmatch import re from tests.performance.query import Query from tests.util.test_file_parser import load_tpc_queries class Workload(object): """Represents a workload. A workload is the internal representation for the set of queries on a dataset. It consists of the dataset name, and a mapping of query names to query strings. Args: name (str): workload name. (Eg. tpch) query_name_filters (list of str): List of regular expressions used for matching query names Attributes: name (str): workload name (Eg. tpch) _query_map (dict): contains a query name -> string mapping; mapping of query name to section (ex. "TPCH-Q10" -> "select * from...") """ WORKLOAD_DIR = os.environ['IMPALA_WORKLOAD_DIR'] def __init__(self, name, query_name_filters=None): self._name = name self._query_map = dict() # Build the query name -> string mapping in the c'tor. We want to fail fast and early # if the user input is bad. self._query_map = load_tpc_queries(self._name, query_name_filters=query_name_filters) assert len(self._query_map) > 0, "No matching queries found for %s" % self._name @property def name(self): return self._name @property def query_map(self): return self._query_map def construct_queries(self, test_vector, scale_factor): """Transform a query map into a list of query objects. Transform all the queries in the workload's query map to query objects based on the input test vector and scale factor. Args: test_vector (?): query vector scale_factor (str): eg. "300gb" Returns: (list of Query): these will be consumed by ? """ queries = list() for query_name, query_str in self._query_map.items(): queries.append(Query(name=query_name, query_str=query_str, workload=self._name, scale_factor=scale_factor, test_vector=test_vector)) return queries