Files
impala/tests/common/kudu_test_suite.py
Csaba Ringhofer f98b697c7b IMPALA-13929: Make 'functional-query' the default workload in tests
This change adds get_workload() to ImpalaTestSuite and removes it
from all test suites that already returned 'functional-query'.
get_workload() is also removed from CustomClusterTestSuite which
used to return 'tpch'.

All other changes besides impala_test_suite.py and
custom_cluster_test_suite.py are just mass removals of
get_workload() functions.

The behavior is only changed in custom cluster tests that didn't
override get_workload(). By returning 'functional-query' instead
of 'tpch', exploration_strategy() will no longer return 'core' in
'exhaustive' test runs. See IMPALA-3947 on why workload affected
exploration_strategy. An example for affected test is
TestCatalogHMSFailures which was skipped both in core and exhaustive
runs before this change.

get_workload() functions that return a different workload than
'functional-query' are not changed - it is possible that some of
these also don't handle exploration_strategy() as expected, but
individually checking these tests is out of scope in this patch.

Change-Id: I9ec6c41ffb3a30e1ea2de773626d1485c69fe115
Reviewed-on: http://gerrit.cloudera.org:8080/22726
Reviewed-by: Riza Suminto <riza.suminto@cloudera.com>
Reviewed-by: Daniel Becker <daniel.becker@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
2025-04-08 07:12:55 +00:00

203 lines
7.0 KiB
Python

# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
from __future__ import absolute_import, division, print_function
from builtins import range
import os
import pytest
import requests
import string
from contextlib import contextmanager
from kudu.schema import (
BOOL,
DOUBLE,
FLOAT,
INT16,
INT32,
INT64,
INT8,
SchemaBuilder,
STRING,
BINARY,
UNIXTIME_MICROS,
DATE)
from kudu.client import Partitioning
from random import choice, sample
from string import ascii_lowercase, digits
from tests.common.impala_test_suite import ImpalaTestSuite
from tests.common.test_dimensions import HS2, create_kudu_dimension
DEFAULT_KUDU_MASTER_WEBUI_PORT = os.getenv('KUDU_MASTER_WEBUI_PORT', '8051')
def get_kudu_master_webpage(page_name):
kudu_master = pytest.config.option.kudu_master_hosts
if "," in kudu_master:
raise NotImplementedError("Multi-master not supported yet")
if ":" in kudu_master:
kudu_master_host = kudu_master.split(":")[0]
else:
kudu_master_host = kudu_master
url = "http://%s:%s/%s" % (kudu_master_host, DEFAULT_KUDU_MASTER_WEBUI_PORT, page_name)
return requests.get(url).text
def get_kudu_master_flag(flag):
varz = get_kudu_master_webpage("varz")
for line in varz.split("\n"):
split = line.split("=")
if len(split) == 2 and split[0] == flag:
return split[1]
assert False, "Failed to find Kudu master flag: %s" % flag
class KuduTestSuite(ImpalaTestSuite):
# Lazily set.
__DB_NAME = None
@classmethod
def get_conn_timeout(cls):
# For IMPALA-5079,IMPALA-4454
return 60 * 5 # 5 minutes
@classmethod
def setup_class(cls):
super(KuduTestSuite, cls).setup_class()
@classmethod
def add_test_dimensions(cls):
super(KuduTestSuite, cls).add_test_dimensions()
cls.ImpalaTestMatrix.add_dimension(
create_kudu_dimension(cls.get_workload()))
@classmethod
def default_test_protocol(cls):
# run_test_case() can produce different result types between beeswax vs hs2 protocol
# in some tests. This fix the test to use hs2 protocol.
return HS2
@classmethod
def auto_create_db(cls):
return True
@classmethod
def get_db_name(cls):
# When py.test runs with the xdist plugin, several processes are started and each
# process runs some partition of the tests. It's possible that multiple processes
# will call this method. To avoid multiple processes using the same database at the
# same time, the database name is formed by concatenating the test class name, the pid
# and a random value. The class name distinguishes classes and the pid distinguishes
# the same class run in different processes. The value is cached so within a single
# process the same database name is always used for the class. This doesn't need to
# be thread-safe since multi-threading is never used.
if not cls.__DB_NAME:
salt = choice(ascii_lowercase) + "".join(sample(ascii_lowercase + digits, 5))
cls.__DB_NAME = cls.__name__.lower() + "_" + str(os.getpid()) + "_" + salt
return cls.__DB_NAME
@classmethod
def random_table_name(cls):
return "".join(choice(string.ascii_lowercase) for _ in range(10))
@classmethod
def to_kudu_table_name(cls, db_name, tbl_name):
"""Return the name of the underlying Kudu table, from the Impala database and table
name. This must be kept in sync with KuduUtil.getDefaultKuduTableName() in the
FE."""
if get_kudu_master_flag("--hive_metastore_uris") != "":
return "%s.%s" % (db_name, tbl_name)
else:
return "impala::%s.%s" % (db_name, tbl_name)
@classmethod
def get_kudu_table_base_name(cls, name):
return name.split(".")[-1]
@contextmanager
def temp_kudu_table(self, kudu, col_types, name=None, num_key_cols=1, col_names=None,
prepend_db_name=True, db_name=None, num_partitions=2):
"""Create and return a table. This function should be used in a "with" context.
'kudu' must be a kudu.client.Client. If a table name is not provided, a random
name will be used. If 'prepend_db_name' is True, the table name will be prepended
with (get_db_name() + "."). If column names are not provided, the letters
"a", "b", "c", ... will be used. The number of partitions can be set using
'num_partitions'.
Example:
with self.temp_kudu_table(kudu, [INT32]) as kudu_table:
assert kudu.table_exists(kudu_table.name)
assert not kudu.table_exists(kudu_table.name)
"""
if not col_names:
if len(col_types) > 26:
raise Exception("Too many columns for default naming")
col_names = [chr(97 + i) for i in range(len(col_types))]
schema_builder = SchemaBuilder()
for i, t in enumerate(col_types):
column_spec = schema_builder.add_column(col_names[i], type_=t)
if i < num_key_cols:
column_spec.nullable(False)
schema_builder.set_primary_keys(col_names[:num_key_cols])
schema = schema_builder.build()
name = name or self.random_table_name()
if prepend_db_name:
name = (db_name or self.get_db_name().lower()) + "." + name
kudu.create_table(name, schema,
partitioning=Partitioning().add_hash_partitions(col_names[:num_key_cols],
num_partitions))
try:
yield kudu.table(name)
finally:
if kudu.table_exists(name):
kudu.delete_table(name)
@contextmanager
def drop_impala_table_after_context(self, impala_client, table_name):
"""DEPRECATED: use unique_database fixture instead.
For use in a "with" block: The named table will be dropped using the provided
impala_client when the block exits.
cursor.execute("CREATE TABLE foo ...")
with drop_impala_table_after_context(impala_client, "foo"):
...
# Now table foo no longer exists.
"""
try:
yield
finally:
impala_client.execute("DROP TABLE IF EXISTS %s" % table_name)
def kudu_col_type_to_impala_col_type(self, col_type):
mapping = {BOOL: "BOOLEAN",
DOUBLE: "DOUBLE",
FLOAT: "FLOAT",
INT16: "SMALLINT",
INT32: "INT",
INT64: "BIGINT",
INT8: "TINYINT",
STRING: "STRING",
BINARY: "BINARY",
UNIXTIME_MICROS: "TIMESTAMP",
DATE: "DATE"}
if col_type not in mapping:
raise Exception("Unexpected column type: %s" % col_type)
return mapping[col_type]