IMPALA-14327: Update load-data.py and run-workload.py to use HS2

load-data.py is used for dataloading while run-workload.py is used for
running perf-AB-test. This patch change the script from using beeswax
protocol to HS2 protocol.

Testing:
Run data loading and perf-AB-test-ub2004 based on this patch.

Change-Id: I1c3727871b8b2e75c3f10ceabfbe9cb96e36ead3
Reviewed-on: http://gerrit.cloudera.org:8080/23309
Reviewed-by: Riza Suminto <riza.suminto@cloudera.com>
Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
Riza Suminto
2025-08-12 14:36:31 -07:00
committed by Impala Public Jenkins
parent 75ca356ed0
commit 9fc941b611
3 changed files with 9 additions and 5 deletions

View File

@@ -33,7 +33,8 @@ import time
import traceback
from optparse import OptionParser
from tests.beeswax.impala_beeswax import ImpalaBeeswaxClient
from tests.common.impala_connection import ImpylaHS2Connection
from tests.common.test_vector import HS2
from multiprocessing.pool import ThreadPool
LOG = logging.getLogger('load-data.py')
@@ -112,6 +113,8 @@ HIVE_ARGS = '-n %s -u "jdbc:hive2://%s/default;%s" --verbose=true'\
HADOOP_CMD = os.path.join(os.environ['HADOOP_HOME'], 'bin/hadoop')
HS2_HOST_PORT = "{}:{}".format(options.impalad, 21050)
def available_workloads(workload_dir):
return [subdir for subdir in os.listdir(workload_dir)
if os.path.isdir(os.path.join(workload_dir, subdir))]
@@ -181,7 +184,8 @@ def exec_impala_query_from_file(file_name):
LOG.info("Beginning execution of impala SQL on {0}: {1}".format(
options.impalad, file_name))
is_success = True
impala_client = ImpalaBeeswaxClient(options.impalad, use_kerberos=options.use_kerberos)
impala_client = ImpylaHS2Connection(HS2_HOST_PORT,
use_kerberos=options.use_kerberos)
output_file = file_name + ".log"
query = None
with open(output_file, 'w') as out_file:
@@ -234,7 +238,7 @@ def generate_schema_statements(workload):
generate_cmd += " --hive_warehouse_dir=%s" % options.hive_warehouse_dir
if options.hdfs_namenode is not None:
generate_cmd += " --hdfs_namenode=%s" % options.hdfs_namenode
generate_cmd += " --backend=%s" % options.impalad
generate_cmd += " --backend=%s" % HS2_HOST_PORT
LOG.info('Executing Generate Schema Command: ' + generate_cmd)
schema_cmd = os.path.join(TESTDATA_BIN_DIR, generate_cmd)
error_msg = 'Error generating schema statements for workload: ' + workload

View File

@@ -95,7 +95,7 @@ parser.add_option("--use_kerberos", dest="use_kerberos", action="store_true",
parser.add_option("--continue_on_query_error", dest="continue_on_query_error",
action="store_true", default=False,
help="If set, continue execution on each query error.")
parser.add_option("-c", "--client_type", dest="client_type", default='beeswax',
parser.add_option("-c", "--client_type", dest="client_type", default='hs2',
choices=['beeswax', 'jdbc', 'hs2'],
help="Client type. Valid options are 'beeswax' or 'jdbc' or 'hs2'")
parser.add_option("--plugin_names", dest="plugin_names", default=None,

View File

@@ -147,7 +147,7 @@ def run_workload(base_dir, workloads, options):
run_workload = ["{0}/bin/run-workload.py".format(IMPALA_HOME)]
impalads = ",".join(["localhost:{0}".format(21000 + i)
impalads = ",".join(["localhost:{0}".format(21050 + i)
for i in range(0, int(options.num_impalads))])
run_workload += ["--workloads={0}".format(workloads),