mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
IMPALA-14327: Update load-data.py and run-workload.py to use HS2
load-data.py is used for dataloading while run-workload.py is used for running perf-AB-test. This patch change the script from using beeswax protocol to HS2 protocol. Testing: Run data loading and perf-AB-test-ub2004 based on this patch. Change-Id: I1c3727871b8b2e75c3f10ceabfbe9cb96e36ead3 Reviewed-on: http://gerrit.cloudera.org:8080/23309 Reviewed-by: Riza Suminto <riza.suminto@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
committed by
Impala Public Jenkins
parent
75ca356ed0
commit
9fc941b611
@@ -33,7 +33,8 @@ import time
|
|||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
from optparse import OptionParser
|
from optparse import OptionParser
|
||||||
from tests.beeswax.impala_beeswax import ImpalaBeeswaxClient
|
from tests.common.impala_connection import ImpylaHS2Connection
|
||||||
|
from tests.common.test_vector import HS2
|
||||||
from multiprocessing.pool import ThreadPool
|
from multiprocessing.pool import ThreadPool
|
||||||
|
|
||||||
LOG = logging.getLogger('load-data.py')
|
LOG = logging.getLogger('load-data.py')
|
||||||
@@ -112,6 +113,8 @@ HIVE_ARGS = '-n %s -u "jdbc:hive2://%s/default;%s" --verbose=true'\
|
|||||||
|
|
||||||
HADOOP_CMD = os.path.join(os.environ['HADOOP_HOME'], 'bin/hadoop')
|
HADOOP_CMD = os.path.join(os.environ['HADOOP_HOME'], 'bin/hadoop')
|
||||||
|
|
||||||
|
HS2_HOST_PORT = "{}:{}".format(options.impalad, 21050)
|
||||||
|
|
||||||
def available_workloads(workload_dir):
|
def available_workloads(workload_dir):
|
||||||
return [subdir for subdir in os.listdir(workload_dir)
|
return [subdir for subdir in os.listdir(workload_dir)
|
||||||
if os.path.isdir(os.path.join(workload_dir, subdir))]
|
if os.path.isdir(os.path.join(workload_dir, subdir))]
|
||||||
@@ -181,7 +184,8 @@ def exec_impala_query_from_file(file_name):
|
|||||||
LOG.info("Beginning execution of impala SQL on {0}: {1}".format(
|
LOG.info("Beginning execution of impala SQL on {0}: {1}".format(
|
||||||
options.impalad, file_name))
|
options.impalad, file_name))
|
||||||
is_success = True
|
is_success = True
|
||||||
impala_client = ImpalaBeeswaxClient(options.impalad, use_kerberos=options.use_kerberos)
|
impala_client = ImpylaHS2Connection(HS2_HOST_PORT,
|
||||||
|
use_kerberos=options.use_kerberos)
|
||||||
output_file = file_name + ".log"
|
output_file = file_name + ".log"
|
||||||
query = None
|
query = None
|
||||||
with open(output_file, 'w') as out_file:
|
with open(output_file, 'w') as out_file:
|
||||||
@@ -234,7 +238,7 @@ def generate_schema_statements(workload):
|
|||||||
generate_cmd += " --hive_warehouse_dir=%s" % options.hive_warehouse_dir
|
generate_cmd += " --hive_warehouse_dir=%s" % options.hive_warehouse_dir
|
||||||
if options.hdfs_namenode is not None:
|
if options.hdfs_namenode is not None:
|
||||||
generate_cmd += " --hdfs_namenode=%s" % options.hdfs_namenode
|
generate_cmd += " --hdfs_namenode=%s" % options.hdfs_namenode
|
||||||
generate_cmd += " --backend=%s" % options.impalad
|
generate_cmd += " --backend=%s" % HS2_HOST_PORT
|
||||||
LOG.info('Executing Generate Schema Command: ' + generate_cmd)
|
LOG.info('Executing Generate Schema Command: ' + generate_cmd)
|
||||||
schema_cmd = os.path.join(TESTDATA_BIN_DIR, generate_cmd)
|
schema_cmd = os.path.join(TESTDATA_BIN_DIR, generate_cmd)
|
||||||
error_msg = 'Error generating schema statements for workload: ' + workload
|
error_msg = 'Error generating schema statements for workload: ' + workload
|
||||||
|
|||||||
@@ -95,7 +95,7 @@ parser.add_option("--use_kerberos", dest="use_kerberos", action="store_true",
|
|||||||
parser.add_option("--continue_on_query_error", dest="continue_on_query_error",
|
parser.add_option("--continue_on_query_error", dest="continue_on_query_error",
|
||||||
action="store_true", default=False,
|
action="store_true", default=False,
|
||||||
help="If set, continue execution on each query error.")
|
help="If set, continue execution on each query error.")
|
||||||
parser.add_option("-c", "--client_type", dest="client_type", default='beeswax',
|
parser.add_option("-c", "--client_type", dest="client_type", default='hs2',
|
||||||
choices=['beeswax', 'jdbc', 'hs2'],
|
choices=['beeswax', 'jdbc', 'hs2'],
|
||||||
help="Client type. Valid options are 'beeswax' or 'jdbc' or 'hs2'")
|
help="Client type. Valid options are 'beeswax' or 'jdbc' or 'hs2'")
|
||||||
parser.add_option("--plugin_names", dest="plugin_names", default=None,
|
parser.add_option("--plugin_names", dest="plugin_names", default=None,
|
||||||
|
|||||||
@@ -147,7 +147,7 @@ def run_workload(base_dir, workloads, options):
|
|||||||
|
|
||||||
run_workload = ["{0}/bin/run-workload.py".format(IMPALA_HOME)]
|
run_workload = ["{0}/bin/run-workload.py".format(IMPALA_HOME)]
|
||||||
|
|
||||||
impalads = ",".join(["localhost:{0}".format(21000 + i)
|
impalads = ",".join(["localhost:{0}".format(21050 + i)
|
||||||
for i in range(0, int(options.num_impalads))])
|
for i in range(0, int(options.num_impalads))])
|
||||||
|
|
||||||
run_workload += ["--workloads={0}".format(workloads),
|
run_workload += ["--workloads={0}".format(workloads),
|
||||||
|
|||||||
Reference in New Issue
Block a user