mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
IMPALA-14512: Remove dependency on sh python package
This modifies bin/single_node_perf_run.py to stop using the sh python package. It replaces sh with calls to subprocess. It stops installing sh for both the Python 2 and 3 virtualenvs. Testing: - Ran perf-AB-test job with it and examined the logs Change-Id: Ic5f9316a5d83c5c0dc37d4a94c55b6a655765fe3 Reviewed-on: http://gerrit.cloudera.org:8080/23600 Reviewed-by: Riza Suminto <riza.suminto@cloudera.com> Reviewed-by: Jason Fehr <jfehr@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
This commit is contained in:
@@ -79,7 +79,6 @@ from tempfile import mkdtemp
|
|||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import pipes
|
import pipes
|
||||||
import sh
|
|
||||||
import shutil
|
import shutil
|
||||||
import subprocess
|
import subprocess
|
||||||
import sys
|
import sys
|
||||||
@@ -99,6 +98,16 @@ def configured_call(cmd):
|
|||||||
return subprocess.check_call(["bash", "-c", cmd])
|
return subprocess.check_call(["bash", "-c", cmd])
|
||||||
|
|
||||||
|
|
||||||
|
def run_git(args):
|
||||||
|
"""Runs git without capturing output (stdout passes through to stdout)"""
|
||||||
|
subprocess.check_call(["git"] + args, text=True)
|
||||||
|
|
||||||
|
|
||||||
|
def get_git_output(args):
|
||||||
|
"""Runs git, capturing the output and returning it"""
|
||||||
|
return subprocess.check_output(["git"] + args, text=True)
|
||||||
|
|
||||||
|
|
||||||
def load_data(db_to_load, table_formats, scale):
|
def load_data(db_to_load, table_formats, scale):
|
||||||
"""Loads a database with a particular scale factor."""
|
"""Loads a database with a particular scale factor."""
|
||||||
all_formats = ("text/none," + table_formats if "text/none" not in table_formats
|
all_formats = ("text/none," + table_formats if "text/none" not in table_formats
|
||||||
@@ -115,12 +124,12 @@ def load_data(db_to_load, table_formats, scale):
|
|||||||
|
|
||||||
|
|
||||||
def get_git_hash_for_name(name):
|
def get_git_hash_for_name(name):
|
||||||
return sh.git("rev-parse", name).strip()
|
return get_git_output(["rev-parse", name]).strip()
|
||||||
|
|
||||||
|
|
||||||
def build(git_hash, options):
|
def build(git_hash, options):
|
||||||
"""Builds Impala in release mode; doesn't build tests."""
|
"""Builds Impala in release mode; doesn't build tests."""
|
||||||
sh.git.checkout(git_hash)
|
run_git(["checkout", git_hash])
|
||||||
buildall = ["{0}/buildall.sh".format(IMPALA_HOME), "-notests", "-release", "-noclean"]
|
buildall = ["{0}/buildall.sh".format(IMPALA_HOME), "-notests", "-release", "-noclean"]
|
||||||
if options.ninja:
|
if options.ninja:
|
||||||
buildall += ["-ninja"]
|
buildall += ["-ninja"]
|
||||||
@@ -168,15 +177,20 @@ def run_workload(base_dir, workloads, options):
|
|||||||
|
|
||||||
def report_benchmark_results(file_a, file_b, description):
|
def report_benchmark_results(file_a, file_b, description):
|
||||||
"""Wrapper around report_benchmark_result.py."""
|
"""Wrapper around report_benchmark_result.py."""
|
||||||
|
performance_result = subprocess.check_output(
|
||||||
|
["{0}/tests/benchmark/report_benchmark_results.py".format(IMPALA_HOME),
|
||||||
|
"--reference_result_file={0}".format(file_a),
|
||||||
|
"--input_result_file={0}".format(file_b),
|
||||||
|
'--report_description="{0}"'.format(description)],
|
||||||
|
text=True)
|
||||||
|
|
||||||
|
# Output the performance result to stdout for convenience
|
||||||
|
print(performance_result)
|
||||||
|
|
||||||
|
# Dump the performance result to a file to preserve
|
||||||
result = os.path.join(IMPALA_PERF_RESULTS, "latest", "performance_result.txt")
|
result = os.path.join(IMPALA_PERF_RESULTS, "latest", "performance_result.txt")
|
||||||
with open(result, "w") as f:
|
with open(result, "w") as f:
|
||||||
subprocess.check_call(
|
f.write(performance_result)
|
||||||
["{0}/tests/benchmark/report_benchmark_results.py".format(IMPALA_HOME),
|
|
||||||
"--reference_result_file={0}".format(file_a),
|
|
||||||
"--input_result_file={0}".format(file_b),
|
|
||||||
'--report_description="{0}"'.format(description)],
|
|
||||||
stdout=f)
|
|
||||||
sh.cat(result, _out=sys.stdout)
|
|
||||||
|
|
||||||
|
|
||||||
def compare(base_dir, hash_a, hash_b, options):
|
def compare(base_dir, hash_a, hash_b, options):
|
||||||
@@ -190,19 +204,17 @@ def compare(base_dir, hash_a, hash_b, options):
|
|||||||
if options.split_profiles:
|
if options.split_profiles:
|
||||||
generate_profile_files(file_a, hash_a, base_dir)
|
generate_profile_files(file_a, hash_a, base_dir)
|
||||||
generate_profile_files(file_b, hash_b, base_dir)
|
generate_profile_files(file_b, hash_b, base_dir)
|
||||||
sh.diff("-u",
|
with open(os.path.join(IMPALA_HOME, "performance_result_profile_diff.txt"), "w") as f:
|
||||||
os.path.join(base_dir, hash_a + "_profiles"),
|
# This does not check that the diff command succeeds
|
||||||
os.path.join(base_dir, hash_b + "_profiles"),
|
subprocess.run(["diff", "-u", os.path.join(base_dir, hash_a + "_profiles"),
|
||||||
_out=os.path.join(IMPALA_HOME, "performance_result_profile_diff.txt"),
|
os.path.join(base_dir, hash_b + "_profiles")], stdout=f, text=True)
|
||||||
_ok_code=[0, 1])
|
|
||||||
else:
|
else:
|
||||||
generate_profile_file(file_a, hash_a, base_dir)
|
generate_profile_file(file_a, hash_a, base_dir)
|
||||||
generate_profile_file(file_b, hash_b, base_dir)
|
generate_profile_file(file_b, hash_b, base_dir)
|
||||||
sh.diff("-u",
|
with open(os.path.join(IMPALA_HOME, "performance_result_profile_diff.txt"), "w") as f:
|
||||||
os.path.join(base_dir, hash_a + "_profile.txt"),
|
# This does not check that the diff command succeeds
|
||||||
os.path.join(base_dir, hash_b + "_profile.txt"),
|
subprocess.run(["diff", "-u", os.path.join(base_dir, hash_a + "_profile.txt"),
|
||||||
_out=os.path.join(IMPALA_HOME, "performance_result_profile_diff.txt"),
|
os.path.join(base_dir, hash_b + "_profile.txt")], stdout=f, text=True)
|
||||||
_ok_code=[0, 1])
|
|
||||||
|
|
||||||
|
|
||||||
def generate_profile_file(name, hash, base_dir):
|
def generate_profile_file(name, hash, base_dir):
|
||||||
@@ -253,16 +265,17 @@ def backup_workloads():
|
|||||||
Used to keep workloads from being clobbered by git checkout.
|
Used to keep workloads from being clobbered by git checkout.
|
||||||
"""
|
"""
|
||||||
temp_dir = mkdtemp()
|
temp_dir = mkdtemp()
|
||||||
sh.cp(os.path.join(IMPALA_HOME, "testdata", "workloads"),
|
shutil.copytree(os.path.join(IMPALA_HOME, "testdata", "workloads"),
|
||||||
temp_dir, R=True, _out=sys.stdout, _err=sys.stderr)
|
os.path.join(temp_dir, "workloads"))
|
||||||
print("Backed up workloads to {0}".format(temp_dir))
|
print("Backed up workloads to {0}".format(temp_dir))
|
||||||
return temp_dir
|
return temp_dir
|
||||||
|
|
||||||
|
|
||||||
def restore_workloads(source):
|
def restore_workloads(source):
|
||||||
"""Restores the workload directory from source into the Impala tree."""
|
"""Restores the workload directory from source into the Impala tree."""
|
||||||
sh.cp(os.path.join(source, "workloads"), os.path.join(IMPALA_HOME, "testdata"),
|
# dirs_exist_ok=True allows this to overwrite the existing files
|
||||||
R=True, _out=sys.stdout, _err=sys.stderr)
|
shutil.copytree(os.path.join(source, "workloads"),
|
||||||
|
os.path.join(IMPALA_HOME, "testdata", "workloads"), dirs_exist_ok=True)
|
||||||
|
|
||||||
|
|
||||||
def perf_ab_test(options, args):
|
def perf_ab_test(options, args):
|
||||||
@@ -314,7 +327,7 @@ def perf_ab_test(options, args):
|
|||||||
hash_b = get_git_hash_for_name(args[1])
|
hash_b = get_git_hash_for_name(args[1])
|
||||||
# discard any changes created by the previous restore_workloads()
|
# discard any changes created by the previous restore_workloads()
|
||||||
shutil.rmtree("testdata/workloads")
|
shutil.rmtree("testdata/workloads")
|
||||||
sh.git.checkout("--", "testdata/workloads")
|
run_git(["checkout", "--", "testdata/workloads"])
|
||||||
build(hash_b, options)
|
build(hash_b, options)
|
||||||
restore_workloads(workload_dir)
|
restore_workloads(workload_dir)
|
||||||
start_impala(options.num_impalads, options)
|
start_impala(options.num_impalads, options)
|
||||||
@@ -399,17 +412,17 @@ def main():
|
|||||||
|
|
||||||
os.chdir(IMPALA_HOME)
|
os.chdir(IMPALA_HOME)
|
||||||
|
|
||||||
if sh.git("status", "--porcelain", "--untracked-files=no", _out=None).strip():
|
if get_git_output(["status", "--porcelain", "--untracked-files=no"]).strip():
|
||||||
sh.git("status", "--porcelain", "--untracked-files=no", _out=sys.stdout)
|
run_git(["status", "--porcelain", "--untracked-files=no"])
|
||||||
# Something went wrong, let's dump the actual diff to make it easier to
|
# Something went wrong, let's dump the actual diff to make it easier to
|
||||||
# track down
|
# track down
|
||||||
print("#### Working copy is dirty, dumping the diff #####")
|
print("#### Working copy is dirty, dumping the diff #####")
|
||||||
sh.git("--no-pager", "diff", _out=sys.stdout)
|
run_git(["--no-pager", "diff"])
|
||||||
print("#### End of diff #####")
|
print("#### End of diff #####")
|
||||||
raise Exception("Working copy is dirty. Consider 'git stash' and try again.")
|
raise Exception("Working copy is dirty. Consider 'git stash' and try again.")
|
||||||
|
|
||||||
# Save the current hash to be able to return to this place in the tree when done
|
# Save the current hash to be able to return to this place in the tree when done
|
||||||
current_hash = sh.git("rev-parse", "--abbrev-ref", "HEAD").strip()
|
current_hash = get_git_output(["rev-parse", "--abbrev-ref", "HEAD"]).strip()
|
||||||
if current_hash == "HEAD":
|
if current_hash == "HEAD":
|
||||||
current_hash = get_git_hash_for_name("HEAD")
|
current_hash = get_git_hash_for_name("HEAD")
|
||||||
|
|
||||||
@@ -419,8 +432,8 @@ def main():
|
|||||||
finally:
|
finally:
|
||||||
# discard any changes created by the previous restore_workloads()
|
# discard any changes created by the previous restore_workloads()
|
||||||
shutil.rmtree("testdata/workloads")
|
shutil.rmtree("testdata/workloads")
|
||||||
sh.git.checkout("--", "testdata/workloads")
|
run_git(["checkout", "--", "testdata/workloads"])
|
||||||
sh.git.checkout(current_hash)
|
run_git(["checkout", current_hash])
|
||||||
restore_workloads(workloads)
|
restore_workloads(workloads)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -54,7 +54,6 @@ requests == 2.21.0
|
|||||||
urllib3 == 1.24.2
|
urllib3 == 1.24.2
|
||||||
certifi == 2020.12.5
|
certifi == 2020.12.5
|
||||||
sasl == 0.2.1
|
sasl == 0.2.1
|
||||||
sh == 1.11
|
|
||||||
six == 1.14.0
|
six == 1.14.0
|
||||||
sqlparse == 0.3.1
|
sqlparse == 0.3.1
|
||||||
texttable == 0.8.3
|
texttable == 0.8.3
|
||||||
|
|||||||
Reference in New Issue
Block a user