mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
Fixes subprocess.check_output calls for Python 3 using universal_newlines=True. Change-Id: I3dae9113635cf23ae02f1f630de311e64119c456 Reviewed-on: http://gerrit.cloudera.org:8080/19812 Reviewed-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com> Tested-by: Impala Public Jenkins <impala-public-jenkins@cloudera.com>
293 lines
12 KiB
Python
Executable File
293 lines
12 KiB
Python
Executable File
#!/usr/bin/env python
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
# Future imports must happen at the beginning of the file
|
|
from __future__ import absolute_import, division, print_function
|
|
|
|
HELP = '''
|
|
Compares two specified branches, using the Gerrit Change-Id as the
|
|
primary identifier. Ignored commits can be added via a JSON
|
|
configuration file or with a special string in the commit message.
|
|
Changes can be cherrypicked with the --cherry_pick argument.
|
|
|
|
This script can be used to keep two development branches
|
|
(by default, "master" and "2.x", in sync). It is equivalent
|
|
to cherry-picking commits one by one, but automates identifying
|
|
the commits to cherry-pick. Unlike "git cherry", it uses
|
|
the Gerrit Change-Id identifier in the commit message
|
|
as a key.
|
|
|
|
The ignored_commits.json configuration file is of the following
|
|
form. Note that commits are the full 20-byte git hashes.
|
|
|
|
[
|
|
{
|
|
"source": "master",
|
|
"target": "2.x",
|
|
"commits": [
|
|
{ "hash": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "comment": "..."},
|
|
{ "hash": "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", "comment": "..."}
|
|
]
|
|
}
|
|
]
|
|
|
|
The --target_remote_name is optional. If not specified, the target remote is set to
|
|
the value of the --source_remote_name. Debug logging to stderr can be enabled with
|
|
--verbose.
|
|
|
|
Example:
|
|
|
|
$bin/compare_branches.py --source_branch master --target_branch 2.x
|
|
--------------------------------------------------------------------------------
|
|
Commits in asf-gerrit/master but not in asf-gerrit/2.x:
|
|
--------------------------------------------------------------------------------
|
|
35a3e186d61b8f365b0f7d1127be311758437e16 IMPALA-5478: Run TPCDS queries with decimal_v2 enabled (Thu Jan 18 03:28:51 2018 +0000) - Taras Bobrovytsky
|
|
d9b6fd073055b436c7404d49454dc215b2c7a369 IMPALA-6386: Invalidate metadata at table level for dataload (Wed Jan 17 22:52:58 2018 +0000) - Joe McDonnell
|
|
dcc7be0ed483b332dac22d6596f56ff2a6cfdaa3 IMPALA-4315: Allow USE and SHOW TABLES if the user has only column privileges (Wed Jan 17 22:40:13 2018 +0000) - Csaba Ringhofer
|
|
b6e43133e671773d2757612f72cfcdb0ff303226 IMPALA-6399: Increase timeout in test_observability to reduce flakiness (Wed Jan 17 22:31:33 2018 +0000) - Lars Volker
|
|
--------------------------------------------------------------------------------
|
|
Jira keys referenced (Note: not all commit messages will reference a jira key):
|
|
IMPALA-5478,IMPALA-6386,IMPALA-4315,IMPALA-6399
|
|
--------------------------------------------------------------------------------
|
|
'''
|
|
|
|
import argparse
|
|
import json
|
|
import logging
|
|
import os
|
|
import re
|
|
import subprocess
|
|
import sys
|
|
|
|
from collections import defaultdict
|
|
from collections import OrderedDict
|
|
from pprint import pformat
|
|
|
|
def create_parser():
|
|
class CustomFormatter(argparse.ArgumentDefaultsHelpFormatter,
|
|
argparse.RawDescriptionHelpFormatter):
|
|
"""
|
|
Mix-in to leave the description alone, but show
|
|
defaults.
|
|
"""
|
|
pass
|
|
|
|
parser = argparse.ArgumentParser(
|
|
formatter_class=CustomFormatter,
|
|
description=HELP)
|
|
|
|
parser.add_argument('--cherry_pick', action='store_true', default=False,
|
|
help='Cherry-pick mismatched commits to current branch. This ' +
|
|
'must match (in the hash sense) the target branch.')
|
|
parser.add_argument('--partial_ok', action='store_true', default=False,
|
|
help='Exit with success if at least one cherrypick succeeded.')
|
|
parser.add_argument('--source_branch', default='master')
|
|
parser.add_argument('--target_branch', default='2.x')
|
|
parser.add_argument('--source_remote_name', default='asf-gerrit',
|
|
help='Name of the source git remote. If set to empty string, ' +
|
|
'this remote is not fetched and branch names are used ' +
|
|
' as is; otherwise, the source ref is remote/branch.')
|
|
parser.add_argument('--target_remote_name', default=None,
|
|
help='Name of the target git remote; defaults to source remote. ' +
|
|
'Empty strings are handled the same way as --source_remote_name.')
|
|
default_ignored_commits_path = os.path.join(
|
|
os.path.dirname(os.path.abspath(__file__)), 'ignored_commits.json')
|
|
parser.add_argument('--ignored_commits_file', default=default_ignored_commits_path,
|
|
help='JSON File that contains ignored commits as specified in the help')
|
|
parser.add_argument('--skip_commits_matching',
|
|
default="Cherry-pick.?:.?not (for|to) {branch}",
|
|
help='Regex searched for in commit messages that causes the commit to be ignored.' +
|
|
' {branch} is replaced with target branch; the search is case-insensitive')
|
|
parser.add_argument('--verbose', '-v', action='store_true', default=False,
|
|
help='Turn on DEBUG and INFO logging')
|
|
return parser
|
|
|
|
def read_ignored_commits(ignored_commits_file):
|
|
'''Returns a dictionary containing commits that should be ignored.
|
|
|
|
ignored_commits_file is a path to a JSON file with schema
|
|
specified at the top of this file.
|
|
|
|
The return structure has dictionary keys are a tuple containing
|
|
(source_branch, target_branch) and values are a set of git hashes.
|
|
'''
|
|
ignored_commits = defaultdict(set)
|
|
with open(ignored_commits_file) as f:
|
|
json_data = json.load(f)
|
|
for result_dict in json_data:
|
|
logging.debug("Parsing result_dict: {0}".format(result_dict))
|
|
ignored_commits[(result_dict['source'], result_dict['target'])] =\
|
|
set([ commit["hash"] for commit in result_dict['commits'] ])
|
|
return ignored_commits
|
|
|
|
def build_commit_map(branch, merge_base):
|
|
'''Creates a map from change id to (hash, subject, author, date, body).'''
|
|
# Disable git pager in order for the sh.git.log command to work
|
|
os.environ['GIT_PAGER'] = ''
|
|
|
|
fields = ['%H', '%s', '%an', '%cd', '%b']
|
|
pretty_format = '\x1f'.join(fields) + '\x1e'
|
|
result = OrderedDict()
|
|
for line in subprocess.check_output(
|
|
["git", "log", branch, "^" + merge_base, "--pretty=" + pretty_format,
|
|
"--color=never"], universal_newlines=True).split('\x1e'):
|
|
if line == "":
|
|
# if no changes are identified by the git log, we get an empty string
|
|
continue
|
|
if line == "\n":
|
|
# git log adds a newline to the end; we can skip it
|
|
continue
|
|
commit_hash, subject, author, date, body = [t.strip() for t in line.split('\x1f')]
|
|
change_id_matches = re.findall('Change-Id: (.*)', body)
|
|
if change_id_matches:
|
|
if len(change_id_matches) > 1:
|
|
logging.warning("Commit %s contains multiple change ids; using first one.",
|
|
commit_hash)
|
|
change_id = change_id_matches[0]
|
|
result[change_id] = (commit_hash, subject, author, date, body)
|
|
else:
|
|
logging.warning('Commit {0} ({1}...) has no Change-Id.'.format(
|
|
commit_hash, subject[:40]))
|
|
logging.debug("Commit map for branch %s has size %d.", branch, len(result))
|
|
return result
|
|
|
|
def cherrypick(cherry_pick_hashes, full_target_branch_name, partial_ok):
|
|
"""Cherrypicks the given commits.
|
|
|
|
Also, asserts that full_target_branch_name matches the current HEAD.
|
|
|
|
cherry_pick_hashes is a list of git hashes, in the order to
|
|
be cherry-picked.
|
|
|
|
If partial_ok is true, return gracefully if at least one cherrypick
|
|
has succeeded.
|
|
|
|
Note that this function does not push to the remote.
|
|
"""
|
|
print("Cherrypicking %d changes." % (len(cherry_pick_hashes),))
|
|
|
|
if len(cherry_pick_hashes) == 0:
|
|
return
|
|
|
|
# Cherrypicking only makes sense if we're on the equivalent of the target branch.
|
|
head_sha = subprocess.check_output(
|
|
['git', 'rev-parse', 'HEAD'], universal_newlines=True).strip()
|
|
target_branch_sha = subprocess.check_output(
|
|
['git', 'rev-parse', full_target_branch_name], universal_newlines=True).strip()
|
|
if head_sha != target_branch_sha:
|
|
print("Cannot cherrypick because %s (%s) and HEAD (%s) are divergent." % (
|
|
full_target_branch_name, target_branch_sha, head_sha))
|
|
sys.exit(1)
|
|
|
|
cherry_pick_hashes.reverse()
|
|
for i, cherry_pick_hash in enumerate(cherry_pick_hashes):
|
|
ret = subprocess.call(
|
|
['git', 'cherry-pick', '--keep-redundant-commits', cherry_pick_hash])
|
|
if ret != 0:
|
|
if partial_ok and i > 0:
|
|
subprocess.check_call(['git', 'cherry-pick', '--abort'])
|
|
print("Failed to cherry-pick %s; stopping picks." % (cherry_pick_hash,))
|
|
return
|
|
else:
|
|
raise Exception("Failed to cherry-pick: %s" % (cherry_pick_hash,))
|
|
|
|
def main():
|
|
parser = create_parser()
|
|
options = parser.parse_args()
|
|
|
|
log_level = logging.WARNING
|
|
if options.verbose:
|
|
log_level = logging.DEBUG
|
|
logging.basicConfig(level=log_level,
|
|
format='%(asctime)s %(threadName)s %(levelname)s: %(message)s')
|
|
|
|
if options.target_remote_name is None:
|
|
options.target_remote_name = options.source_remote_name
|
|
|
|
# Ensure all branches are up to date, unless remotes are disabled
|
|
# by specifying them with an empty string.
|
|
if options.source_remote_name != "":
|
|
subprocess.check_call(['git', 'fetch', options.source_remote_name,
|
|
options.source_branch])
|
|
full_source_branch_name = options.source_remote_name + '/' + options.source_branch
|
|
else:
|
|
full_source_branch_name = options.source_branch
|
|
if options.target_remote_name != "":
|
|
if options.source_remote_name != options.target_remote_name\
|
|
or options.source_branch != options.target_branch:
|
|
subprocess.check_call(['git', 'fetch', options.target_remote_name,
|
|
options.target_branch])
|
|
full_target_branch_name = options.target_remote_name + '/' + options.target_branch
|
|
else:
|
|
full_target_branch_name = options.target_branch
|
|
|
|
merge_base = subprocess.check_output(["git", "merge-base",
|
|
full_source_branch_name, full_target_branch_name], universal_newlines=True).strip()
|
|
source_commits = build_commit_map(full_source_branch_name, merge_base)
|
|
target_commits = build_commit_map(full_target_branch_name, merge_base)
|
|
|
|
ignored_commits = read_ignored_commits(options.ignored_commits_file)
|
|
logging.debug("ignored commits from {0}:\n{1}"
|
|
.format(options.ignored_commits_file, pformat(ignored_commits)))
|
|
commits_ignored = [] # Track commits actually ignored for debug logging
|
|
|
|
cherry_pick_hashes = []
|
|
print('-' * 80)
|
|
print('Commits in {0} but not in {1}:'.format(
|
|
full_source_branch_name, full_target_branch_name))
|
|
print('-' * 80)
|
|
jira_keys = []
|
|
jira_key_pat = re.compile(r'(IMPALA-\d+)')
|
|
skip_commits_matching = options.skip_commits_matching.format(
|
|
branch=options.target_branch)
|
|
for change_id, (commit_hash, msg, author, date, body) in source_commits.items():
|
|
change_in_target = change_id in target_commits
|
|
ignore_by_config = commit_hash in ignored_commits[
|
|
(options.source_branch, options.target_branch)]
|
|
ignore_by_commit_message = re.search(skip_commits_matching, "\n".join([msg, body]),
|
|
re.IGNORECASE)
|
|
# This conditional block just for debug logging of ignored commits
|
|
if ignore_by_config or ignore_by_commit_message:
|
|
if change_in_target:
|
|
logging.debug("Not ignoring commit because change is already in target: {0}"
|
|
.format(commit_hash))
|
|
else:
|
|
if ignore_by_commit_message:
|
|
logging.debug("Ignoring commit {0} by commit message.".format(commit_hash))
|
|
else:
|
|
logging.debug("Ignoring commit {0} by config file.".format(commit_hash))
|
|
commits_ignored.append(commit_hash)
|
|
else:
|
|
logging.debug("NOT ignoring commit {0} since not in ignored commits ({1},{2})"
|
|
.format(commit_hash, options.source_branch, options.target_branch))
|
|
if not change_in_target and not ignore_by_config and not ignore_by_commit_message:
|
|
print('{0} {1} ({2}) - {3}'.format(commit_hash, msg, date, author))
|
|
cherry_pick_hashes.append(commit_hash)
|
|
jira_keys += jira_key_pat.findall(msg)
|
|
|
|
print('-' * 80)
|
|
|
|
print("Jira keys referenced (Note: not all commit messages will reference a jira key):")
|
|
print(','.join(jira_keys))
|
|
print('-' * 80)
|
|
|
|
logging.debug("Commits actually ignored (change was not in target): {0}"
|
|
.format(pformat(commits_ignored)))
|
|
|
|
if options.cherry_pick:
|
|
cherrypick(cherry_pick_hashes, full_target_branch_name, options.partial_ok)
|
|
|
|
if __name__ == '__main__':
|
|
main()
|