mirror of
https://github.com/apache/impala.git
synced 2025-12-19 18:12:08 -05:00
This script compares two branches and optionally cherry-picks changes across. It uses the Gerrit Change-Id as the key, and it supports a configuration file and a string to ignore commits. Change-Id: I6120ec2d6e914a1e5fda568178b32aafda8722a9 Reviewed-on: http://gerrit.cloudera.org:8080/9045 Reviewed-by: Jim Apple <jbapple-impala@apache.org> Tested-by: Impala Public Jenkins
274 lines
11 KiB
Python
Executable File
274 lines
11 KiB
Python
Executable File
#!/usr/bin/env python
|
|
#
|
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
# you may not use this file except in compliance with the License.
|
|
# You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
HELP = '''
|
|
Compares two specified branches, using the Gerrit Change-Id as the
|
|
primary identifier. Ignored commits can be added via a JSON
|
|
configuration file or with a special string in the commit message.
|
|
Changes can be cherrypicked with the --cherry_pick argument.
|
|
|
|
This script can be used to keep two development branches
|
|
(by default, "master" and "2.x", in sync). It is equivalent
|
|
to cherry-picking commits one by one, but automates identifying
|
|
the commits to cherry-pick. Unlike "git cherry", it uses
|
|
the Gerrit Change-Id identifier in the commit message
|
|
as a key.
|
|
|
|
The ignored_commits.json configuration file is of the following
|
|
form. Note that commits are the full 20-byte git hashes.
|
|
|
|
[
|
|
{
|
|
"source": "master",
|
|
"target": "2.x",
|
|
"commits": [
|
|
{ "hash": "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", "comment": "...",
|
|
{ "hash": "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb", "comment": "..."}
|
|
]
|
|
}
|
|
]
|
|
|
|
The --target_remote_name is optional. If not specified, the target remote is set to
|
|
the value of the --source_remote_name. Debug logging to stderr can be enabled with
|
|
--verbose.
|
|
|
|
Example:
|
|
|
|
$bin/compare_branches.py --source_branch master --target_branch 2.x
|
|
--------------------------------------------------------------------------------
|
|
Commits in asf-gerrit/master but not in asf-gerrit/2.x:
|
|
--------------------------------------------------------------------------------
|
|
35a3e186d61b8f365b0f7d1127be311758437e16 IMPALA-5478: Run TPCDS queries with decimal_v2 enabled (Thu Jan 18 03:28:51 2018 +0000) - Taras Bobrovytsky
|
|
d9b6fd073055b436c7404d49454dc215b2c7a369 IMPALA-6386: Invalidate metadata at table level for dataload (Wed Jan 17 22:52:58 2018 +0000) - Joe McDonnell
|
|
dcc7be0ed483b332dac22d6596f56ff2a6cfdaa3 IMPALA-4315: Allow USE and SHOW TABLES if the user has only column privileges (Wed Jan 17 22:40:13 2018 +0000) - Csaba Ringhofer
|
|
b6e43133e671773d2757612f72cfcdb0ff303226 IMPALA-6399: Increase timeout in test_observability to reduce flakiness (Wed Jan 17 22:31:33 2018 +0000) - Lars Volker
|
|
--------------------------------------------------------------------------------
|
|
Jira keys referenced (Note: not all commit messages will reference a jira key):
|
|
IMPALA-5478,IMPALA-6386,IMPALA-4315,IMPALA-6399
|
|
--------------------------------------------------------------------------------
|
|
'''
|
|
|
|
import argparse
|
|
import json
|
|
import logging
|
|
import os
|
|
import re
|
|
import sh
|
|
import sys
|
|
|
|
from collections import defaultdict
|
|
try:
|
|
from collections import OrderedDict
|
|
except ImportError:
|
|
from ordereddict import OrderedDict
|
|
from pprint import pformat
|
|
|
|
def create_parser():
|
|
class CustomFormatter(argparse.ArgumentDefaultsHelpFormatter,
|
|
argparse.RawDescriptionHelpFormatter):
|
|
"""
|
|
Mix-in to leave the description alone, but show
|
|
defaults.
|
|
"""
|
|
pass
|
|
|
|
parser = argparse.ArgumentParser(
|
|
formatter_class=CustomFormatter,
|
|
description=HELP)
|
|
|
|
parser.add_argument('--cherry_pick', action='store_true', default=False,
|
|
help='Cherry-pick mismatched commits to current branch. This ' +
|
|
'must match (in the hash sense) the target branch.')
|
|
parser.add_argument('--source_branch', default='master')
|
|
parser.add_argument('--target_branch', default='2.x')
|
|
parser.add_argument('--source_remote_name', default='asf-gerrit',
|
|
help='Name of the source git remote. If set to empty string, ' +
|
|
'this remote is not fetched and branch names are used ' +
|
|
' as is; otherwise, the source ref is remote/branch.')
|
|
parser.add_argument('--target_remote_name', default=None,
|
|
help='Name of the target git remote; defaults to source remote. ' +
|
|
'Empty strings are handled the same way as --source_remote_name.')
|
|
default_ignored_commits_path = os.path.join(
|
|
os.path.dirname(os.path.abspath(__file__)), 'ignored_commits.json')
|
|
parser.add_argument('--ignored_commits_file', default=default_ignored_commits_path,
|
|
help='JSON File that contains ignored commits as specified in the help')
|
|
parser.add_argument('--skip_commits_matching', default="Cherry-picks: not for {branch}",
|
|
help='String in commit messages that causes the commit to be ignored. ' +
|
|
' {branch} is replaced with target branch; the search is case-insensitive')
|
|
parser.add_argument('--verbose', '-v', action='store_true', default=False,
|
|
help='Turn on DEBUG and INFO logging')
|
|
return parser
|
|
|
|
def read_ignored_commits(ignored_commits_file):
|
|
'''Returns a dictionary containing commits that should be ignored.
|
|
|
|
ignored_commits_file is a path to a JSON file with schema
|
|
specified at the top of this file.
|
|
|
|
The return structure has dictionary keys are a tuple containing
|
|
(source_branch, target_branch) and values are a set of git hashes.
|
|
'''
|
|
ignored_commits = defaultdict(set)
|
|
with open(ignored_commits_file) as f:
|
|
json_data = json.load(f)
|
|
for result_dict in json_data:
|
|
logging.debug("Parsing result_dict: {0}".format(result_dict))
|
|
ignored_commits[(result_dict['source'], result_dict['target'])] =\
|
|
set([ commit["hash"] for commit in result_dict['commits'] ])
|
|
return ignored_commits
|
|
|
|
def build_commit_map(branch, merge_base):
|
|
'''Creates a map from change id to (hash, subject, author, date, body).'''
|
|
# Disable git pager in order for the sh.git.log command to work
|
|
os.environ['GIT_PAGER'] = ''
|
|
|
|
fields = ['%H', '%s', '%an', '%cd', '%b']
|
|
pretty_format = '\x1f'.join(fields) + '\x1e'
|
|
result = OrderedDict()
|
|
for line in sh.git.log(
|
|
branch, "^" + merge_base, pretty=pretty_format, color='never').split('\x1e'):
|
|
if line == "":
|
|
# if no changes are identified by the git log, we get an empty string
|
|
continue
|
|
if line == "\n":
|
|
# git log adds a newline to the end; we can skip it
|
|
continue
|
|
commit_hash, subject, author, date, body = [t.strip() for t in line.split('\x1f')]
|
|
change_id_matches = re.findall('Change-Id: (.*)', body)
|
|
if change_id_matches:
|
|
if len(change_id_matches) > 1:
|
|
logging.warning("Commit %s contains multiple change ids; using first one.",
|
|
commit_hash)
|
|
change_id = change_id_matches[0]
|
|
result[change_id] = (commit_hash, subject, author, date, body)
|
|
else:
|
|
logging.warning('Commit {0} ({1}...) has no Change-Id.'.format(
|
|
commit_hash, subject[:40]))
|
|
logging.debug("Commit map for branch %s has size %d.", branch, len(result))
|
|
return result
|
|
|
|
def cherrypick(cherry_pick_hashes, full_target_branch_name):
|
|
"""Cherrypicks the given commits.
|
|
|
|
Also, asserts that full_target_branch_name matches the current HEAD.
|
|
|
|
cherry_pick_hashes is a list of git hashes, in the order to
|
|
be cherry-picked.
|
|
|
|
Note that this function does not push to the remote.
|
|
"""
|
|
print "Cherrypicking %d changes." % (len(cherry_pick_hashes),)
|
|
|
|
if len(cherry_pick_hashes) == 0:
|
|
return
|
|
|
|
# Cherrypicking only makes sense if we're on the equivalent of the target branch.
|
|
head_sha = sh.git('rev-parse', 'HEAD').strip()
|
|
target_branch_sha = sh.git('rev-parse', full_target_branch_name).strip()
|
|
if head_sha != target_branch_sha:
|
|
print "Cannot cherrypick because %s (%s) and HEAD (%s) are divergent." % (
|
|
full_target_branch_name, target_branch_sha, head_sha)
|
|
sys.exit(1)
|
|
|
|
cherry_pick_hashes.reverse()
|
|
for cherry_pick_hash in cherry_pick_hashes:
|
|
sh.git('cherry-pick', '--keep-redundant-commits', cherry_pick_hash)
|
|
|
|
|
|
def main():
|
|
parser = create_parser()
|
|
options = parser.parse_args()
|
|
|
|
log_level = logging.WARNING
|
|
if options.verbose:
|
|
log_level = logging.DEBUG
|
|
logging.basicConfig(level=log_level,
|
|
format='%(asctime)s %(threadName)s %(levelname)s: %(message)s')
|
|
|
|
if options.target_remote_name is None:
|
|
options.target_remote_name = options.source_remote_name
|
|
|
|
# Ensure all branches are up to date, unless remotes are disabled
|
|
# by specifying them with an empty string.
|
|
if options.source_remote_name != "":
|
|
sh.git.fetch(options.source_remote_name)
|
|
full_source_branch_name = options.source_remote_name + '/' + options.source_branch
|
|
else:
|
|
full_source_branch_name = options.source_branch
|
|
if options.target_remote_name != "":
|
|
if options.source_remote_name != options.target_remote_name:
|
|
sh.git.fetch(options.target_remote_name)
|
|
full_target_branch_name = options.target_remote_name + '/' + options.target_branch
|
|
else:
|
|
full_target_branch_name = options.target_branch
|
|
|
|
merge_base = sh.git("merge-base",
|
|
full_source_branch_name, full_target_branch_name).strip()
|
|
source_commits = build_commit_map(full_source_branch_name, merge_base)
|
|
target_commits = build_commit_map(full_target_branch_name, merge_base)
|
|
|
|
ignored_commits = read_ignored_commits(options.ignored_commits_file)
|
|
logging.debug("ignored commits from {0}:\n{1}"
|
|
.format(options.ignored_commits_file, pformat(ignored_commits)))
|
|
commits_ignored = [] # Track commits actually ignored for debug logging
|
|
|
|
cherry_pick_hashes = []
|
|
print '-' * 80
|
|
print 'Commits in {0} but not in {1}:'.format(
|
|
full_source_branch_name, full_target_branch_name)
|
|
print '-' * 80
|
|
jira_keys = []
|
|
jira_key_pat = re.compile(r'(IMPALA-\d+)')
|
|
skip_commits_matching = options.skip_commits_matching.replace(
|
|
"{branch}", options.target_branch)
|
|
for change_id, (commit_hash, msg, author, date, body) in source_commits.iteritems():
|
|
change_in_target = change_id in target_commits
|
|
ignore_by_config = commit_hash in ignored_commits[
|
|
(options.source_branch, options.target_branch)]
|
|
ignore_by_commit_message = skip_commits_matching.lower() in msg.lower() \
|
|
or skip_commits_matching.lower() in body.lower()
|
|
# This conditional block just for debug logging of ignored commits
|
|
if ignore_by_config or ignore_by_commit_message:
|
|
if change_in_target:
|
|
logging.debug("Not ignoring commit because change is already in target: {0}"
|
|
.format(commit_hash))
|
|
else:
|
|
if ignore_by_commit_message:
|
|
logging.debug("Ignoring commit {0} by commit message.".format(commit_hash))
|
|
else:
|
|
logging.debug("Ignoring commit {0} by config file.".format(commit_hash))
|
|
commits_ignored.append(commit_hash)
|
|
else:
|
|
logging.debug("NOT ignoring commit {0} since not in ignored commits ({1},{2})"
|
|
.format(commit_hash, options.source_branch, options.target_branch))
|
|
if not change_in_target and not ignore_by_config and not ignore_by_commit_message:
|
|
print u'{0} {1} ({2}) - {3}'.format(commit_hash, msg, date, author)
|
|
cherry_pick_hashes.append(commit_hash)
|
|
jira_keys += jira_key_pat.findall(msg)
|
|
|
|
print '-' * 80
|
|
|
|
print "Jira keys referenced (Note: not all commit messages will reference a jira key):"
|
|
print ','.join(jira_keys)
|
|
print '-' * 80
|
|
|
|
logging.debug("Commits actually ignored (change was not in target): {0}"
|
|
.format(pformat(commits_ignored)))
|
|
|
|
if options.cherry_pick:
|
|
cherrypick(cherry_pick_hashes, full_target_branch_name)
|
|
|
|
if __name__ == '__main__':
|
|
main()
|