Merge "Add repo_diff tools"
am: 725af68e38
Change-Id: Ibb8f4de45c91ed6190d5d7ecce1e7129eeab2f6c
This commit is contained in:
1
tools/repo_diff/exclusions.txt
Normal file
1
tools/repo_diff/exclusions.txt
Normal file
@@ -0,0 +1 @@
|
||||
platform/prebuilts/.*
|
||||
150
tools/repo_diff/git_commits_not_upstreamed.py
Normal file
150
tools/repo_diff/git_commits_not_upstreamed.py
Normal file
@@ -0,0 +1,150 @@
|
||||
"""List downstream commits that are not upstream and are visible in the diff.
|
||||
|
||||
Only include changes that are visible when you diff
|
||||
the downstream and usptream branches.
|
||||
|
||||
This will naturally exclude changes that already landed upstream
|
||||
in some form but were not merged or cherry picked.
|
||||
|
||||
This will also exclude changes that were added then reverted downstream.
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
import argparse
|
||||
import os
|
||||
import subprocess
|
||||
|
||||
|
||||
def git(args):
|
||||
"""Git command.
|
||||
|
||||
Args:
|
||||
args: A list of arguments to be sent to the git command.
|
||||
|
||||
Returns:
|
||||
The output of the git command.
|
||||
"""
|
||||
|
||||
command = ['git']
|
||||
command.extend(args)
|
||||
with open(os.devnull, 'w') as devull:
|
||||
return subprocess.check_output(command, stderr=devull)
|
||||
|
||||
|
||||
class CommitFinder(object):
|
||||
|
||||
def __init__(self, working_dir, upstream, downstream):
|
||||
self.working_dir = working_dir
|
||||
self.upstream = upstream
|
||||
self.downstream = downstream
|
||||
|
||||
def __call__(self, filename):
|
||||
insertion_commits = set()
|
||||
|
||||
if os.path.isfile(os.path.join(self.working_dir, filename)):
|
||||
blame_output = git(['-C', self.working_dir, 'blame', '-l',
|
||||
'%s..%s' % (self.upstream, self.downstream),
|
||||
'--', filename])
|
||||
for line in blame_output.splitlines():
|
||||
# The commit is the first field of a line
|
||||
blame_fields = line.split(' ', 1)
|
||||
# Some lines can be empty
|
||||
if blame_fields:
|
||||
insertion_commits.add(blame_fields[0])
|
||||
|
||||
return insertion_commits
|
||||
|
||||
|
||||
def find_insertion_commits(upstream, downstream, working_dir):
|
||||
"""Finds all commits that insert lines on top of the upstream baseline.
|
||||
|
||||
Args:
|
||||
upstream: Upstream branch to be used as a baseline.
|
||||
downstream: Downstream branch to search for commits missing upstream.
|
||||
working_dir: Run as if git was started in this directory.
|
||||
|
||||
Returns:
|
||||
A set of commits that insert lines on top of the upstream baseline.
|
||||
"""
|
||||
|
||||
insertion_commits = set()
|
||||
|
||||
diff_files = git(['-C', working_dir, 'diff',
|
||||
'--name-only',
|
||||
'--diff-filter=d',
|
||||
upstream,
|
||||
downstream])
|
||||
diff_files = diff_files.splitlines()
|
||||
|
||||
finder = CommitFinder(working_dir, upstream, downstream)
|
||||
commits_per_file = [finder(filename) for filename in diff_files]
|
||||
|
||||
for commits in commits_per_file:
|
||||
insertion_commits.update(commits)
|
||||
|
||||
return insertion_commits
|
||||
|
||||
|
||||
def find(upstream, downstream, working_dir):
|
||||
"""Finds downstream commits that are not upstream and are visible in the diff.
|
||||
|
||||
Args:
|
||||
upstream: Upstream branch to be used as a baseline.
|
||||
downstream: Downstream branch to search for commits missing upstream.
|
||||
working_dir: Run as if git was started in thid directory.
|
||||
|
||||
Returns:
|
||||
A set of downstream commits missing upstream.
|
||||
"""
|
||||
|
||||
commits_not_upstreamed = set()
|
||||
revlist_output = git(['-C', working_dir, 'rev-list', '--no-merges',
|
||||
'%s..%s' % (upstream, downstream)])
|
||||
downstream_only_commits = set(revlist_output.splitlines())
|
||||
insertion_commits = set()
|
||||
|
||||
# If there are no downstream-only commits there's no point in
|
||||
# futher filtering
|
||||
if downstream_only_commits:
|
||||
insertion_commits = find_insertion_commits(upstream, downstream,
|
||||
working_dir)
|
||||
|
||||
# The commits that are only downstream and are visible in 'git blame' are the
|
||||
# ones that insert lines in the diff between upstream and downstream.
|
||||
commits_not_upstreamed.update(
|
||||
downstream_only_commits.intersection(insertion_commits))
|
||||
|
||||
# TODO(diegowilson) add commits that deleted lines
|
||||
|
||||
return commits_not_upstreamed
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Finds commits yet to be applied upstream.')
|
||||
parser.add_argument(
|
||||
'upstream',
|
||||
help='Upstream branch to be used as a baseline.',
|
||||
)
|
||||
parser.add_argument(
|
||||
'downstream',
|
||||
help='Downstream branch to search for commits missing upstream.',
|
||||
)
|
||||
parser.add_argument(
|
||||
'-C',
|
||||
'--working_directory',
|
||||
help='Run as if git was started in thid directory',
|
||||
default='.',)
|
||||
args = parser.parse_args()
|
||||
upstream = args.upstream
|
||||
downstream = args.downstream
|
||||
working_dir = os.path.abspath(args.working_directory)
|
||||
|
||||
print('\n'.join(find(upstream, downstream, working_dir)))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
166
tools/repo_diff/repo_diff_android.py
Normal file
166
tools/repo_diff/repo_diff_android.py
Normal file
@@ -0,0 +1,166 @@
|
||||
#!/usr/bin/python
|
||||
"""Diff a repo (downstream) and its upstream.
|
||||
|
||||
This script:
|
||||
1. Downloads a repo source tree with specified manifest URL, branch
|
||||
and release tag.
|
||||
2. Retrieves the BUILD_ID from $downstream/build/core/build_id.mk.
|
||||
3. Downloads the upstream using the BUILD_ID.
|
||||
4. Diffs each project in these two repos.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import subprocess
|
||||
import repo_diff_downstream
|
||||
|
||||
HELP_MSG = "Diff a repo (downstream) and its upstream"
|
||||
|
||||
WORKSPACE = "workspace"
|
||||
UPSTREAM_WORKSPACE = "upstream_workspace"
|
||||
|
||||
DEFAULT_MANIFEST_URL = "https://android.googlesource.com/platform/manifest"
|
||||
DEFAULT_MANIFEST_BRANCH = "oreo-dev"
|
||||
DEFAULT_UPSTREAM_MANIFEST_URL = "https://android.googlesource.com/platform/manifest"
|
||||
DEFAULT_UPSTREAM_MANIFEST_BRANCH = "master"
|
||||
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||||
DEFAULT_EXCLUSIONS_FILE = os.path.join(SCRIPT_DIR, "exclusions.txt")
|
||||
|
||||
|
||||
def parse_args():
|
||||
"""Parse args."""
|
||||
|
||||
parser = argparse.ArgumentParser(description=HELP_MSG)
|
||||
|
||||
parser.add_argument("-u", "--manifest-url",
|
||||
help="manifest url",
|
||||
default=DEFAULT_MANIFEST_URL)
|
||||
parser.add_argument("-b", "--manifest-branch",
|
||||
help="manifest branch",
|
||||
default=DEFAULT_MANIFEST_BRANCH)
|
||||
parser.add_argument("-r", "--upstream-manifest-url",
|
||||
help="upstream manifest url",
|
||||
default=DEFAULT_UPSTREAM_MANIFEST_URL)
|
||||
parser.add_argument("-a", "--upstream-manifest-branch",
|
||||
help="upstream manifest branch",
|
||||
default=DEFAULT_UPSTREAM_MANIFEST_BRANCH)
|
||||
parser.add_argument("-e", "--exclusions-file",
|
||||
help="exclusions file",
|
||||
default=DEFAULT_EXCLUSIONS_FILE)
|
||||
parser.add_argument("-t", "--tag",
|
||||
help="release tag (optional). If not set then will"
|
||||
"sync the latest in the branch.")
|
||||
|
||||
return parser.parse_args()
|
||||
|
||||
|
||||
def repo_init(url, rev, workspace):
|
||||
"""Repo init with specific url and rev.
|
||||
|
||||
Args:
|
||||
url: manifest url
|
||||
rev: manifest branch, or rev
|
||||
workspace: the folder to init and sync code
|
||||
"""
|
||||
|
||||
print("repo init:\n url: %s\n rev: %s\n workspace: %s" %
|
||||
(url, rev, workspace))
|
||||
|
||||
subprocess.check_output("repo init --manifest-url=%s --manifest-branch=%s" %
|
||||
(url, rev), cwd=workspace, shell=True)
|
||||
|
||||
|
||||
def repo_sync(workspace, retry=5):
|
||||
"""Repo sync."""
|
||||
|
||||
count = 0
|
||||
while count < retry:
|
||||
count += 1
|
||||
print("repo sync (retry=%d/%d):\n workspace: %s" %
|
||||
(count, retry, workspace))
|
||||
|
||||
try:
|
||||
subprocess.check_output(("repo sync --jobs=24 --current-branch --quiet "
|
||||
"--no-tags --no-clone-bundle"),
|
||||
cwd=workspace, shell=True)
|
||||
except subprocess.CalledProcessError as e:
|
||||
print "Error: %s" % e.output
|
||||
# Stop retrying if the repo sync was successful
|
||||
else:
|
||||
break
|
||||
|
||||
|
||||
def get_commit_with_keyword(project_path, keyword):
|
||||
"""Get the latest commit in $project_path with the specific keyword."""
|
||||
|
||||
return subprocess.check_output(("git -C %s "
|
||||
"rev-list --max-count=1 --grep=\"%s\" "
|
||||
"HEAD") %
|
||||
(project_path, keyword), shell=True).rstrip()
|
||||
|
||||
|
||||
def get_build_id(workspace):
|
||||
"""Get BUILD_ID defined in $workspace/build/core/build_id.mk."""
|
||||
|
||||
path = os.path.join(workspace, "build", "core", "build_id.mk")
|
||||
return subprocess.check_output("source %s && echo $BUILD_ID" % path,
|
||||
shell=True).rstrip()
|
||||
|
||||
|
||||
def repo_sync_specific_release(url, branch, tag, workspace):
|
||||
"""Repo sync source with the specific release tag."""
|
||||
|
||||
if not os.path.exists(workspace):
|
||||
os.makedirs(workspace)
|
||||
|
||||
manifest_path = os.path.join(workspace, ".repo", "manifests")
|
||||
|
||||
repo_init(url, branch, workspace)
|
||||
if tag:
|
||||
rev = get_commit_with_keyword(manifest_path, tag)
|
||||
repo_init(url, rev, workspace)
|
||||
repo_sync(workspace)
|
||||
|
||||
|
||||
def diff(manifest_url, manifest_branch, tag, upstream_manifest_url,
|
||||
upstream_manifest_branch, exclusions_file):
|
||||
"""Syncs and diffs an Android workspace against an upstream workspace."""
|
||||
|
||||
workspace = os.path.abspath(WORKSPACE)
|
||||
upstream_workspace = os.path.abspath(UPSTREAM_WORKSPACE)
|
||||
# repo sync downstream source tree
|
||||
repo_sync_specific_release(
|
||||
manifest_url,
|
||||
manifest_branch,
|
||||
tag,
|
||||
workspace)
|
||||
|
||||
# get the build_id so that we know which rev of upstream we need
|
||||
build_id = get_build_id(workspace)
|
||||
|
||||
# repo sync upstream source tree
|
||||
repo_sync_specific_release(
|
||||
upstream_manifest_url,
|
||||
upstream_manifest_branch,
|
||||
build_id,
|
||||
upstream_workspace)
|
||||
|
||||
# do the comparison
|
||||
repo_diff_downstream.diff(
|
||||
upstream_workspace,
|
||||
workspace,
|
||||
os.path.abspath("project.csv"),
|
||||
os.path.abspath("commit.csv"),
|
||||
os.path.abspath(exclusions_file),
|
||||
)
|
||||
|
||||
|
||||
def main():
|
||||
args = parse_args()
|
||||
|
||||
diff(args.manifest_url, args.manifest_branch, args.tag,
|
||||
args.upstream_manifest_url, args.upstream_manifest_branch,
|
||||
args.exclusions_file)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
491
tools/repo_diff/repo_diff_downstream.py
Normal file
491
tools/repo_diff/repo_diff_downstream.py
Normal file
@@ -0,0 +1,491 @@
|
||||
"""Diffs one repo source tree an upstream repo source tree.
|
||||
|
||||
Matches the projects from a Gerrit repo workspace to the projects
|
||||
of an upstream workspace. After identifying exist both in the
|
||||
downstream and the upstream workspace it then diffs the each project.
|
||||
|
||||
Finally, the results of the project matching and diffing are reported.
|
||||
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import
|
||||
from __future__ import division
|
||||
from __future__ import print_function
|
||||
import argparse
|
||||
import csv
|
||||
import datetime
|
||||
import multiprocessing
|
||||
import os
|
||||
import re
|
||||
import subprocess
|
||||
import xml.etree.ElementTree as et
|
||||
import git_commits_not_upstreamed
|
||||
|
||||
|
||||
def get_projects(source_tree):
|
||||
"""Retrieve the dict of projects names and paths.
|
||||
|
||||
Args:
|
||||
source_tree: A path to the source tree.
|
||||
|
||||
Returns:
|
||||
A dict of project paths keyed by project names.
|
||||
"""
|
||||
|
||||
projects = {}
|
||||
|
||||
manifest = source_tree + '/.repo/manifest.xml'
|
||||
tree = et.parse(manifest)
|
||||
root = tree.getroot()
|
||||
|
||||
for project in root.findall('project'):
|
||||
# Ignore projects that are not synced by default
|
||||
if 'notdefault' in project.get('groups', ''):
|
||||
continue
|
||||
path = project.get('path', project.get('name'))
|
||||
path = os.path.abspath(os.path.join(source_tree, path))
|
||||
name = project.get('name')
|
||||
projects[name] = path
|
||||
|
||||
return projects
|
||||
|
||||
|
||||
def git(args):
|
||||
"""Git command.
|
||||
|
||||
Args:
|
||||
args: A list of arguments to be sent to the git command.
|
||||
|
||||
Returns:
|
||||
The output of the git command.
|
||||
"""
|
||||
|
||||
command = ['git']
|
||||
command.extend(args)
|
||||
with open(os.devnull, 'w') as devull:
|
||||
return subprocess.check_output(command, stderr=devull)
|
||||
|
||||
|
||||
def get_revision_diff_stats(directory, rev_a, rev_b):
|
||||
"""Retrieves stats of diff between two git revisions.
|
||||
|
||||
Args:
|
||||
directory: A path to the git directory to diff.
|
||||
rev_a: A git revision to diff.
|
||||
rev_b: A git revision to diff.
|
||||
|
||||
Returns:
|
||||
A dict with the count of files modified, lines added
|
||||
and lines removed.
|
||||
"""
|
||||
stats = {
|
||||
'file': 0,
|
||||
'insertion': 0,
|
||||
'deletion': 0,
|
||||
}
|
||||
|
||||
git_diffstat = git(
|
||||
['-C', directory, 'diff', '--shortstat', rev_a, rev_b])
|
||||
for element in git_diffstat.split(','):
|
||||
for key in stats:
|
||||
if key in element:
|
||||
stats[key] = int(element.split()[0])
|
||||
|
||||
return stats
|
||||
|
||||
|
||||
def get_project_stats(upstream_dir, downstream_dir):
|
||||
"""Retrieves stats of diff between two git projects.
|
||||
|
||||
Diffs a downstream directory against an upstream directory.
|
||||
Lines that exist only in the downstream directory are considered insertions.
|
||||
Lines that exist only in the upstream directory are considered deletions.
|
||||
|
||||
Args:
|
||||
upstream_dir: A path to the upstream directory to compare.
|
||||
downstream_dir: A path to the downstream directory to compare.
|
||||
|
||||
Returns:
|
||||
A dict with the count of files modified, lines added
|
||||
and lines removed.
|
||||
"""
|
||||
stats = {
|
||||
'file': 0,
|
||||
'insertion': 0,
|
||||
'deletion': 0,
|
||||
}
|
||||
|
||||
if upstream_dir and downstream_dir:
|
||||
print('Diffing %s vs %s' % (downstream_dir, upstream_dir))
|
||||
git(['-C', downstream_dir, 'fetch', '--update-shallow', upstream_dir])
|
||||
stats = get_revision_diff_stats(downstream_dir, 'FETCH_HEAD', 'HEAD')
|
||||
|
||||
return stats
|
||||
|
||||
|
||||
def match_project_by_root_commits(
|
||||
downstream_project_name, downstream_project_path, upstream_root_commits):
|
||||
"""Match a downstream project to an upstream project using their root commits.
|
||||
|
||||
Find all root commits in a downstream project and find a matching
|
||||
upstream project that have a root commit in common.
|
||||
|
||||
Args:
|
||||
downstream_project_name: A string with the downstream project name.
|
||||
downstream_project_path: A string with the downstream project path.
|
||||
upstream_root_commits: A dict of root commits and their upstream project.
|
||||
|
||||
Returns:
|
||||
A string with the matched upstream project name.
|
||||
"""
|
||||
upstream_match = None
|
||||
downstream_root_commits = find_root_commits_in_path(downstream_project_path)
|
||||
for root in downstream_root_commits:
|
||||
if root in upstream_root_commits:
|
||||
upstream_project_list = upstream_root_commits[root]
|
||||
if len(upstream_project_list) > 1:
|
||||
print('Warning: ' + downstream_project_name +
|
||||
' matches multiple projects')
|
||||
print(upstream_project_list)
|
||||
else:
|
||||
upstream_match = upstream_project_list[0]['name']
|
||||
# Once there's a root commit match, stop looking for a project match
|
||||
break
|
||||
|
||||
return upstream_match
|
||||
|
||||
|
||||
def match_projects(upstream_projects, downstream_projects):
|
||||
"""Match downstream projects to upstream projects.
|
||||
|
||||
Args:
|
||||
upstream_projects: A dict of upstream projects.
|
||||
downstream_projects: A dict of downstream projects.
|
||||
|
||||
Returns:
|
||||
A list of upstream and downstream project pairs.
|
||||
"""
|
||||
|
||||
project_matches = []
|
||||
|
||||
# keep a list of upstream projects that have not been matched
|
||||
unmatched_upstream_projects = set(upstream_projects.keys())
|
||||
|
||||
upstream_root_commits = find_root_commits_in_projects(upstream_projects)
|
||||
# Match all downstream projects to an upstream project
|
||||
for downstream_name, downstream_path in downstream_projects.iteritems():
|
||||
# First try to match projects by name
|
||||
if downstream_name in upstream_projects:
|
||||
upstream_match = downstream_name
|
||||
# If there is no project name match then try matching by commit
|
||||
else:
|
||||
upstream_match = match_project_by_root_commits(
|
||||
downstream_name, downstream_path, upstream_root_commits)
|
||||
|
||||
project_matches.append({
|
||||
'upstream': upstream_match,
|
||||
'downstream': downstream_name,
|
||||
})
|
||||
unmatched_upstream_projects.discard(upstream_match)
|
||||
|
||||
# Add all upstream projects that have not been matched
|
||||
for project in unmatched_upstream_projects:
|
||||
project_matches.append({
|
||||
'upstream': project,
|
||||
'downstream': None,
|
||||
})
|
||||
|
||||
return project_matches
|
||||
|
||||
|
||||
def filter_exclusion_list(projects, exclusion_file):
|
||||
"""Removes all projects that match the exclusion patterns."""
|
||||
|
||||
filtered = {}
|
||||
|
||||
exclusion_list = []
|
||||
if exclusion_file:
|
||||
with open(exclusion_file) as f:
|
||||
exclusion_list = f.readlines()
|
||||
exclusion_list = [line.strip() for line in exclusion_list]
|
||||
exclusion_pattern = '|'.join(exclusion_list)
|
||||
|
||||
if exclusion_pattern:
|
||||
for name, path in projects.iteritems():
|
||||
if re.match(exclusion_pattern, name):
|
||||
print('Excluding ' + name)
|
||||
else:
|
||||
filtered[name] = path
|
||||
else:
|
||||
filtered = projects
|
||||
|
||||
return filtered
|
||||
|
||||
|
||||
def get_all_projects_stats(upstream_source_tree, downstream_source_tree,
|
||||
exclusion_file):
|
||||
"""Finds the stats of all project in a source tree.
|
||||
|
||||
Args:
|
||||
upstream_source_tree: A string with the path to the upstream gerrit
|
||||
source tree.
|
||||
downstream_source_tree: A string with the path to the downstream gerrit
|
||||
source tree.
|
||||
exclusion_file: A string with the path to the exclusion file.
|
||||
|
||||
Returns:
|
||||
A dict of matching upstream and downstream projects
|
||||
including stats for projects that matches.
|
||||
"""
|
||||
all_project_stats = []
|
||||
|
||||
upstream_projects = get_projects(upstream_source_tree)
|
||||
downstream_projects = get_projects(downstream_source_tree)
|
||||
|
||||
upstream_projects = filter_exclusion_list(upstream_projects, exclusion_file)
|
||||
downstream_projects = filter_exclusion_list(downstream_projects,
|
||||
exclusion_file)
|
||||
|
||||
project_matches = match_projects(upstream_projects, downstream_projects)
|
||||
|
||||
for match in project_matches:
|
||||
upstream_project_name = match['upstream']
|
||||
downstream_project_name = match['downstream']
|
||||
project_stats = get_project_stats(
|
||||
upstream_projects.get(upstream_project_name, None),
|
||||
downstream_projects.get(downstream_project_name, None))
|
||||
status = ''
|
||||
if not upstream_project_name:
|
||||
status = 'Downstream Only Projects'
|
||||
elif not downstream_project_name:
|
||||
status = 'Upstream Only Projects'
|
||||
elif project_stats['file'] == 0:
|
||||
status = 'Intact Projects'
|
||||
elif upstream_project_name == downstream_project_name:
|
||||
status = 'Modified Projects'
|
||||
else:
|
||||
status = 'Forked Projects'
|
||||
|
||||
project_stats['status'] = status
|
||||
project_stats['upstream'] = upstream_project_name
|
||||
project_stats['downstream'] = downstream_project_name
|
||||
project_stats['downstream_path'] = downstream_projects.get(
|
||||
downstream_project_name)
|
||||
|
||||
all_project_stats.append(project_stats)
|
||||
|
||||
return all_project_stats
|
||||
|
||||
|
||||
def find_root_commits_in_path(path):
|
||||
"""Returns a list of root commits in a git project path."""
|
||||
print('Analyzing history of ' + path)
|
||||
rev_list = git(['-C', path, 'rev-list', '--max-parents=0', 'HEAD'])
|
||||
return rev_list.splitlines()
|
||||
|
||||
|
||||
def find_root_commits_in_projects(projects):
|
||||
"""Returns a dict of root commits with all projects with that root commit."""
|
||||
root_commits = {}
|
||||
for name, path in projects.iteritems():
|
||||
for root in find_root_commits_in_path(path):
|
||||
root_list = root_commits.get(root, [])
|
||||
root_list.append({
|
||||
'name': name,
|
||||
'path': path,
|
||||
})
|
||||
root_commits[root] = root_list
|
||||
return root_commits
|
||||
|
||||
|
||||
def get_commit_stats_in_project(project):
|
||||
"""Extract commits that have not been upstreamed in a specific project.
|
||||
|
||||
Args:
|
||||
project: A dict of a project name and path.
|
||||
|
||||
Returns:
|
||||
A dict of commits not upstreamed.
|
||||
"""
|
||||
name = project['name']
|
||||
path = project['downstream_path']
|
||||
print('Finding commits not upstreamed in ' + name)
|
||||
commits = git_commits_not_upstreamed.find('FETCH_HEAD', 'HEAD', path)
|
||||
print('Found commits not upstreamed in ' + name)
|
||||
stats = []
|
||||
for commit in commits:
|
||||
author = git(['-C', path, 'show', '--no-patch', '--format=%ae', commit])
|
||||
author = author.strip()
|
||||
subject = git(['-C', path, 'show', '--no-patch', '--format=%s', commit])
|
||||
subject = subject.strip()
|
||||
stats.append({
|
||||
'commit': commit,
|
||||
'author': author,
|
||||
'subject': subject,
|
||||
})
|
||||
|
||||
return {
|
||||
'name': name,
|
||||
'stats': stats,
|
||||
}
|
||||
|
||||
|
||||
def get_all_commits_stats(project_stats):
|
||||
"""Extract commits that have not been upstreamed in all projects.
|
||||
|
||||
Args:
|
||||
project_stats: A dict of matching upstream and downstream projects
|
||||
including stats for projects that matches.
|
||||
|
||||
Returns:
|
||||
A dict of commits not upstreamed.
|
||||
"""
|
||||
commit_stats = {}
|
||||
downstream_stats = {match['downstream']: match for match in project_stats}
|
||||
|
||||
# Only analyze modified projects
|
||||
modified_projects = []
|
||||
for name, stats in downstream_stats.iteritems():
|
||||
if stats['status'].startswith('Modified'):
|
||||
stats['name'] = name
|
||||
modified_projects.append(stats)
|
||||
|
||||
pool = multiprocessing.Pool()
|
||||
|
||||
commit_stats = pool.map(get_commit_stats_in_project, modified_projects)
|
||||
|
||||
commit_stats = {stats['name']: stats['stats'] for stats in commit_stats}
|
||||
|
||||
return commit_stats
|
||||
|
||||
|
||||
def write_commit_csv(commit_stats, commit_output_file):
|
||||
"""Write project comparison data to a CSV file.
|
||||
|
||||
Args:
|
||||
commit_stats: The dict of the stats for all commits.
|
||||
commit_output_file: Path to the output file.
|
||||
"""
|
||||
with open(commit_output_file, 'w') as f:
|
||||
fieldnames = [
|
||||
'Date',
|
||||
'Commit',
|
||||
'Downstream Project',
|
||||
'Author',
|
||||
'Subject',
|
||||
]
|
||||
today = datetime.datetime.today().strftime('%Y/%m/%d')
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
for project, stats in commit_stats.iteritems():
|
||||
for stat in stats:
|
||||
writer.writerow({
|
||||
'Date': today,
|
||||
'Commit': stat['commit'],
|
||||
'Downstream Project': project,
|
||||
'Author': stat['author'],
|
||||
'Subject': stat['subject'],
|
||||
})
|
||||
print('Wrote commit stats to ' + commit_output_file)
|
||||
|
||||
|
||||
def write_project_csv(project_stats, commit_stats, project_output_file):
|
||||
"""Write project comparison data to a CSV file.
|
||||
|
||||
Args:
|
||||
project_stats: The dict of the stats for all projects.
|
||||
commit_stats: The dict of the stats for all commits.
|
||||
project_output_file: Path to the output file.
|
||||
"""
|
||||
with open(project_output_file, 'w') as f:
|
||||
fieldnames = [
|
||||
'Date',
|
||||
'Downstream Project',
|
||||
'Upstream Project',
|
||||
'Diff Status',
|
||||
'Files Changed',
|
||||
'Line Insertions',
|
||||
'Line Deletions',
|
||||
'Line Changes',
|
||||
'Commits Not Upstreamed',
|
||||
]
|
||||
writer = csv.DictWriter(f, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
today = datetime.datetime.today().strftime('%Y/%m/%d')
|
||||
for stat in project_stats:
|
||||
commits_not_upstreamed = 0
|
||||
downstream_project = stat['downstream']
|
||||
if downstream_project in commit_stats:
|
||||
commits_not_upstreamed = len(commit_stats[downstream_project])
|
||||
writer.writerow({
|
||||
'Date': today,
|
||||
'Downstream Project': downstream_project,
|
||||
'Upstream Project': stat['upstream'],
|
||||
'Diff Status': stat['status'],
|
||||
'Files Changed': stat['file'],
|
||||
'Line Insertions': stat['insertion'],
|
||||
'Line Deletions': stat['deletion'],
|
||||
'Line Changes': stat['insertion'] + stat['deletion'],
|
||||
'Commits Not Upstreamed': commits_not_upstreamed,
|
||||
})
|
||||
print('Wrote project stats to ' + project_output_file)
|
||||
|
||||
|
||||
def diff(upstream_source_tree, downstream_source_tree, project_output_file,
|
||||
commit_output_file, exclusions_file):
|
||||
"""Diff one repo source tree against another.
|
||||
|
||||
Args:
|
||||
upstream_source_tree: A string with the path to a gerrit source tree.
|
||||
downstream_source_tree: A string with the path to a gerrit source tree.
|
||||
project_output_file: Path to the project output file.
|
||||
commit_output_file: Path to the commit output file.
|
||||
exclusions_file: Path to exclusions file.
|
||||
"""
|
||||
project_stats = get_all_projects_stats(upstream_source_tree,
|
||||
downstream_source_tree,
|
||||
exclusions_file)
|
||||
commit_stats = get_all_commits_stats(project_stats)
|
||||
write_commit_csv(commit_stats, commit_output_file)
|
||||
write_project_csv(project_stats, commit_stats, project_output_file)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
description='Diff a repo source tree against an upstream source tree.')
|
||||
parser.add_argument('upstream_path', help='Path to an upstream source tree.')
|
||||
parser.add_argument(
|
||||
'downstream_path', help='Path to a downstream source tree.')
|
||||
parser.add_argument(
|
||||
'-p',
|
||||
'--project_output_file',
|
||||
help='Path to write the project output file',
|
||||
default='project.csv',)
|
||||
parser.add_argument(
|
||||
'-c',
|
||||
'--commit_output_file',
|
||||
help='Path to write the commit output file',
|
||||
default='commit.csv',)
|
||||
parser.add_argument(
|
||||
'-e',
|
||||
'--exclusions_file',
|
||||
help='Path to file with a list of project names to be excluded from'
|
||||
'the diff. You may use a regular expression to match project names as'
|
||||
'described in https://docs.python.org/2/howto/regex.html',
|
||||
default='',
|
||||
)
|
||||
args = parser.parse_args()
|
||||
upstream_source_tree = os.path.abspath(args.upstream_path)
|
||||
downstream_source_tree = os.path.abspath(args.downstream_path)
|
||||
project_output_file = os.path.abspath(args.project_output_file)
|
||||
commit_output_file = os.path.abspath(args.commit_output_file)
|
||||
exclusions_file = ''
|
||||
if args.exclusions_file:
|
||||
exclusions_file = os.path.abspath(args.exclusions_file)
|
||||
|
||||
diff(upstream_source_tree, downstream_source_tree, project_output_file,
|
||||
commit_output_file, exclusions_file)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user