Add 'whitelist' option to compare_images

* `compare_images -t a b -s system -w whitelist.txt` to specify
  whitelist.
* diff.csv is split into two parts:
  - whitelisted_diff.csv: Diffs that are ignored by a whitelist
  - diff.csv: Diffs that are not ignored by any whitelist.
* Add `-c, --check` option to compare_images_and_print.sh
  When this option is specified, compare_images_and_print.sh exits with
  a non-zero status if diff.csv contains any unexpected diffs.

Bug: 144398662
Test: Tested manually by creating two directories that contain common
      files, whitelisted diffing files and not whitelisted diffing files.
Test: ./compare_images_and_print.sh --help
Test: ./compare_images_and_print.sh --check "-t a b -s SYSTEM -i"
Change-Id: Ie80741d08ef5bc245b409b46ee716ae783428a7d
This commit is contained in:
Yo Chiang
2019-11-11 17:07:45 +08:00
parent 2ce9bc6563
commit 9039ad4acd
3 changed files with 192 additions and 21 deletions

View File

@@ -21,6 +21,7 @@ from pathlib import Path
import hashlib
import argparse
import zipfile
import fnmatch
def silent_call(cmd):
return subprocess.call(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) == 0
@@ -56,10 +57,89 @@ def strip_and_sha1sum(filepath):
return sha1sum(filepath)
def main(all_targets, search_paths, ignore_signing_key=False):
def get_target_name(path):
return os.path.basename(os.path.normpath(path))
def make_filter_from_whitelists(whitelists, all_targets):
"""Creates a callable filter from a list of whitelist files.
Whitelist can contain pathname patterns or ignored lines. Pathnames are case
insensitive.
For example, this ignores the file "system/build.prop":
SYSTEM/build.prop
This ignores txt files:
*.txt
This ignores files in directory "system/dontcare/"
SYSTEM/dontcare/*
This ignores lines prefixed with pat1 or pat2 in file "system/build.prop":
SYSTEM/build.prop=pat1 pat2
Args:
whitelists: A list of whitelist filenames.
all_targets: A list of targets to compare.
Returns:
A callable object that accepts a file pathname and returns True if the file
is ignored by the whitelists and False when it is not.
"""
ignored_patterns = set()
ignored_lines = defaultdict(list)
for whitelist in whitelists:
if not os.path.isfile(whitelist):
continue
with open(whitelist, 'rb') as f:
for line in f:
pat = line.strip().decode()
if pat and pat[-1] == '\\':
pat = pat.rstrip('\\')
if '=' in pat:
filename, prefixes = pat.split('=', 1)
prefixes = prefixes.split()
if prefixes:
ignored_lines[filename.lower()].extend(prefixes)
elif pat:
ignored_patterns.add(pat.lower())
def diff_with_ignored_lines(filename, prefixes):
"""Compares sha1 digest of file while ignoring lines.
Args:
filename: File to compare among each target.
prefixes: A list of prefixes. Lines that start with prefix are ignored.
Returns:
True if file is identical among each target.
"""
file_digest_respect_ignore = []
for target in all_targets:
pathname = os.path.join(target, filename)
if not os.path.isfile(pathname):
return False
sha1 = hashlib.sha1()
with open(pathname, 'rb') as f:
for line in f:
line_text = line.decode()
if not any(line_text.startswith(prefix) for prefix in prefixes):
sha1.update(line)
file_digest_respect_ignore.append(sha1.hexdigest())
return (len(file_digest_respect_ignore) == len(all_targets) and
len(set(file_digest_respect_ignore)) == 1)
def whitelist_filter(filename):
norm_filename = filename.lower()
for pattern in ignored_patterns:
if fnmatch.fnmatch(norm_filename, pattern):
return True
if norm_filename in ignored_lines:
ignored_prefixes = ignored_lines[norm_filename]
return diff_with_ignored_lines(filename, ignored_prefixes)
return False
return whitelist_filter
def main(all_targets, search_paths, whitelists, ignore_signing_key=False):
def run(path):
is_native_component = silent_call(["llvm-objdump", "-a", path])
is_apk = path.endswith('.apk')
@@ -70,7 +150,8 @@ def main(all_targets, search_paths, ignore_signing_key=False):
else:
return sha1sum(path)
artifact_target_map = defaultdict(list)
# artifact_sha1_target_map[filename][sha1] = list of targets
artifact_sha1_target_map = defaultdict(lambda: defaultdict(list))
for target in all_targets:
paths = []
for search_path in search_paths:
@@ -78,37 +159,60 @@ def main(all_targets, search_paths, ignore_signing_key=False):
if path.exists() and not path.is_dir():
paths.append((str(path), str(path.relative_to(target))))
results = [(run(path), filename) for path, filename in paths]
for sha1, filename in results:
artifact_target_map[(sha1, filename)].append(get_target_name(target))
target_basename = os.path.basename(os.path.normpath(target))
for path, filename in paths:
sha1 = run(path)
artifact_sha1_target_map[filename][sha1].append(target_basename)
def pretty_print(sha1, filename, targets):
return filename + ", " + sha1[:10] + ", " + ";".join(targets) + "\n"
return '{}, {}, {}\n'.format(filename, sha1[:10], ';'.join(targets))
def is_common(sha1_target_map):
for sha1, targets in sha1_target_map.items():
return len(sha1_target_map) == 1 and len(targets) == len(all_targets)
return False
whitelist_filter = make_filter_from_whitelists(whitelists, all_targets)
common = []
diff = []
whitelisted_diff = []
for filename, sha1_target_map in artifact_sha1_target_map.items():
if is_common(sha1_target_map):
for sha1, targets in sha1_target_map.items():
common.append(pretty_print(sha1, filename, targets))
else:
if whitelist_filter(filename):
for sha1, targets in sha1_target_map.items():
whitelisted_diff.append(pretty_print(sha1, filename, targets))
else:
for sha1, targets in sha1_target_map.items():
diff.append(pretty_print(sha1, filename, targets))
common = sorted(common)
diff = sorted(diff)
whitelisted_diff = sorted(whitelisted_diff)
header = "filename, sha1sum, targets\n"
def is_common(targets):
return len(targets) == len(all_targets)
common = sorted([pretty_print(sha1, filename, targets)
for (sha1, filename), targets in artifact_target_map.items() if is_common(targets)])
diff = sorted([pretty_print(sha1, filename, targets)
for (sha1, filename), targets in artifact_target_map.items() if not is_common(targets)])
with open("common.csv", 'w') as fout:
fout.write(header)
fout.writelines(common)
with open("diff.csv", 'w') as fout:
fout.write(header)
fout.writelines(diff)
with open("whitelisted_diff.csv", 'w') as fout:
fout.write(header)
fout.writelines(whitelisted_diff)
if __name__ == "__main__":
parser = argparse.ArgumentParser(prog="compare_images", usage="compare_images -t model1 model2 [model...] -s dir1 [dir...] [-i] [-u]")
parser = argparse.ArgumentParser(prog="compare_images", usage="compare_images -t model1 model2 [model...] -s dir1 [dir...] [-i] [-u] [-w whitelist1] [-w whitelist2]")
parser.add_argument("-t", "--target", nargs='+', required=True)
parser.add_argument("-s", "--search_path", nargs='+', required=True)
parser.add_argument("-i", "--ignore_signing_key", action='store_true')
parser.add_argument("-u", "--unzip", action='store_true')
parser.add_argument("-w", "--whitelist", action="append", default=[])
args = parser.parse_args()
if len(args.target) < 2:
parser.error("The number of targets has to be at least two.")
@@ -117,4 +221,4 @@ if __name__ == "__main__":
unzip_cmd = ["unzip", "-qd", t, os.path.join(t, "*.zip")]
unzip_cmd.extend([os.path.join(s, "*") for s in args.search_path])
subprocess.call(unzip_cmd)
main(args.target, args.search_path, args.ignore_signing_key)
main(args.target, args.search_path, args.whitelist, args.ignore_signing_key)