Refactor compare_images

1. Rename some variables to more detail
2. Fix a bug that occurs when system.img contains product, system_ext
3. Remove multithread code, it made performance worse :(
  before: 136.97s user 1070.22s system 290% cpu 6:55.84 total
  after:  56.29s user 76.95s system 105% cpu 2:06.84 total

Bug: 138329983
Test: compare some images, and check if result is same as before
Change-Id: I4cec27475283a34d063ad3251cfc5909c34071c1
This commit is contained in:
Jeongik Cha
2019-07-25 19:05:50 +09:00
parent 996f9b13a0
commit b68408275b

View File

@@ -17,14 +17,12 @@ import os
import subprocess import subprocess
import sys import sys
from collections import defaultdict from collections import defaultdict
from concurrent import futures
from glob import glob from glob import glob
from operator import itemgetter from operator import itemgetter
import hashlib import hashlib
import argparse import argparse
import zipfile import zipfile
tpe = futures.ThreadPoolExecutor()
def silent_call(cmd): def silent_call(cmd):
return subprocess.call(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) == 0 return subprocess.call(cmd, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) == 0
@@ -32,7 +30,7 @@ def sha1sum(f):
with open(f, 'rb') as fin: with open(f, 'rb') as fin:
return hashlib.sha1(fin.read()).hexdigest() return hashlib.sha1(fin.read()).hexdigest()
def unsigned_and_sha1sum(filepath): def sha1sum_without_signing_key(filepath):
apk = zipfile.ZipFile(filepath) apk = zipfile.ZipFile(filepath)
l = [] l = []
for f in sorted(apk.namelist()): for f in sorted(apk.namelist()):
@@ -59,11 +57,11 @@ def strip_and_sha1sum(filepath):
return sha1sum(filepath) return sha1sum(filepath)
def main(targets, search_paths, ignore_signing_key=False): def main(all_targets, search_paths, ignore_signing_key=False):
def get_target_name(path): def get_target_name(path):
return os.path.basename(os.path.normpath(path)) return os.path.basename(os.path.normpath(path))
artifact_target_map = defaultdict(list) artifact_target_map = defaultdict(list)
for target in targets: for target in all_targets:
def valid_path(p): def valid_path(p):
if os.path.isdir(p) or not os.path.exists(p): if os.path.isdir(p) or not os.path.exists(p):
return False return False
@@ -75,28 +73,30 @@ def main(targets, search_paths, ignore_signing_key=False):
target, "**", "*"), recursive=True) if valid_path(path)] target, "**", "*"), recursive=True) if valid_path(path)]
def run(path): def run(path):
objdump = silent_call(["objdump", "-a", path]) is_native_component = silent_call(["objdump", "-a", path])
is_apk = path.endswith('.apk') is_apk = path.endswith('.apk')
if objdump: if is_native_component:
return strip_and_sha1sum(path), path[len(target):] return strip_and_sha1sum(path), path[len(target):]
elif is_apk and ignore_signing_key: elif is_apk and ignore_signing_key:
return unsigned_and_sha1sum(path), path[len(target):] return sha1sum_without_signing_key(path), path[len(target):]
else: else:
return sha1sum(path), path[len(target):] return sha1sum(path), path[len(target):]
results = list(map(futures.Future.result, [tpe.submit(run, p) for p in paths])) results = [run(p) for p in paths]
for sha1, f in results: for sha1, filename in results:
basename = os.path.split(os.path.dirname(f))[1] + os.path.basename(f) artifact_target_map[(sha1, filename)].append(get_target_name(target))
artifact_target_map[(sha1, basename)].append((get_target_name(target), f))
def pretty_print(sha1, filename, targets):
return filename + ", " + sha1[:10] + ", " + ";".join(targets) + "\n"
def pretty_print(p, ts):
assert(len({t for t, _ in ts}) == len(ts))
return ";".join({f for _, f in ts}) + ", " + p[0][:10] + ", " + ";".join(map(itemgetter(0), ts)) + "\n"
header = "filename, sha1sum, targets\n" header = "filename, sha1sum, targets\n"
common = sorted([pretty_print(p, ts)
for p, ts in artifact_target_map.items() if len(ts) == len(targets)]) def is_common(targets):
diff = sorted([pretty_print(p, ts) return len(targets) == len(all_targets)
for p, ts in artifact_target_map.items() if len(ts) < len(targets)]) common = sorted([pretty_print(sha1, filename, targets)
for (sha1, filename), targets in artifact_target_map.items() if is_common(targets)])
diff = sorted([pretty_print(sha1, filename, targets)
for (sha1, filename), targets in artifact_target_map.items() if not is_common(targets)])
with open("common.csv", 'w') as fout: with open("common.csv", 'w') as fout:
fout.write(header) fout.write(header)