Refactor native heap viewer to add tests.

This only includes a few tests to start with.

Bug: 62492960

Test: Ran new unit tests.
Test: Ran with all different options and verified it produces the same
Test: output as the previous script.
Change-Id: Iad29a5f04f49986139c92030a3259cae512859af
This commit is contained in:
Christopher Ferris
2018-07-11 15:15:35 -07:00
parent 1ae3edd54b
commit dfb5368b57
2 changed files with 438 additions and 216 deletions

View File

@@ -22,7 +22,8 @@ import re
import subprocess
import sys
usage = """
class Args:
_usage = """
Usage:
1. Collect a native heap dump from the device. For example:
$ adb shell stop
@@ -50,61 +51,59 @@ Usage:
stack frame. 71b07bc0b0 is the address of the stack frame.
"""
verbose = False
html_output = False
reverse_frames = False
product_out = os.getenv("ANDROID_PRODUCT_OUT")
if product_out:
symboldir = product_out + "/symbols"
else:
symboldir = "./symbols"
args = sys.argv[1:]
while len(args) > 1:
if args[0] == "--symbols":
symboldir = args[1]
args = args[2:]
elif args[0] == "--verbose":
verbose = True
args = args[1:]
elif args[0] == "--html":
html_output = True
args = args[1:]
elif args[0] == "--reverse":
reverse_frames = True
args = args[1:]
def __init__(self):
self.verbose = False
self.html_output = False
self.reverse_frames = False
product_out = os.getenv("ANDROID_PRODUCT_OUT")
if product_out:
self.symboldir = product_out + "/symbols"
else:
print "Invalid option "+args[0]
break
self.symboldir = "./symbols"
if len(args) != 1:
print usage
exit(0)
i = 1
extra_args = []
while i < len(sys.argv):
if sys.argv[i] == "--symbols":
i += 1
self.symboldir = args[i]
elif sys.argv[i] == "--verbose":
self.verbose = True
elif sys.argv[i] == "--html":
self.html_output = True
elif sys.argv[i] == "--reverse":
self.reverse_frames = True
elif sys.argv[i][0] == '-':
print "Invalid option " + sys.argv[i]
else:
extra_args.append(sys.argv[i])
i += 1
native_heap = args[0]
if len(extra_args) != 1:
print self._usage
sys.exit(1)
re_map = re.compile("(?P<start>[0-9a-f]+)-(?P<end>[0-9a-f]+) .... (?P<offset>[0-9a-f]+) [0-9a-f]+:[0-9a-f]+ [0-9]+ +(?P<name>.*)")
self.native_heap = extra_args[0]
class Backtrace:
def __init__(self, is_zygote, size, num_allocs, frames):
self.is_zygote = is_zygote
self.size = size
self.num_allocs = num_allocs
self.frames = frames
def __init__(self, is_zygote, size, num_allocs, frames):
self.is_zygote = is_zygote
self.size = size
self.num_allocs = num_allocs
self.frames = frames
class Mapping:
def __init__(self, start, end, offset, name):
self.start = start
self.end = end
self.offset = offset
self.name = name
def __init__(self, start, end, offset, name):
self.start = start
self.end = end
self.offset = offset
self.name = name
class FrameDescription:
def __init__(self, function, location, library):
self.function = function
self.location = location
self.library = library
def __init__(self, function, location, library):
self.function = function
self.location = location
self.library = library
def GetVersion(native_heap):
"""Get the version of the native heap dump."""
@@ -117,8 +116,8 @@ def GetVersion(native_heap):
return m.group('version')
return None
def NumFieldValid(native_heap):
"""Determine if the num field is valid.
def GetNumFieldValidByParsingLines(native_heap):
"""Determine if the num field is valid by parsing the backtrace lines.
Malloc debug for N incorrectly set the num field to the number of
backtraces instead of the number of allocations with the same size and
@@ -156,195 +155,210 @@ def NumFieldValid(native_heap):
return True
return matched == 0
version = GetVersion(native_heap)
if not version or version == "v1.0":
# Version v1.0 was produced by a buggy version of malloc debug where the
# num field was set incorrectly.
# Unfortunately, Android P produced a v1.0 version that does set the
# num field. Do one more check to see if this is the broken version.
num_field_valid = NumFieldValid(native_heap)
else:
num_field_valid = True
def GetNumFieldValid(native_heap):
version = GetVersion(native_heap)
if not version or version == "v1.0":
# Version v1.0 was produced by a buggy version of malloc debug where the
# num field was set incorrectly.
# Unfortunately, Android P produced a v1.0 version that does set the
# num field. Do one more check to see if this is the broken version.
return GetNumFieldValidByParsingLines(native_heap)
else:
return True
backtraces = []
mappings = []
def ParseNativeHeap(native_heap, reverse_frames, num_field_valid):
"""Parse the native heap into backtraces, maps.
for line in open(native_heap, "r"):
Returns two lists, the first is a list of all of the backtraces, the
second is the sorted list of maps.
"""
backtraces = []
mappings = []
re_map = re.compile("(?P<start>[0-9a-f]+)-(?P<end>[0-9a-f]+) .... (?P<offset>[0-9a-f]+) [0-9a-f]+:[0-9a-f]+ [0-9]+ +(?P<name>.*)")
for line in open(native_heap, "r"):
# Format of line:
# z 0 sz 50 num 1 bt 000000000000a100 000000000000b200
parts = line.split()
if len(parts) > 7 and parts[0] == "z" and parts[2] == "sz":
is_zygote = parts[1] != "1"
size = int(parts[3])
if num_field_valid:
num_allocs = int(parts[5])
else:
num_allocs = 1
frames = map(lambda x: int(x, 16), parts[7:])
if reverse_frames:
frames = list(reversed(frames))
backtraces.append(Backtrace(is_zygote, size, num_allocs, frames))
continue
m = re_map.match(line)
if m:
is_zygote = parts[1] != "1"
size = int(parts[3])
if num_field_valid:
num_allocs = int(parts[5])
else:
num_allocs = 1
frames = map(lambda x: int(x, 16), parts[7:])
if reverse_frames:
frames = list(reversed(frames))
backtraces.append(Backtrace(is_zygote, size, num_allocs, frames))
else:
# Parse map line:
# 720de01000-720ded7000 r-xp 00000000 fd:00 495 /system/lib64/libc.so
m = re_map.match(line)
if m:
start = int(m.group('start'), 16)
end = int(m.group('end'), 16)
offset = int(m.group('offset'), 16)
name = m.group('name')
mappings.append(Mapping(start, end, offset, name))
continue
# Return the mapping that contains the given address.
# Returns None if there is no such mapping.
def find_mapping(addr):
min = 0
max = len(mappings) - 1
while True:
if max < min:
return None
mid = (min + max) // 2
if mappings[mid].end <= addr:
min = mid + 1
elif mappings[mid].start > addr:
max = mid - 1
else:
return mappings[mid]
return backtraces, mappings
# Resolve address libraries and offsets.
# addr_offsets maps addr to .so file offset
# addrs_by_lib maps library to list of addrs from that library
# Resolved addrs maps addr to FrameDescription
addr_offsets = {}
addrs_by_lib = {}
resolved_addrs = {}
EMPTY_FRAME_DESCRIPTION = FrameDescription("???", "???", "???")
for backtrace in backtraces:
def FindMapping(mappings, addr):
"""Find the mapping given addr.
Returns the mapping that contains addr.
Returns None if there is no such mapping.
"""
min = 0
max = len(mappings) - 1
while True:
if max < min:
return None
mid = (min + max) // 2
if mappings[mid].end <= addr:
min = mid + 1
elif mappings[mid].start > addr:
max = mid - 1
else:
return mappings[mid]
def ResolveAddrs(html_output, symboldir, backtraces, mappings):
"""Resolve address libraries and offsets.
addr_offsets maps addr to .so file offset
addrs_by_lib maps library to list of addrs from that library
Resolved addrs maps addr to FrameDescription
Returns the resolved_addrs hash.
"""
addr_offsets = {}
addrs_by_lib = {}
resolved_addrs = {}
empty_frame_description = FrameDescription("???", "???", "???")
for backtrace in backtraces:
for addr in backtrace.frames:
if addr in addr_offsets:
continue
mapping = find_mapping(addr)
if mapping:
addr_offsets[addr] = addr - mapping.start + mapping.offset
if not (mapping.name in addrs_by_lib):
addrs_by_lib[mapping.name] = []
addrs_by_lib[mapping.name].append(addr)
else:
resolved_addrs[addr] = EMPTY_FRAME_DESCRIPTION
if addr in addr_offsets:
continue
mapping = FindMapping(mappings, addr)
if mapping:
addr_offsets[addr] = addr - mapping.start + mapping.offset
if not (mapping.name in addrs_by_lib):
addrs_by_lib[mapping.name] = []
addrs_by_lib[mapping.name].append(addr)
else:
resolved_addrs[addr] = empty_frame_description
# Resolve functions and line numbers.
if html_output == False:
print "Resolving symbols using directory %s..." % symboldir
# Resolve functions and line numbers
if html_output == False:
print "Resolving symbols using directory %s..." % symboldir
for lib in addrs_by_lib:
for lib in addrs_by_lib:
sofile = symboldir + lib
if os.path.isfile(sofile):
file_offset = 0
result = subprocess.check_output(["objdump", "-w", "-j", ".text", "-h", sofile])
for line in result.split("\n"):
splitted = line.split()
if len(splitted) > 5 and splitted[1] == ".text":
file_offset = int(splitted[5], 16)
break
file_offset = 0
result = subprocess.check_output(["objdump", "-w", "-j", ".text", "-h", sofile])
for line in result.split("\n"):
splitted = line.split()
if len(splitted) > 5 and splitted[1] == ".text":
file_offset = int(splitted[5], 16)
break
input_addrs = ""
for addr in addrs_by_lib[lib]:
input_addrs += "%s\n" % hex(addr_offsets[addr] - file_offset)
p = subprocess.Popen(["addr2line", "-C", "-j", ".text", "-e", sofile, "-f"], stdout=subprocess.PIPE, stdin=subprocess.PIPE)
result = p.communicate(input_addrs)[0]
splitted = result.split("\n")
for x in range(0, len(addrs_by_lib[lib])):
function = splitted[2*x];
location = splitted[2*x+1];
resolved_addrs[addrs_by_lib[lib][x]] = FrameDescription(function, location, lib)
input_addrs = ""
for addr in addrs_by_lib[lib]:
input_addrs += "%s\n" % hex(addr_offsets[addr] - file_offset)
p = subprocess.Popen(["addr2line", "-C", "-j", ".text", "-e", sofile, "-f"], stdout=subprocess.PIPE, stdin=subprocess.PIPE)
result = p.communicate(input_addrs)[0]
splitted = result.split("\n")
for x in range(0, len(addrs_by_lib[lib])):
function = splitted[2*x];
location = splitted[2*x+1];
resolved_addrs[addrs_by_lib[lib][x]] = FrameDescription(function, location, lib)
else:
if html_output == False:
print "%s not found for symbol resolution" % lib
fd = FrameDescription("???", "???", lib)
for addr in addrs_by_lib[lib]:
resolved_addrs[addr] = fd
if html_output == False:
print "%s not found for symbol resolution" % lib
def addr2line(addr):
if addr == "ZYGOTE" or addr == "APP":
return FrameDescription("", "", "")
fd = FrameDescription("???", "???", lib)
for addr in addrs_by_lib[lib]:
resolved_addrs[addr] = fd
return resolved_addrs[int(addr, 16)]
return resolved_addrs
def Addr2Line(resolved_addrs, addr):
if addr == "ZYGOTE" or addr == "APP":
return FrameDescription("", "", "")
return resolved_addrs[int(addr, 16)]
class AddrInfo:
def __init__(self, addr):
self.addr = addr
self.size = 0
self.number = 0
self.num_allocs = 0
self.children = {}
def __init__(self, addr):
self.addr = addr
self.size = 0
self.number = 0
self.num_allocs = 0
self.children = {}
def addStack(self, size, num_allocs, stack):
self.size += size * num_allocs
self.number += num_allocs
if len(stack) > 0:
child = stack[0]
if not (child.addr in self.children):
self.children[child.addr] = child
self.children[child.addr].addStack(size, num_allocs, stack[1:])
def addStack(self, size, num_allocs, stack):
self.size += size * num_allocs
self.number += num_allocs
if len(stack) > 0:
child = stack[0]
if not (child.addr in self.children):
self.children[child.addr] = child
self.children[child.addr].addStack(size, num_allocs, stack[1:])
zygote = AddrInfo("ZYGOTE")
app = AddrInfo("APP")
def Display(resolved_addrs, indent, total, parent_total, node):
fd = Addr2Line(resolved_addrs, node.addr)
total_percent = 0
if total != 0:
total_percent = 100 * node.size / float(total)
parent_percent = 0
if parent_total != 0:
parent_percent = 100 * node.size / float(parent_total)
print "%9d %6.2f%% %6.2f%% %8d %s%s %s %s %s" % (node.size, total_percent, parent_percent, node.number, indent, node.addr, fd.library, fd.function, fd.location)
children = sorted(node.children.values(), key=lambda x: x.size, reverse=True)
for child in children:
Display(resolved_addrs, indent + " ", total, node.size, child)
def display(indent, total, parent_total, node):
fd = addr2line(node.addr)
total_percent = 0
if total != 0:
total_percent = 100 * node.size / float(total)
parent_percent = 0
if parent_total != 0:
parent_percent = 100 * node.size / float(parent_total)
print "%9d %6.2f%% %6.2f%% %8d %s%s %s %s %s" % (node.size, total_percent, parent_percent, node.number, indent, node.addr, fd.library, fd.function, fd.location)
children = sorted(node.children.values(), key=lambda x: x.size, reverse=True)
def DisplayHtml(verbose, resolved_addrs, total, node, extra, label_count):
fd = Addr2Line(resolved_addrs, node.addr)
if verbose:
lib = fd.library
else:
lib = os.path.basename(fd.library)
total_percent = 0
if total != 0:
total_percent = 100 * node.size / float(total)
label = "%d %6.2f%% %6d %s%s %s %s" % (node.size, total_percent, node.number, extra, lib, fd.function, fd.location)
label = label.replace("&", "&amp;")
label = label.replace("'", "&apos;")
label = label.replace('"', "&quot;")
label = label.replace("<", "&lt;")
label = label.replace(">", "&gt;")
children = sorted(node.children.values(), key=lambda x: x.size, reverse=True)
print '<li>'
if len(children) > 0:
print '<label for="' + str(label_count) + '">' + label + '</label>'
print '<input type="checkbox" id="' + str(label_count) + '"/>'
print '<ol>'
label_count += 1
for child in children:
display(indent + " ", total, node.size, child)
label_count = DisplayHtml(verbose, resolved_addrs, total, child, "", label_count)
print '</ol>'
else:
print label
print '</li>'
label_count=0
def display_html(total, node, extra):
global label_count
fd = addr2line(node.addr)
if verbose:
lib = fd.library
else:
lib = os.path.basename(fd.library)
total_percent = 0
if total != 0:
total_percent = 100 * node.size / float(total)
label = "%d %6.2f%% %6d %s%s %s %s" % (node.size, total_percent, node.number, extra, lib, fd.function, fd.location)
label = label.replace("&", "&amp;")
label = label.replace("'", "&apos;")
label = label.replace('"', "&quot;")
label = label.replace("<", "&lt;")
label = label.replace(">", "&gt;")
children = sorted(node.children.values(), key=lambda x: x.size, reverse=True)
print '<li>'
if len(children) > 0:
print '<label for="' + str(label_count) + '">' + label + '</label>'
print '<input type="checkbox" id="' + str(label_count) + '"/>'
print '<ol>'
label_count+=1
for child in children:
display_html(total, child, "")
print '</ol>'
else:
print label
print '</li>'
for backtrace in backtraces:
stack = []
for addr in backtrace.frames:
stack.append(AddrInfo("%x" % addr))
stack.reverse()
if backtrace.is_zygote:
zygote.addStack(backtrace.size, backtrace.num_allocs, stack)
else:
app.addStack(backtrace.size, backtrace.num_allocs, stack)
return label_count
html_header = """
def CreateHtml(verbose, app, zygote, resolved_addrs):
print """
<!DOCTYPE html>
<html><head><style>
li input {
@@ -367,19 +381,44 @@ label {
Click on an individual line to expand/collapse to see the details of the
allocation data<ol>
"""
html_footer = "</ol></body></html>"
if html_output:
print html_header
display_html(app.size, app, "app ")
if zygote.size>0:
display_html(zygote.size, zygote, "zygote ")
print html_footer
else:
label_count = 0
label_count = DisplayHtml(verbose, resolved_addrs, app.size, app, "app ", label_count)
if zygote.size > 0:
DisplayHtml(verbose, resolved_addrs, zygote.size, zygote, "zygote ", label_count)
print "</ol></body></html>"
def main():
args = Args()
num_field_valid = GetNumFieldValid(args.native_heap)
backtraces, mappings = ParseNativeHeap(args.native_heap, args.reverse_frames, num_field_valid)
# Resolve functions and line numbers
resolved_addrs = ResolveAddrs(args.html_output, args.symboldir, backtraces, mappings)
app = AddrInfo("APP")
zygote = AddrInfo("ZYGOTE")
for backtrace in backtraces:
stack = []
for addr in backtrace.frames:
stack.append(AddrInfo("%x" % addr))
stack.reverse()
if backtrace.is_zygote:
zygote.addStack(backtrace.size, backtrace.num_allocs, stack)
else:
app.addStack(backtrace.size, backtrace.num_allocs, stack)
if args.html_output:
CreateHtml(args.verbose, app, zygote, resolved_addrs)
else:
print ""
print "%9s %6s %6s %8s %s %s %s %s" % ("BYTES", "%TOTAL", "%PARENT", "COUNT", "ADDR", "LIBRARY", "FUNCTION", "LOCATION")
display("", app.size, app.size + zygote.size, app)
Display(resolved_addrs, "", app.size, app.size + zygote.size, app)
print ""
display("", zygote.size, app.size + zygote.size, zygote)
Display(resolved_addrs, "", zygote.size, app.size + zygote.size, zygote)
print ""
if __name__ == '__main__':
main()