Provide a way to disassemble code in a tombstone.

Test: ran disassemble_test.py Change-Id: Id6beb23ff40d72b89b4d8400d645f7f868fd87d2
2016-03-09 21:55:40 -08:00
parent 5d691e597a
commit a6e5520bce
4 changed files with 19799 additions and 0 deletions
--- a/scripts/architecture.py
+++ b/scripts/architecture.py
@@ -0,0 +1,58 @@
 """Abstraction layer for different ABIs."""
 import re
 import symbol
 def UnpackLittleEndian(word):
  """Split a hexadecimal string in little endian order."""
  return [word[x:x+2] for x in range(len(word) - 2, -2, -2)]
 ASSEMBLE = 'as'
 DISASSEMBLE = 'objdump'
 LINK = 'ld'
 UNPACK = 'unpack'
 OPTIONS = {
    'x86': {
        ASSEMBLE: ['--32'],
        LINK: ['-melf_i386']
    }
 }
 class Architecture(object):
  """Creates an architecture abstraction for a given ABI.
  Args:
    name: The abi name, as represented in a tombstone.
  """
  def __init__(self, name):
    symbol.ARCH = name
    self.toolchain = symbol.FindToolchain()
    self.options = OPTIONS.get(name, {})
  def Assemble(self, args):
    """Generates an assembler command, appending the given args."""
    return [symbol.ToolPath(ASSEMBLE)] + self.options.get(ASSEMBLE, []) + args
  def Link(self, args):
    """Generates a link command, appending the given args."""
    return [symbol.ToolPath(LINK)] + self.options.get(LINK, []) + args
  def Disassemble(self, args):
    """Generates a disassemble command, appending the given args."""
    return ([symbol.ToolPath(DISASSEMBLE)] + self.options.get(DISASSEMBLE, []) +
            args)
  def WordToBytes(self, word):
    """Unpacks a hexadecimal string in the architecture's byte order.
    Args:
      word: A string representing a hexadecimal value.
    Returns:
      An array of hexadecimal byte values.
    """
    return self.options.get(UNPACK, UnpackLittleEndian)(word)
--- a/scripts/disassemble_test.py
+++ b/scripts/disassemble_test.py
@@ -0,0 +1,9 @@
 #!/usr/bin/python
 import disassemble_tombstone
 import disassemble_test_input
 for test in disassemble_test_input.tests:
  print test
  for line in disassemble_tombstone.Disassemble(iter(disassemble_test_input.tests[test].splitlines(True))):
    print line,
--- a/scripts/disassemble_test_input.py
+++ b/scripts/disassemble_test_input.py
--- a/scripts/disassemble_tombstone.py
+++ b/scripts/disassemble_tombstone.py
@@ -0,0 +1,171 @@
 #!/usr/bin/python
 """Disassemble the code stored in a tombstone.
 The classes in this module use an interface, ProcessLine, so that they can be
 chained together to do arbitrary procerssing. The current classes support
 disassembling the bytes embedded in tombstones and printing output to stdout.
 """
 import re
 import subprocess
 import sys
 import tempfile
 import architecture
 STANDARD_PROLOGUE = """
       .type   _start, %function
       .globl  _start
 _start:
 """
 THUMB_PROLOGUE = STANDARD_PROLOGUE + """
       .code   16
       .thumb_func
       .type   thumb_start, %function
 thumb_start:
 """
 def Disassemble(line_generator):
  abi_line = re.compile("(ABI: \'(.*)\')")
  abi = None
  tools = None
  # Process global headers
  for line in line_generator:
    yield line
    abi_header = abi_line.search(line)
    if abi_header:
      abi = abi_header.group(2)
      # Look up the tools here so we don't do a lookup for each code block.
      tools = architecture.Architecture(abi)
      break
  # The rest of the file consists of:
  #   o Lines that should pass through unchanged
  #   o Blocks of register values, which follow a 'pid: ...' line and end with
  #     'backtrace:' line
  #   o Blocks of code represented as words, which start with 'code around ...'
  #     and end with a line that doesn't look like a list of words.
  #
  # The only constraint on the ordering of these blocks is that the register
  # values must come before the first code block.
  #
  # It's easiest to nest register processing in the codeblock search loop.
  register_list_re = re.compile('^pid: ')
  codeblock_re = re.compile('^code around ([a-z0-9]+)')
  register_text = {}
  for line in line_generator:
    yield line
    if register_list_re.search(line):
      register_text = {}
      for output in ProcessRegisterList(line_generator, register_text):
        yield output
    code_match = codeblock_re.search(line)
    if code_match:
      for output in ProcessCodeBlock(
          abi, tools, code_match.group(1), register_text, line_generator):
        yield output
 def ProcessRegisterList(line_generator, rval):
  for line in line_generator:
    yield line
    if line.startswith('backtrace:'):
      return
    # The register list is indented and consists of alternating name, value
    # pairs.
    if line.startswith(' '):
      words = line.split()
      assert len(words) % 2 == 0
      for index in range(0, len(words), 2):
        rval[words[index]] = words[index + 1]
 def ProcessCodeBlock(abi, tools, register_name, register_text, line_generator):
  program_counter = register_text[register_name]
  program_counter_val = int(program_counter, 16)
  scratch_file = tempfile.NamedTemporaryFile(suffix='.s')
  # ARM code comes in two flavors: arm and thumb. Figure out the one
  # to use by peeking in the cpsr.
  if abi == 'arm' and int(register_text['cpsr'], 16) & 0x20:
    scratch_file.write(THUMB_PROLOGUE)
  else:
    scratch_file.write(STANDARD_PROLOGUE)
  # Retains the hexadecimal text for the start of the block
  start_address = None
  # Maintains a numeric counter for the address of the current byte
  current_address = None
  # Handle the 3 differnt file formats that we've observerd.
  if len(program_counter) == 8:
    block_line_len = [67]
    block_num_words = 4
  else:
    assert len(program_counter) == 16
    block_line_len = [57, 73]
    block_num_words = 2
  # Now generate assembly from the bytes in the code block.
  for line in line_generator:
    words = line.split()
    # Be conservative and stop interpreting if the line length is wrong
    # We can't count words because spaces can appear in the text representation
    # of the memory.
    if len(line) not in block_line_len:
      break
    # Double check the address at the start of each line
    if current_address is None:
      start_address = words[0]
      current_address = int(start_address, 16)
    else:
      assert current_address == int(words[0], 16)
    for word in words[1:block_num_words+1]:
      # Handle byte swapping
      for byte in tools.WordToBytes(word):
        # Emit a label at the desired program counter.
        # This will cause the disassembler to resynchronize at this point,
        # allowing us to position the arrow and also ensuring that we decode
        # the instruction properly.
        if current_address == program_counter_val:
          scratch_file.write('program_counter_was_here:\n')
        scratch_file.write('  .byte 0x%s\n' % byte)
        current_address += 1
  scratch_file.flush()
  # Assemble the scratch file and relocate it to the block address with the
  # linker.
  object_file = tempfile.NamedTemporaryFile(suffix='.o')
  subprocess.check_call(tools.Assemble([
      '-o', object_file.name, scratch_file.name]))
  scratch_file.close()
  linked_file = tempfile.NamedTemporaryFile(suffix='.o')
  cmd = tools.Link([
      '-Ttext', '0x' + start_address, '-o', linked_file.name, object_file.name])
  subprocess.check_call(cmd)
  object_file.close()
  disassembler = subprocess.Popen(tools.Disassemble([
      '-S', linked_file.name]), stdout=subprocess.PIPE)
  # Skip some of the annoying assembler headers.
  emit = False
  start_pattern = start_address + ' '
  # objdump padding varies between 32 bit and 64 bit architectures
  arrow_pattern = re.compile('^[ 0]*%8x:\t' % program_counter_val)
  for line in disassembler.stdout:
    emit = emit or line.startswith(start_pattern)
    if emit and len(line) > 1 and line.find('program_counter_was_here') == -1:
      if arrow_pattern.search(line):
        yield '--->' + line
      else:
        yield '    ' + line
  linked_file.close()
  yield '\n'
 def main(argv):
  for fn in argv[1:]:
    for line in Disassemble(open(fn, 'r')):
      print line,
 if __name__ == '__main__':
  main(sys.argv)