From 3a3349eeebaf73be7fcde62699518ea68bc001c2 Mon Sep 17 00:00:00 2001 From: David Srbecky Date: Thu, 28 Oct 2021 13:08:10 +0100 Subject: [PATCH] Try to locate symbols using build-id. If the parsed backtrace has ELF build-ids, use them to brute-force search for files in the symbols directory. The search only considers files with the same basename, so the number of candidates to check is very small. This fixes issues when compile time directory does not match the runtime directory on device (e.g. for apex). Bug: 171980804 Test: ART crashes can be symbolized again Change-Id: I0311a10c1e2eab92ee44f1b32c523d4763a54ee1 --- scripts/stack_core.py | 66 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 56 insertions(+), 10 deletions(-) diff --git a/scripts/stack_core.py b/scripts/stack_core.py index 22997bfb6..e703cc9fc 100755 --- a/scripts/stack_core.py +++ b/scripts/stack_core.py @@ -16,7 +16,10 @@ """stack symbolizes native crash dumps.""" +import collections +import functools import os +import pathlib import re import subprocess import symbol @@ -67,6 +70,11 @@ class TraceConverter: "x86_64": "rax|rbx|rcx|rdx|rsi|rdi|r8|r9|r10|r11|r12|r13|r14|r15|cs|ss|rip|rbp|rsp|eflags", } + # We use the "file" command line tool to extract BuildId from ELF files. + ElfInfo = collections.namedtuple("ElfInfo", ["bitness", "build_id"]) + file_tool_output = re.compile(r"ELF (?P32|64)-bit .*" + r"BuildID(\[.*\])?=(?P[0-9a-f]+)") + def UpdateAbiRegexes(self): if symbol.ARCH == "arm64" or symbol.ARCH == "mips64" or symbol.ARCH == "x86_64": self.width = "{16}" @@ -96,7 +104,9 @@ class TraceConverter: # 0x prefix). r"(?P\[[^\]]+\]|[^\r\n \t]*)" # Library name. r"( \(offset (?P0x[0-9a-fA-F]+)\))?" # Offset into the file to find the start of the shared so. - r"(?P \((?P.*)\))?") # Is the symbol there? + r"(?P \((?P.*?)\))?" # Is the symbol there? (non-greedy) + r"( \(BuildId: (?P.*)\))?" # Optional build-id of the ELF file. + r"[ \t]*$") # End of line (to expand non-greedy match). # pylint: disable-msg=C6310 # Sanitizer output. This is different from debuggerd output, and it is easier to handle this as # its own regex. Example: @@ -183,22 +193,24 @@ class TraceConverter: self.DeleteApkTmpFiles() def MatchTraceLine(self, line): - if self.trace_line.match(line): - match = self.trace_line.match(line) + match = self.trace_line.match(line) + if match: return {"frame": match.group("frame"), "offset": match.group("offset"), "so_offset": match.group("so_offset"), "dso": match.group("dso"), "symbol_present": bool(match.group("symbolpresent")), - "symbol_name": match.group("symbol")} - if self.sanitizer_trace_line.match(line): - match = self.sanitizer_trace_line.match(line) + "symbol_name": match.group("symbol"), + "build_id": match.group("build_id")} + match = self.sanitizer_trace_line.match(line) + if match: return {"frame": match.group("frame"), "offset": match.group("offset"), "so_offset": None, "dso": match.group("dso"), "symbol_present": False, - "symbol_name": None} + "symbol_name": None, + "build_id": None} return None def ExtractLibFromApk(self, apk, shared_lib_name): @@ -300,6 +312,32 @@ class TraceConverter: return file_name, tmp_shared_lib return None, None + # Find all files in the symbols directory and group them by basename (without directory). + @functools.cache + def GlobSymbolsDir(self, symbols_dir): + files_by_basename = {} + for path in sorted(pathlib.Path(symbols_dir).glob("**/*")): + files_by_basename.setdefault(path.name, []).append(path) + return files_by_basename + + # Use the "file" command line tool to find the bitness and build_id of given ELF file. + @functools.cache + def GetLibraryInfo(self, lib): + stdout = subprocess.check_output(["file", lib], text=True) + match = self.file_tool_output.search(stdout) + if match: + return self.ElfInfo(bitness=match.group("bitness"), build_id=match.group("build_id")) + return None + + # Search for a library with the given basename and build_id anywhere in the symbols directory. + @functools.cache + def GetLibraryByBuildId(self, symbols_dir, basename, build_id): + for candidate in self.GlobSymbolsDir(symbols_dir).get(basename): + info = self.GetLibraryInfo(candidate) + if info and info.build_id == build_id: + return "/" + str(candidate.relative_to(symbols_dir)) + return None + def GetLibPath(self, lib): symbol_dir = symbol.SYMBOLS_DIR if os.path.isfile(symbol_dir + lib): @@ -401,6 +439,7 @@ class TraceConverter: so_offset = trace_line_dict["so_offset"] symbol_present = trace_line_dict["symbol_present"] symbol_name = trace_line_dict["symbol_name"] + build_id = trace_line_dict["build_id"] if frame <= self.last_frame and (self.trace_lines or self.value_lines): self.PrintOutput(self.trace_lines, self.value_lines) @@ -443,9 +482,16 @@ class TraceConverter: lib = area lib_name = None - # When using atest, test paths are different between the out/ directory - # and device. Apply fixups. - lib = self.GetLibPath(lib) + if build_id: + # If we have the build_id, do a brute-force search of the symbols directory. + lib = self.GetLibraryByBuildId(symbol.SYMBOLS_DIR, os.path.basename(lib), build_id) + if not lib: + print("WARNING: Cannot find {} with build id {} in symbols directory." + .format(os.path.basename(lib), build_id)) + else: + # When using atest, test paths are different between the out/ directory + # and device. Apply fixups. + lib = self.GetLibPath(lib) # If a calls b which further calls c and c is inlined to b, we want to # display "a -> b -> c" in the stack trace instead of just "a -> c"