From f62a3be63d39c013abacb6d033c9d39eebe2b64b Mon Sep 17 00:00:00 2001 From: Christopher Ferris Date: Thu, 9 Mar 2023 16:13:57 -0800 Subject: [PATCH] Remove all ARCH references. Replaced with ARCH_IS_32BIT since that's the only thing truly necessary to work. This also makes the register regex much more lenient, but it appears to be strict enough that it doesn't seem to capture arbitrary lines when run through logcat. Removed the StripPC function and verified that an arm crash that ends in 1 still works. Removed the architecture.py script, it is old and I don't see anyone using it. Modify the reading of the lines to ignore any errors in the input. Test: All unit tests pass. Test: Symbolized arm and arm64 tombstones Test: Symbolized x86 and x86_64 tombstones Test: Ran through a logcat and verified it caught the bug but Test: didn't print any extra information. Change-Id: I6a65ecaad68da1d56864db32ff434512e4df0d89 --- scripts/architecture.py | 58 ---------- scripts/stack | 8 +- scripts/stack_core.py | 54 ++++----- scripts/symbol.py | 247 ++++++---------------------------------- 4 files changed, 67 insertions(+), 300 deletions(-) delete mode 100644 scripts/architecture.py diff --git a/scripts/architecture.py b/scripts/architecture.py deleted file mode 100644 index f239250c7..000000000 --- a/scripts/architecture.py +++ /dev/null @@ -1,58 +0,0 @@ -"""Abstraction layer for different ABIs.""" - -import re -import symbol - -def UnpackLittleEndian(word): - """Split a hexadecimal string in little endian order.""" - return [word[x:x+2] for x in range(len(word) - 2, -2, -2)] - - -ASSEMBLE = 'as' -DISASSEMBLE = 'objdump' -LINK = 'ld' -UNPACK = 'unpack' - -OPTIONS = { - 'x86': { - ASSEMBLE: ['--32'], - LINK: ['-melf_i386'] - } -} - - -class Architecture(object): - """Creates an architecture abstraction for a given ABI. - - Args: - name: The abi name, as represented in a tombstone. - """ - - def __init__(self, name): - symbol.ARCH = name - self.toolchain = symbol.FindToolchain() - self.options = OPTIONS.get(name, {}) - - def Assemble(self, args): - """Generates an assembler command, appending the given args.""" - return [symbol.ToolPath(ASSEMBLE)] + self.options.get(ASSEMBLE, []) + args - - def Link(self, args): - """Generates a link command, appending the given args.""" - return [symbol.ToolPath(LINK)] + self.options.get(LINK, []) + args - - def Disassemble(self, args): - """Generates a disassemble command, appending the given args.""" - return ([symbol.ToolPath(DISASSEMBLE)] + self.options.get(DISASSEMBLE, []) + - args) - - def WordToBytes(self, word): - """Unpacks a hexadecimal string in the architecture's byte order. - - Args: - word: A string representing a hexadecimal value. - - Returns: - An array of hexadecimal byte values. - """ - return self.options.get(UNPACK, UnpackLittleEndian)(word) diff --git a/scripts/stack b/scripts/stack index 70bc3fbbe..2e83ff2af 100755 --- a/scripts/stack +++ b/scripts/stack @@ -44,7 +44,10 @@ def main(): args = parser.parse_args() if args.arch: - symbol.ARCH = args.arch + if args.arch == "arm" or args.arch == "x86": + symbol.ARCH_IS_32BIT = True + else: + symbol.ARCH_IS_32BIT = False if args.symbols_dir: symbol.SYMBOLS_DIR = args.symbols_dir if args.symbols_zip: @@ -56,10 +59,11 @@ def main(): symbol.VERBOSE = args.verbose if args.file == '-': print("Reading native crash info from stdin") + sys.stdin.reconfigure(errors='ignore') f = sys.stdin else: print("Searching for native crashes in %s" % args.file) - f = open(args.file, "r") + f = open(args.file, "r", errors='ignore') lines = f.readlines() f.close() diff --git a/scripts/stack_core.py b/scripts/stack_core.py index f88550f65..021617a60 100755 --- a/scripts/stack_core.py +++ b/scripts/stack_core.py @@ -76,15 +76,14 @@ class TraceConverter: r"Build ID:\s*(?P[0-9a-f]+)", flags=re.DOTALL) - def UpdateAbiRegexes(self): - if symbol.ARCH == "arm64" or symbol.ARCH == "x86_64" or symbol.ARCH == "riscv64": - self.width = "{16}" - self.spacing = " " - else: + def UpdateBitnessRegexes(self): + if symbol.ARCH_IS_32BIT: self.width = "{8}" self.spacing = "" - - self.register_line = re.compile("(([ ]*\\b(" + self.register_names[symbol.ARCH] + ")\\b +[0-9a-f]" + self.width + "){1,5}$)") + else: + self.width = "{16}" + self.spacing = " " + self.register_line = re.compile(" (([ ]*\\b(\S*)\\b +[0-9a-f]" + self.width + "){1,5}$)") # Note that both trace and value line matching allow for variable amounts of # whitespace (e.g. \t). This is because the we want to allow for the stack @@ -183,9 +182,9 @@ class TraceConverter: def ConvertTrace(self, lines): lines = [self.CleanLine(line) for line in lines] try: - if not symbol.ARCH: - symbol.SetAbi(lines) - self.UpdateAbiRegexes() + if symbol.ARCH_IS_32BIT is None: + symbol.SetBitness(lines) + self.UpdateBitnessRegexes() for line in lines: self.ProcessLine(line) self.PrintOutput(self.trace_lines, self.value_lines) @@ -371,11 +370,11 @@ class TraceConverter: test_name = lib.rsplit("/", 1)[-1] test_dir = "/data/nativetest" test_dir_bitness = "" - if symbol.ARCH.endswith("64"): + if symbol.ARCH_IS_32BIT: + bitness = "32" + else: bitness = "64" test_dir_bitness = "64" - else: - bitness = "32" # Unfortunately, the location of the real symbol file is not # standardized, so we need to go hunting for it. @@ -540,7 +539,7 @@ class TraceConverter: if nest_count > 0: nest_count = nest_count - 1 arrow = "v------>" - if symbol.ARCH == "arm64" or symbol.ARCH == "x86_64" or symbol.ARCH == "riscv64": + if not symbol.ARCH_IS_32BIT: arrow = "v-------------->" self.trace_lines.append((arrow, source_symbol, source_location)) else: @@ -583,8 +582,12 @@ class RegisterPatternTests(unittest.TestCase): def assert_register_matches(self, abi, example_crash, stupid_pattern): tc = TraceConverter() lines = example_crash.split('\n') - symbol.SetAbi(lines) - tc.UpdateAbiRegexes() + symbol.SetBitness(lines) + tc.UpdateBitnessRegexes() + if symbol.ARCH_IS_32BIT: + print("32 Bit Arch") + else: + print("64 Bit Arch") for line in lines: tc.ProcessLine(line) is_register = (re.search(stupid_pattern, line) is not None) @@ -593,10 +596,10 @@ class RegisterPatternTests(unittest.TestCase): tc.PrintOutput(tc.trace_lines, tc.value_lines) def test_arm_registers(self): - self.assert_register_matches("arm", example_crashes.arm, '\\b(r0|r4|r8|ip)\\b') + self.assert_register_matches("arm", example_crashes.arm, '\\b(r0|r4|r8|ip|scr)\\b') def test_arm64_registers(self): - self.assert_register_matches("arm64", example_crashes.arm64, '\\b(x0|x4|x8|x12|x16|x20|x24|x28|sp)\\b') + self.assert_register_matches("arm64", example_crashes.arm64, '\\b(x0|x4|x8|x12|x16|x20|x24|x28|sp|v[1-3]?[0-9])\\b') def test_x86_registers(self): self.assert_register_matches("x86", example_crashes.x86, '\\b(eax|esi|xcs|eip)\\b') @@ -612,10 +615,9 @@ class LibmemunreachablePatternTests(unittest.TestCase): tc = TraceConverter() lines = example_crashes.libmemunreachable.split('\n') - symbol.SetAbi(lines) - self.assertEqual(symbol.ARCH, "arm") - - tc.UpdateAbiRegexes() + symbol.SetBitness(lines) + self.assertTrue(symbol.ARCH_IS_32BIT) + tc.UpdateBitnessRegexes() header_lines = 0 trace_lines = 0 for line in lines: @@ -635,8 +637,8 @@ class LongASANStackTests(unittest.TestCase): def test_long_asan_crash(self): tc = TraceConverter() lines = example_crashes.long_asan_crash.splitlines() - symbol.SetAbi(lines) - tc.UpdateAbiRegexes() + symbol.SetBitness(lines) + tc.UpdateBitnessRegexes() # Test by making sure trace_line_count is monotonically non-decreasing. If the stack trace # is split, a separator is printed and trace_lines is flushed. trace_line_count = 0 @@ -652,8 +654,8 @@ class LongASANStackTests(unittest.TestCase): class ValueLinesTest(unittest.TestCase): def test_value_line_skipped(self): tc = TraceConverter() - symbol.SetAbi(["ABI: 'arm'"]) - tc.UpdateAbiRegexes() + symbol.ARCH_IS_32BIT = True + tc.UpdateBitnessRegexes() tc.ProcessLine(" 12345678 00001000 .") self.assertEqual([], tc.value_lines) diff --git a/scripts/symbol.py b/scripts/symbol.py index 714c21222..bfdc29eb6 100755 --- a/scripts/symbol.py +++ b/scripts/symbol.py @@ -58,13 +58,12 @@ def FindSymbolsDir(): SYMBOLS_DIR = FindSymbolsDir() -ARCH = None +ARCH_IS_32BIT = None VERBOSE = False # These are private. Do not access them from other modules. _CACHED_TOOLCHAIN = None -_CACHED_TOOLCHAIN_ARCH = None _CACHED_CXX_FILT = None # Caches for symbolized information. @@ -147,18 +146,18 @@ for sig in (signal.SIGABRT, signal.SIGINT, signal.SIGTERM): def ToolPath(tool, toolchain=None): """Return a fully-qualified path to the specified tool, or just the tool if it's on PATH """ - if shutil.which(tool) is not None: - return tool + if shutil.which(tool): + return tool if not toolchain: toolchain = FindToolchain() return os.path.join(toolchain, tool) def FindToolchain(): - """Returns the toolchain matching ARCH.""" + """Returns the toolchain.""" - global _CACHED_TOOLCHAIN, _CACHED_TOOLCHAIN_ARCH - if _CACHED_TOOLCHAIN is not None and _CACHED_TOOLCHAIN_ARCH == ARCH: + global _CACHED_TOOLCHAIN + if _CACHED_TOOLCHAIN: return _CACHED_TOOLCHAIN llvm_binutils_dir = ANDROID_BUILD_TOP + "/prebuilts/clang/host/linux-x86/llvm-binutils-stable/"; @@ -166,8 +165,7 @@ def FindToolchain(): raise Exception("Could not find llvm tool chain directory %s" % (llvm_binutils_dir)) _CACHED_TOOLCHAIN = llvm_binutils_dir - _CACHED_TOOLCHAIN_ARCH = ARCH - print("Using", _CACHED_TOOLCHAIN_ARCH, "toolchain from:", _CACHED_TOOLCHAIN) + print("Using toolchain from:", _CACHED_TOOLCHAIN) return _CACHED_TOOLCHAIN @@ -324,21 +322,6 @@ def CallLlvmSymbolizerForSet(lib, unique_addrs): return result -def StripPC(addr): - """Strips the Thumb bit a program counter address when appropriate. - - Args: - addr: the program counter address - - Returns: - The stripped program counter address. - """ - global ARCH - if ARCH == "arm": - return addr & ~1 - return addr - - def CallObjdumpForSet(lib, unique_addrs): """Use objdump to find out the names of the containing functions. @@ -381,8 +364,8 @@ def CallObjdumpForSet(lib, unique_addrs): if not os.path.exists(symbols): return None - start_addr_dec = str(StripPC(int(addrs[0], 16))) - stop_addr_dec = str(StripPC(int(addrs[-1], 16)) + 8) + start_addr_dec = str(int(addrs[0], 16)) + stop_addr_dec = str(int(addrs[-1], 16) + 8) cmd = [ToolPath("llvm-objdump"), "--section=.text", "--demangle", @@ -431,7 +414,7 @@ def CallObjdumpForSet(lib, unique_addrs): addr = components.group(1) target_addr = addrs[addr_index] i_addr = int(addr, 16) - i_target = StripPC(int(target_addr, 16)) + i_target = int(target_addr, 16) if i_addr == i_target: result[target_addr] = (current_symbol, i_target - current_symbol_addr) addr_cache[target_addr] = result[target_addr] @@ -517,228 +500,64 @@ def FormatSymbolWithoutParameters(symbol): return result.strip() -def GetAbiFromToolchain(toolchain_var, bits): - toolchain = os.environ.get(toolchain_var) - if not toolchain: - return None +def SetBitness(lines): + global ARCH_IS_32BIT - toolchain_match = re.search("\/(aarch64|arm|x86)\/", toolchain) - if toolchain_match: - abi = toolchain_match.group(1) - if abi == "aarch64": - return "arm64" - elif bits == 64: - if abi == "x86": - return "x86_64" - return abi - return None - -def Get32BitArch(): - # Check for ANDROID_TOOLCHAIN_2ND_ARCH first, if set, use that. - # If not try ANDROID_TOOLCHAIN to find the arch. - # If this is not set, then default to arm. - arch = GetAbiFromToolchain("ANDROID_TOOLCHAIN_2ND_ARCH", 32) - if not arch: - arch = GetAbiFromToolchain("ANDROID_TOOLCHAIN", 32) - if not arch: - return "arm" - return arch - -def Get64BitArch(): - # Check for ANDROID_TOOLCHAIN, if it is set, we can figure out the - # arch this way. If this is not set, then default to arm64. - arch = GetAbiFromToolchain("ANDROID_TOOLCHAIN", 64) - if not arch: - return "arm64" - return arch - -def SetAbi(lines): - global ARCH - - abi_line = re.compile("ABI: \'(.*)\'") trace_line = re.compile("\#[0-9]+[ \t]+..[ \t]+([0-9a-f]{8}|[0-9a-f]{16})([ \t]+|$)") asan_trace_line = re.compile("\#[0-9]+[ \t]+0x([0-9a-f]+)[ \t]+") - ARCH = None + ARCH_IS_32BIT = False for line in lines: - abi_match = abi_line.search(line) - if abi_match: - ARCH = abi_match.group(1) - break trace_match = trace_line.search(line) if trace_match: # Try to guess the arch, we know the bitness. if len(trace_match.group(1)) == 16: - ARCH = Get64BitArch() + ARCH_IS_32BIT = False else: - ARCH = Get32BitArch() + ARCH_IS_32BIT = True break asan_trace_match = asan_trace_line.search(line) if asan_trace_match: # We might be able to guess the bitness by the length of the address. if len(asan_trace_match.group(1)) > 8: - ARCH = Get64BitArch() + ARCH_IS_32BIT = False # We know for a fact this is 64 bit, so we are done. break else: - ARCH = Get32BitArch() # This might be 32 bit, or just a small address. Keep going in this # case, but if we couldn't figure anything else out, go with 32 bit. - if not ARCH: - raise Exception("Could not determine arch from input, use --arch=XXX to specify it") - - -class FindToolchainTests(unittest.TestCase): - def assert_toolchain_found(self, abi): - global ARCH - ARCH = abi - FindToolchain() # Will throw on failure. - - @unittest.skipIf(ANDROID_BUILD_TOP == '.', 'Test only supported in an Android tree.') - def test_toolchains_found(self): - self.assert_toolchain_found("arm") - self.assert_toolchain_found("arm64") - self.assert_toolchain_found("x86") - self.assert_toolchain_found("x86_64") + ARCH_IS_32BIT = True class FindClangDirTests(unittest.TestCase): @unittest.skipIf(ANDROID_BUILD_TOP == '.', 'Test only supported in an Android tree.') def test_clang_dir_found(self): self.assertIsNotNone(FindClangDir()) -class SetArchTests(unittest.TestCase): - def test_abi_check(self): - global ARCH +class SetBitnessTests(unittest.TestCase): + def test_32bit_check(self): + global ARCH_IS_32BIT - SetAbi(["ABI: 'arm'"]) - self.assertEqual(ARCH, "arm") - SetAbi(["ABI: 'arm64'"]) - self.assertEqual(ARCH, "arm64") + SetBitness(["#00 pc 000374e0"]) + self.assertTrue(ARCH_IS_32BIT) - SetAbi(["ABI: 'x86'"]) - self.assertEqual(ARCH, "x86") - SetAbi(["ABI: 'x86_64'"]) - self.assertEqual(ARCH, "x86_64") + def test_64bit_check(self): + global ARCH_IS_32BIT - def test_32bit_trace_line_toolchain(self): - global ARCH - - os.environ.clear() - os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/arm/arm-linux-androideabi-4.9/bin" - SetAbi(["#00 pc 000374e0"]) - self.assertEqual(ARCH, "arm") - - os.environ.clear() - os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/x86/arm-linux-androideabi-4.9/bin" - SetAbi(["#00 pc 000374e0"]) - self.assertEqual(ARCH, "x86") - - def test_32bit_trace_line_toolchain_2nd(self): - global ARCH - - os.environ.clear() - os.environ["ANDROID_TOOLCHAIN_2ND_ARCH"] = "linux-x86/arm/arm-linux-androideabi-4.9/bin" - os.environ["ANDROID_TOOLCHAIN_ARCH"] = "linux-x86/aarch64/aarch64-linux-android-4.9/bin" - SetAbi(["#00 pc 000374e0"]) - self.assertEqual(ARCH, "arm") - - os.environ.clear() - os.environ["ANDROID_TOOLCHAIN_2ND_ARCH"] = "linux-x86/x86/x86-linux-androideabi-4.9/bin" - os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/unknown/unknown-linux-androideabi-4.9/bin" - SetAbi(["#00 pc 000374e0"]) - self.assertEqual(ARCH, "x86") - - def test_64bit_trace_line_toolchain(self): - global ARCH - - os.environ.clear() - os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/aarch/aarch-linux-androideabi-4.9/bin" - SetAbi(["#00 pc 00000000000374e0"]) - self.assertEqual(ARCH, "arm64") - - os.environ.clear() - os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/x86/arm-linux-androideabi-4.9/bin" - SetAbi(["#00 pc 00000000000374e0"]) - self.assertEqual(ARCH, "x86_64") - - def test_trace_default_abis(self): - global ARCH - - os.environ.clear() - SetAbi(["#00 pc 000374e0"]) - self.assertEqual(ARCH, "arm") - SetAbi(["#00 pc 00000000000374e0"]) - self.assertEqual(ARCH, "arm64") + SetBitness(["#00 pc 00000000000374e0"]) + self.assertFalse(ARCH_IS_32BIT) def test_32bit_asan_trace_line_toolchain(self): - global ARCH + global ARCH_IS_32BIT - os.environ.clear() - os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/arm/arm-linux-androideabi-4.9/bin" - SetAbi(["#10 0xb5eeba5d (/system/vendor/lib/egl/libGLESv1_CM_adreno.so+0xfa5d)"]) - self.assertEqual(ARCH, "arm") - - os.environ.clear() - os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/x86/arm-linux-androideabi-4.9/bin" - SetAbi(["#10 0xb5eeba5d (/system/vendor/lib/egl/libGLESv1_CM_adreno.so+0xfa5d)"]) - self.assertEqual(ARCH, "x86") - - def test_32bit_asan_trace_line_toolchain_2nd(self): - global ARCH - - os.environ.clear() - os.environ["ANDROID_TOOLCHAIN_2ND_ARCH"] = "linux-x86/arm/arm-linux-androideabi-4.9/bin" - os.environ["ANDROID_TOOLCHAIN_ARCH"] = "linux-x86/aarch64/aarch64-linux-android-4.9/bin" - SetAbi(["#3 0xae1725b5 (/system/vendor/lib/libllvm-glnext.so+0x6435b5)"]) - self.assertEqual(ARCH, "arm") - - os.environ.clear() - os.environ["ANDROID_TOOLCHAIN_2ND_ARCH"] = "linux-x86/x86/x86-linux-androideabi-4.9/bin" - os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/unknown/unknown-linux-androideabi-4.9/bin" - SetAbi(["#3 0xae1725b5 (/system/vendor/lib/libllvm-glnext.so+0x6435b5)"]) - self.assertEqual(ARCH, "x86") + SetBitness(["#10 0xb5eeba5d (/system/vendor/lib/egl/libGLESv1_CM_adreno.so+0xfa5d)"]) + self.assertTrue(ARCH_IS_32BIT) def test_64bit_asan_trace_line_toolchain(self): - global ARCH + global ARCH_IS_32BIT - os.environ.clear() - os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/aarch/aarch-linux-androideabi-4.9/bin" - SetAbi(["#0 0x11b35d33bf (/system/lib/libclang_rt.asan-arm-android.so+0x823bf)"]) - self.assertEqual(ARCH, "arm64") - - os.environ.clear() - os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/x86/arm-linux-androideabi-4.9/bin" - SetAbi(["#12 0x11b35d33bf (/system/lib/libclang_rt.asan-arm-android.so+0x823bf)"]) - self.assertEqual(ARCH, "x86_64") - - # Verify that if an address that might be 32 bit comes first, that - # encountering a 64 bit address returns a 64 bit abi. - ARCH = None - os.environ.clear() - os.environ["ANDROID_TOOLCHAIN"] = "linux-x86/x86/arm-linux-androideabi-4.9/bin" - SetAbi(["#12 0x5d33bf (/system/lib/libclang_rt.asan-arm-android.so+0x823bf)", - "#12 0x11b35d33bf (/system/lib/libclang_rt.asan-arm-android.so+0x823bf)"]) - self.assertEqual(ARCH, "x86_64") - - def test_asan_trace_default_abis(self): - global ARCH - - os.environ.clear() - SetAbi(["#4 0x1234349ab (/system/vendor/lib/libllvm-glnext.so+0x64fc4f)"]) - self.assertEqual(ARCH, "arm64") - SetAbi(["#1 0xae17ec4f (/system/vendor/lib/libllvm-glnext.so+0x64fc4f)"]) - self.assertEqual(ARCH, "arm") - - def test_no_abi(self): - global ARCH - - # Python2 vs Python3 compatibility: Python3 warns on Regexp deprecation, but Regex - # does not provide that name. - if not hasattr(unittest.TestCase, 'assertRaisesRegex'): - unittest.TestCase.assertRaisesRegex = getattr(unittest.TestCase, 'assertRaisesRegexp') - self.assertRaisesRegex(Exception, - "Could not determine arch from input, use --arch=XXX to specify it", - SetAbi, []) + SetBitness(["#12 0x5d33bf (/system/lib/libclang_rt.asan-arm-android.so+0x823bf)", + "#12 0x11b35d33bf (/system/lib/libclang_rt.asan-arm-android.so+0x823bf)"]) + self.assertFalse(ARCH_IS_32BIT) class FormatSymbolWithoutParametersTests(unittest.TestCase): def test_c(self):