Migrate to llvm-objdump and llvm-symbolizer
Android NDK will no longer support tools based on GNU toolchain.
Hence migrate ASan symbolization for Android to LLVM base tools
like llvm-symbolizer and llvm-objdump.
Bug: 1273402
Cq-Include-Trybots: luci.chromium.try:linux_chromium_asan_rel_ng,win-asan,mac_chromium_asan_rel_ng,android-asan
Change-Id: Icc0c5f8e64e1bb05d49f347c3316850a0ff83e55
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/3303875
Reviewed-by: Andrew Grieve <[email protected]>
Reviewed-by: Hans Wennborg <[email protected]>
Reviewed-by: Nico Weber <[email protected]>
Commit-Queue: Jonathan Wright <[email protected]>
Cr-Commit-Position: refs/heads/main@{#961488}
NOKEYCHECK=True
GitOrigin-RevId: ffa736ce5f5d7b018b768c29c0cc1ff35152393a
diff --git a/llvm_objdump.py b/llvm_objdump.py
new file mode 100644
index 0000000..3b2acd6
--- /dev/null
+++ b/llvm_objdump.py
@@ -0,0 +1,137 @@
+# Copyright 2022 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+import logging
+import os
+import re
+import subprocess
+
+_CHROME_SRC = os.path.join(os.path.dirname(__file__), os.pardir, os.pardir)
+_LLVM_OBJDUMP_PATH = os.path.join(_CHROME_SRC, 'third_party', 'llvm-build',
+ 'Release+Asserts', 'bin', 'llvm-objdump')
+
+# Function lines look like:
+# 000177b0 <android::IBinder::~IBinder()+0x2c>:
+# We pull out the address and function first. Then we check for an optional
+# offset. This is tricky due to functions that look like "operator+(..)+0x2c"
+_FUNC = re.compile(r"(^[a-f0-9]*) <(.*)>:$")
+_OFFSET = re.compile(r"(.*)\+0x([a-f0-9]*)")
+
+# A disassembly line looks like:
+# 177b2: b510 push {r4, lr}
+_ASM = re.compile(r"(^[ a-f0-9]*):[ a-f0-0]*.*$")
+
+
+def _StripPC(addr, cpu_arch):
+ """Strips the Thumb bit from a program counter address when appropriate.
+
+ Args:
+ addr: the program counter address
+ cpu_arch: Target CPU architecture.
+
+ Returns:
+ The stripped program counter address.
+ """
+ if cpu_arch == "arm":
+ return addr & ~1
+ return addr
+
+
+class ObjdumpInformation(object):
+ def __init__(self, address, library, symbol, offset):
+ self.address = address
+ self.library = library
+ self.symbol = symbol
+ self.offset = offset
+
+
+class LLVMObjdumper(object):
+ def __init__(self):
+ """Creates an instance of LLVMObjdumper that interacts with llvm-objdump.
+ """
+ self._llvm_objdump_parameters = [
+ '--disassemble',
+ '--demangle',
+ '--section=.text',
+ ]
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, exc_type, exc_val, exc_tb):
+ pass
+
+ @staticmethod
+ def GetSymbolDataFromObjdumpOutput(objdump_out, address, cpu_arch):
+ stripped_target_address = _StripPC(address, cpu_arch)
+ for line in objdump_out.split(os.linesep):
+ components = _FUNC.match(line)
+ if components:
+ # This is a new function, so record the current function and its
+ # address.
+ current_symbol_addr = int(components.group(1), 16)
+ current_symbol = components.group(2)
+
+ # Does it have an optional offset like: "foo(..)+0x2c"?
+ components = _OFFSET.match(current_symbol)
+ if components:
+ current_symbol = components.group(1)
+ offset = components.group(2)
+ if offset:
+ current_symbol_addr -= int(offset, 16)
+
+ # Is it a disassembly line like: "177b2: b510 push {r4, lr}"?
+ components = _ASM.match(line)
+ if components:
+ addr = components.group(1)
+ i_addr = int(addr, 16)
+ if i_addr == stripped_target_address:
+ return (current_symbol, stripped_target_address - current_symbol_addr)
+
+ return (None, None)
+
+ def GetSymbolInformation(self, lib, address, cpu_arch):
+ """Returns the corresponding function names and line numbers.
+
+ Args:
+ lib: library to search for info.
+ address: address to look for info.
+ cpu_arch: architecture where the dump was taken
+
+ Returns:
+ An ObjdumpInformation object
+ """
+ if not os.path.isfile(_LLVM_OBJDUMP_PATH):
+ logging.error('Cannot find llvm-objdump. path=%s', _LLVM_OBJDUMP_PATH)
+ return None
+
+ stripped_address = _StripPC(address, cpu_arch)
+
+ full_arguments = [_LLVM_OBJDUMP_PATH] + self._llvm_objdump_parameters
+ full_arguments.append('--start-address=' + str(stripped_address))
+ full_arguments.append('--stop-address=' + str(stripped_address + 8))
+ full_arguments.append(lib)
+
+ objdump_process = subprocess.Popen(full_arguments,
+ stdout=subprocess.PIPE,
+ stdin=subprocess.PIPE,
+ universal_newlines=True)
+
+ stdout, stderr = objdump_process.communicate()
+ objdump_process_return_code = objdump_process.poll()
+
+ if objdump_process_return_code != 0:
+ logging.error(
+ 'Invocation of llvm-objdump failed!' +
+ ' tool-command-line=\'{}\', return-code={}, std-error=\'{}\''.format(
+ ' '.join(full_arguments), objdump_process_return_code, stderr))
+ return None
+
+ symbol, offset = LLVMObjdumper.GetSymbolDataFromObjdumpOutput(
+ stdout, address, cpu_arch)
+
+ return ObjdumpInformation(address=address,
+ library=lib,
+ symbol=symbol,
+ offset=offset)
diff --git a/llvm_symbolizer.py b/llvm_symbolizer.py
index ac21078..86bc0a8 100644
--- a/llvm_symbolizer.py
+++ b/llvm_symbolizer.py
@@ -4,7 +4,6 @@
import logging
import os
-import re
import subprocess
import threading
@@ -13,25 +12,26 @@
_CHROME_SRC, 'third_party', 'llvm-build', 'Release+Asserts', 'bin',
'llvm-symbolizer')
-_BINARY = re.compile(r'0b[0,1]+')
-_HEX = re.compile(r'0x[0-9,a-e]+')
-_OCTAL = re.compile(r'0[0-7]+')
-
_UNKNOWN = '<UNKNOWN>'
+_ELF_MAGIC_HEADER_BYTES = b'\x7f\x45\x4c\x46'
-def _CheckValidAddr(addr):
- """
- Check whether the addr is valid input to llvm symbolizer.
- Valid addr has to be octal, binary, or hex number.
+
+def IsValidLLVMSymbolizerTarget(file_path):
+ """ Verify the passed file is a valid target for llvm-symbolization
Args:
- addr: addr to be entered to llvm symbolizer.
+ file_path: Path to a file to be checked
- Returns:
- whether the addr is valid input to llvm symbolizer.
+ Return:
+ True if the file exists and has the correct ELF header, False otherwise
"""
- return _HEX.match(addr) or _OCTAL.match(addr) or _BINARY.match(addr)
+ try:
+ with open(file_path, 'rb') as f:
+ header_bytes = f.read(4)
+ return header_bytes == _ELF_MAGIC_HEADER_BYTES
+ except IOError:
+ return False
class LLVMSymbolizer(object):
@@ -42,6 +42,12 @@
numbers of an address from the symbols library.
"""
self._llvm_symbolizer_subprocess = None
+ self._llvm_symbolizer_parameters = [
+ '--functions',
+ '--demangle',
+ '--inlines',
+ ]
+
# Allow only one thread to call GetSymbolInformation at a time.
self._lock = threading.Lock()
@@ -53,7 +59,7 @@
"""
if os.path.isfile(_LLVM_SYMBOLIZER_PATH):
self._llvm_symbolizer_subprocess = subprocess.Popen(
- [_LLVM_SYMBOLIZER_PATH],
+ [_LLVM_SYMBOLIZER_PATH] + self._llvm_symbolizer_parameters,
stdout=subprocess.PIPE,
stdin=subprocess.PIPE,
universal_newlines=True)
@@ -89,15 +95,26 @@
addr: address to look for info.
Returns:
- A list of (function name, line numbers) tuple.
+ A triplet of address, module-name and list of symbols
"""
- if (self._llvm_symbolizer_subprocess is None or not lib
- or not _CheckValidAddr(addr) or not os.path.isfile(lib)):
+ if (self._llvm_symbolizer_subprocess is None):
+ logging.error('Can\'t run llvm-symbolizer! ' +
+ 'Subprocess for llvm-symbolizer has not been started!')
+ return [(_UNKNOWN, lib)]
+
+ if not lib:
+ logging.error('Can\'t run llvm-symbolizer! No target is given!')
+ return [(_UNKNOWN, lib)]
+
+ if not IsValidLLVMSymbolizerTarget(lib):
+ logging.error(
+ 'Can\'t run llvm-symbolizer! ' +
+ 'Given binary is not a valid target. path=%s', lib)
return [(_UNKNOWN, lib)]
proc = self._llvm_symbolizer_subprocess
with self._lock:
- proc.stdin.write('%s %s\n' % (lib, addr))
+ proc.stdin.write('%s %s\n' % (lib, hex(addr)))
proc.stdin.flush()
result = []
# Read until an empty line is observed, which indicates the end of the