Start libc++ python cleanup and consolidation.

Libc++ frequently creates and uses utilities written in python.
Currently there are python modules under both libcxx/test and
libcxx/util. My goal with these changes is to consolidate them
into a single package under libcxx/utils/libcxx.

git-svn-id: https://llvm.org/svn/llvm-project/libcxx/trunk@294644 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Eric Fiselier
2017-02-09 22:53:14 +00:00
parent d8b62dceb2
commit 1147753b1c
16 changed files with 27 additions and 10 deletions

View File

@@ -0,0 +1,17 @@
#===----------------------------------------------------------------------===##
#
# The LLVM Compiler Infrastructure
#
# This file is dual licensed under the MIT and the University of Illinois Open
# Source Licenses. See LICENSE.TXT for details.
#
#===----------------------------------------------------------------------===##
"""libcxx abi symbol checker"""
__author__ = 'Eric Fiselier'
__email__ = 'eric@efcs.ca'
__versioninfo__ = (0, 1, 0)
__version__ = ' '.join(str(v) for v in __versioninfo__) + 'dev'
__all__ = ['diff', 'extract', 'util']

View File

@@ -0,0 +1,103 @@
# -*- Python -*- vim: set syntax=python tabstop=4 expandtab cc=80:
#===----------------------------------------------------------------------===##
#
# The LLVM Compiler Infrastructure
#
# This file is dual licensed under the MIT and the University of Illinois Open
# Source Licenses. See LICENSE.TXT for details.
#
#===----------------------------------------------------------------------===##
"""
diff - A set of functions for diff-ing two symbol lists.
"""
from libcxx.sym_check import util
def _symbol_difference(lhs, rhs):
lhs_names = set((n['name'] for n in lhs))
rhs_names = set((n['name'] for n in rhs))
diff_names = lhs_names - rhs_names
return [n for n in lhs if n['name'] in diff_names]
def _find_by_key(sym_list, k):
for sym in sym_list:
if sym['name'] == k:
return sym
return None
def added_symbols(old, new):
return _symbol_difference(new, old)
def removed_symbols(old, new):
return _symbol_difference(old, new)
def changed_symbols(old, new):
changed = []
for old_sym in old:
if old_sym in new:
continue
new_sym = _find_by_key(new, old_sym['name'])
if (new_sym is not None and not new_sym in old
and cmp(old_sym, new_sym) != 0):
changed += [(old_sym, new_sym)]
return changed
def diff(old, new):
added = added_symbols(old, new)
removed = removed_symbols(old, new)
changed = changed_symbols(old, new)
return added, removed, changed
def report_diff(added_syms, removed_syms, changed_syms, names_only=False,
demangle=True):
def maybe_demangle(name):
return util.demangle_symbol(name) if demangle else name
report = ''
for sym in added_syms:
report += 'Symbol added: %s\n' % maybe_demangle(sym['name'])
if not names_only:
report += ' %s\n\n' % sym
if added_syms and names_only:
report += '\n'
for sym in removed_syms:
report += 'SYMBOL REMOVED: %s\n' % maybe_demangle(sym['name'])
if not names_only:
report += ' %s\n\n' % sym
if removed_syms and names_only:
report += '\n'
if not names_only:
for sym_pair in changed_syms:
old_sym, new_sym = sym_pair
old_str = '\n OLD SYMBOL: %s' % old_sym
new_str = '\n NEW SYMBOL: %s' % new_sym
report += ('SYMBOL CHANGED: %s%s%s\n\n' %
(maybe_demangle(old_sym['name']),
old_str, new_str))
added = bool(len(added_syms) != 0)
abi_break = bool(len(removed_syms))
if not names_only:
abi_break = abi_break or len(changed_syms)
if added or abi_break:
report += 'Summary\n'
report += ' Added: %d\n' % len(added_syms)
report += ' Removed: %d\n' % len(removed_syms)
if not names_only:
report += ' Changed: %d\n' % len(changed_syms)
if not abi_break:
report += 'Symbols added.'
else:
report += 'ABI BREAKAGE: SYMBOLS ADDED OR REMOVED!'
else:
report += 'Symbols match.'
is_different = abi_break or bool(len(added_syms)) \
or bool(len(changed_syms))
return report, abi_break, is_different

View File

@@ -0,0 +1,193 @@
# -*- Python -*- vim: set syntax=python tabstop=4 expandtab cc=80:
#===----------------------------------------------------------------------===##
#
# The LLVM Compiler Infrastructure
#
# This file is dual licensed under the MIT and the University of Illinois Open
# Source Licenses. See LICENSE.TXT for details.
#
#===----------------------------------------------------------------------===##
"""
extract - A set of function that extract symbol lists from shared libraries.
"""
import distutils.spawn
import sys
import re
from libcxx.sym_check import util
extract_ignore_names = ['_init', '_fini']
class NMExtractor(object):
"""
NMExtractor - Extract symbol lists from libraries using nm.
"""
@staticmethod
def find_tool():
"""
Search for the nm executable and return the path.
"""
return distutils.spawn.find_executable('nm')
def __init__(self):
"""
Initialize the nm executable and flags that will be used to extract
symbols from shared libraries.
"""
self.nm_exe = self.find_tool()
if self.nm_exe is None:
# ERROR no NM found
print("ERROR: Could not find nm")
sys.exit(1)
self.flags = ['-P', '-g']
def extract(self, lib):
"""
Extract symbols from a library and return the results as a dict of
parsed symbols.
"""
cmd = [self.nm_exe] + self.flags + [lib]
out, _, exit_code = util.execute_command_verbose(cmd)
if exit_code != 0:
raise RuntimeError('Failed to run %s on %s' % (self.nm_exe, lib))
fmt_syms = (self._extract_sym(l)
for l in out.splitlines() if l.strip())
# Cast symbol to string.
final_syms = (repr(s) for s in fmt_syms if self._want_sym(s))
# Make unique and sort strings.
tmp_list = list(sorted(set(final_syms)))
# Cast string back to symbol.
return util.read_syms_from_list(tmp_list)
def _extract_sym(self, sym_str):
bits = sym_str.split()
# Everything we want has at least two columns.
if len(bits) < 2:
return None
new_sym = {
'name': bits[0],
'type': bits[1],
'is_defined': (bits[1].lower() != 'u')
}
new_sym['name'] = new_sym['name'].replace('@@', '@')
new_sym = self._transform_sym_type(new_sym)
# NM types which we want to save the size for.
if new_sym['type'] == 'OBJECT' and len(bits) > 3:
new_sym['size'] = int(bits[3], 16)
return new_sym
@staticmethod
def _want_sym(sym):
"""
Check that s is a valid symbol that we want to keep.
"""
if sym is None or len(sym) < 2:
return False
if sym['name'] in extract_ignore_names:
return False
bad_types = ['t', 'b', 'r', 'd', 'w']
return (sym['type'] not in bad_types
and sym['name'] not in ['__bss_start', '_end', '_edata'])
@staticmethod
def _transform_sym_type(sym):
"""
Map the nm single letter output for type to either FUNC or OBJECT.
If the type is not recognized it is left unchanged.
"""
func_types = ['T', 'W']
obj_types = ['B', 'D', 'R', 'V', 'S']
if sym['type'] in func_types:
sym['type'] = 'FUNC'
elif sym['type'] in obj_types:
sym['type'] = 'OBJECT'
return sym
class ReadElfExtractor(object):
"""
ReadElfExtractor - Extract symbol lists from libraries using readelf.
"""
@staticmethod
def find_tool():
"""
Search for the readelf executable and return the path.
"""
return distutils.spawn.find_executable('readelf')
def __init__(self):
"""
Initialize the readelf executable and flags that will be used to
extract symbols from shared libraries.
"""
self.tool = self.find_tool()
if self.tool is None:
# ERROR no NM found
print("ERROR: Could not find readelf")
sys.exit(1)
self.flags = ['--wide', '--symbols']
def extract(self, lib):
"""
Extract symbols from a library and return the results as a dict of
parsed symbols.
"""
cmd = [self.tool] + self.flags + [lib]
out, _, exit_code = util.execute_command_verbose(cmd)
if exit_code != 0:
raise RuntimeError('Failed to run %s on %s' % (self.nm_exe, lib))
dyn_syms = self.get_dynsym_table(out)
return self.process_syms(dyn_syms)
def process_syms(self, sym_list):
new_syms = []
for s in sym_list:
parts = s.split()
if not parts:
continue
assert len(parts) == 7 or len(parts) == 8 or len(parts) == 9
if len(parts) == 7:
continue
new_sym = {
'name': parts[7],
'size': int(parts[2]),
'type': parts[3],
'is_defined': (parts[6] != 'UND')
}
assert new_sym['type'] in ['OBJECT', 'FUNC', 'NOTYPE']
if new_sym['name'] in extract_ignore_names:
continue
if new_sym['type'] == 'NOTYPE':
continue
if new_sym['type'] == 'FUNC':
del new_sym['size']
new_syms += [new_sym]
return new_syms
def get_dynsym_table(self, out):
lines = out.splitlines()
start = -1
end = -1
for i in range(len(lines)):
if lines[i].startswith("Symbol table '.dynsym'"):
start = i + 2
if start != -1 and end == -1 and not lines[i].strip():
end = i + 1
assert start != -1
if end == -1:
end = len(lines)
return lines[start:end]
def extract_symbols(lib_file):
"""
Extract and return a list of symbols extracted from a dynamic library.
The symbols are extracted using NM. They are then filtered and formated.
Finally they symbols are made unique.
"""
if ReadElfExtractor.find_tool():
extractor = ReadElfExtractor()
else:
extractor = NMExtractor()
return extractor.extract(lib_file)

View File

@@ -0,0 +1,40 @@
# -*- Python -*- vim: set syntax=python tabstop=4 expandtab cc=80:
#===----------------------------------------------------------------------===##
#
# The LLVM Compiler Infrastructure
#
# This file is dual licensed under the MIT and the University of Illinois Open
# Source Licenses. See LICENSE.TXT for details.
#
#===----------------------------------------------------------------------===##
"""
match - A set of functions for matching symbols in a list to a list of regexs
"""
import re
def find_and_report_matching(symbol_list, regex_list):
report = ''
found_count = 0
for regex_str in regex_list:
report += 'Matching regex "%s":\n' % regex_str
matching_list = find_matching_symbols(symbol_list, regex_str)
if not matching_list:
report += ' No matches found\n\n'
continue
# else
found_count += len(matching_list)
for m in matching_list:
report += ' MATCHES: %s\n' % m['name']
report += '\n'
return found_count, report
def find_matching_symbols(symbol_list, regex_str):
regex = re.compile(regex_str)
matching_list = []
for s in symbol_list:
if regex.match(s['name']):
matching_list += [s]
return matching_list

View File

@@ -0,0 +1,323 @@
#===----------------------------------------------------------------------===##
#
# The LLVM Compiler Infrastructure
#
# This file is dual licensed under the MIT and the University of Illinois Open
# Source Licenses. See LICENSE.TXT for details.
#
#===----------------------------------------------------------------------===##
import ast
import distutils.spawn
import signal
import subprocess
import sys
import re
def to_bytes(str):
# Encode to UTF-8 to get binary data.
if isinstance(str, bytes):
return str
return str.encode('utf-8')
def to_string(bytes):
if isinstance(bytes, str):
return bytes
return to_bytes(bytes)
def convert_string(bytes):
try:
return to_string(bytes.decode('utf-8'))
except AttributeError: # 'str' object has no attribute 'decode'.
return str(bytes)
except UnicodeError:
return str(bytes)
def execute_command(cmd, input_str=None):
"""
Execute a command, capture and return its output.
"""
kwargs = {
'stdin': subprocess.PIPE,
'stdout': subprocess.PIPE,
'stderr': subprocess.PIPE,
}
p = subprocess.Popen(cmd, **kwargs)
out, err = p.communicate(input=input_str)
exitCode = p.wait()
if exitCode == -signal.SIGINT:
raise KeyboardInterrupt
out = convert_string(out)
err = convert_string(err)
return out, err, exitCode
def execute_command_verbose(cmd, input_str=None):
"""
Execute a command and print its output on failure.
"""
out, err, exitCode = execute_command(cmd, input_str=input_str)
if exitCode != 0:
report = "Command: %s\n" % ' '.join(["'%s'" % a for a in cmd])
report += "Exit Code: %d\n" % exitCode
if out:
report += "Standard Output:\n--\n%s--" % out
if err:
report += "Standard Error:\n--\n%s--" % err
report += "\n\nFailed!"
sys.stderr.write('%s\n' % report)
return out, err, exitCode
def read_syms_from_list(slist):
"""
Read a list of symbols from a list of strings.
Each string is one symbol.
"""
return [ast.literal_eval(l) for l in slist]
def read_syms_from_file(filename):
"""
Read a list of symbols in from a file.
"""
with open(filename, 'r') as f:
data = f.read()
return read_syms_from_list(data.splitlines())
def read_blacklist(filename):
with open(filename, 'r') as f:
data = f.read()
lines = [l.strip() for l in data.splitlines() if l.strip()]
lines = [l for l in lines if not l.startswith('#')]
return lines
def write_syms(sym_list, out=None, names_only=False):
"""
Write a list of symbols to the file named by out.
"""
out_str = ''
out_list = sym_list
out_list.sort(key=lambda x: x['name'])
if names_only:
out_list = [sym['name'] for sym in sym_list]
for sym in out_list:
out_str += '%s\n' % sym
if out is None:
sys.stdout.write(out_str)
else:
with open(out, 'w') as f:
f.write(out_str)
_cppfilt_exe = distutils.spawn.find_executable('c++filt')
def demangle_symbol(symbol):
if _cppfilt_exe is None:
return symbol
out, _, exit_code = execute_command_verbose(
[_cppfilt_exe], input_str=symbol)
if exit_code != 0:
return symbol
return out
def is_elf(filename):
with open(filename, 'rb') as f:
magic_bytes = f.read(4)
return magic_bytes == b'\x7fELF'
def is_mach_o(filename):
with open(filename, 'rb') as f:
magic_bytes = f.read(4)
return magic_bytes in [
'\xfe\xed\xfa\xce', # MH_MAGIC
'\xce\xfa\xed\xfe', # MH_CIGAM
'\xfe\xed\xfa\xcf', # MH_MAGIC_64
'\xcf\xfa\xed\xfe', # MH_CIGAM_64
'\xca\xfe\xba\xbe', # FAT_MAGIC
'\xbe\xba\xfe\xca' # FAT_CIGAM
]
def is_library_file(filename):
if sys.platform == 'darwin':
return is_mach_o(filename)
else:
return is_elf(filename)
def extract_or_load(filename):
import libcxx.sym_check.extract
if is_library_file(filename):
return libcxx.sym_check.extract.extract_symbols(filename)
return read_syms_from_file(filename)
def adjust_mangled_name(name):
if not name.startswith('__Z'):
return name
return name[1:]
new_delete_std_symbols = [
'_Znam',
'_Znwm',
'_ZdaPv',
'_ZdaPvm',
'_ZdlPv',
'_ZdlPvm'
]
cxxabi_symbols = [
'___dynamic_cast',
'___gxx_personality_v0',
'_ZTIDi',
'_ZTIDn',
'_ZTIDs',
'_ZTIPDi',
'_ZTIPDn',
'_ZTIPDs',
'_ZTIPKDi',
'_ZTIPKDn',
'_ZTIPKDs',
'_ZTIPKa',
'_ZTIPKb',
'_ZTIPKc',
'_ZTIPKd',
'_ZTIPKe',
'_ZTIPKf',
'_ZTIPKh',
'_ZTIPKi',
'_ZTIPKj',
'_ZTIPKl',
'_ZTIPKm',
'_ZTIPKs',
'_ZTIPKt',
'_ZTIPKv',
'_ZTIPKw',
'_ZTIPKx',
'_ZTIPKy',
'_ZTIPa',
'_ZTIPb',
'_ZTIPc',
'_ZTIPd',
'_ZTIPe',
'_ZTIPf',
'_ZTIPh',
'_ZTIPi',
'_ZTIPj',
'_ZTIPl',
'_ZTIPm',
'_ZTIPs',
'_ZTIPt',
'_ZTIPv',
'_ZTIPw',
'_ZTIPx',
'_ZTIPy',
'_ZTIa',
'_ZTIb',
'_ZTIc',
'_ZTId',
'_ZTIe',
'_ZTIf',
'_ZTIh',
'_ZTIi',
'_ZTIj',
'_ZTIl',
'_ZTIm',
'_ZTIs',
'_ZTIt',
'_ZTIv',
'_ZTIw',
'_ZTIx',
'_ZTIy',
'_ZTSDi',
'_ZTSDn',
'_ZTSDs',
'_ZTSPDi',
'_ZTSPDn',
'_ZTSPDs',
'_ZTSPKDi',
'_ZTSPKDn',
'_ZTSPKDs',
'_ZTSPKa',
'_ZTSPKb',
'_ZTSPKc',
'_ZTSPKd',
'_ZTSPKe',
'_ZTSPKf',
'_ZTSPKh',
'_ZTSPKi',
'_ZTSPKj',
'_ZTSPKl',
'_ZTSPKm',
'_ZTSPKs',
'_ZTSPKt',
'_ZTSPKv',
'_ZTSPKw',
'_ZTSPKx',
'_ZTSPKy',
'_ZTSPa',
'_ZTSPb',
'_ZTSPc',
'_ZTSPd',
'_ZTSPe',
'_ZTSPf',
'_ZTSPh',
'_ZTSPi',
'_ZTSPj',
'_ZTSPl',
'_ZTSPm',
'_ZTSPs',
'_ZTSPt',
'_ZTSPv',
'_ZTSPw',
'_ZTSPx',
'_ZTSPy',
'_ZTSa',
'_ZTSb',
'_ZTSc',
'_ZTSd',
'_ZTSe',
'_ZTSf',
'_ZTSh',
'_ZTSi',
'_ZTSj',
'_ZTSl',
'_ZTSm',
'_ZTSs',
'_ZTSt',
'_ZTSv',
'_ZTSw',
'_ZTSx',
'_ZTSy'
]
def is_stdlib_symbol_name(name):
name = adjust_mangled_name(name)
if re.search("@GLIBC|@GCC", name):
return False
if re.search('(St[0-9])|(__cxa)|(__cxxabi)', name):
return True
if name in new_delete_std_symbols:
return True
if name in cxxabi_symbols:
return True
if name.startswith('_Z'):
return True
return False
def filter_stdlib_symbols(syms):
stdlib_symbols = []
other_symbols = []
for s in syms:
canon_name = adjust_mangled_name(s['name'])
if not is_stdlib_symbol_name(canon_name):
assert not s['is_defined'] and "found defined non-std symbol"
other_symbols += [s]
else:
stdlib_symbols += [s]
return stdlib_symbols, other_symbols