Start libc++ python cleanup and consolidation.

Libc++ frequently creates and uses utilities written in python. Currently there are python modules under both libcxx/test and libcxx/util. My goal with these changes is to consolidate them into a single package under libcxx/utils/libcxx. git-svn-id: https://llvm.org/svn/llvm-project/libcxx/trunk@294644 91177308-0d34-0410-b5e6-96231b3b80d8
2017-02-09 22:53:14 +00:00
parent d8b62dceb2
commit 1147753b1c
16 changed files with 27 additions and 10 deletions
--- a/utils/libcxx/sym_check/init.py
+++ b/utils/libcxx/sym_check/init.py
@@ -0,0 +1,17 @@
+#===----------------------------------------------------------------------===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is dual licensed under the MIT and the University of Illinois Open
+# Source Licenses. See LICENSE.TXT for details.
+#
+#===----------------------------------------------------------------------===##
+
+"""libcxx abi symbol checker"""
+
+__author__ = 'Eric Fiselier'
+__email__ = 'eric@efcs.ca'
+__versioninfo__ = (0, 1, 0)
+__version__ = ' '.join(str(v) for v in __versioninfo__) + 'dev'
+
+__all__ = ['diff', 'extract', 'util']
--- a/utils/libcxx/sym_check/diff.py
+++ b/utils/libcxx/sym_check/diff.py
@@ -0,0 +1,103 @@
+# -*- Python -*- vim: set syntax=python tabstop=4 expandtab cc=80:
+#===----------------------------------------------------------------------===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is dual licensed under the MIT and the University of Illinois Open
+# Source Licenses. See LICENSE.TXT for details.
+#
+#===----------------------------------------------------------------------===##
+"""
+diff - A set of functions for diff-ing two symbol lists.
+"""
+
+from libcxx.sym_check import util
+
+
+def _symbol_difference(lhs, rhs):
+    lhs_names = set((n['name'] for n in lhs))
+    rhs_names = set((n['name'] for n in rhs))
+    diff_names = lhs_names - rhs_names
+    return [n for n in lhs if n['name'] in diff_names]
+
+
+def _find_by_key(sym_list, k):
+    for sym in sym_list:
+        if sym['name'] == k:
+            return sym
+    return None
+
+
+def added_symbols(old, new):
+    return _symbol_difference(new, old)
+
+
+def removed_symbols(old, new):
+    return _symbol_difference(old, new)
+
+
+def changed_symbols(old, new):
+    changed = []
+    for old_sym in old:
+        if old_sym in new:
+            continue
+        new_sym = _find_by_key(new, old_sym['name'])
+        if (new_sym is not None and not new_sym in old
+                and cmp(old_sym, new_sym) != 0):
+            changed += [(old_sym, new_sym)]
+    return changed
+
+
+def diff(old, new):
+    added = added_symbols(old, new)
+    removed = removed_symbols(old, new)
+    changed = changed_symbols(old, new)
+    return added, removed, changed
+
+
+def report_diff(added_syms, removed_syms, changed_syms, names_only=False,
+                demangle=True):
+    def maybe_demangle(name):
+        return util.demangle_symbol(name) if demangle else name
+
+    report = ''
+    for sym in added_syms:
+        report += 'Symbol added: %s\n' % maybe_demangle(sym['name'])
+        if not names_only:
+            report += '    %s\n\n' % sym
+    if added_syms and names_only:
+        report += '\n'
+    for sym in removed_syms:
+        report += 'SYMBOL REMOVED: %s\n' % maybe_demangle(sym['name'])
+        if not names_only:
+            report += '    %s\n\n' % sym
+    if removed_syms and names_only:
+        report += '\n'
+    if not names_only:
+        for sym_pair in changed_syms:
+            old_sym, new_sym = sym_pair
+            old_str = '\n    OLD SYMBOL: %s' % old_sym
+            new_str = '\n    NEW SYMBOL: %s' % new_sym
+            report += ('SYMBOL CHANGED: %s%s%s\n\n' %
+                       (maybe_demangle(old_sym['name']),
+                        old_str, new_str))
+
+    added = bool(len(added_syms) != 0)
+    abi_break = bool(len(removed_syms))
+    if not names_only:
+        abi_break = abi_break or len(changed_syms)
+    if added or abi_break:
+        report += 'Summary\n'
+        report += '    Added:   %d\n' % len(added_syms)
+        report += '    Removed: %d\n' % len(removed_syms)
+        if not names_only:
+            report += '    Changed: %d\n' % len(changed_syms)
+        if not abi_break:
+            report += 'Symbols added.'
+        else:
+            report += 'ABI BREAKAGE: SYMBOLS ADDED OR REMOVED!'
+    else:
+        report += 'Symbols match.'
+    is_different = abi_break or bool(len(added_syms)) \
+                   or bool(len(changed_syms))
+    return report, abi_break, is_different
--- a/utils/libcxx/sym_check/extract.py
+++ b/utils/libcxx/sym_check/extract.py
@@ -0,0 +1,193 @@
+# -*- Python -*- vim: set syntax=python tabstop=4 expandtab cc=80:
+#===----------------------------------------------------------------------===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is dual licensed under the MIT and the University of Illinois Open
+# Source Licenses. See LICENSE.TXT for details.
+#
+#===----------------------------------------------------------------------===##
+"""
+extract - A set of function that extract symbol lists from shared libraries.
+"""
+import distutils.spawn
+import sys
+import re
+
+from libcxx.sym_check import util
+
+extract_ignore_names = ['_init', '_fini']
+
+class NMExtractor(object):
+    """
+    NMExtractor - Extract symbol lists from libraries using nm.
+    """
+
+    @staticmethod
+    def find_tool():
+        """
+        Search for the nm executable and return the path.
+        """
+        return distutils.spawn.find_executable('nm')
+
+    def __init__(self):
+        """
+        Initialize the nm executable and flags that will be used to extract
+        symbols from shared libraries.
+        """
+        self.nm_exe = self.find_tool()
+        if self.nm_exe is None:
+            # ERROR no NM found
+            print("ERROR: Could not find nm")
+            sys.exit(1)
+        self.flags = ['-P', '-g']
+
+    def extract(self, lib):
+        """
+        Extract symbols from a library and return the results as a dict of
+        parsed symbols.
+        """
+        cmd = [self.nm_exe] + self.flags + [lib]
+        out, _, exit_code = util.execute_command_verbose(cmd)
+        if exit_code != 0:
+            raise RuntimeError('Failed to run %s on %s' % (self.nm_exe, lib))
+        fmt_syms = (self._extract_sym(l)
+                    for l in out.splitlines() if l.strip())
+            # Cast symbol to string.
+        final_syms = (repr(s) for s in fmt_syms if self._want_sym(s))
+        # Make unique and sort strings.
+        tmp_list = list(sorted(set(final_syms)))
+        # Cast string back to symbol.
+        return util.read_syms_from_list(tmp_list)
+
+    def _extract_sym(self, sym_str):
+        bits = sym_str.split()
+        # Everything we want has at least two columns.
+        if len(bits) < 2:
+            return None
+        new_sym = {
+            'name': bits[0],
+            'type': bits[1],
+            'is_defined': (bits[1].lower() != 'u')
+        }
+        new_sym['name'] = new_sym['name'].replace('@@', '@')
+        new_sym = self._transform_sym_type(new_sym)
+        # NM types which we want to save the size for.
+        if new_sym['type'] == 'OBJECT' and len(bits) > 3:
+            new_sym['size'] = int(bits[3], 16)
+        return new_sym
+
+    @staticmethod
+    def _want_sym(sym):
+        """
+        Check that s is a valid symbol that we want to keep.
+        """
+        if sym is None or len(sym) < 2:
+            return False
+        if sym['name'] in extract_ignore_names:
+            return False
+        bad_types = ['t', 'b', 'r', 'd', 'w']
+        return (sym['type'] not in bad_types
+                and sym['name'] not in ['__bss_start', '_end', '_edata'])
+
+    @staticmethod
+    def _transform_sym_type(sym):
+        """
+        Map the nm single letter output for type to either FUNC or OBJECT.
+        If the type is not recognized it is left unchanged.
+        """
+        func_types = ['T', 'W']
+        obj_types = ['B', 'D', 'R', 'V', 'S']
+        if sym['type'] in func_types:
+            sym['type'] = 'FUNC'
+        elif sym['type'] in obj_types:
+            sym['type'] = 'OBJECT'
+        return sym
+
+class ReadElfExtractor(object):
+    """
+    ReadElfExtractor - Extract symbol lists from libraries using readelf.
+    """
+
+    @staticmethod
+    def find_tool():
+        """
+        Search for the readelf executable and return the path.
+        """
+        return distutils.spawn.find_executable('readelf')
+
+    def __init__(self):
+        """
+        Initialize the readelf executable and flags that will be used to
+        extract symbols from shared libraries.
+        """
+        self.tool = self.find_tool()
+        if self.tool is None:
+            # ERROR no NM found
+            print("ERROR: Could not find readelf")
+            sys.exit(1)
+        self.flags = ['--wide', '--symbols']
+
+    def extract(self, lib):
+        """
+        Extract symbols from a library and return the results as a dict of
+        parsed symbols.
+        """
+        cmd = [self.tool] + self.flags + [lib]
+        out, _, exit_code = util.execute_command_verbose(cmd)
+        if exit_code != 0:
+            raise RuntimeError('Failed to run %s on %s' % (self.nm_exe, lib))
+        dyn_syms = self.get_dynsym_table(out)
+        return self.process_syms(dyn_syms)
+
+    def process_syms(self, sym_list):
+        new_syms = []
+        for s in sym_list:
+            parts = s.split()
+            if not parts:
+                continue
+            assert len(parts) == 7 or len(parts) == 8 or len(parts) == 9
+            if len(parts) == 7:
+                continue
+            new_sym = {
+                'name': parts[7],
+                'size': int(parts[2]),
+                'type': parts[3],
+                'is_defined': (parts[6] != 'UND')
+            }
+            assert new_sym['type'] in ['OBJECT', 'FUNC', 'NOTYPE']
+            if new_sym['name'] in extract_ignore_names:
+                continue
+            if new_sym['type'] == 'NOTYPE':
+                continue
+            if new_sym['type'] == 'FUNC':
+                del new_sym['size']
+            new_syms += [new_sym]
+        return new_syms
+
+    def get_dynsym_table(self, out):
+        lines = out.splitlines()
+        start = -1
+        end = -1
+        for i in range(len(lines)):
+            if lines[i].startswith("Symbol table '.dynsym'"):
+                start = i + 2
+            if start != -1 and end == -1 and not lines[i].strip():
+                end = i + 1
+        assert start != -1
+        if end == -1:
+            end = len(lines)
+        return lines[start:end]
+
+
+def extract_symbols(lib_file):
+    """
+    Extract and return a list of symbols extracted from a dynamic library.
+    The symbols are extracted using NM. They are then filtered and formated.
+    Finally they symbols are made unique.
+    """
+    if ReadElfExtractor.find_tool():
+        extractor = ReadElfExtractor()
+    else:
+        extractor = NMExtractor()
+    return extractor.extract(lib_file)
--- a/utils/libcxx/sym_check/match.py
+++ b/utils/libcxx/sym_check/match.py
@@ -0,0 +1,40 @@
+# -*- Python -*- vim: set syntax=python tabstop=4 expandtab cc=80:
+#===----------------------------------------------------------------------===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is dual licensed under the MIT and the University of Illinois Open
+# Source Licenses. See LICENSE.TXT for details.
+#
+#===----------------------------------------------------------------------===##
+"""
+match - A set of functions for matching symbols in a list to a list of regexs
+"""
+
+import re
+
+
+def find_and_report_matching(symbol_list, regex_list):
+    report = ''
+    found_count = 0
+    for regex_str in regex_list:
+        report += 'Matching regex "%s":\n' % regex_str
+        matching_list = find_matching_symbols(symbol_list, regex_str)
+        if not matching_list:
+            report += '    No matches found\n\n'
+            continue
+        # else
+        found_count += len(matching_list)
+        for m in matching_list:
+            report += '    MATCHES: %s\n' % m['name']
+        report += '\n'
+    return found_count, report
+
+
+def find_matching_symbols(symbol_list, regex_str):
+    regex = re.compile(regex_str)
+    matching_list = []
+    for s in symbol_list:
+        if regex.match(s['name']):
+            matching_list += [s]
+    return matching_list
--- a/utils/libcxx/sym_check/util.py
+++ b/utils/libcxx/sym_check/util.py
@@ -0,0 +1,323 @@
+#===----------------------------------------------------------------------===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is dual licensed under the MIT and the University of Illinois Open
+# Source Licenses. See LICENSE.TXT for details.
+#
+#===----------------------------------------------------------------------===##
+
+import ast
+import distutils.spawn
+import signal
+import subprocess
+import sys
+import re
+
+def to_bytes(str):
+    # Encode to UTF-8 to get binary data.
+    if isinstance(str, bytes):
+        return str
+    return str.encode('utf-8')
+
+def to_string(bytes):
+    if isinstance(bytes, str):
+        return bytes
+    return to_bytes(bytes)
+
+def convert_string(bytes):
+    try:
+        return to_string(bytes.decode('utf-8'))
+    except AttributeError: # 'str' object has no attribute 'decode'.
+        return str(bytes)
+    except UnicodeError:
+        return str(bytes)
+
+def execute_command(cmd, input_str=None):
+    """
+    Execute a command, capture and return its output.
+    """
+    kwargs = {
+        'stdin': subprocess.PIPE,
+        'stdout': subprocess.PIPE,
+        'stderr': subprocess.PIPE,
+    }
+    p = subprocess.Popen(cmd, **kwargs)
+    out, err = p.communicate(input=input_str)
+    exitCode = p.wait()
+    if exitCode == -signal.SIGINT:
+        raise KeyboardInterrupt
+    out = convert_string(out)
+    err = convert_string(err)
+    return out, err, exitCode
+
+
+def execute_command_verbose(cmd, input_str=None):
+    """
+    Execute a command and print its output on failure.
+    """
+    out, err, exitCode = execute_command(cmd, input_str=input_str)
+    if exitCode != 0:
+        report = "Command: %s\n" % ' '.join(["'%s'" % a for a in cmd])
+        report += "Exit Code: %d\n" % exitCode
+        if out:
+            report += "Standard Output:\n--\n%s--" % out
+        if err:
+            report += "Standard Error:\n--\n%s--" % err
+        report += "\n\nFailed!"
+        sys.stderr.write('%s\n' % report)
+    return out, err, exitCode
+
+
+def read_syms_from_list(slist):
+    """
+    Read a list of symbols from a list of strings.
+    Each string is one symbol.
+    """
+    return [ast.literal_eval(l) for l in slist]
+
+
+def read_syms_from_file(filename):
+    """
+    Read a list of symbols in from a file.
+    """
+    with open(filename, 'r') as f:
+        data = f.read()
+    return read_syms_from_list(data.splitlines())
+
+
+def read_blacklist(filename):
+    with open(filename, 'r') as f:
+        data = f.read()
+    lines = [l.strip() for l in data.splitlines() if l.strip()]
+    lines = [l for l in lines if not l.startswith('#')]
+    return lines
+
+
+def write_syms(sym_list, out=None, names_only=False):
+    """
+    Write a list of symbols to the file named by out.
+    """
+    out_str = ''
+    out_list = sym_list
+    out_list.sort(key=lambda x: x['name'])
+    if names_only:
+        out_list = [sym['name'] for sym in sym_list]
+    for sym in out_list:
+        out_str += '%s\n' % sym
+    if out is None:
+        sys.stdout.write(out_str)
+    else:
+        with open(out, 'w') as f:
+            f.write(out_str)
+
+
+_cppfilt_exe = distutils.spawn.find_executable('c++filt')
+
+
+def demangle_symbol(symbol):
+    if _cppfilt_exe is None:
+        return symbol
+    out, _, exit_code = execute_command_verbose(
+        [_cppfilt_exe], input_str=symbol)
+    if exit_code != 0:
+        return symbol
+    return out
+
+
+def is_elf(filename):
+    with open(filename, 'rb') as f:
+        magic_bytes = f.read(4)
+    return magic_bytes == b'\x7fELF'
+
+
+def is_mach_o(filename):
+    with open(filename, 'rb') as f:
+        magic_bytes = f.read(4)
+    return magic_bytes in [
+        '\xfe\xed\xfa\xce',  # MH_MAGIC
+        '\xce\xfa\xed\xfe',  # MH_CIGAM
+        '\xfe\xed\xfa\xcf',  # MH_MAGIC_64
+        '\xcf\xfa\xed\xfe',  # MH_CIGAM_64
+        '\xca\xfe\xba\xbe',  # FAT_MAGIC
+        '\xbe\xba\xfe\xca'   # FAT_CIGAM
+    ]
+
+
+def is_library_file(filename):
+    if sys.platform == 'darwin':
+        return is_mach_o(filename)
+    else:
+        return is_elf(filename)
+
+
+def extract_or_load(filename):
+    import libcxx.sym_check.extract
+    if is_library_file(filename):
+        return libcxx.sym_check.extract.extract_symbols(filename)
+    return read_syms_from_file(filename)
+
+def adjust_mangled_name(name):
+    if not name.startswith('__Z'):
+        return name
+    return name[1:]
+
+new_delete_std_symbols = [
+    '_Znam',
+    '_Znwm',
+    '_ZdaPv',
+    '_ZdaPvm',
+    '_ZdlPv',
+    '_ZdlPvm'
+]
+
+cxxabi_symbols = [
+    '___dynamic_cast',
+    '___gxx_personality_v0',
+    '_ZTIDi',
+    '_ZTIDn',
+    '_ZTIDs',
+    '_ZTIPDi',
+    '_ZTIPDn',
+    '_ZTIPDs',
+    '_ZTIPKDi',
+    '_ZTIPKDn',
+    '_ZTIPKDs',
+    '_ZTIPKa',
+    '_ZTIPKb',
+    '_ZTIPKc',
+    '_ZTIPKd',
+    '_ZTIPKe',
+    '_ZTIPKf',
+    '_ZTIPKh',
+    '_ZTIPKi',
+    '_ZTIPKj',
+    '_ZTIPKl',
+    '_ZTIPKm',
+    '_ZTIPKs',
+    '_ZTIPKt',
+    '_ZTIPKv',
+    '_ZTIPKw',
+    '_ZTIPKx',
+    '_ZTIPKy',
+    '_ZTIPa',
+    '_ZTIPb',
+    '_ZTIPc',
+    '_ZTIPd',
+    '_ZTIPe',
+    '_ZTIPf',
+    '_ZTIPh',
+    '_ZTIPi',
+    '_ZTIPj',
+    '_ZTIPl',
+    '_ZTIPm',
+    '_ZTIPs',
+    '_ZTIPt',
+    '_ZTIPv',
+    '_ZTIPw',
+    '_ZTIPx',
+    '_ZTIPy',
+    '_ZTIa',
+    '_ZTIb',
+    '_ZTIc',
+    '_ZTId',
+    '_ZTIe',
+    '_ZTIf',
+    '_ZTIh',
+    '_ZTIi',
+    '_ZTIj',
+    '_ZTIl',
+    '_ZTIm',
+    '_ZTIs',
+    '_ZTIt',
+    '_ZTIv',
+    '_ZTIw',
+    '_ZTIx',
+    '_ZTIy',
+    '_ZTSDi',
+    '_ZTSDn',
+    '_ZTSDs',
+    '_ZTSPDi',
+    '_ZTSPDn',
+    '_ZTSPDs',
+    '_ZTSPKDi',
+    '_ZTSPKDn',
+    '_ZTSPKDs',
+    '_ZTSPKa',
+    '_ZTSPKb',
+    '_ZTSPKc',
+    '_ZTSPKd',
+    '_ZTSPKe',
+    '_ZTSPKf',
+    '_ZTSPKh',
+    '_ZTSPKi',
+    '_ZTSPKj',
+    '_ZTSPKl',
+    '_ZTSPKm',
+    '_ZTSPKs',
+    '_ZTSPKt',
+    '_ZTSPKv',
+    '_ZTSPKw',
+    '_ZTSPKx',
+    '_ZTSPKy',
+    '_ZTSPa',
+    '_ZTSPb',
+    '_ZTSPc',
+    '_ZTSPd',
+    '_ZTSPe',
+    '_ZTSPf',
+    '_ZTSPh',
+    '_ZTSPi',
+    '_ZTSPj',
+    '_ZTSPl',
+    '_ZTSPm',
+    '_ZTSPs',
+    '_ZTSPt',
+    '_ZTSPv',
+    '_ZTSPw',
+    '_ZTSPx',
+    '_ZTSPy',
+    '_ZTSa',
+    '_ZTSb',
+    '_ZTSc',
+    '_ZTSd',
+    '_ZTSe',
+    '_ZTSf',
+    '_ZTSh',
+    '_ZTSi',
+    '_ZTSj',
+    '_ZTSl',
+    '_ZTSm',
+    '_ZTSs',
+    '_ZTSt',
+    '_ZTSv',
+    '_ZTSw',
+    '_ZTSx',
+    '_ZTSy'
+]
+
+def is_stdlib_symbol_name(name):
+    name = adjust_mangled_name(name)
+    if re.search("@GLIBC|@GCC", name):
+        return False
+    if re.search('(St[0-9])|(__cxa)|(__cxxabi)', name):
+        return True
+    if name in new_delete_std_symbols:
+        return True
+    if name in cxxabi_symbols:
+        return True
+    if name.startswith('_Z'):
+        return True
+    return False
+
+def filter_stdlib_symbols(syms):
+    stdlib_symbols = []
+    other_symbols = []
+    for s in syms:
+        canon_name = adjust_mangled_name(s['name'])
+        if not is_stdlib_symbol_name(canon_name):
+            assert not s['is_defined'] and "found defined non-std symbol"
+            other_symbols += [s]
+        else:
+            stdlib_symbols += [s]
+    return stdlib_symbols, other_symbols