Autotest against llvm-dwarfdump (#428)
authorSeva Alekseyev <sevaa@yarxi.ru>
Wed, 6 Jul 2022 17:06:37 +0000 (13:06 -0400)
committerGitHub <noreply@github.com>
Wed, 6 Jul 2022 17:06:37 +0000 (10:06 -0700)
* Descriptive output dump file names on autotest

* Dwarfdump.py

* Test and test files for dwarfdump

* Loclist dump fix

* Permissions

* Fixes

17 files changed:
elftools/dwarf/datatype_cpp.py [new file with mode: 0644]
elftools/dwarf/dwarf_util.py [new file with mode: 0644]
elftools/dwarf/enums.py
elftools/dwarf/locationlists.py
elftools/dwarf/ranges.py
elftools/dwarf/structs.py
scripts/dwarfdump.py [new file with mode: 0644]
test/all_tests.py
test/external_tools/README.txt
test/external_tools/llvm-dwarfdump [new file with mode: 0755]
test/run_dwarfdump_tests.py [new file with mode: 0644]
test/run_examples_test.py
test/run_readelf_tests.py
test/testfiles_for_dwarfdump/dwarf_gnuops4.so.elf [new file with mode: 0644]
test/testfiles_for_dwarfdump/dwarf_lineprogramv5.elf [new file with mode: 0644]
test/testfiles_for_dwarfdump/dwarf_v5ops.so.elf [new file with mode: 0644]
test/utils.py

diff --git a/elftools/dwarf/datatype_cpp.py b/elftools/dwarf/datatype_cpp.py
new file mode 100644 (file)
index 0000000..fea7e9c
--- /dev/null
@@ -0,0 +1,232 @@
+#-------------------------------------------------------------------------------
+# elftools: dwarf/datatype_cpp.py
+#
+# First draft at restoring the source level name a C/C++ datatype
+# from DWARF data. Aiming at compatibility with llvm-dwarfdump v15.
+#
+# Eli Bendersky (eliben@gmail.com)
+# This code is in the public domain
+#-------------------------------------------------------------------------------
+from ..common.py3compat import bytes2str
+
+cpp_symbols = dict(
+    pointer   = "*",
+    reference = "&",
+    const     = "const")
+
+def describe_cpp_datatype(var_die):
+    return str(parse_cpp_datatype(var_die))
+
+def parse_cpp_datatype(var_die):
+    """Given a DIE that describes a variable, a parameter, or a member
+    with DW_AT_type in it, tries to return the C++ datatype as a string
+    
+    Returns a TypeDesc.
+
+    Does not follow typedefs, doesn't  resolve array element types
+    or struct members. Not good for a debugger.
+    """
+    t = TypeDesc()
+
+    if not 'DW_AT_type' in var_die.attributes:
+        t.tag = ''
+        return t
+
+    type_die = var_die.get_DIE_from_attribute('DW_AT_type')
+
+    mods = []
+    # Unlike readelf, dwarfdump doesn't chase typedefs
+    while type_die.tag in ('DW_TAG_const_type', 'DW_TAG_pointer_type', 'DW_TAG_reference_type'):
+        modifier = _strip_type_tag(type_die) # const/reference/pointer
+        mods.insert(0, modifier)
+        if not 'DW_AT_type' in type_die.attributes: # void* is encoded as a pointer to nothing
+            t.name = t.tag = "void"
+            t.modifiers = tuple(mods)
+            return t
+        type_die = type_die.get_DIE_from_attribute('DW_AT_type')
+
+    # From this point on, type_die doesn't change
+    t.tag = _strip_type_tag(type_die)
+    t.modifiers = tuple(mods)
+    
+    if t.tag in ('ptr_to_member', 'subroutine'):
+        if t.tag == 'ptr_to_member':
+            ptr_prefix = DIE_name(type_die.get_DIE_from_attribute('DW_AT_containing_type')) + "::"
+            type_die = type_die.get_DIE_from_attribute('DW_AT_type')
+        elif "DW_AT_object_pointer" in type_die.attributes: # Older compiler... Subroutine, but with an object pointer
+            ptr_prefix = DIE_name(DIE_type(DIE_type(type_die.get_DIE_from_attribute('DW_AT_object_pointer')))) + "::"
+        else: # Not a pointer to member
+            ptr_prefix = ''
+
+        if t.tag == 'subroutine':
+            params = tuple(format_function_param(p, p) for p in type_die.iter_children() if p.tag in ("DW_TAG_formal_parameter", "DW_TAG_unspecified_parameters") and 'DW_AT_artificial' not in p.attributes)
+            params = ", ".join(params)
+            if 'DW_AT_type' in type_die.attributes:
+                retval_type = parse_cpp_datatype(type_die)
+                is_pointer = retval_type.modifiers and retval_type.modifiers[-1] == 'pointer'
+                retval_type = str(retval_type)
+                if not is_pointer:
+                    retval_type += " "
+            else:
+                retval_type = "void "
+
+            if len(mods) and mods[-1] == 'pointer':
+                mods.pop()
+                t.modifiers = tuple(mods)
+                t.name = "%s(%s*)(%s)" % (retval_type, ptr_prefix, params)
+            else:
+                t.name = "%s(%s)" % (retval_type, params)
+            return t
+    elif DIE_is_ptr_to_member_struct(type_die):
+        dt =  parse_cpp_datatype(next(type_die.iter_children())) # The first element is pfn, a function pointer with a this
+        dt.modifiers = tuple(dt.modifiers[:-1]) # Pop the extra pointer
+        dt.tag = "ptr_to_member_type" # Not a function pointer per se
+        return dt
+    elif t.tag == 'array':
+        t.dimensions = (sub.attributes['DW_AT_upper_bound'].value + 1 if 'DW_AT_upper_bound' in sub.attributes else -1
+            for sub
+            in type_die.iter_children()
+            if sub.tag == 'DW_TAG_subrange_type')
+        t.name = describe_cpp_datatype(type_die)
+        return t
+
+    # Now the nonfunction types
+    # Blank name is sometimes legal (unnamed unions, etc)
+
+    t.name = safe_DIE_name(type_die, t.tag + " ")
+
+    # Check the nesting - important for parameters
+    parent = type_die.get_parent()
+    scopes = list()
+    while parent.tag in ('DW_TAG_class_type', 'DW_TAG_structure_type', 'DW_TAG_union_type', 'DW_TAG_namespace'):
+        scopes.insert(0, safe_DIE_name(parent, _strip_type_tag(parent) + " "))
+        # If unnamed scope, fall back to scope type - like "structure "
+        parent = parent.get_parent()
+    t.scopes = tuple(scopes)
+    
+    return t  
+
+#--------------------------------------------------
+
+class TypeDesc(object):
+    """ Encapsulates a description of a datatype, as parsed from DWARF DIEs.
+        Not enough to display the variable in the debugger, but enough
+        to produce a type description string similar to those of llvm-dwarfdump.
+
+        name - name for primitive datatypes, element name for arrays, the
+            whole name for functions and function pouinters
+
+        modifiers - a collection of "const"/"pointer"/"reference", from the
+            chain of DIEs preceeding the real type DIE
+
+        scopes - a collection of struct/class/namespace names, parents of the
+            real type DIE
+
+        tag - the tag of the real type DIE, stripped of initial DW_TAG_ and
+            final _type
+
+        dimensions - the collection of array dimensions, if the type is an
+            array. -1 means an array of unknown dimension.
+
+    """
+    def __init__(self):
+        self.name = None
+        self.modifiers = () # Reads left to right
+        self.scopes = () # Reads left to right
+        self.tag = None 
+        self.dimensions = None
+
+    def __str__(self):
+        # Some reference points from dwarfdump:
+        # const->pointer->const->char = const char *const
+        # const->reference->const->int = const const int &
+        # const->reference->int = const int &
+        name = str(self.name)
+        mods = self.modifiers
+
+        parts = []
+        # Initial const applies to the var ifself, other consts apply to the pointee
+        if len(mods) and mods[0] == 'const':
+            parts.append("const")
+            mods = mods[1:]
+
+        # ref->const in the end, const goes in front
+        if mods[-2:] == ("reference", "const"):
+            parts.append("const")
+            mods = mods[0:-1]
+
+        if self.scopes:
+            name = '::'.join(self.scopes)+'::' + name
+        parts.append(name)
+
+        if len(mods):
+            parts.append("".join(cpp_symbols[mod] for mod in mods))
+
+        if self.dimensions:
+            dims = "".join('[%s]' % (str(dim) if dim > 0 else '',)
+                for dim in self.dimensions)
+        else:
+            dims = ''
+
+        return " ".join(parts)+dims
+
+def DIE_name(die):
+    return bytes2str(die.attributes['DW_AT_name'].value)
+
+def safe_DIE_name(die, default = ''):
+    return bytes2str(die.attributes['DW_AT_name'].value) if 'DW_AT_name' in die.attributes else default
+
+def DIE_type(die):
+    return die.get_DIE_from_attribute("DW_AT_type")
+
+class ClassDesc(object):
+    def __init__(self):
+        self.scopes = ()
+        self.const_member = False
+
+def get_class_spec_if_member(func_spec, the_func):
+    if 'DW_AT_object_pointer' in the_func.attributes:
+        this_param = the_func.get_DIE_from_attribute('DW_AT_object_pointer')
+        this_type = parse_cpp_datatype(this_param)
+        class_spec = ClassDesc()
+        class_spec.scopes = this_type.scopes + (this_type.name,)
+        class_spec.const_member = any(("const", "pointer") == this_type.modifiers[i:i+2]
+            for i in range(len(this_type.modifiers))) # const -> pointer -> const for this arg of const 
+        return class_spec
+
+    # Check the parent element chain - could be a class
+    parent = func_spec.get_parent()
+
+    scopes = []
+    while parent.tag in ("DW_TAG_class_type", "DW_TAG_structure_type", "DW_TAG_namespace"):
+        scopes.insert(0, DIE_name(parent))
+        parent = parent.get_parent()
+    if scopes:
+        cs = ClassDesc()
+        cs.scopes = tuple(scopes)
+        return cs
+
+    return None
+
+def format_function_param(param_spec, param):
+    if param_spec.tag == 'DW_TAG_formal_parameter':
+        if 'DW_AT_name' in param.attributes:
+            name = DIE_name(param)
+        elif 'DW_AT_name' in param_spec.attributes:
+            name = DIE_name(param_spec)
+        else:
+            name = None
+        type = parse_cpp_datatype(param_spec)
+        return  str(type)
+    else: # unspecified_parameters AKA variadic
+        return "..."
+
+def DIE_is_ptr_to_member_struct(type_die):
+    if type_die.tag == 'DW_TAG_structure_type':
+        members = tuple(die for die in type_die.iter_children() if die.tag == "DW_TAG_member")
+        return len(members) == 2 and safe_DIE_name(members[0]) == "__pfn" and safe_DIE_name(members[1]) == "__delta"
+    return False                        
+
+def _strip_type_tag(die):
+    """Given a DIE with DW_TAG_foo_type, returns foo"""
+    return die.tag[7:-5]
diff --git a/elftools/dwarf/dwarf_util.py b/elftools/dwarf/dwarf_util.py
new file mode 100644 (file)
index 0000000..5d942e1
--- /dev/null
@@ -0,0 +1,61 @@
+#-------------------------------------------------------------------------------
+# elftools: dwarf/dwarf_utils.py
+#
+# Minor, shared DWARF helpers
+#
+# Eli Bendersky (eliben@gmail.com)
+# This code is in the public domain
+#-------------------------------------------------------------------------------
+
+import os
+from ..construct.macros import UBInt32, UBInt64, ULInt32, ULInt64, Array
+from ..common.exceptions import DWARFError
+from ..common.utils import struct_parse
+
+def _get_base_offset(cu, base_attribute_name):
+    """Retrieves a required, base offset-type atribute
+    from the top DIE in the CU. Applies to several indirectly
+    encoded objects - range lists, location lists, strings, addresses.
+    """
+    cu_top_die = cu.get_top_DIE()
+    if not base_attribute_name in cu_top_die.attributes:
+        raise DWARFError("The CU at offset 0x%x needs %s" % (cu.cu_offset, base_attribute_name))
+    return cu_top_die.attributes[base_attribute_name].value
+
+def _resolve_via_offset_table(stream, cu, index, base_attribute_name):
+    """Given an index in the offset table and directions where to find it,
+    retrieves an offset. Works for loclists, rnglists.
+
+    The DWARF offset bitness of the CU block in the section matches that
+    of the CU record in dwarf_info. See DWARFv5 standard, section 7.4.
+
+    This is used for translating DW_FORM_loclistx, DW_FORM_rnglistx
+    via the offset table in the respective section.
+    """
+    base_offset = _get_base_offset(cu, base_attribute_name)
+    # That's offset (within the rnglists/loclists/str_offsets section) of
+    # the offset table for this CU's block in that section, which in turn is indexed by the index.
+
+    offset_size = 4 if cu.structs.dwarf_format == 32 else 8
+    return base_offset + struct_parse(cu.structs.Dwarf_offset(''), stream, base_offset + index*offset_size, True)
+
+def _iter_CUs_in_section(stream, structs, parser):
+    """Iterates through the list of CU sections in loclists or rangelists. Almost identical structures there.
+
+    get_parser is a lambda that takes structs, returns the parser
+    """
+    stream.seek(0, os.SEEK_END)
+    endpos = stream.tell()
+    stream.seek(0, os.SEEK_SET)
+
+    offset = 0
+    while offset < endpos:
+        header = struct_parse(parser, stream, offset)
+        if header.offset_count > 0:
+            offset_parser = structs.Dwarf_uint64 if header.is64 else structs.Dwarf_uint32
+            header['offsets'] = struct_parse(Array(header.offset_count, offset_parser('')), stream)
+        else:
+            header['offsets'] = False
+        yield header
+        offset = header.offset_after_length + header.unit_length   
+
index 97630f79fe325fc2bea51abc1cf5135ed829bbd9..9eaaf2666c386b13619d51eacf22354a0ab6ecac 100644 (file)
@@ -440,3 +440,81 @@ ENUM_DW_RLE = dict(
     DW_RLE_start_end     = 0x06,
     DW_RLE_start_length  = 0x07
 )
+
+ENUM_DW_LANG = dict(
+    DW_LANG_C89            = 0x0001,
+    DW_LANG_C              = 0x0002,
+    DW_LANG_Ada83          = 0x0003,
+    DW_LANG_C_plus_plus    = 0x0004,
+    DW_LANG_Cobol74        = 0x0005,
+    DW_LANG_Cobol85        = 0x0006,
+    DW_LANG_Fortran77      = 0x0007,
+    DW_LANG_Fortran90      = 0x0008,
+    DW_LANG_Pascal83       = 0x0009,
+    DW_LANG_Modula2        = 0x000a,
+    DW_LANG_Java           = 0x000b,
+    DW_LANG_C99            = 0x000c,
+    DW_LANG_Ada95          = 0x000d,
+    DW_LANG_Fortran95      = 0x000e,
+    DW_LANG_PLI            = 0x000f,
+    DW_LANG_ObjC           = 0x0010,
+    DW_LANG_ObjC_plus_plus = 0x0011,
+    DW_LANG_UPC            = 0x0012,
+    DW_LANG_D              = 0x0013,
+    DW_LANG_Python         = 0x0014,
+    DW_LANG_OpenCL         = 0x0015,
+    DW_LANG_Go             = 0x0016,
+    DW_LANG_Modula3        = 0x0017,
+    DW_LANG_Haskell        = 0x0018,
+    DW_LANG_C_plus_plus_03 = 0x0019,
+    DW_LANG_C_plus_plus_11 = 0x001a,
+    DW_LANG_OCaml          = 0x001b,
+    DW_LANG_Rust           = 0x001c,
+    DW_LANG_C11            = 0x001d,
+    DW_LANG_Swift          = 0x001e,
+    DW_LANG_Julia          = 0x001f,
+    DW_LANG_Dylan          = 0x0020,
+    DW_LANG_C_plus_plus_14 = 0x0021,
+    DW_LANG_Fortran03      = 0x0022,
+    DW_LANG_Fortran08      = 0x0023,
+    DW_LANG_RenderScript   = 0x0024,
+    DW_LANG_BLISS          = 0x0025,
+    DW_LANG_lo_user        = 0x8000,
+    DW_LANG_hi_user        = 0xffff
+)
+
+ENUM_DW_ATE = dict(
+    DW_ATE_address         = 0x01,
+    DW_ATE_boolean         = 0x02,
+    DW_ATE_complex_float   = 0x03,
+    DW_ATE_float           = 0x04,
+    DW_ATE_signed          = 0x05,
+    DW_ATE_signed_char     = 0x06,
+    DW_ATE_unsigned        = 0x07,
+    DW_ATE_unsigned_char   = 0x08,
+    DW_ATE_imaginary_float = 0x09,
+    DW_ATE_packed_decimal  = 0x0a,
+    DW_ATE_numeric_string  = 0x0b,
+    DW_ATE_edited          = 0x0c,
+    DW_ATE_signed_fixed    = 0x0d,
+    DW_ATE_unsigned_fixed  = 0x0e,
+    DW_ATE_decimal_float   = 0x0f,
+    DW_ATE_UTF             = 0x10,
+    DW_ATE_UCS             = 0x11,
+    DW_ATE_ASCII           = 0x12,
+    DW_ATE_lo_user         = 0x80,
+    DW_ATE_hi_user         = 0xff
+)
+
+ENUM_DW_ACCESS = dict(
+    DW_ACCESS_public    = 0x01,
+    DW_ACCESS_protected = 0x02,
+    DW_ACCESS_private   = 0x03
+)
+
+ENUM_DW_INL = dict(
+    DW_INL_not_inlined          = 0x00,
+    DW_INL_inlined              = 0x01,
+    DW_INL_declared_not_inlined = 0x02,
+    DW_INL_declared_inlined     = 0x03
+)
index c23cee9d84512b063af7975c21efe68822bad374..eae55c5b28a390a6393ff6a0824abe22701064c7 100644 (file)
@@ -110,8 +110,6 @@ class LocationLists(object):
             offset_index = 0
             while stream.tell() < endpos:
                 # We are at the start of the CU block in the loclists now
-                unit_length = struct_parse(self.structs.Dwarf_initial_length(''), stream)
-                offset_past_len = stream.tell()
                 cu_header = struct_parse(self.structs.Dwarf_loclists_CU_header, stream)
                 assert(cu_header.version == 5)
 
@@ -119,7 +117,7 @@ class LocationLists(object):
                 # We don't have a binary for the former yet. On an off chance that we one day might,
                 # let's parse the header anyway.
 
-                cu_end_offset = offset_past_len + unit_length
+                cu_end_offset = cu_header.offset_after_length + cu_header.unit_length
                 # Unit_length includes the header but doesn't include the length
 
                 while stream.tell() < cu_end_offset:
@@ -263,7 +261,7 @@ class LocationParser(object):
     @staticmethod
     def _attribute_has_loc_list(attr, dwarf_version):
         return ((dwarf_version < 4 and
-                 attr.form in ('DW_FORM_data4', 'DW_FORM_data8') and
+                 attr.form in ('DW_FORM_data1', 'DW_FORM_data2', 'DW_FORM_data4', 'DW_FORM_data8') and
                  not attr.name == 'DW_AT_const_value') or
                 attr.form == 'DW_FORM_sec_offset')
 
@@ -275,6 +273,7 @@ class LocationParser(object):
                                'DW_AT_frame_base', 'DW_AT_segment',
                                'DW_AT_static_link', 'DW_AT_use_location',
                                'DW_AT_vtable_elem_location',
+                               'DW_AT_call_value',
                                'DW_AT_GNU_call_site_value',
                                'DW_AT_GNU_call_site_target',
                                'DW_AT_GNU_call_site_data_value'))
index e5476de1007d3f5d1ca044db459d77b7070a7586..e5c9fde9d5a1525eb6bad2d072f95ca147a6ebea 100644 (file)
@@ -10,6 +10,8 @@ import os
 from collections import namedtuple
 
 from ..common.utils import struct_parse
+from ..common.exceptions import DWARFError
+from .dwarf_util import _iter_CUs_in_section
 
 
 RangeEntry = namedtuple('RangeEntry', 'entry_offset entry_length begin_offset end_offset is_absolute')
@@ -54,6 +56,12 @@ class RangeLists(object):
         self.stream.seek(offset, os.SEEK_SET)
         return self._parse_range_list_from_stream()
 
+    def get_range_list_at_offset_ex(self, offset):
+        """Get a DWARF v5 range list, addresses and offsets unresolved,
+        at the given offset in the section
+        """
+        return struct_parse(self.structs.Dwarf_rnglists_entries, self.stream, offset)
+
     def iter_range_lists(self):
         """ Yield all range lists found in the section.
         """
@@ -68,6 +76,24 @@ class RangeLists(object):
         for offset in all_offsets:
             yield self.get_range_list_at_offset(offset)
 
+    def iter_CUs(self):
+        """For DWARF5 returns an array of objects, where each one has an array of offsets
+        """
+        if self.version < 5:
+            raise DWARFError("CU iteration in rnglists is not supported with DWARF<5")
+
+        structs = next(self._dwarfinfo.iter_CUs()).structs # Just pick one
+        return _iter_CUs_in_section(self.stream, structs, structs.Dwarf_rnglists_CU_header)
+
+    def iter_CU_range_lists_ex(self, cu):
+        """For DWARF5, returns untranslated rangelists in the CU, where CU comes from iter_CUs above
+        """
+        stream = self.stream
+        stream.seek(cu.offset_table_offset + (64 if cu.is64 else 32) * cu.offset_count)
+        while stream.tell() < cu.offset_after_length + cu.unit_length:
+            yield struct_parse(self.structs.Dwarf_rnglists_entries, stream);
+
+
     #------ PRIVATE ------#
 
     def _parse_range_list_from_stream(self):
index 5aa4a121820f65ac4e0d5ca456732fee79525d3b..b479399d10dfc75e060100d6cc9f31e7dbb09421 100644 (file)
@@ -403,13 +403,15 @@ class DWARFStructs(object):
         """ Create a struct for debug_loclists CU header, DWARFv5, 7,29
         """
         self.Dwarf_loclists_CU_header = Struct('Dwarf_loclists_CU_header',
-            # Unit_length parsed separately
+            StreamOffset('cu_offset'),
+            self.Dwarf_initial_length('unit_length'),
+            Value('is64', lambda ctx: ctx.is64),
+            StreamOffset('offset_after_length'),
             self.Dwarf_uint16('version'),
             self.Dwarf_uint8('address_size'),
             self.Dwarf_uint8('segment_selector_size'),
-            PrefixedArray(
-                self.Dwarf_offset('offsets'),
-                self.Dwarf_uint32('')))
+            self.Dwarf_uint32('offset_count'),
+            StreamOffset('offset_table_offset'))
 
         cld = self.Dwarf_loclists_counted_location_description = PrefixedArray(self.Dwarf_uint8('loc_expr'), self.Dwarf_uleb128(''))
 
@@ -436,6 +438,17 @@ class DWARFStructs(object):
             StreamOffset('entry_offset'), self.Dwarf_uleb128('begin'), self.Dwarf_uleb128('end'))
 
     def _create_rnglists_parsers(self):
+        self.Dwarf_rnglists_CU_header = Struct('Dwarf_rnglists_CU_header',
+            StreamOffset('cu_offset'),
+            self.Dwarf_initial_length('unit_length'),
+            Value('is64', lambda ctx: ctx.is64),
+            StreamOffset('offset_after_length'),
+            self.Dwarf_uint16('version'),
+            self.Dwarf_uint8('address_size'),
+            self.Dwarf_uint8('segment_selector_size'),
+            self.Dwarf_uint32('offset_count'),
+            StreamOffset('offset_table_offset'))
+
         self.Dwarf_rnglists_entries = RepeatUntilExcluding(
             lambda obj, ctx: obj.entry_type == 'DW_RLE_end_of_list',
             Struct('entry',
@@ -462,9 +475,11 @@ class _InitialLengthAdapter(Adapter):
     """
     def _decode(self, obj, context):
         if obj.first < 0xFFFFFF00:
+            context['is64'] = False
             return obj.first
         else:
             if obj.first == 0xFFFFFFFF:
+                context['is64'] = True
                 return obj.second
             else:
                 raise ConstructError("Failed decoding initial length for %X" % (
diff --git a/scripts/dwarfdump.py b/scripts/dwarfdump.py
new file mode 100644 (file)
index 0000000..ac86da8
--- /dev/null
@@ -0,0 +1,539 @@
+#!/usr/bin/env python
+#-------------------------------------------------------------------------------
+# scripts/dwarfdump.py
+#
+# A clone of 'llvm-dwarfdump-11' in Python, based on the pyelftools library
+#
+# Eli Bendersky (eliben@gmail.com)
+# This code is in the public domain
+#-------------------------------------------------------------------------------
+import argparse
+import os, sys, posixpath
+import traceback
+
+# For running from development directory. It should take precedence over the
+# installed pyelftools.
+sys.path.insert(0, '.')
+
+from elftools import __version__
+from elftools.common.exceptions import DWARFError, ELFError
+from elftools.common.py3compat import bytes2str
+from elftools.elf.elffile import ELFFile
+from elftools.dwarf.locationlists import LocationParser, LocationEntry, LocationExpr, LocationViewPair, BaseAddressEntry as LocBaseAddressEntry
+from elftools.dwarf.ranges import RangeEntry # ranges.BaseAddressEntry collides with the one above
+import elftools.dwarf.ranges
+from elftools.dwarf.enums import *
+from elftools.dwarf.dwarf_expr import DWARFExprParser, DWARFExprOp
+from elftools.dwarf.datatype_cpp import describe_cpp_datatype
+from elftools.dwarf.descriptions import describe_reg_name
+
+# ------------------------------
+# ------------------------------
+
+def _get_cu_base(cu):
+    top_die = cu.get_top_DIE()
+    attr = top_die.attributes
+    if 'DW_AT_low_pc' in attr:
+        return attr['DW_AT_low_pc'].value
+    elif 'DW_AT_entry_pc' in attr:
+        return attr['DW_AT_entry_pc'].value
+    else:
+        raise ValueError("Can't find the base IP (low_pc) for a CU")
+
+def _addr_str_length(die):
+    return die.cu.header.address_size*2
+
+def _DIE_name(die):
+    if 'DW_AT_name' in die.attributes:
+        return bytes2str(die.attributes['DW_AT_name'].value)
+    elif 'DW_AT_linkage_name' in die.attributes:
+        return bytes2str(die.attributes['DW_AT_linkage_name'].value)
+    else:
+        raise DWARFError()
+
+def _DIE_linkage_name(die):
+    if 'DW_AT_linkage_name' in die.attributes:
+        return bytes2str(die.attributes['DW_AT_linkage_name'].value)
+    elif 'DW_AT_name' in die.attributes:
+        return bytes2str(die.attributes['DW_AT_name'].value)
+    else:
+        raise DWARFError()        
+
+def _safe_DIE_name(die, default=None):
+    if 'DW_AT_name' in die.attributes:
+        return bytes2str(die.attributes['DW_AT_name'].value)
+    elif 'DW_AT_linkage_name' in die.attributes:
+        return bytes2str(die.attributes['DW_AT_linkage_name'].value)
+    else:
+        return default
+
+def _safe_DIE_linkage_name(die, default=None):
+    if 'DW_AT_linkage_name' in die.attributes:
+        return bytes2str(die.attributes['DW_AT_linkage_name'].value)
+    elif 'DW_AT_name' in die.attributes:
+        return bytes2str(die.attributes['DW_AT_name'].value)
+    else:
+        return default
+
+def _desc_ref(attr, die, extra=''):
+    if extra:
+        extra = " \"%s\"" % extra
+    return "cu + 0x%04x => {0x%08x}%s" % (
+        attr.raw_value,
+        die.cu.cu_offset + attr.raw_value,
+        extra)
+
+def _desc_data(attr, die):
+    """ Hex with length driven by form
+    """
+    len = int(attr.form[12:]) * 2
+    return "0x%0*x" % (len, attr.value,)
+
+FORM_DESCRIPTIONS = dict(
+    DW_FORM_string=lambda attr, die: "\"%s\"" % (bytes2str(attr.value),),
+    DW_FORM_strp=lambda attr, die: " .debug_str[0x%08x] = \"%s\"" % (attr.raw_value, bytes2str(attr.value).replace("\\", "\\\\")),
+    DW_FORM_line_strp=lambda attr, die: ".debug_line_str[0x%08x] = \"%s\"" % (attr.raw_value, bytes2str(attr.value).replace("\\", "\\\\")),
+    DW_FORM_flag_present=lambda attr, die: "true",
+    DW_FORM_flag=lambda attr, die: "0x%02x" % int(attr.value),
+    DW_FORM_addr=lambda attr, die: "0x%0*x" % (_addr_str_length(die), attr.value),
+    DW_FORM_data1=_desc_data,
+    DW_FORM_data2=_desc_data,
+    DW_FORM_data4=_desc_data,
+    DW_FORM_data8=_desc_data,
+    DW_FORM_block1=lambda attr, die: "<0x%02x> %s " % (len(attr.value), " ".join("%02x" %b for b in attr.value)),
+    DW_FORM_block2=lambda attr, die: "<0x%04x> %s " % (len(attr.value), " ".join("%02x" %b for b in attr.value)),
+    DW_FORM_block4=lambda attr, die: "<0x%08x> %s " % (len(attr.value), " ".join("%02x" %b for b in attr.value)),
+    DW_FORM_ref=_desc_ref,
+    DW_FORM_ref1=_desc_ref, DW_FORM_ref2=_desc_ref,
+    DW_FORM_ref4=_desc_ref, DW_FORM_ref8=_desc_ref,
+    DW_FORM_sec_offset=lambda attr,die:  "0x%08x" % (attr.value,),
+    DW_FORM_exprloc=lambda attr, die: _desc_expression(attr.value, die)
+)
+
+def _desc_enum(attr, enum):
+    """For attributes like DW_AT_language, physically
+    int, logically an enum
+    """
+    return next((k for (k, v) in enum.items() if v == attr.value), str(attr.value))
+
+def _cu_comp_dir(cu):
+    return bytes2str(cu.get_top_DIE().attributes['DW_AT_comp_dir'].value)
+
+def _desc_decl_file(attr, die):
+    cu = die.cu
+    if not hasattr(cu, "_lineprogram"):
+        cu._lineprogram = die.dwarfinfo.line_program_for_CU(cu)
+    val = attr.value
+    if cu._lineprogram and val > 0 and val <= len(cu._lineprogram.header.file_entry):
+        file_entry = cu._lineprogram.header.file_entry[val-1]
+        includes = cu._lineprogram.header.include_directory
+        if file_entry.dir_index > 0:
+            dir = bytes2str(includes[file_entry.dir_index - 1])
+            if dir.startswith('.'):
+                dir = posixpath.join(_cu_comp_dir(cu), dir)
+        else:
+            dir = _cu_comp_dir(cu)
+        return "\"%s\"" % (posixpath.join(dir, bytes2str(file_entry.name)),)
+    else:
+        return '(N/A)'
+
+def _desc_ranges(attr, die):
+    di = die.cu.dwarfinfo
+    if not hasattr(di, '_rnglists'):
+        di._rangelists = di.range_lists()
+    rangelist = di._rangelists.get_range_list_at_offset(attr.value)
+    base_ip = _get_cu_base(die.cu)
+    lines = []
+    addr_str_len = die.cu.header.address_size*2
+    for entry in rangelist:
+        if isinstance(entry, RangeEntry):
+            lines.append("                 [0x%0*x, 0x%0*x)" % (
+                addr_str_len,
+                (0 if entry.is_absolute else base_ip) + entry.begin_offset,
+                addr_str_len,
+                (0 if entry.is_absolute else base_ip) + entry.end_offset))
+        elif isinstance(entry, elftools.dwarf.ranges.BaseAddressEntry):
+            base_ip = entry.base_address
+        else:
+            raise NotImplementedError("Unknown object in a range list")
+    return ("0x%08x\n" % attr.value) + "\n".join(lines)
+
+def _desc_locations(attr, die):
+    cu = die.cu
+    di = cu.dwarfinfo
+    if not hasattr(di, '_loclists'):
+        di._loclists = di.location_lists()
+    if not hasattr(di, '_locparser'):
+        di._locparser = LocationParser(di._loclists)
+    loclist = di._locparser.parse_from_attribute(attr, cu.header.version, die)
+    if isinstance(loclist, LocationExpr):
+        return _desc_expression(loclist.loc_expr, die)
+    else:
+        base_ip = _get_cu_base(cu)
+        lines = []
+        addr_str_len = die.cu.header.address_size*2
+        for entry in loclist:
+            if isinstance(entry, LocationEntry):
+                lines.append("                 [0x%0*x, 0x%0*x): %s" % (
+                    addr_str_len,
+                    (0 if entry.is_absolute else base_ip) + entry.begin_offset,
+                    addr_str_len,
+                    (0 if entry.is_absolute else base_ip) + entry.end_offset,
+                    _desc_expression(entry.loc_expr, die)))
+            elif isinstance(entry, LocBaseAddressEntry):
+                base_ip = entry.base_address
+            else:
+                raise NotImplementedError("Unknown object in a location list")
+        return ("0x%08x:\n" % attr.value) + "\n".join(lines)
+
+# By default, numeric arguments are spelled in hex with a leading 0x
+def _desc_operationarg(s, cu):
+    if isinstance(s, str):
+        return s
+    elif isinstance(s, int):
+        return hex(s)
+    elif isinstance(s, list): # Could be a blob (list of ints), could be a subexpression
+        if len(s) > 0 and isinstance(s[0], DWARFExprOp): # Subexpression
+            return '(' + '; '.join(_desc_operation(op.op, op.op_name, op.args, cu) for op in s) + ')'
+        else:
+            return " ".join((hex(len(s)),) + tuple("0x%02x" % b for b in s))
+
+def _arch(cu):
+    return cu.dwarfinfo.config.machine_arch
+
+def _desc_reg(reg_no, cu):
+    return describe_reg_name(reg_no, _arch(cu), True).upper()
+
+def _desc_operation(op, op_name, args, cu):
+    # Not sure about regx(regno) and bregx(regno, offset)
+    if 0x50 <= op <= 0x6f: # reg0...reg31 - decode reg name
+        return op_name + " " + _desc_reg(op - 0x50, cu)
+    elif 0x70 <= op <= 0x8f: # breg0...breg31(offset) - also decode reg name
+        return '%s %s%+d' % (
+            op_name,
+            _desc_reg(op - 0x70, cu),
+            args[0])
+    elif op_name in ('DW_OP_fbreg', 'DW_OP_bra', 'DW_OP_skip'): # Argument is decimal with a leading sign
+        return op_name + ' ' + "%+d" % (args[0])
+    elif op_name in ('DW_OP_const1s', 'DW_OP_const2s'): # Argument is decimal without a leading sign
+        return op_name + ' ' + "%d" % (args[0])
+    elif op_name in ('DW_OP_entry_value', 'DW_OP_GNU_entry_value'): # No space between opcode and args
+        return op_name + _desc_operationarg(args[0], cu)
+    elif op_name == 'DW_OP_regval_type': # Arg is a DIE pointer
+        return "%s %s (0x%08x -> 0x%08x) \"%s\"" % (
+            op_name,
+            _desc_reg(args[0], cu),
+            args[1],
+            args[1] + cu.cu_offset,
+            _DIE_name(cu._get_cached_DIE(args[1] + cu.cu_offset)))
+    elif op_name == 'DW_OP_convert': # Arg is a DIE pointer
+        return "%s (0x%08x -> 0x%08x) \"%s\"" % (
+            op_name,
+            args[0],
+            args[0] + cu.cu_offset,
+            _DIE_name(cu._get_cached_DIE(args[0] + cu.cu_offset)))
+    elif args:
+        return op_name + ' ' + ', '.join(_desc_operationarg(s, cu) for s in args)
+    else:
+        return op_name
+
+# TODO: remove this once dwarfdump catches up
+UNSUPPORTED_OPS = (
+    'DW_OP_implicit_pointer',
+    'DW_OP_deref_type',
+    'DW_OP_GNU_parameter_ref',
+    'DW_OP_GNU_deref_type',
+    'DW_OP_GNU_implicit_pointer',
+    'DW_OP_GNU_convert',
+    'DW_OP_GNU_regval_type')
+
+def _desc_expression(expr, die):
+    cu = die.cu
+    if not hasattr(cu, '_exprparser'):
+        cu._exprparser = DWARFExprParser(cu.structs)
+
+    parsed = cu._exprparser.parse_expr(expr)
+    # TODO: remove this once dwarfdump catches up
+    first_unsupported = next((i for (i, op) in enumerate(parsed) if op.op_name in UNSUPPORTED_OPS), None)
+    if first_unsupported is None:
+        lines = [_desc_operation(op.op, op.op_name, op.args, cu) for op in parsed]
+    else:
+        lines = [_desc_operation(op.op, op.op_name, op.args, cu) for op in parsed[0:first_unsupported]]
+        start_of_unparsed = parsed[first_unsupported].offset
+        lines.append("<decoding error> " + " ".join("%02x" % b for b in expr[start_of_unparsed:]))
+    return ", ".join(lines)
+
+def _desc_datatype(attr, die):
+    """Oy vey
+    """
+    return _desc_ref(attr, die, describe_cpp_datatype(die))
+
+def _get_origin_name(die):
+    func_die = die.get_DIE_from_attribute('DW_AT_abstract_origin')
+    name = _safe_DIE_linkage_name(func_die, '')
+    if not name:
+        if 'DW_AT_specification' in func_die.attributes:
+            name = _DIE_linkage_name(func_die.get_DIE_from_attribute('DW_AT_specification'))
+        elif 'DW_AT_abstract_origin' in func_die.attributes:
+            return _get_origin_name(func_die)    
+    return name
+
+def _desc_origin(attr, die):
+    return _desc_ref(attr, die, _get_origin_name(die))
+
+def _desc_spec(attr, die):
+    return _desc_ref(attr, die,
+        _DIE_linkage_name(die.get_DIE_from_attribute('DW_AT_specification')))
+
+def _desc_value(attr, die):
+    return str(attr.value)
+
+ATTR_DESCRIPTIONS = dict(
+    DW_AT_language=lambda attr, die: _desc_enum(attr, ENUM_DW_LANG),
+    DW_AT_encoding=lambda attr, die: _desc_enum(attr, ENUM_DW_ATE),
+    DW_AT_accessibility=lambda attr, die: _desc_enum(attr, ENUM_DW_ACCESS),
+    DW_AT_inline=lambda attr, die: _desc_enum(attr, ENUM_DW_INL),
+    DW_AT_decl_file=_desc_decl_file,
+    DW_AT_decl_line=_desc_value,
+    DW_AT_ranges=_desc_ranges,
+    DW_AT_location=_desc_locations,
+    DW_AT_data_member_location=lambda attr, die: _desc_data(attr, die) if attr.form.startswith('DW_FORM_data') else _desc_locations(attr, die),
+    DW_AT_frame_base=_desc_locations,
+    DW_AT_type=_desc_datatype,
+    DW_AT_call_line=_desc_value,
+    DW_AT_call_file=_desc_decl_file,
+    DW_AT_abstract_origin=_desc_origin,
+    DW_AT_specification=_desc_spec
+)
+
+class ReadElf(object):
+    """ dump_xxx is used to dump the respective section.
+    Mimics the output of dwarfdump with --verbose
+    """
+    def __init__(self, filename, file, output):
+        """ file:
+                stream object with the ELF file to read
+
+            output:
+                output stream to write to
+        """
+        self.elffile = ELFFile(file)
+        self.output = output
+        self._dwarfinfo = self.elffile.get_dwarf_info()
+        arches = {"EM_386": "i386", "EM_X86_64": "x86-64"}
+        arch = arches[self.elffile['e_machine']]
+        bits = self.elffile.elfclass
+        self._emitline("%s:    file format elf%d-%s" % (filename, bits, arch))
+
+    def _emit(self, s=''):
+        """ Emit an object to output
+        """
+        self.output.write(str(s))
+
+    def _emitline(self, s=''):
+        """ Emit an object to output, followed by a newline
+        """
+        self.output.write(str(s).rstrip() + '\n')
+
+    def dump_info(self):
+        # TODO: DWARF64 will cause discrepancies in hex offset sizes
+        self._emitline(".debug_info contents:")
+        for cu in self._dwarfinfo.iter_CUs():
+            if cu.header.version >= 5:
+                ut = next(k for (k,v) in ENUM_DW_UT.items() if v == cu.header.unit_type)
+                unit_type_str = " unit_type = %s," % ut
+            else:
+                unit_type_str = ''
+
+            self._emitline("0x%08x: Compile Unit: length = 0x%08x, format = DWARF%d, version = 0x%04x,%s abbr_offset = 0x%04x, addr_size = 0x%02x (next unit at 0x%08x)" %(
+                cu.cu_offset,
+                cu.header.unit_length,
+                cu.structs.dwarf_format,
+                cu.header.version,
+                unit_type_str,
+                cu.header.debug_abbrev_offset,
+                cu.header.address_size,
+                cu.cu_offset + (4 if cu.structs.dwarf_format == 32 else 12) + cu.header.unit_length))
+            self._emitline()
+            parent = cu.get_top_DIE()
+            for die in cu.iter_DIEs():
+                if die.get_parent() == parent:
+                    parent = die
+                if not die.is_null(): 
+                    self._emitline("0x%08x: %s [%d] %s %s" % (
+                        die.offset,
+                        die.tag,
+                        die.abbrev_code,
+                        '*' if die.has_children else '',
+                        '(0x%08x)' % die.get_parent().offset if die.get_parent() is not None else ''))
+                    for attr_name in die.attributes:
+                        attr = die.attributes[attr_name]
+                        self._emitline("              %s [%s]  (%s)" % (attr_name, attr.form, self.describe_attr_value(die, attr)))
+                else:
+                    self._emitline("0x%08x: NULL" % (die.offset,))
+                    parent = die.get_parent()
+                self._emitline()
+
+    def describe_attr_value(self, die, attr):
+        """This describes the attribute value in the way that's compatible 
+        with llvm_dwarfdump. Somewhat duplicates the work of describe_attr_value() in descriptions
+        """
+        if attr.name in ATTR_DESCRIPTIONS:
+            return ATTR_DESCRIPTIONS[attr.name](attr, die)
+        elif attr.form in FORM_DESCRIPTIONS:
+            return FORM_DESCRIPTIONS[attr.form](attr, die)
+        else:
+            return str(attr.value)
+
+    def dump_loc(self):
+        pass
+
+    def dump_loclists(self):
+        pass
+
+    def dump_ranges(self):
+        pass
+
+    def dump_v4_rangelist(self, rangelist, cu_map):
+        cu = cu_map[rangelist[0].entry_offset]
+        addr_str_len = cu.header.address_size*2
+        base_ip = _get_cu_base(cu)
+        for entry in rangelist:
+            if isinstance(entry, RangeEntry):
+                self._emitline("[0x%0*x, 0x%0*x)" % (
+                    addr_str_len,
+                    (0 if entry.is_absolute else base_ip) + entry.begin_offset,
+                    addr_str_len,
+                    (0 if entry.is_absolute else base_ip) + entry.end_offset))
+            elif isinstance(entry, elftools.dwarf.ranges.BaseAddressEntry):
+                base_ip = entry.base_address
+            else:
+                raise NotImplementedError("Unknown object in a range list")    
+
+    def dump_rnglists(self):
+        self._emitline(".debug_rnglists contents:")
+        ranges_sec = self._dwarfinfo.range_lists()
+        if ranges_sec.version < 5:
+            return
+
+        cu_map = {die.attributes['DW_AT_ranges'].value : cu # Dict from range offset to home CU
+            for cu in self._dwarfinfo.iter_CUs()
+            for die in cu.iter_DIEs()
+            if 'DW_AT_ranges' in die.attributes}
+
+        for cu in ranges_sec.iter_CUs():
+            self._emitline("0x%08x: range list header: length = 0x%08x, format = DWARF%d, version = 0x%04x, addr_size = 0x%02x, seg_size = 0x%02x, offset_entry_count = 0x%08x" % (
+                cu.cu_offset,
+                cu.unit_length,
+                64 if cu.is64 else 32,
+                cu.version,
+                cu.address_size,
+                cu.segment_selector_size,
+                cu.offset_count))
+            self._emitline("ranges:")
+            if cu.offset_count > 0:
+                rangelists = [ranges_sec.get_range_list_at_offset_ex(offset) for offset in cu.offsets]
+            else:
+                rangelists = list(ranges_sec.iter_CU_range_lists_ex(cu))
+            # We have to parse it completely before dumping, because dwarfdump aligns columns,
+            # no way to do that without some lookahead
+            max_type_len = max(len(entry.entry_type) for rangelist in rangelists for entry in rangelist)
+            for rangelist in rangelists:
+                self.dump_v5_rangelist(rangelist, cu_map, max_type_len)
+
+    def dump_v5_rangelist(self, rangelist, cu_map, max_type_len):
+        cu = cu_map[rangelist[0].entry_offset]
+        addr_str_len = cu.header.address_size*2
+        base_ip = _get_cu_base(cu)        
+        for entry in rangelist:
+            type = entry.entry_type
+            self._emit("0x%08x: [%s]:  " % (entry.entry_offset, type.ljust(max_type_len)))
+            if type == 'DW_RLE_base_address':
+                base_ip = entry.address
+                self._emitline("0x%0*x" % (addr_str_len, base_ip))
+            elif type == 'DW_RLE_offset_pair':
+                self._emitline("0x%0*x, 0x%0*x => [0x%0*x, 0x%0*x)" % (
+                    addr_str_len, entry.start_offset,
+                    addr_str_len, entry.end_offset,
+                    addr_str_len, entry.start_offset + base_ip,
+                    addr_str_len, entry.end_offset + base_ip))
+            elif type == 'DW_RLE_start_length':
+                self._emitline("0x%0*x, 0x%0*x => [0x%0*x, 0x%0*x)" % (
+                    addr_str_len, entry.start_address,
+                    addr_str_len, entry.length,
+                    addr_str_len, entry.start_address,
+                    addr_str_len, entry.start_address + entry.length))
+            elif type == 'DW_RLE_start_end':
+                self._emitline("0x%0*x, 0x%0*x => [0x%0*x, 0x%0*x)" % (
+                    addr_str_len, entry.start_address,
+                    addr_str_len, entry.end_address,
+                    addr_str_len, entry.start_address,
+                    addr_str_len, entry.end_address))
+            else:
+                raise NotImplementedError()
+        last = rangelist[-1]
+        self._emitline("0x%08x: [DW_RLE_end_of_list ]" % (last.entry_offset + last.entry_length,))
+
+SCRIPT_DESCRIPTION = 'Display information about the contents of ELF format files'
+VERSION_STRING = '%%(prog)s: based on pyelftools %s' % __version__
+
+def main(stream=None):
+    # parse the command-line arguments and invoke ReadElf
+    argparser = argparse.ArgumentParser(
+            usage='usage: %(prog)s [options] <elf-file>',
+            description=SCRIPT_DESCRIPTION,
+            add_help=False,
+            prog='readelf.py')
+    argparser.add_argument('file',
+            nargs='?', default=None,
+            help='ELF file to parse')
+    argparser.add_argument('-H', '--help',
+            action='store_true', dest='help',
+            help='Display this information')
+    argparser.add_argument('--verbose',
+            action='store_true', dest='verbose',
+            help=('For compatibility with dwarfdump. Non-verbose mode is not implemented.'))
+
+    # Section dumpers
+    sections = ('info', 'loclists', 'rnglists') # 'loc', 'ranges' not implemented yet
+    for section in sections:
+        argparser.add_argument('--debug-%s' % section,
+            action='store_true', dest=section,
+            help=('Display the contents of DWARF debug_%s section.' % section))
+
+    args = argparser.parse_args()
+
+    if args.help or not args.file:
+        argparser.print_help()
+        sys.exit(0)
+
+    # A compatibility hack on top of a compatibility hack :(
+    del ENUM_DW_TAG["DW_TAG_template_type_param"]
+    del ENUM_DW_TAG["DW_TAG_template_value_param"]
+    ENUM_DW_TAG['DW_TAG_template_type_parameter'] = 0x2f
+    ENUM_DW_TAG['DW_TAG_template_value_parameter'] = 0x30
+
+    with open(args.file, 'rb') as file:
+        try:
+            readelf = ReadElf(args.file, file, stream or sys.stdout)
+            if args.info:
+                readelf.dump_info()
+            if args.loclists:
+                readelf.dump_loclists()
+            if args.rnglists:
+                readelf.dump_rnglists()
+            #if args.loc:
+            #    readelf.dump_loc()
+            #if args.ranges:
+            #    readelf.dump_ranges()
+        except ELFError as ex:
+            sys.stdout.flush()
+            sys.stderr.write('ELF error: %s\n' % ex)
+            if args.show_traceback:
+                traceback.print_exc()
+            sys.exit(1)
+
+#-------------------------------------------------------------------------------
+if __name__ == '__main__':
+    main()
+    #profile_main()
index 289a0fd6fdcf036e724274a368e0ab3684f8d3c4..b7f60fceb22a08e018976ba16ffbd9c58c3acd8a 100755 (executable)
@@ -23,6 +23,7 @@ def main():
     run_test_script('test/run_all_unittests.py')
     run_test_script('test/run_examples_test.py')
     run_test_script('test/run_readelf_tests.py', '--parallel')
+    run_test_script('test/run_dwarfdump_tests.py', '--parallel')
 
 if __name__ == '__main__':
     sys.exit(main())
index 31feddfde64ef0c8e8ff5427eb411becde1c0961..968c6774eb3b95f9514930636f3a2126863969a0 100644 (file)
@@ -7,3 +7,12 @@ readelf is built as follows:
 * git co binutils-<VERSION>-branch
 * Run configure, then make
 * Built on a 64-bit Ubuntu machine
+
+llvm-dwarfdump is built as follows:
+
+* Used Debian v10 on x86_64
+* install gcc, git, cmake
+* git clone https://github.com/llvm/llvm-project.git llvm
+* cd llvm
+* cmake -S llvm -B build -G "Unix Makefiles" -DCMAKE_BUILD_TYPE=Release
+* cmake --build build -- llvm-dwarfdump
diff --git a/test/external_tools/llvm-dwarfdump b/test/external_tools/llvm-dwarfdump
new file mode 100755 (executable)
index 0000000..02bc4df
Binary files /dev/null and b/test/external_tools/llvm-dwarfdump differ
diff --git a/test/run_dwarfdump_tests.py b/test/run_dwarfdump_tests.py
new file mode 100644 (file)
index 0000000..8209e80
--- /dev/null
@@ -0,0 +1,196 @@
+#!/usr/bin/env python
+#-------------------------------------------------------------------------------
+# test/run_dwarfdump_tests.py
+#
+# Automatic test runner for elftools & llvm-dwarfdump-11
+#
+# Eli Bendersky (eliben@gmail.com)
+# This code is in the public domain
+#-------------------------------------------------------------------------------
+import argparse
+from difflib import SequenceMatcher
+import logging
+from multiprocessing import Pool
+import os
+import platform
+import re
+import sys
+import time
+
+from utils import run_exe, is_in_rootdir, dump_output_to_temp_files
+
+# Make it possible to run this file from the root dir of pyelftools without
+# installing pyelftools; useful for CI testing, etc.
+sys.path[0:0] = ['.']
+
+# Create a global logger object
+testlog = logging.getLogger('run_tests')
+testlog.setLevel(logging.DEBUG)
+testlog.addHandler(logging.StreamHandler(sys.stdout))
+
+# Following the readelf example, we ship our own.
+if platform.system() == "Darwin": # MacOS
+   raise NotImplementedError("Not supported on MacOS")
+elif platform.system() == "Windows":
+    raise NotImplementedError("Not supported on Windows")
+else:
+    DWARFDUMP_PATH = 'test/external_tools/llvm-dwarfdump'
+
+def discover_testfiles(rootdir):
+    """ Discover test files in the given directory. Yield them one by one.
+    """
+    for filename in os.listdir(rootdir):
+        _, ext = os.path.splitext(filename)
+        if ext == '.elf':
+            yield os.path.join(rootdir, filename)
+
+
+def run_test_on_file(filename, verbose=False, opt=None):
+    """ Runs a test on the given input filename. Return True if all test
+        runs succeeded.
+        If opt is specified, rather that going over the whole
+        set of supported options, the test will only
+        run for one option.
+    """
+    success = True
+    testlog.info("Test file '%s'" % filename)
+    if opt is None:
+        options = [
+            '--debug-info'
+            ]
+    else:
+        options = [opt]
+
+    for option in options:
+        if verbose: testlog.info("..option='%s'" % option)
+
+        # stdouts will be a 2-element list: output of llvm-dwarfdump and output
+        # of scripts/dwarfdump.py
+        stdouts = []
+        for exe_path in [DWARFDUMP_PATH, 'scripts/dwarfdump.py']:
+            args = [option, '--verbose', filename]
+            if verbose: testlog.info("....executing: '%s %s'" % (
+                exe_path, ' '.join(args)))
+            t1 = time.time()
+            rc, stdout = run_exe(exe_path, args)
+            if verbose: testlog.info("....elapsed: %s" % (time.time() - t1,))
+            if rc != 0:
+                testlog.error("@@ aborting - '%s %s' returned '%s'" % (exe_path, option, rc))
+                return False
+            stdouts.append(stdout)
+        if verbose: testlog.info('....comparing output...')
+        t1 = time.time()
+        rc, errmsg = compare_output(*stdouts)
+        if verbose: testlog.info("....elapsed: %s" % (time.time() - t1,))
+        if rc:
+            if verbose: testlog.info('.......................SUCCESS')
+        else:
+            success = False
+            testlog.info('.......................FAIL')
+            testlog.info('....for file %s' % filename)
+            testlog.info('....for option "%s"' % option)
+            testlog.info('....Output #1 is llvm-dwarfdump, Output #2 is pyelftools')
+            testlog.info('@@ ' + errmsg)
+            dump_output_to_temp_files(testlog, filename, option, *stdouts)
+    return success
+
+
+def compare_output(s1, s2):
+    """ Compare stdout strings s1 and s2.
+        s1 is from llvm-dwarfdump, s2 from elftools dwarfdump.py
+        Return pair success, errmsg. If comparison succeeds, success is True
+        and errmsg is empty. Otherwise success is False and errmsg holds a
+        description of the mismatch.
+    """
+    def prepare_lines(s):
+        return [line for line in s.lower().splitlines() if line.strip() != '']
+
+    lines1 = prepare_lines(s1)
+    lines2 = prepare_lines(s2)
+
+    if len(lines1) != len(lines2):
+        return False, 'Number of lines different: %s vs %s' % (
+                len(lines1), len(lines2))
+
+    for (i, (line1, line2)) in enumerate(zip(lines1, lines2)):
+        # Compare ignoring whitespace
+        lines1_parts = line1.split()
+        lines2_parts = line2.split()
+
+        if ''.join(lines1_parts) != ''.join(lines2_parts):
+            sm = SequenceMatcher()
+            sm.set_seqs(lines1[i], lines2[i])
+            changes = sm.get_opcodes()
+
+            errmsg = 'Mismatch on line #%s:\n>>%s<<\n>>%s<<\n (%r)' % (
+                i, line1, line2, changes)
+            return False, errmsg
+    return True, ''
+
+def main():
+    if not is_in_rootdir():
+        testlog.error('Error: Please run me from the root dir of pyelftools!')
+        return 1
+
+    argparser = argparse.ArgumentParser(
+        usage='usage: %(prog)s [options] [file] [file] ...',
+        prog='run_dwarfdump_tests.py')
+    argparser.add_argument('files', nargs='*', help='files to run tests on')
+    argparser.add_argument(
+        '--parallel', action='store_true',
+        help='run tests in parallel; always runs all tests w/o verbose')
+    argparser.add_argument('-V', '--verbose',
+                           action='store_true', dest='verbose',
+                           help='verbose output')
+    argparser.add_argument(
+        '-k', '--keep-going',
+        action='store_true', dest='keep_going',
+        help="Run all tests, don't stop at the first failure")
+    argparser.add_argument('--opt',
+        action='store', dest='opt', metavar='<dwarfdump-option>',
+        help= 'Limit the test one one dwarfdump option.')
+    args = argparser.parse_args()
+
+    if args.parallel:
+        if args.verbose or args.keep_going == False:
+            print('WARNING: parallel mode disables verbosity and always keeps going')
+
+    if args.verbose:
+        testlog.info('Running in verbose mode')
+        testlog.info('Python executable = %s' % sys.executable)
+        testlog.info('dwarfdump path = %s' % DWARFDUMP_PATH)
+        testlog.info('Given list of files: %s' % args.files)
+
+    # If file names are given as command-line arguments, only these files
+    # are taken as inputs. Otherwise, autodiscovery is performed.
+    if len(args.files) > 0:
+        filenames = args.files
+    else:
+        filenames = sorted(discover_testfiles('test/testfiles_for_dwarfdump'))
+
+    if len(filenames) > 1 and args.parallel:
+        pool = Pool()
+        results = pool.map(run_test_on_file, filenames)
+        failures = results.count(False)
+    else:
+        failures = 0
+        for filename in filenames:
+            if not run_test_on_file(filename, args.verbose, args.opt):
+                failures += 1
+                if not args.keep_going:
+                    break
+
+    if failures == 0:
+        testlog.info('\nConclusion: SUCCESS')
+        return 0
+    elif args.keep_going:
+        testlog.info('\nConclusion: FAIL ({}/{})'.format(
+            failures, len(filenames)))
+        return 1
+    else:
+        testlog.info('\nConclusion: FAIL')
+        return 1
+
+
+if __name__ == '__main__':
+    sys.exit(main())
index c5268f3492c95d8cf704e8a43426d82d625d7cf0..2335f888768ff9b1f96ffb600b307d67ce9a228d 100755 (executable)
@@ -63,7 +63,7 @@ def run_example_and_compare(example_path):
         return True
     else:
         testlog.info('.......FAIL comparison')
-        dump_output_to_temp_files(testlog, example_out, ref_str)
+        dump_output_to_temp_files(testlog, example_path, '', example_out, ref_str)
         return False
 
 
index 4c3df984caf44c8b20d2047811e6bf550a3b751c..ad56f4e31dd1b81a47eaeaa1d0bb371046074b4d 100755 (executable)
@@ -117,7 +117,7 @@ def run_test_on_file(filename, verbose=False, opt=None):
             testlog.info('....for option "%s"' % option)
             testlog.info('....Output #1 is readelf, Output #2 is pyelftools')
             testlog.info('@@ ' + errmsg)
-            dump_output_to_temp_files(testlog, *stdouts)
+            dump_output_to_temp_files(testlog, filename, option, *stdouts)
     return success
 
 
diff --git a/test/testfiles_for_dwarfdump/dwarf_gnuops4.so.elf b/test/testfiles_for_dwarfdump/dwarf_gnuops4.so.elf
new file mode 100644 (file)
index 0000000..d9ffe9b
Binary files /dev/null and b/test/testfiles_for_dwarfdump/dwarf_gnuops4.so.elf differ
diff --git a/test/testfiles_for_dwarfdump/dwarf_lineprogramv5.elf b/test/testfiles_for_dwarfdump/dwarf_lineprogramv5.elf
new file mode 100644 (file)
index 0000000..33f051b
Binary files /dev/null and b/test/testfiles_for_dwarfdump/dwarf_lineprogramv5.elf differ
diff --git a/test/testfiles_for_dwarfdump/dwarf_v5ops.so.elf b/test/testfiles_for_dwarfdump/dwarf_v5ops.so.elf
new file mode 100644 (file)
index 0000000..9da7825
Binary files /dev/null and b/test/testfiles_for_dwarfdump/dwarf_v5ops.so.elf differ
index 8eedacf563c98b5c3dd17fa5444d833c5ef31875..4669cf8458a96dc4a02a31a333270b1946931211 100644 (file)
@@ -32,13 +32,14 @@ def is_in_rootdir():
     return os.path.isdir('test') and os.path.isdir('elftools')
 
 
-def dump_output_to_temp_files(testlog, *args):
+def dump_output_to_temp_files(testlog, filename, option, *args):
     """ Dumps the output strings given in 'args' to temp files: one for each
-        arg.
+        arg. The filename and option arguments contribute to the file name,
+        so that one knows which test did the output dump come from.
     """
     for i, s in enumerate(args):
         fd, path = tempfile.mkstemp(
-                prefix='out' + str(i + 1) + '_',
+                prefix='out-%d-%s-%s-' % (i + 1, os.path.split(filename)[-1], option),
                 suffix='.stdout')
         file = os.fdopen(fd, 'w')
         file.write(s)