readelf 2.41 with Ranges test excluded on 2 files (#489)
authorSeva Alekseyev <sevaa@yarxi.ru>
Tue, 22 Aug 2023 12:26:05 +0000 (08:26 -0400)
committerGitHub <noreply@github.com>
Tue, 22 Aug 2023 12:26:05 +0000 (05:26 -0700)
* readelf 2.41 with Ranges test excluded

* Ranges test reinstated except two binaries

---------

Co-authored-by: Seva <seva@sandbox.home>
elftools/dwarf/descriptions.py
elftools/dwarf/ranges.py
scripts/readelf.py
test/external_tools/README.txt
test/external_tools/readelf
test/run_readelf_tests.py

index 7db7f3f1dc95cea0d0b001f674bf972a14545100..0b409c7296a9ccbfe42cb096681b895e409a9564 100644 (file)
@@ -183,34 +183,37 @@ def describe_form_class(form):
 #
 _MACHINE_ARCH = None
 
+# Implements the alternative format of readelf: lowercase hex, prefixed with 0x unless 0
+def _format_hex(n):
+    return '0x%x' % n if n != 0 else '0'
 
 def _describe_attr_ref(attr, die, section_offset):
-    return '<0x%x>' % (attr.value + die.cu.cu_offset)
+    return '<%s>' % _format_hex(attr.value + die.cu.cu_offset)
 
 def _describe_attr_ref_sig8(attr, die, section_offset):
-    return 'signature: 0x%x' % (attr.value)
+    return 'signature: %s' % _format_hex(attr.value)
 
 def _describe_attr_value_passthrough(attr, die, section_offset):
     return attr.value
 
 def _describe_attr_hex(attr, die, section_offset):
-    return '0x%x' % (attr.value)
+    return '%s' % _format_hex(attr.value)
 
 def _describe_attr_hex_addr(attr, die, section_offset):
-    return '<0x%x>' % (attr.value)
+    return '<%s>' % _format_hex(attr.value)
 
 def _describe_attr_split_64bit(attr, die, section_offset):
     low_word = attr.value & 0xFFFFFFFF
     high_word = (attr.value >> 32) & 0xFFFFFFFF
-    return '0x%x 0x%x' % (low_word, high_word)
+    return '%s %s' % (_format_hex(low_word), _format_hex(high_word))
 
 def _describe_attr_strp(attr, die, section_offset):
-    return '(indirect string, offset: 0x%x): %s' % (
-        attr.raw_value, bytes2str(attr.value))
+    return '(indirect string, offset: %s): %s' % (
+        _format_hex(attr.raw_value), bytes2str(attr.value))
 
 def _describe_attr_line_strp(attr, die, section_offset):
-    return '(indirect line string, offset: 0x%x): %s' % (
-        attr.raw_value, bytes2str(attr.value))
+    return '(indirect line string, offset: %s): %s' % (
+        _format_hex(attr.raw_value), bytes2str(attr.value))
 
 def _describe_attr_string(attr, die, section_offset):
     return bytes2str(attr.value)
index 85fc593a7165c3eea98531bb8cc17eebdb3ae2c9..43ab293d1f738e11baf887ae1be6ad14e8a3a682 100644 (file)
@@ -114,14 +114,21 @@ class RangeLists(object):
         return struct_parse(self.structs.Dwarf_rnglists_entries, self.stream, offset)
 
     def iter_range_lists(self):
-        """ Yield all range lists found in the section according to readelf rules.
+        """ Yields all range lists found in the section according to readelf rules.
         Scans the DIEs for rangelist offsets, then pulls those.
+        Returned rangelists are always translated into lists of BaseAddressEntry/RangeEntry objects.
         """
-        # Calling parse until the stream ends is wrong, because ranges can overlap.
-        # Need to scan the DIEs to know all range locations
+        # Rangelists can overlap. That is, one DIE points at the rangelist beginning, and another
+        # points at the middle of the same. Therefore, enumerating them is not a well defined
+        # operation - do you count those as two different (but overlapping) ones, or as a single one?
+        # For debugging utility, you want two. That's what readelf does. For faithfully
+        # representing the section contents, you want one.
+        # That was the behaviour of pyelftools 0.28 and below - calling
+        # parse until the stream end. Leaving aside the question of correctless,
+        # that's uncompatible with readelf.
 
-        # This maps list offset to CU
         ver5 = self.version >= 5
+        # This maps list offset to CU
         cu_map = {die.attributes['DW_AT_ranges'].value : cu
             for cu in self._dwarfinfo.iter_CUs()
             for die in cu.iter_DIEs()
@@ -147,8 +154,13 @@ class RangeLists(object):
         stream = self.stream
         stream.seek(cu.offset_table_offset + (64 if cu.is64 else 32) * cu.offset_count)
         while stream.tell() < cu.offset_after_length + cu.unit_length:
-            yield struct_parse(self.structs.Dwarf_rnglists_entries, stream);
+            yield struct_parse(self.structs.Dwarf_rnglists_entries, stream)
 
+    def translate_v5_entry(self, entry, cu):
+        """Translates entries in a DWARFv5 rangelist from raw parsed format to 
+        a list of BaseAddressEntry/RangeEntry, using the CU
+        """
+        return entry_translate[entry.entry_type](entry, cu)
 
     #------ PRIVATE ------#
 
index 14e29ae01e04995f61c8e66d5bb5b45bb076d6b1..a58085b41c7b184c24e470749d84023f691c9ca8 100755 (executable)
@@ -949,7 +949,8 @@ class ReadElf(object):
                 lead0x = False
             else:
                 lead0x = True
-                fieldsize -= 2
+                if fieldsize is not None:
+                    fieldsize -= 2
 
         s = '0x' if lead0x else ''
         if fullhex:
@@ -980,7 +981,7 @@ class ReadElf(object):
             self._format_hex(
                 version_section['sh_addr'], fieldsize=16, lead0x=lead0x),
             self._format_hex(
-                version_section['sh_offset'], fieldsize=6, lead0x=True),
+                version_section['sh_offset'], fieldsize=8, lead0x=True),
             version_section['sh_link'],
                 self.elffile.get_section(version_section['sh_link']).name
             )
@@ -1117,7 +1118,7 @@ class ReadElf(object):
 
         for cu in self._dwarfinfo.iter_CUs():
             self._emitline('  Compilation Unit @ offset %s:' %
-                self._format_hex(cu.cu_offset))
+                self._format_hex(cu.cu_offset, alternate=True))
             self._emitline('   Length:        %s (%s)' % (
                 self._format_hex(cu['unit_length']),
                 '%s-bit' % cu.dwarf_format()))
@@ -1128,7 +1129,7 @@ class ReadElf(object):
                     self._emitline('   Unit Type:     %s (%d)' % (
                         unit_type, ENUM_DW_UT.get(cu.header.unit_type, 0)))
                     self._emitline('   Abbrev Offset: %s' % (
-                        self._format_hex(cu['debug_abbrev_offset'])))
+                        self._format_hex(cu['debug_abbrev_offset'], alternate=True)))
                     self._emitline('   Pointer Size:  %s' % cu['address_size'])
                     if unit_type in ('DW_UT_skeleton', 'DW_UT_split_compile'):
                         self._emitline('   Dwo id:        %s' % cu['dwo_id'])
@@ -1137,7 +1138,7 @@ class ReadElf(object):
                         self._emitline('   Type Offset:   0x%x' % cu['type_offset'])
             else:
                 self._emitline('   Abbrev Offset: %s' % (
-                    self._format_hex(cu['debug_abbrev_offset']))),
+                    self._format_hex(cu['debug_abbrev_offset'], alternate=True))),
                 self._emitline('   Pointer Size:  %s' % cu['address_size'])
 
             # The nesting depth of each DIE within the tree of DIEs must be
@@ -1627,33 +1628,54 @@ class ReadElf(object):
             self._dump_debug_rangesection(di, range_lists_sec)
 
     def _dump_debug_rangesection(self, di, range_lists_sec):
-        # In the master branch of binutils, the v5 dump format is way different by now.
-
+        # Last amended to match readelf 2.41
         ver5 = range_lists_sec.version >= 5
         section_name = (di.debug_rnglists_sec if ver5 else di.debug_ranges_sec).name
         addr_size = di.config.default_address_size # In bytes, 4 or 8
         addr_width = addr_size * 2 # In hex digits, 8 or 16
         line_template = "    %%08x %%0%dx %%0%dx %%s" % (addr_width, addr_width)
-        base_template = "    %%08x %%0%dx (base address)" % (addr_width)        
-
-        range_lists = list(range_lists_sec.iter_range_lists())
-        if len(range_lists) == 0:
-            # Present but empty locations section - readelf outputs a message
-            self._emitline("\nSection '%s' has no debugging data." % section_name)
-            return
+        base_template = "    %%08x %%0%dx (base address)" % (addr_width)
 
         # In order to determine the base address of the range
         # We need to know the corresponding CU.
         cu_map = {die.attributes['DW_AT_ranges'].value : cu  # Range list offset => CU
             for cu in di.iter_CUs()
             for die in cu.iter_DIEs()
-            if 'DW_AT_ranges' in die.attributes}
-
-        self._emitline('Contents of the %s section:\n' % section_name)
-        self._emitline('    Offset   Begin    End')
-
-        for range_list in range_lists:
-            self._dump_rangelist(range_list, cu_map, ver5, line_template, base_template)
+            if 'DW_AT_ranges' in die.attributes}        
+
+        if ver5: # Dump by CUs - unsure at this point what does readelf do, ranges dump is buggy in 2.41
+            self._emitline('Contents of the %s section:\n\n\n' % section_name)
+            for cu in range_lists_sec.iter_CUs():
+                self._emitline(' Table at Offset: %s:' % self._format_hex(cu.cu_offset, alternate=True))
+                self._emitline('  Length:          %s' % self._format_hex(cu.unit_length, alternate=True))
+                self._emitline('  DWARF version:   %d' % cu.version)
+                self._emitline('  Address size:    %d' % cu.address_size)
+                self._emitline('  Segment size:    %d' % cu.segment_selector_size)
+                self._emitline('  Offset entries:  %d\n' % cu.offset_count)
+                # Is the offset table dumped too?
+                for (i, range_list) in enumerate(range_lists_sec.iter_CU_range_lists_ex(cu)):
+                    list_offset = range_list[0].entry_offset
+                    range_list = list(range_lists_sec.translate_v5_entry(entry, cu_map[list_offset]) for entry in range_list)
+                    self._emitline('  Offset: %s, Index: %d' % (self._format_hex(list_offset, alternate=True), i))
+                    self._emitline('    Offset   Begin    End')
+                    self._dump_rangelist(range_list, cu_map, ver5, line_template, base_template)
+        else: # Dump by DIE reference offset
+            range_lists = list(range_lists_sec.iter_range_lists())
+            if len(range_lists) == 0:
+                # Present but empty ranges section - readelf outputs a message
+                self._emitline("\nSection '%s' has no debugging data." % section_name)
+                return
+
+            self._emitline('Contents of the %s section:\n\n\n' % section_name)
+            self._emitline('    Offset   Begin    End')
+
+            for range_list in range_lists:
+                if len(range_list) == 0: # working around a bogus behavior in readelf 2.41
+                    # No entries means no offset. Dirty hack: peek the stream position
+                    range_list_offset = range_lists_sec.stream.tell() - self._dwarfinfo.config.default_address_size*2
+                    self._emitline('    %08x <End of list>' % (range_list_offset))
+                else:
+                    self._dump_rangelist(range_list, cu_map, ver5, line_template, base_template)
 
     def _dump_rangelist(self, range_list, cu_map, ver5, line_template, base_template):
         # Weird discrepancy in binutils: for DWARFv5 it outputs entry offset,
index 968c6774eb3b95f9514930636f3a2126863969a0..87d0b17712dcf6ac140a39d76debf4d4b96ab19e 100644 (file)
@@ -4,7 +4,7 @@ readelf is built as follows:
 
 * From binutils Git: https://sourceware.org/git/binutils-gdb.git
 * git fetch --all --tags
-* git co binutils-<VERSION>-branch
+* git checkout binutils-<VERSION>-release
 * Run configure, then make
 * Built on a 64-bit Ubuntu machine
 
index f7df45636d5206357d151cc99bcc905af4da1887..e23d301565e2d5cac71e3d66d95aa7a1c4b6a93c 100755 (executable)
Binary files a/test/external_tools/readelf and b/test/external_tools/readelf differ
index c1fc48c2c8cd5d40417dfa4e5000a44d3d4b1293..f94c42ebec146e2cb637c5d1abd22a509c12077c 100755 (executable)
@@ -72,6 +72,11 @@ def run_test_on_file(filename, verbose=False, opt=None):
     else:
         options = [opt]
 
+    # TODO(sevaa): excluding two files from the --debug-dump=Ranges test until the maintainers
+    # of GNU binutils fix https://sourceware.org/bugzilla/show_bug.cgi?id=30781
+    if filename.endswith('dwarf_test_versions_mix.elf') or filename.endswith('dwarf_v5ops.so.elf'):
+        options.remove('--debug-dump=Ranges')
+
     for option in options:
         if verbose: testlog.info("..option='%s'" % option)