Mixing v4 and v5 loclists and rangelists sections (#429)
authorSeva Alekseyev <sevaa@yarxi.ru>
Fri, 8 Jul 2022 12:41:52 +0000 (08:41 -0400)
committerGitHub <noreply@github.com>
Fri, 8 Jul 2022 12:41:52 +0000 (05:41 -0700)
* More GNU note dumping

* aranges fix for empty sections

* Mixed v4/v5 sections.

* Test for readelf

* Comments, typo

elftools/dwarf/aranges.py
elftools/dwarf/dwarfinfo.py
elftools/dwarf/locationlists.py
elftools/dwarf/ranges.py
elftools/elf/descriptions.py
elftools/elf/enums.py
scripts/readelf.py
test/run_readelf_tests.py
test/testfiles_for_readelf/dwarf_test_versions_mix.elf [new file with mode: 0644]

index 3f140f44f6590fef1befc75804475450ab79e1ff..ae409756471116206865c77c26fb092ef6eb429c 100644 (file)
@@ -56,8 +56,13 @@ class ARanges(object):
 
 
     #------ PRIVATE ------#
-    def _get_entries(self):
+    def _get_entries(self, need_empty=False):
         """ Populate self.entries with ARangeEntry tuples for each range of addresses
+
+            Terminating null entries of CU blocks are not returned, unless
+            need_empty is set to True and the CU block contains nothing but
+            a null entry. The null entry will have both address and length
+            set to 0. 
         """
         self.stream.seek(0)
         entries = []
@@ -77,10 +82,15 @@ class ARanges(object):
                 seek_to = int(math.ceil(fp/float(tuple_size)) * tuple_size)
                 self.stream.seek(seek_to)
 
+                # We now have a binary with empty arange sections - nothing but a NULL entry.
+                # To keep compatibility with readelf, we need to return those.
+                # A two level list would be a prettier solution, but this will be compatible.
+                got_entries = False
+
                 # entries in this set/CU
                 addr = struct_parse(addr_size('addr'), self.stream)
                 length = struct_parse(addr_size('length'), self.stream)
-                while addr != 0 or length != 0:
+                while addr != 0 or length != 0 or (not got_entries and need_empty):
                     # 'begin_addr length info_offset version address_size segment_size'
                     entries.append(
                         ARangeEntry(begin_addr=addr,
@@ -90,8 +100,11 @@ class ARanges(object):
                             version=aranges_header["version"],
                             address_size=aranges_header["address_size"],
                             segment_size=aranges_header["segment_size"]))
-                    addr = struct_parse(addr_size('addr'), self.stream)
-                    length = struct_parse(addr_size('length'), self.stream)
+                    got_entries = True
+                    if addr != 0 or length != 0:
+                        addr = struct_parse(addr_size('addr'), self.stream)
+                        length = struct_parse(addr_size('length'), self.stream)
+                    
             # Segmentation exists in executable
             elif aranges_header["segment_size"] != 0:
                 raise NotImplementedError("Segmentation not implemented")
index 7c7060e1c49f37a016573545f108698e771be7f5..1cc20015ff072ec022a3cf7e8e80655e29d5967d 100644 (file)
@@ -19,8 +19,8 @@ from .compileunit import CompileUnit
 from .abbrevtable import AbbrevTable
 from .lineprogram import LineProgram
 from .callframe import CallFrameInfo
-from .locationlists import LocationLists
-from .ranges import RangeLists
+from .locationlists import LocationLists, LocationListsPair
+from .ranges import RangeLists, RangeListsPair
 from .aranges import ARanges
 from .namelut import NameLUT
 
@@ -343,26 +343,32 @@ class DWARFInfo(object):
             return None
 
     def location_lists(self):
-        """ Get a LocationLists object representing the .debug_loc section of
+        """ Get a LocationLists object representing the .debug_loc/debug_loclists section of
             the DWARF data, or None if this section doesn't exist.
+
+            If both sections exist, it returns a LocationListsPair.
         """
-        if self.debug_loclists_sec:
-            assert(self.debug_loc_sec is None) # Are there ever files with both kinds of location sections?
+        if self.debug_loclists_sec and self.debug_loc_sec is None:
             return LocationLists(self.debug_loclists_sec.stream, self.structs, 5, self)
-        elif self.debug_loc_sec:
-            return LocationLists(self.debug_loc_sec.stream, self.structs)
+        elif self.debug_loc_sec and self.debug_loclists_sec is None:
+            return LocationLists(self.debug_loc_sec.stream, self.structs, 4, self)
+        elif self.debug_loc_sec and self.debug_loclists_sec:
+            return LocationListsPair(self.debug_loclists_sec.stream, self.debug_loclists_sec.stream, self.structs, self)
         else:
             return None
 
     def range_lists(self):
-        """ Get a RangeLists object representing the .debug_ranges section of
+        """ Get a RangeLists object representing the .debug_ranges/.debug_rnglists section of
             the DWARF data, or None if this section doesn't exist.
+
+            If both sections exist, it returns a RangeListsPair.
         """
-        if self.debug_rnglists_sec:
-            assert(self.debug_ranges_sec is None)
+        if self.debug_rnglists_sec and self.debug_ranges_sec is None:
             return RangeLists(self.debug_rnglists_sec.stream, self.structs, 5, self)
-        elif self.debug_ranges_sec:
+        elif self.debug_ranges_sec and self.debug_rnglists_sec is None:
             return RangeLists(self.debug_ranges_sec.stream, self.structs, 4, self)
+        elif self.debug_ranges_sec and self.debug_rnglists_sec:
+            return RangeListsPair(self.debug_ranges_sec.stream, self.debug_rnglists_sec.stream, self.structs, self)
         else:
             return None
 
index eae55c5b28a390a6393ff6a0824abe22701064c7..0792d452da7e3a3dd3ac8c32ba305508f9b4946f 100644 (file)
@@ -10,12 +10,42 @@ import os
 from collections import namedtuple
 from ..common.exceptions import DWARFError
 from ..common.utils import struct_parse
+from .dwarf_util import _iter_CUs_in_section
 
 LocationExpr = namedtuple('LocationExpr', 'loc_expr')
 LocationEntry = namedtuple('LocationEntry', 'entry_offset entry_length begin_offset end_offset loc_expr is_absolute')
 BaseAddressEntry = namedtuple('BaseAddressEntry', 'entry_offset entry_length base_address')
 LocationViewPair = namedtuple('LocationViewPair', 'entry_offset begin end')
 
+class LocationListsPair(object):
+    """For those binaries that contain both a debug_loc and a debug_loclists section,
+    it holds a LocationLists object for both and forwards API calls to the right one.
+    """
+    def __init__(self, streamv4, streamv5, structs, dwarfinfo=None):
+        self._loc = LocationLists(streamv4, structs, 4, dwarfinfo)
+        self._loclists = LocationLists(streamv5, structs, 5, dwarfinfo)
+
+    def get_location_list_at_offset(self, offset, die=None):
+        """See LocationLists.get_location_list_at_offset().
+        """
+        if die is None:
+            raise DWARFError("For this binary, \"die\" needs to be provided")
+        section = self._loclists if die.cu.version >= 5 else self._loc
+        return section.get_location_list_at_offset(offset, die)
+
+    def iter_location_lists(self):
+        """Tricky proposition, since the structure of loc and loclists
+        is not identical. A realistic readelf implementation needs to be aware of both
+        """
+        raise DWARFError("Iterating through two sections is not supported")
+
+    def iter_CUs(self):
+        """See LocationLists.iter_CUs()
+
+        There are no CUs in DWARFv4 sections.
+        """
+        raise DWARFError("Iterating through two sections is not supported")
+
 class LocationLists(object):
     """ A single location list is a Python list consisting of LocationEntry or
         BaseAddressEntry objects.
@@ -57,7 +87,7 @@ class LocationLists(object):
         # Location lists are referenced by DIE attributes by offset or by index.
 
         # As of DWARFv5, it may contain, in addition to proper location lists,
-        #location list view pairs, which are referenced by the nonstandard DW_AT_GNU_locviews
+        # location list view pairs, which are referenced by the nonstandard DW_AT_GNU_locviews
         # attribute. A set of locview pairs (which is a couple of ULEB128 values) may preceed
         # a location list; the former is referenced by the DW_AT_GNU_locviews attribute, the
         # latter - by DW_AT_location (in the same DIE). Binutils' readelf dumps those.
@@ -67,20 +97,21 @@ class LocationLists(object):
         #
         # Taking a cue from binutils, we would have to scan this section while looking at
         # what's in DIEs.
+        ver5 = self.version >= 5
         stream = self.stream
         stream.seek(0, os.SEEK_END)
         endpos = stream.tell()
 
         stream.seek(0, os.SEEK_SET)
 
-        if self.version >= 5:
-            # Need to provide support for DW_AT_GNU_locviews. They are interspersed in
-            # the locations section, no way to tell where short of checking all DIEs
-            all_offsets = set() # Set of offsets where either a locview pair set can be found, or a view-less loclist
-            locviews = dict() # Map of locview offset to the respective loclist offset
-            cu_map = dict() # Map of loclist offsets to CUs
-            for cu in self.dwarfinfo.iter_CUs():
-                cu_ver = cu['version']
+        # Need to provide support for DW_AT_GNU_locviews. They are interspersed in
+        # the locations section, no way to tell where short of checking all DIEs
+        all_offsets = set() # Set of offsets where either a locview pair set can be found, or a view-less loclist
+        locviews = dict() # Map of locview offset to the respective loclist offset
+        cu_map = dict() # Map of loclist offsets to CUs
+        for cu in self.dwarfinfo.iter_CUs():
+            cu_ver = cu['version']
+            if (cu_ver >= 5) == ver5:
                 for die in cu.iter_DIEs():
                     # A combination of location and locviews means there is a location list
                     # preceed by several locview pairs
@@ -96,15 +127,16 @@ class LocationLists(object):
                     # Scan other attributes for location lists
                     for key in die.attributes:
                         attr = die.attributes[key]
-                        if (key != 'DW_AT_location' and
+                        if ((key != 'DW_AT_location' or 'DW_AT_GNU_locviews' not in die.attributes) and
                             LocationParser.attribute_has_location(attr, cu_ver) and
                             LocationParser._attribute_has_loc_list(attr, cu_ver)):
                             list_offset = attr.value
                             all_offsets.add(list_offset)
                             cu_map[list_offset] = cu
-            all_offsets = list(all_offsets)
-            all_offsets.sort()
+        all_offsets = list(all_offsets)
+        all_offsets.sort()
 
+        if ver5:
             # Loclists section is organized as an array of CUs, each length prefixed.
             # We don't assume that the CUs go in the same order as the ones in info.
             offset_index = 0
@@ -133,9 +165,22 @@ class LocationLists(object):
                             next_offset = cu_end_offset # And implicitly quit the loop within the CU
                         stream.seek(next_offset, os.SEEK_SET)
         else:
-            # Just call _parse_location_list_from_stream until the stream ends
-            while stream.tell() < endpos:
-                yield self._parse_location_list_from_stream()
+            for offset in all_offsets:
+                list_offset = locviews.get(offset, offset)
+                if cu_map[list_offset].header.version < 5:
+                    stream.seek(offset, os.SEEK_SET)
+                    locview_pairs = self._parse_locview_pairs(locviews)
+                    entries = self._parse_location_list_from_stream()
+                    yield locview_pairs + entries
+
+    def iter_CUs(self):
+        """For DWARF5 returns an array of objects, where each one has an array of offsets
+        """
+        if self.version < 5:
+            raise DWARFError("CU iteration in loclists is not supported with DWARF<5")
+
+        structs = next(self.dwarfinfo.iter_CUs()).structs # Just pick one
+        return _iter_CUs_in_section(self.stream, structs, structs.Dwarf_loclists_CU_header)
 
     #------ PRIVATE ------#
 
index e5c9fde9d5a1525eb6bad2d072f95ca147a6ebea..0ed5545c175be7c8c76823e0199cd4367e65eafb 100644 (file)
@@ -31,6 +31,51 @@ entry_translate = {
     'DW_RLE_startx_length': not_implemented
 }
 
+class RangeListsPair(object):
+    """For those binaries that contain both a debug_ranges and a debug_rnglists section,
+    it holds a RangeLists object for both and forwards API calls to the right one based
+    on the CU version.
+    """
+    def __init__(self, streamv4, streamv5, structs, dwarfinfo=None):
+        self._ranges = RangeLists(streamv4, structs, 4, dwarfinfo)
+        self._rnglists = RangeLists(streamv5, structs, 5, dwarfinfo)
+
+    def get_range_list_at_offset(self, offset, cu=None):
+        """Forwards the call to either v4 section or v5 one,
+        depending on DWARF version in the CU.
+        """
+        if cu is None:
+            raise DWARFError("For this binary, \"cu\" needs to be provided")
+        section = self._rnglists if cu.header.version >= 5 else self._ranges
+        return section.get_range_list_at_offset(offset, cu)
+
+    def get_range_list_at_offset_ex(self, offset):
+        """Gets an untranslated v5 rangelist from the v5 section.
+        """
+        return self._rnglists.get_range_list_at_offset_ex(offset)
+
+    def iter_range_lists(self):
+        """Tricky proposition, since the structure of ranges and rnglists
+        is not identical. A realistic readelf implementation needs to be aware of both.
+        """
+        raise DWARFError("Iterating through two sections is not supported")
+
+    def iter_CUs(self):
+        """See RangeLists.iter_CUs()
+        
+        CU structure is only present in DWARFv5 rnglists sections. A well written
+        section dumper should check if one is present.
+        """
+        return self._rnglists.iter_CUs()
+
+    def iter_CU_range_lists_ex(self, cu):
+        """See RangeLists.iter_CU_range_lists_ex()
+
+        CU structure is only present in DWARFv5 rnglists sections. A well written
+        section dumper should check if one is present.
+        """
+        return self._rnglists.iter_CU_range_lists_ex(cu)
+
 class RangeLists(object):
     """ A single range list is a Python list consisting of RangeEntry or
         BaseAddressEntry objects.
@@ -50,7 +95,7 @@ class RangeLists(object):
         self.version = version
         self._dwarfinfo = dwarfinfo
 
-    def get_range_list_at_offset(self, offset):
+    def get_range_list_at_offset(self, offset, cu=None):
         """ Get a range list at the given offset in the section.
         """
         self.stream.seek(offset, os.SEEK_SET)
@@ -63,14 +108,16 @@ class RangeLists(object):
         return struct_parse(self.structs.Dwarf_rnglists_entries, self.stream, offset)
 
     def iter_range_lists(self):
-        """ Yield all range lists found in the section.
+        """ Yield all range lists found in the section according to readelf rules.
+        Scans the DIEs for rangelist offsets, then pulls those.
         """
         # Calling parse until the stream ends is wrong, because ranges can overlap.
         # Need to scan the DIEs to know all range locations
+        ver5 = self.version >= 5
         all_offsets = list(set(die.attributes['DW_AT_ranges'].value
             for cu in self._dwarfinfo.iter_CUs()
             for die in cu.iter_DIEs()
-            if 'DW_AT_ranges' in die.attributes))
+            if 'DW_AT_ranges' in die.attributes and (cu.header.version >= 5) == ver5))
         all_offsets.sort()
 
         for offset in all_offsets:
index 4ac33c1a26b01d8db66fee481511b6d7c813e1de..38c80b6e6ee5b418c39936b5d4a4fabe49270535 100644 (file)
@@ -259,12 +259,19 @@ def describe_note_gnu_property_x86_feature_1(value):
             descs.append(desc)
     return 'x86 feature: ' + ', '.join(descs)
 
-def describe_note_gnu_property_x86_isa_1(value):
+def describe_note_gnu_property_x86_feature_2_used(value):
+    descs = []
+    for mask, desc in _DESCR_NOTE_GNU_PROPERTY_X86_FEATURE_2_FLAGS:
+        if value & mask:
+            descs.append(desc)
+    return 'x86 feature used: ' + ', '.join(descs)
+
+def describe_note_gnu_property_x86_isa_1(value, verb):
     descs = []
     for mask, desc in _DESCR_NOTE_GNU_PROPERTY_X86_ISA_1_FLAGS:
         if value & mask:
             descs.append(desc)
-    return 'x86 ISA needed: ' + ', '.join(descs)
+    return 'x86 ISA %s: %s' % (verb, ', '.join(descs))
 
 def describe_note_gnu_properties(properties):
     descriptions = []
@@ -285,11 +292,21 @@ def describe_note_gnu_properties(properties):
                 prop_desc = ' <corrupt length: 0x%x>' % sz
             else:
                 prop_desc = describe_note_gnu_property_x86_feature_1(d)
+        elif t == 'GNU_PROPERTY_X86_FEATURE_2_USED':
+            if sz != 4:
+                prop_desc = ' <corrupt length: 0x%x>' % sz
+            else:
+                prop_desc = describe_note_gnu_property_x86_feature_2_used(d)                
         elif t == 'GNU_PROPERTY_X86_ISA_1_NEEDED':
             if sz != 4:
                 prop_desc = ' <corrupt length: 0x%x>' % sz
             else:
-                prop_desc = describe_note_gnu_property_x86_isa_1(d)
+                prop_desc = describe_note_gnu_property_x86_isa_1(d, "needed")
+        elif t == 'GNU_PROPERTY_X86_ISA_1_USED':
+            if sz != 4:
+                prop_desc = ' <corrupt length: 0x%x>' % sz
+            else:
+                prop_desc = describe_note_gnu_property_x86_isa_1(d, "used")
         elif _DESCR_NOTE_GNU_PROPERTY_TYPE_LOPROC <= t <= _DESCR_NOTE_GNU_PROPERTY_TYPE_HIPROC:
             prop_desc = '<processor-specific type 0x%x data: %s >' % (t, bytes2hex(d, sep=' '))
         elif _DESCR_NOTE_GNU_PROPERTY_TYPE_LOUSER <= t <= _DESCR_NOTE_GNU_PROPERTY_TYPE_HIUSER:
@@ -615,6 +632,17 @@ _DESCR_NOTE_GNU_PROPERTY_X86_FEATURE_1_FLAGS = (
     (8, 'LAM_U57'),
 )
 
+# Bit masks for GNU_PROPERTY_X86_FEATURE_2_xxx flags in the form
+# (mask, flag_description) in the desired output order
+_DESCR_NOTE_GNU_PROPERTY_X86_FEATURE_2_FLAGS = (
+    (1, 'x86'),
+    (2, 'x87'),
+    (4, 'MMX'),
+    (8, 'XMM'),
+    (16, 'YMM'),
+    (32, 'ZMM'),
+)
+
 # Same for GNU_PROPERTY_X86_SET_1_xxx
 _DESCR_NOTE_GNU_PROPERTY_X86_ISA_1_FLAGS = (
     (1, 'x86-64-baseline'),
index a5855c2677e7c8eef23148b68400ba4f61488c72..745aefc793a71a631ed2817c40aaa0bdaacb8274 100644 (file)
@@ -878,6 +878,8 @@ ENUM_NOTE_GNU_PROPERTY_TYPE = dict(
     GNU_PROPERTY_NO_COPY_ON_PROTECTED=2,
     GNU_PROPERTY_X86_FEATURE_1_AND=0xc0000002,
     GNU_PROPERTY_X86_ISA_1_NEEDED=0xc0008002,
+    GNU_PROPERTY_X86_FEATURE_2_USED=0xc0010001,
+    GNU_PROPERTY_X86_ISA_1_USED=0xc0010002,
     _default_=Pass,
 )
 
index 3895226df40e7b0aa9ef89a4609618d5fa396183..2095c915d629635efdb34ffb33790ff2421ca840 100755 (executable)
@@ -62,9 +62,8 @@ from elftools.dwarf.descriptions import (
     )
 from elftools.dwarf.constants import (
     DW_LNS_copy, DW_LNS_set_file, DW_LNE_define_file)
-from elftools.dwarf.locationlists import LocationParser, LocationEntry, LocationViewPair, BaseAddressEntry
-from elftools.dwarf.ranges import RangeEntry # ranges.BaseAddressEntry collides with the one above
-import elftools.dwarf.ranges
+from elftools.dwarf.locationlists import LocationParser, LocationEntry, LocationViewPair, BaseAddressEntry as LocBaseAddressEntry, LocationListsPair
+from elftools.dwarf.ranges import RangeEntry, BaseAddressEntry as RangeBaseAddressEntry, RangeListsPair
 from elftools.dwarf.callframe import CIE, FDE, ZERO
 from elftools.ehabi.ehabiinfo import CorruptEHABIEntry, CannotUnwindEHABIEntry, GenericEHABIEntry
 from elftools.dwarf.enums import ENUM_DW_UT
@@ -76,6 +75,24 @@ def _get_cu_base(cu):
         return attr['DW_AT_low_pc'].value
     elif 'DW_AT_entry_pc' in attr:
         return attr['DW_AT_entry_pc'].value
+    elif 'DW_AT_ranges' in attr:
+        # Rare case but happens: rangelist in the top DIE.
+        # If there is a base or at least one absolute entry,
+        # this will give us the base IP for the CU.
+        rl = cu.dwarfinfo.range_lists().get_range_list_at_offset(attr['DW_AT_ranges'].value, cu)
+        base_ip = None
+        for r in rl:
+            if isinstance(r, RangeBaseAddressEntry):
+                ip = r.base_address
+            elif isinstance(r, RangeEntry) and r.is_absolute:
+                ip = r.begin_offset
+            else:
+                ip = None
+            if ip is not None and (base_ip is None or ip < base_ip):
+                base_ip = ip
+        if base_ip is None:
+            raise ValueError("Can't find the base IP (low_pc) for a CU")
+        return base_ip
     else:
         raise ValueError("Can't find the base IP (low_pc) for a CU")
 
@@ -1178,14 +1195,13 @@ class ReadElf(object):
                         '0' if state.address == 0 else self._format_hex(state.address),
                         'x' if state.is_stmt and not state.end_sequence else ''))
                 else:
-                    # What's the deal with op_index after address on DWARF 5? Is omitting it
-                    # a function of DWARF version, or ISA, or what?
-                    # Used to be unconditional, even on non-VLIW machines.
+                    # In readelf, on non-VLIW machines there is no op_index postfix after address.
+                    # It used to be unconditional.
                     self._emitline('%-35s  %s  %18s%s %s' % (
                         bytes2str(lineprogram['file_entry'][state.file - 1].name),
                         "%11d" % (state.line,) if not state.end_sequence else '-',
                         '0' if state.address == 0 else self._format_hex(state.address),
-                        '' if ver5 else '[%d]' % (state.op_index,),
+                        '' if lineprogram.header.maximum_operations_per_instruction == 1 else '[%d]' % (state.op_index,),
                         'x' if state.is_stmt and not state.end_sequence else ''))
                 if entry.command == DW_LNS_copy:
                     # Another readelf oddity...
@@ -1296,8 +1312,11 @@ class ReadElf(object):
         aranges_table = self._dwarfinfo.get_aranges()
         if aranges_table == None:
             return
-        # seems redundent, but we need to get the unsorted set of entries to match system readelf
-        unordered_entries = aranges_table._get_entries()
+        # Seems redundant, but we need to get the unsorted set of entries
+        # to match system readelf.
+        # Also, sometimes there are blank sections in aranges, but readelf
+        # dumps them, so we should too.
+        unordered_entries = aranges_table._get_entries(need_empty=True)
 
         if len(unordered_entries) == 0:
             self._emitline()
@@ -1320,9 +1339,10 @@ class ReadElf(object):
                 self._emitline('  Segment Size:             %d' % (entry.segment_size))
                 self._emitline()
                 self._emitline('    Address            Length')
-            self._emitline('    %s %s' % (
-                self._format_hex(entry.begin_addr, fullhex=True, lead0x=False),
-                self._format_hex(entry.length, fullhex=True, lead0x=False)))
+            if entry.begin_addr != 0 or entry.length != 0:
+                self._emitline('    %s %s' % (
+                    self._format_hex(entry.begin_addr, fullhex=True, lead0x=False),
+                    self._format_hex(entry.length, fullhex=True, lead0x=False)))
             prev_offset = entry.info_offset
         self._emitline('    %s %s' % (
                 self._format_hex(0, fullhex=True, lead0x=False),
@@ -1440,15 +1460,21 @@ class ReadElf(object):
         """ Dump the location lists from .debug_loc/.debug_loclists section
         """
         di = self._dwarfinfo
-        loc_lists = di.location_lists()
-        if not loc_lists: # No locations section - readelf outputs nothing
+        loc_lists_sec = di.location_lists()
+        if not loc_lists_sec: # No locations section - readelf outputs nothing
             return
 
-        loc_lists = list(loc_lists.iter_location_lists())
-        if len(loc_lists) == 0:
-            # Present but empty locations section - readelf outputs a message
-            self._emitline("\nSection '%s' has no debugging data." % (di.debug_loclists_sec or di.debug_loc_sec).name)
-            return
+        if isinstance(loc_lists_sec, LocationListsPair):
+            self._dump_debug_locsection(di, loc_lists_sec._loc)
+            self._dump_debug_locsection(di, loc_lists_sec._loclists)
+        else:
+            self._dump_debug_locsection(di, loc_lists_sec)
+        
+    def _dump_debug_locsection(self, di, loc_lists_sec):        
+        """ Dump the location lists from .debug_loc/.debug_loclists section
+        """
+        ver5 = loc_lists_sec.version >= 5
+        section_name = (di.debug_loclists_sec if ver5 else di.debug_loc_sec).name
 
         # To dump a location list, one needs to know the CU.
         # Scroll through DIEs once, list the known location list offsets.
@@ -1467,81 +1493,106 @@ class ReadElf(object):
         addr_width = addr_size * 2 # In hex digits, 8 or 16
         line_template = "    %%08x %%0%dx %%0%dx %%s%%s" % (addr_width, addr_width)
 
-        self._emitline('Contents of the %s section:\n' % (di.debug_loclists_sec or di.debug_loc_sec).name)
+        loc_lists = list(loc_lists_sec.iter_location_lists())
+        if len(loc_lists) == 0:
+            # Present but empty locations section - readelf outputs a message
+            self._emitline("\nSection '%s' has no debugging data." % (section_name,))
+            return
+
+        self._emitline('Contents of the %s section:\n' % (section_name,))
         self._emitline('    Offset   Begin            End              Expression')
         for loc_list in loc_lists:
-            in_views = False
-            has_views = False
-            base_ip = None
-            loc_entry_count = 0
-            cu = None
-            for entry in loc_list:
-                if isinstance(entry, LocationViewPair):
-                    has_views = in_views = True
-                    # The "v" before address is conditional in binutils, haven't figured out how
-                    self._emitline("    %08x v%015x v%015x location view pair" % (entry.entry_offset, entry.begin, entry.end))
-                else:
-                    if in_views:
-                        in_views = False
-                        self._emitline("")
-
-                    # Need the CU for this loclist, but the map is keyed by the offset
-                    # of the first entry in the loclist. Got to skip the views first.
-                    if cu is None:
-                        cu = cu_map.get(entry.entry_offset, False)
-                        if not cu:
-                            raise ValueError("Location list can't be tracked to a CU")
-
-                    if isinstance(entry, LocationEntry):
-                        if base_ip is None and not entry.is_absolute:
-                            base_ip = _get_cu_base(cu)
-
-                        begin_offset = (0 if entry.is_absolute else base_ip) + entry.begin_offset
-                        end_offset = (0 if entry.is_absolute else base_ip) + entry.end_offset
-                        expr = describe_DWARF_expr(entry.loc_expr, cu.structs, cu.cu_offset)
-                        if has_views:
-                            view = loc_list[loc_entry_count]
-                            postfix = ' (start == end)' if entry.begin_offset == entry.end_offset and view.begin == view.end else ''
-                            self._emitline('    %08x v%015x v%015x views at %08x for:' %(
-                                entry.entry_offset,
-                                view.begin,
-                                view.end,
-                                view.entry_offset))
-                            self._emitline('             %016x %016x %s%s' %(
-                                begin_offset,
-                                end_offset,
-                                expr,
-                                postfix))
-                            loc_entry_count += 1
-                        else:
-                            postfix = ' (start == end)' if entry.begin_offset == entry.end_offset else ''
-                            self._emitline(line_template % (
-                                entry.entry_offset,
-                                begin_offset,
-                                end_offset,
-                                expr,
-                                postfix))
-                    elif isinstance(entry, BaseAddressEntry):
-                        base_ip = entry.base_address
-                        self._emitline("    %08x %016x (base address)" % (entry.entry_offset, entry.base_address))
-
-            # Pyelftools doesn't store the terminating entry,
-            # but readelf emits its offset, so this should too.
-            last = loc_list[-1]
-            self._emitline("    %08x <End of list>" % (last.entry_offset + last.entry_length))
+            self._dump_loclist(loc_list, line_template, cu_map)
+
+    def _dump_loclist(self, loc_list, line_template, cu_map):
+        in_views = False
+        has_views = False
+        base_ip = None
+        loc_entry_count = 0
+        cu = None
+        for entry in loc_list:
+            if isinstance(entry, LocationViewPair):
+                has_views = in_views = True
+                # The "v" before address is conditional in binutils, haven't figured out how
+                self._emitline("    %08x v%015x v%015x location view pair" % (entry.entry_offset, entry.begin, entry.end))
+            else:
+                if in_views:
+                    in_views = False
+                    self._emitline("")
+
+                # Readelf quirk: indexed loclists don't show the real base IP
+                if cu_map is None:
+                    base_ip = 0
+                elif cu is None:
+                    cu = cu_map.get(entry.entry_offset, False)
+                    if not cu:
+                        raise ValueError("Location list can't be tracked to a CU")
+
+                if isinstance(entry, LocationEntry):
+                    if base_ip is None and not entry.is_absolute:
+                        base_ip = _get_cu_base(cu)
+
+                    begin_offset = (0 if entry.is_absolute else base_ip) + entry.begin_offset
+                    end_offset = (0 if entry.is_absolute else base_ip) + entry.end_offset
+                    expr = describe_DWARF_expr(entry.loc_expr, cu.structs, cu.cu_offset)
+                    if has_views:
+                        view = loc_list[loc_entry_count]
+                        postfix = ' (start == end)' if entry.begin_offset == entry.end_offset and view.begin == view.end else ''
+                        self._emitline('    %08x v%015x v%015x views at %08x for:' %(
+                            entry.entry_offset,
+                            view.begin,
+                            view.end,
+                            view.entry_offset))
+                        self._emitline('             %016x %016x %s%s' %(
+                            begin_offset,
+                            end_offset,
+                            expr,
+                            postfix))
+                        loc_entry_count += 1
+                    else:
+                        postfix = ' (start == end)' if entry.begin_offset == entry.end_offset else ''
+                        self._emitline(line_template % (
+                            entry.entry_offset,
+                            begin_offset,
+                            end_offset,
+                            expr,
+                            postfix))
+                elif isinstance(entry, LocBaseAddressEntry):
+                    base_ip = entry.base_address
+                    self._emitline("    %08x %016x (base address)" % (entry.entry_offset, entry.base_address))
+
+        # Pyelftools doesn't store the terminating entry,
+        # but readelf emits its offset, so this should too.
+        last = loc_list[-1]
+        self._emitline("    %08x <End of list>" % (last.entry_offset + last.entry_length))
 
     def _dump_debug_ranges(self):
         # TODO: GNU readelf format doesn't need entry_length?
         di = self._dwarfinfo
-        range_lists = di.range_lists()
-        if not range_lists: # No ranges section - readelf outputs nothing
+        range_lists_sec = di.range_lists()
+        if not range_lists_sec: # No ranges section - readelf outputs nothing
             return
 
-        ver5 = range_lists.version >= 5
-        range_lists = list(range_lists.iter_range_lists())
+        if isinstance(range_lists_sec, RangeListsPair):
+            self._dump_debug_rangesection(di, range_lists_sec._ranges)
+            self._dump_debug_rangesection(di, range_lists_sec._rnglists)
+        else:
+            self._dump_debug_rangesection(di, range_lists_sec)
+
+    def _dump_debug_rangesection(self, di, range_lists_sec):
+        # In the master branch of binutils, the v5 dump format is way different by now.
+
+        ver5 = range_lists_sec.version >= 5
+        section_name = (di.debug_rnglists_sec if ver5 else di.debug_ranges_sec).name
+        addr_size = di.config.default_address_size # In bytes, 4 or 8
+        addr_width = addr_size * 2 # In hex digits, 8 or 16
+        line_template = "    %%08x %%0%dx %%0%dx %%s" % (addr_width, addr_width)
+        base_template = "    %%08x %%0%dx (base address)" % (addr_width)        
+
+        range_lists = list(range_lists_sec.iter_range_lists())
         if len(range_lists) == 0:
             # Present but empty locations section - readelf outputs a message
-            self._emitline("\nSection '%s' has no debugging data." % (di.debug_rnglists_sec or di.debug_ranges_sec).name)
+            self._emitline("\nSection '%s' has no debugging data." % section_name)
             return
 
         # In order to determine the base address of the range
@@ -1551,36 +1602,34 @@ class ReadElf(object):
             for die in cu.iter_DIEs()
             if 'DW_AT_ranges' in die.attributes}
 
-        addr_size = di.config.default_address_size # In bytes, 4 or 8
-        addr_width = addr_size * 2 # In hex digits, 8 or 16
-        line_template = "    %%08x %%0%dx %%0%dx %%s" % (addr_width, addr_width)
-        base_template = "    %%08x %%0%dx (base address)" % (addr_width)
-
-        self._emitline('Contents of the %s section:\n' % (di.debug_rnglists_sec or di.debug_ranges_sec).name)
+        self._emitline('Contents of the %s section:\n' % section_name)
         self._emitline('    Offset   Begin    End')
 
         for range_list in range_lists:
-            # Weird discrepancy in binutils: for DWARFv5 it outputs entry offset,
-            # for DWARF<=4 list offset.
-            first = range_list[0]
-            base_ip = _get_cu_base(cu_map[first.entry_offset])
-            for entry in range_list:
-                if isinstance(entry, RangeEntry):
-                    postfix = ' (start == end)' if entry.begin_offset == entry.end_offset else ''
-                    self._emitline(line_template % (
-                        entry.entry_offset if ver5 else first.entry_offset,
-                        (0 if entry.is_absolute else base_ip) + entry.begin_offset,
-                        (0 if entry.is_absolute else base_ip) + entry.end_offset,
-                        postfix))
-                elif isinstance(entry, elftools.dwarf.ranges.BaseAddressEntry):
-                    base_ip = entry.base_address
-                    self._emitline(base_template % (
-                        entry.entry_offset if ver5 else first.entry_offset,
-                        entry.base_address))
-                else:
-                    raise NotImplementedError("Unknown object in a range list")
-            last = range_list[-1]
-            self._emitline('    %08x <End of list>' % (last.entry_offset + last.entry_length if ver5 else first.entry_offset))
+            self._dump_rangelist(range_list, cu_map, ver5, line_template, base_template)
+
+    def _dump_rangelist(self, range_list, cu_map, ver5, line_template, base_template):
+        # Weird discrepancy in binutils: for DWARFv5 it outputs entry offset,
+        # for DWARF<=4 list offset.
+        first = range_list[0]
+        base_ip = _get_cu_base(cu_map[first.entry_offset])
+        for entry in range_list:
+            if isinstance(entry, RangeEntry):
+                postfix = ' (start == end)' if entry.begin_offset == entry.end_offset else ''
+                self._emitline(line_template % (
+                    entry.entry_offset if ver5 else first.entry_offset,
+                    (0 if entry.is_absolute else base_ip) + entry.begin_offset,
+                    (0 if entry.is_absolute else base_ip) + entry.end_offset,
+                    postfix))
+            elif isinstance(entry,RangeBaseAddressEntry):
+                base_ip = entry.base_address
+                self._emitline(base_template % (
+                    entry.entry_offset if ver5 else first.entry_offset,
+                    entry.base_address))
+            else:
+                raise NotImplementedError("Unknown object in a range list")
+        last = range_list[-1]
+        self._emitline('    %08x <End of list>' % (last.entry_offset + last.entry_length if ver5 else first.entry_offset))
 
     def _display_arch_specific_arm(self):
         """ Display the ARM architecture-specific info contained in the file.
index ad56f4e31dd1b81a47eaeaa1d0bb371046074b4d..c1fc48c2c8cd5d40417dfa4e5000a44d3d4b1293 100755 (executable)
@@ -88,7 +88,10 @@ def run_test_on_file(filename, verbose=False, opt=None):
         # patched from 0x07 0x10 to 00 00.
         # Those represented the second instruction in the first FDE in .eh_frame. This changed the instruction
         # from "DW_CFA_undefined 16" to two NOPs.
-        # GNU readelf had a bug here, had to work around. See PR #411.
+        # GNU readelf 2.38 had a bug here, had to work around:
+        # https://sourceware.org/bugzilla/show_bug.cgi?id=29250
+        # It's been fixed in the binutils' master since, but the latest master will break a lot.
+        # Same patch in  dwarf_test_versions_mix.elf at 0x2061: 07 10 -> 00 00
 
         # stdouts will be a 2-element list: output of readelf and output
         # of scripts/readelf.py
diff --git a/test/testfiles_for_readelf/dwarf_test_versions_mix.elf b/test/testfiles_for_readelf/dwarf_test_versions_mix.elf
new file mode 100644 (file)
index 0000000..6ae3333
Binary files /dev/null and b/test/testfiles_for_readelf/dwarf_test_versions_mix.elf differ