Support for DWARFv5 debug_rnglists section (#419)
authorSeva Alekseyev <sevaa@yarxi.ru>
Fri, 17 Jun 2022 13:09:32 +0000 (09:09 -0400)
committerGitHub <noreply@github.com>
Fri, 17 Jun 2022 13:09:32 +0000 (06:09 -0700)
* Pre-DWARFv5 range section dumping, Ranges in readelf autotest

* DWARFv5 rnglists section support

* Autotest fixes

* Misleading comment

* Version, dwarfinfo now required in RangeLists constructor

elftools/dwarf/dwarfinfo.py
elftools/dwarf/enums.py
elftools/dwarf/ranges.py
elftools/dwarf/structs.py
examples/reference_output/dwarf_range_lists.out
scripts/readelf.py
test/run_readelf_tests.py

index 8dc7028f160e16731ba82d9d787c538d2a04e801..7c7060e1c49f37a016573545f108698e771be7f5 100644 (file)
@@ -102,7 +102,7 @@ class DWARFInfo(object):
         self.debug_pubtypes_sec = debug_pubtypes_sec
         self.debug_pubnames_sec = debug_pubnames_sec
         self.debug_loclists_sec = debug_loclists_sec
-        self.debug_rnglists_sec = debug_rnglists_sec # Ignored for now
+        self.debug_rnglists_sec = debug_rnglists_sec
 
         # This is the DWARFStructs the context uses, so it doesn't depend on
         # DWARF format and address_size (these are determined per CU) - set them
@@ -358,8 +358,11 @@ class DWARFInfo(object):
         """ Get a RangeLists object representing the .debug_ranges section of
             the DWARF data, or None if this section doesn't exist.
         """
-        if self.debug_ranges_sec:
-            return RangeLists(self.debug_ranges_sec.stream, self.structs)
+        if self.debug_rnglists_sec:
+            assert(self.debug_ranges_sec is None)
+            return RangeLists(self.debug_rnglists_sec.stream, self.structs, 5, self)
+        elif self.debug_ranges_sec:
+            return RangeLists(self.debug_ranges_sec.stream, self.structs, 4, self)
         else:
             return None
 
index c38ebe01ec26f2af8a33cabcd10ea4fa072eb737..c0ae4294e36dccbc8d6d940f8aeaf184c7bc5771 100644 (file)
@@ -429,3 +429,14 @@ ENUM_DW_LLE = dict(
     DW_LLE_start_end        = 0x07,
     DW_LLE_start_length     = 0x08    
 )
+
+ENUM_DW_RLE = dict(
+    DW_RLE_end_of_list   = 0x00,
+    DW_RLE_base_addressx = 0x01,
+    DW_RLE_startx_endx   = 0x02,
+    DW_RLE_startx_length = 0x03,
+    DW_RLE_offset_pair   = 0x04,
+    DW_RLE_base_address  = 0x05,
+    DW_RLE_start_end     = 0x06,
+    DW_RLE_start_length  = 0x07
+)
index 5f99473ed28cea850710fdb00418da7ce1ba5416..e5476de1007d3f5d1ca044db459d77b7070a7586 100644 (file)
@@ -12,18 +12,41 @@ from collections import namedtuple
 from ..common.utils import struct_parse
 
 
-RangeEntry = namedtuple('RangeEntry', 'begin_offset end_offset')
-BaseAddressEntry = namedtuple('BaseAddressEntry', 'base_address')
+RangeEntry = namedtuple('RangeEntry', 'entry_offset entry_length begin_offset end_offset is_absolute')
+BaseAddressEntry = namedtuple('BaseAddressEntry', 'entry_offset base_address')
 
+def not_implemented(e):
+    raise NotImplementedError("Range list entry %s is not supported yet" % (e.entry_type,))
+
+# Maps parsed entry types to RangeEntry/BaseAddressEntry objects
+entry_translate = {
+    'DW_RLE_base_address' : lambda e: BaseAddressEntry(e.entry_offset, e.address),
+    'DW_RLE_offset_pair'  : lambda e: RangeEntry(e.entry_offset, e.entry_length, e.start_offset, e.end_offset, False),
+    'DW_RLE_start_end'    : lambda e: RangeEntry(e.entry_offset, e.entry_length, e.start_address, e.end_address, True),
+    'DW_RLE_start_length' : lambda e: RangeEntry(e.entry_offset, e.entry_length, e.start_address, e.start_address + e.length, True),
+    'DW_RLE_base_addressx': not_implemented,
+    'DW_RLE_startx_endx'  : not_implemented,
+    'DW_RLE_startx_length': not_implemented
+}
 
 class RangeLists(object):
     """ A single range list is a Python list consisting of RangeEntry or
         BaseAddressEntry objects.
+
+        Since v0.29, two new parameters - version and dwarfinfo
+
+        version is used to distinguish DWARFv5 rnglists section from
+        the DWARF<=4 ranges section. Only the 4/5 distinction matters.
+
+        The dwarfinfo is needed for enumeration, because enumeration
+        requires scanning the DIEs, because ranges may overlap, even on DWARF<=4
     """
-    def __init__(self, stream, structs):
+    def __init__(self, stream, structs, version, dwarfinfo):
         self.stream = stream
         self.structs = structs
         self._max_addr = 2 ** (self.structs.address_size * 8) - 1
+        self.version = version
+        self._dwarfinfo = dwarfinfo
 
     def get_range_list_at_offset(self, offset):
         """ Get a range list at the given offset in the section.
@@ -34,32 +57,44 @@ class RangeLists(object):
     def iter_range_lists(self):
         """ Yield all range lists found in the section.
         """
-        # Just call _parse_range_list_from_stream until the stream ends
-        self.stream.seek(0, os.SEEK_END)
-        endpos = self.stream.tell()
+        # Calling parse until the stream ends is wrong, because ranges can overlap.
+        # Need to scan the DIEs to know all range locations
+        all_offsets = list(set(die.attributes['DW_AT_ranges'].value
+            for cu in self._dwarfinfo.iter_CUs()
+            for die in cu.iter_DIEs()
+            if 'DW_AT_ranges' in die.attributes))
+        all_offsets.sort()
 
-        self.stream.seek(0, os.SEEK_SET)
-        while self.stream.tell() < endpos:
-            yield self._parse_range_list_from_stream()
+        for offset in all_offsets:
+            yield self.get_range_list_at_offset(offset)
 
     #------ PRIVATE ------#
 
     def _parse_range_list_from_stream(self):
-        lst = []
-        while True:
-            begin_offset = struct_parse(
-                self.structs.Dwarf_target_addr(''), self.stream)
-            end_offset = struct_parse(
-                self.structs.Dwarf_target_addr(''), self.stream)
-            if begin_offset == 0 and end_offset == 0:
-                # End of list - we're done.
-                break
-            elif begin_offset == self._max_addr:
-                # Base address selection entry
-                lst.append(BaseAddressEntry(base_address=end_offset))
-            else:
-                # Range entry
-                lst.append(RangeEntry(
-                    begin_offset=begin_offset,
-                    end_offset=end_offset))
-        return lst
+        if self.version >= 5:
+            return list(entry_translate[entry.entry_type](entry)
+                for entry
+                in struct_parse(self.structs.Dwarf_rnglists_entries, self.stream))
+        else:
+            lst = []
+            while True:
+                entry_offset = self.stream.tell()
+                begin_offset = struct_parse(
+                    self.structs.Dwarf_target_addr(''), self.stream)
+                end_offset = struct_parse(
+                    self.structs.Dwarf_target_addr(''), self.stream)
+                if begin_offset == 0 and end_offset == 0:
+                    # End of list - we're done.
+                    break
+                elif begin_offset == self._max_addr:
+                    # Base address selection entry
+                    lst.append(BaseAddressEntry(entry_offset=entry_offset, base_address=end_offset))
+                else:
+                    # Range entry
+                    lst.append(RangeEntry(
+                        entry_offset=entry_offset,
+                        entry_length=self.stream.tell() - entry_offset,
+                        begin_offset=begin_offset,
+                        end_offset=end_offset,
+                        is_absolute=False))
+            return lst
index a1a286b41840ee994f0204aa7cfb9cd9278dc637..5aa4a121820f65ac4e0d5ca456732fee79525d3b 100644 (file)
@@ -13,7 +13,7 @@ from ..construct import (
     SBInt8, SBInt16, SBInt32, SBInt64, SLInt8, SLInt16, SLInt32, SLInt64,
     Adapter, Struct, ConstructError, If, Enum, Array, PrefixedArray,
     CString, Embed, StaticField, IfThenElse, Construct, Rename, Sequence,
-    Switch
+    Switch, Value
     )
 from ..common.construct_utils import (RepeatUntilExcluding, ULEB128, SLEB128,
     StreamOffset)
@@ -144,6 +144,7 @@ class DWARFStructs(object):
         self._create_string_offsets_table_header()
         self._create_address_table_header()
         self._create_loclists_parsers()
+        self._create_rnglists_parsers()
 
     def _create_initial_length(self):
         def _InitialLength(name):
@@ -434,6 +435,27 @@ class DWARFStructs(object):
         self.Dwarf_locview_pair = Struct('locview_pair',
             StreamOffset('entry_offset'), self.Dwarf_uleb128('begin'), self.Dwarf_uleb128('end'))
 
+    def _create_rnglists_parsers(self):
+        self.Dwarf_rnglists_entries = RepeatUntilExcluding(
+            lambda obj, ctx: obj.entry_type == 'DW_RLE_end_of_list',
+            Struct('entry',
+                StreamOffset('entry_offset'),
+                Enum(self.Dwarf_uint8('entry_type'), **ENUM_DW_RLE),
+                Embed(Switch('', lambda ctx: ctx.entry_type,
+                {
+                    'DW_RLE_end_of_list'      : Struct('end_of_list'),
+                    'DW_RLE_base_addressx'    : Struct('base_addressx', self.Dwarf_uleb128('index')),
+                    'DW_RLE_startx_endx'      : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('end_index')),
+                    'DW_RLE_startx_length'    : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('length')),
+                    'DW_RLE_offset_pair'      : Struct('startx_endx', self.Dwarf_uleb128('start_offset'), self.Dwarf_uleb128('end_offset')),
+                    'DW_RLE_base_address'     : Struct('base_address', self.Dwarf_target_addr('address')),
+                    'DW_RLE_start_end'        : Struct('start_end', self.Dwarf_target_addr('start_address'), self.Dwarf_target_addr('end_address')),
+                    'DW_RLE_start_length'     : Struct('start_length', self.Dwarf_target_addr('start_address'), self.Dwarf_uleb128('length'))
+                })),
+                StreamOffset('entry_end_offset'),
+                Value('entry_length', lambda ctx: ctx.entry_end_offset - ctx.entry_offset)))
+
+
 class _InitialLengthAdapter(Adapter):
     """ A standard Construct adapter that expects a sub-construct
         as a struct with one or two values (first, second).
index f8939b8f583de842a42dd22098bdd09a282211ea..2b800c568aa6f805075d008c73582c08e82a3b84 100644 (file)
@@ -4,4 +4,4 @@ Processing file: ./examples/sample_exe64.elf
   Found a compile unit at offset 258, length 156
   Found a compile unit at offset 418, length 300
    DIE DW_TAG_lexical_block. attr DW_AT_ranges.
-[RangeEntry(begin_offset=26, end_offset=40), RangeEntry(begin_offset=85, end_offset=118), RangeEntry(begin_offset=73, end_offset=77), RangeEntry(begin_offset=64, end_offset=67)]
+[RangeEntry(entry_offset=0, entry_length=16, begin_offset=26, end_offset=40, is_absolute=False), RangeEntry(entry_offset=16, entry_length=16, begin_offset=85, end_offset=118, is_absolute=False), RangeEntry(entry_offset=32, entry_length=16, begin_offset=73, end_offset=77, is_absolute=False), RangeEntry(entry_offset=48, entry_length=16, begin_offset=64, end_offset=67, is_absolute=False)]
index 6bd776badd2ed93c3befeefec53b04ff1adf329f..070032ab5f6773b0a7a3f691c566b652c0ed681e 100755 (executable)
@@ -63,10 +63,21 @@ from elftools.dwarf.descriptions import (
 from elftools.dwarf.constants import (
     DW_LNS_copy, DW_LNS_set_file, DW_LNE_define_file)
 from elftools.dwarf.locationlists import LocationParser, LocationEntry, LocationViewPair, BaseAddressEntry
+from elftools.dwarf.ranges import RangeEntry # ranges.BaseAddressEntry collides with the one above
+import elftools.dwarf.ranges
 from elftools.dwarf.callframe import CIE, FDE, ZERO
 from elftools.ehabi.ehabiinfo import CorruptEHABIEntry, CannotUnwindEHABIEntry, GenericEHABIEntry
 from elftools.dwarf.enums import ENUM_DW_UT
 
+def _get_cu_base(cu):
+    top_die = cu.get_top_DIE()
+    attr = top_die.attributes
+    if 'DW_AT_low_pc' in attr:
+        return attr['DW_AT_low_pc'].value
+    elif 'DW_AT_entry_pc' in attr:
+        return attr['DW_AT_entry_pc'].value
+    else:
+        raise ValueError("Can't find the base IP (low_pc) for a CU")
 
 class ReadElf(object):
     """ display_* methods are used to emit output into the output stream
@@ -859,6 +870,8 @@ class ReadElf(object):
             self._dump_debug_namelut(dump_what)
         elif dump_what == 'loc':
             self._dump_debug_locations()
+        elif dump_what == 'Ranges':
+            self._dump_debug_ranges()
         else:
             self._emitline('debug dump not yet supported for "%s"' % dump_what)
 
@@ -1429,16 +1442,6 @@ class ReadElf(object):
     def _dump_debug_locations(self):
         """ Dump the location lists from .debug_loc/.debug_loclists section
         """
-        def _get_cu_base(cu):
-            top_die = cu.get_top_DIE()
-            attr = top_die.attributes
-            if 'DW_AT_low_pc' in attr:
-                return attr['DW_AT_low_pc'].value
-            elif 'DW_AT_entry_pc' in attr:
-                return attr['DW_AT_entry_pc'].value
-            else:
-                raise ValueError("Can't find the base IP (low_pc) for a CU")
-
         di = self._dwarfinfo
         loc_lists = di.location_lists()
         if not loc_lists: # No locations section - readelf outputs nothing
@@ -1530,6 +1533,58 @@ class ReadElf(object):
             last = loc_list[-1]
             self._emitline("    %08x <End of list>" % (last.entry_offset + last.entry_length))
 
+    def _dump_debug_ranges(self):
+        # TODO: GNU readelf format doesn't need entry_length?
+        di = self._dwarfinfo
+        range_lists = di.range_lists()
+        if not range_lists: # No ranges section - readelf outputs nothing
+            return
+
+        ver5 = range_lists.version >= 5
+        range_lists = list(range_lists.iter_range_lists())
+        if len(range_lists) == 0:
+            # Present but empty locations section - readelf outputs a message
+            self._emitline("\nSection '%s' has no debugging data." % (di.debug_rnglists_sec or di.debug_ranges_sec).name)
+            return
+
+        # In order to determine the base address of the range
+        # We need to know the corresponding CU.
+        cu_map = {die.attributes['DW_AT_ranges'].value : cu  # Range list offset => CU
+            for cu in di.iter_CUs()
+            for die in cu.iter_DIEs()
+            if 'DW_AT_ranges' in die.attributes}
+
+        addr_size = di.config.default_address_size # In bytes, 4 or 8
+        addr_width = addr_size * 2 # In hex digits, 8 or 16
+        line_template = "    %%08x %%0%dx %%0%dx %%s" % (addr_width, addr_width)
+        base_template = "    %%08x %%0%dx (base address)" % (addr_width)
+
+        self._emitline('Contents of the %s section:\n' % (di.debug_rnglists_sec or di.debug_ranges_sec).name)
+        self._emitline('    Offset   Begin    End')
+        
+        for range_list in range_lists:
+            # Weird discrepancy in binutils: for DWARFv5 it outputs entry offset,
+            # for DWARF<=4 list offset.
+            first = range_list[0]
+            base_ip = _get_cu_base(cu_map[first.entry_offset])
+            for entry in range_list:
+                if isinstance(entry, RangeEntry):
+                    postfix = ' (start == end)' if entry.begin_offset == entry.end_offset else ''
+                    self._emitline(line_template % (
+                        entry.entry_offset if ver5 else first.entry_offset,
+                        (0 if entry.is_absolute else base_ip) + entry.begin_offset,
+                        (0 if entry.is_absolute else base_ip) + entry.end_offset,
+                        postfix))
+                elif isinstance(entry, elftools.dwarf.ranges.BaseAddressEntry):
+                    base_ip = entry.base_address
+                    self._emitline(base_template % (
+                        entry.entry_offset if ver5 else first.entry_offset,
+                        entry.base_address))
+                else:
+                    raise NotImplementedError("Unknown object in a range list")
+            last = range_list[-1]
+            self._emitline('    %08x <End of list>' % (last.entry_offset + last.entry_length if ver5 else first.entry_offset))            
+
     def _display_arch_specific_arm(self):
         """ Display the ARM architecture-specific info contained in the file.
         """
@@ -1620,7 +1675,7 @@ def main(stream=None):
             action='store', dest='debug_dump_what', metavar='<what>',
             help=(
                 'Display the contents of DWARF debug sections. <what> can ' +
-                'one of {info,decodedline,frames,frames-interp,aranges,pubtypes,pubnames,loc}'))
+                'one of {info,decodedline,frames,frames-interp,aranges,pubtypes,pubnames,loc,Ranges}'))
     argparser.add_argument('--traceback',
                            action='store_true', dest='show_traceback',
                            help='Dump the Python traceback on ELFError'
index 3d92d9de11797c23cda997b8b4c312f34cf18efb..03fc5a995032ab229f9b8aa9c1462f7670cb7f27 100755 (executable)
@@ -66,7 +66,8 @@ def run_test_on_file(filename, verbose=False, opt=None):
             '--debug-dump=info', '--debug-dump=decodedline',
             '--debug-dump=frames', '--debug-dump=frames-interp',
             '--debug-dump=aranges', '--debug-dump=pubtypes',
-            '--debug-dump=pubnames', '--debug-dump=loc'
+            '--debug-dump=pubnames', '--debug-dump=loc',
+            '--debug-dump=Ranges'
             ]
     else:
         options = [opt]