added parsing of .debug_ranges v0.10
authorEli Bendersky <eliben@gmail.com>
Thu, 29 Dec 2011 04:10:53 +0000 (06:10 +0200)
committerEli Bendersky <eliben@gmail.com>
Thu, 29 Dec 2011 04:10:53 +0000 (06:10 +0200)
elftools/dwarf/dwarfinfo.py
elftools/dwarf/locationlists.py
elftools/dwarf/ranges.py [new file with mode: 0644]
elftools/elf/elffile.py
examples/dwarf_range_lists.py [new file with mode: 0644]

index 665f4d492edfe1a76072e28ab80afe303b41a428..9aa2f520e6d2805f4cb324ae4382a39f89fc49ab 100644 (file)
@@ -17,6 +17,7 @@ from .abbrevtable import AbbrevTable
 from .lineprogram import LineProgram
 from .callframe import CallFrameInfo
 from .locationlists import LocationLists
+from .ranges import RangeLists
 
 
 # Describes a debug section
@@ -60,6 +61,7 @@ class DWARFInfo(object):
             debug_frame_sec,
             debug_str_sec,
             debug_loc_sec,
+            debug_ranges_sec,
             debug_line_sec):
         """ config:
                 A DwarfConfig object
@@ -75,6 +77,7 @@ class DWARFInfo(object):
         self.debug_frame_sec = debug_frame_sec
         self.debug_str_sec = debug_str_sec
         self.debug_loc_sec = debug_loc_sec
+        self.debug_ranges_sec = debug_ranges_sec
         self.debug_line_sec = debug_line_sec
 
         # This is the DWARFStructs the context uses, so it doesn't depend on 
@@ -155,6 +158,12 @@ class DWARFInfo(object):
         """
         return LocationLists(self.debug_loc_sec.stream, self.structs)
 
+    def range_lists(self):
+        """ Get a RangeLists object representing the .debug_ranges section of
+            the DWARF data, or None if this section doesn't exist.
+        """
+        return RangeLists(self.debug_ranges_sec.stream, self.structs)
+
     #------ PRIVATE ------#
 
     def _parse_CUs_iter(self):
index 2e08b0dd758b3d4b77d98ff639cb75f9d612effe..45aa36be0f2e16d77016973af367a97e3a951ead 100644 (file)
@@ -1,7 +1,7 @@
 #-------------------------------------------------------------------------------
 # elftools: dwarf/locationlists.py
 #
-# DWARF location lists section decoding
+# DWARF location lists section decoding (.debug_loc)
 #
 # Eli Bendersky (eliben@gmail.com)
 # This code is in the public domain
diff --git a/elftools/dwarf/ranges.py b/elftools/dwarf/ranges.py
new file mode 100644 (file)
index 0000000..9a216ee
--- /dev/null
@@ -0,0 +1,68 @@
+#-------------------------------------------------------------------------------
+# elftools: dwarf/ranges.py
+#
+# DWARF ranges section decoding (.debug_ranges)
+#
+# Eli Bendersky (eliben@gmail.com)
+# This code is in the public domain
+#-------------------------------------------------------------------------------
+import os
+from collections import namedtuple
+
+from ..common.utils import struct_parse
+
+
+RangeEntry = namedtuple('RangeEntry', 'begin_offset end_offset')
+BaseAddressEntry = namedtuple('BaseAddressEntry', 'base_address')
+
+
+class RangeLists(object):
+    """ A single range list is a Python list consisting of RangeEntry or
+        BaseAddressEntry objects.
+    """
+    def __init__(self, stream, structs):
+        self.stream = stream
+        self.structs = structs
+        self._max_addr = 2 ** (self.structs.address_size * 8) - 1
+
+    def get_range_list_at_offset(self, offset):
+        """ Get a range list at the given offset in the section.
+        """
+        self.stream.seek(offset, os.SEEK_SET)
+        return self._parse_range_list_from_stream()
+
+    def iter_range_lists(self):
+        """ Yield all range lists found in the section.
+        """
+        # Just call _parse_range_list_from_stream until the stream ends
+        self.stream.seek(0, os.SEEK_END)
+        endpos = self.stream.tell()
+
+        self.stream.seek(0, os.SEEK_SET)
+        while self.stream.tell() < endpos:
+            yield self._parse_range_list_from_stream()
+
+    #------ PRIVATE ------#
+
+    def _parse_range_list_from_stream(self):
+        lst = []
+        while True:
+            begin_offset = struct_parse(
+                self.structs.Dwarf_target_addr(''), self.stream)
+            end_offset = struct_parse(
+                self.structs.Dwarf_target_addr(''), self.stream)
+            if begin_offset == 0 and end_offset == 0:
+                # End of list - we're done.
+                break
+            elif begin_offset == self._max_addr:
+                # Base address selection entry
+                lst.append(BaseAddressEntry(base_address=end_offset))
+            else: 
+                # Range entry
+                lst.append(RangeEntry(
+                    begin_offset=begin_offset,
+                    end_offset=end_offset))
+        return lst
+
+
+
index 57b213bfc1ba042e11f60e8edfd35f3050d21016..dcec5550a6db4904d30b33e365a7dca14e227ff5 100644 (file)
@@ -123,7 +123,8 @@ class ELFFile(object):
         #
         debug_sections = {}
         for secname in ('.debug_info', '.debug_abbrev', '.debug_str', 
-                        '.debug_line', '.debug_frame', '.debug_loc'):
+                        '.debug_line', '.debug_frame', '.debug_loc',
+                        '.debug_ranges'):
             section = self.get_section_by_name(secname)
             if section is None:
                 debug_sections[secname] = None
@@ -142,6 +143,7 @@ class ELFFile(object):
                 debug_frame_sec=debug_sections['.debug_frame'],
                 debug_str_sec=debug_sections['.debug_str'],
                 debug_loc_sec=debug_sections['.debug_loc'],
+                debug_ranges_sec=debug_sections['.debug_ranges'],
                 debug_line_sec=debug_sections['.debug_line'])
 
     def get_machine_arch(self):
diff --git a/examples/dwarf_range_lists.py b/examples/dwarf_range_lists.py
new file mode 100644 (file)
index 0000000..397c162
--- /dev/null
@@ -0,0 +1,90 @@
+#-------------------------------------------------------------------------------
+# elftools example: dwarf_range_lists.py
+#
+# Examine DIE entries which have range list values, and decode these range 
+# lists.
+#
+# Eli Bendersky (eliben@gmail.com)
+# This code is in the public domain
+#-------------------------------------------------------------------------------
+from __future__ import print_function
+import sys
+
+# If elftools is not installed, maybe we're running from the root or examples
+# dir of the source distribution
+try:
+    import elftools
+except ImportError:
+    sys.path.extend(['.', '..'])
+
+from elftools.elf.elffile import ELFFile
+from elftools.dwarf.descriptions import (
+    describe_DWARF_expr, set_global_machine_arch)
+from elftools.dwarf.ranges import RangeEntry
+
+
+def process_file(filename):
+    print('Processing file:', filename)
+    with open(filename) as f:
+        elffile = ELFFile(f)
+
+        if not elffile.has_dwarf_info():
+            print('  file has no DWARF info')
+            return
+
+        # get_dwarf_info returns a DWARFInfo context object, which is the
+        # starting point for all DWARF-based processing in pyelftools.
+        dwarfinfo = elffile.get_dwarf_info()
+
+        # The range lists are extracted by DWARFInfo from the .debug_ranges
+        # section, and returned here as a RangeLists object.
+        range_lists = dwarfinfo.range_lists()
+
+        for CU in dwarfinfo.iter_CUs():
+            # DWARFInfo allows to iterate over the compile units contained in
+            # the .debug_info section. CU is a CompileUnit object, with some
+            # computed attributes (such as its offset in the section) and
+            # a header which conforms to the DWARF standard. The access to
+            # header elements is, as usual, via item-lookup.
+            print('  Found a compile unit at offset %s, length %s' % (
+                CU.cu_offset, CU['unit_length']))
+
+            # A CU provides a simple API to iterate over all the DIEs in it.
+            for DIE in CU.iter_DIEs():
+                # Go over all attributes of the DIE. Each attribute is an
+                # AttributeValue object (from elftools.dwarf.die), which we
+                # can examine.
+                for attr in DIE.attributes.itervalues():
+                    if attribute_has_range_list(attr):
+                        # This is a range list. Its value is an offset into
+                        # the .debug_ranges section, so we can use the range
+                        # lists object to decode it.
+                        rangelist = range_lists.get_range_list_at_offset(
+                            attr.value)
+
+                        print('   DIE %s. attr %s.\n%s' % (
+                            DIE.tag,
+                            attr.name,
+                            rangelist))
+
+
+def attribute_has_range_list(attr):
+    """ Only some attributes can have range list values, if they have the
+        required DW_FORM (rangelistptr "class" in DWARF spec v3)
+    """
+    if attr.name == 'DW_AT_ranges':
+        if attr.form in ('DW_FORM_data4', 'DW_FORM_data8'):
+            return True
+    return False
+
+
+if __name__ == '__main__':
+    for filename in sys.argv[1:]:
+        process_file(filename)
+
+
+
+
+
+
+