Add parsing and readelf dumping for .eh_frame (#155)

author Pierre-Marie de Rodat <pmderodat@kawie.fr>

Tue, 29 Aug 2017 02:05:58 +0000 (22:05 -0400)

committer Eli Bendersky <eliben@users.noreply.github.com>

Tue, 29 Aug 2017 02:05:58 +0000 (19:05 -0700)
author Pierre-Marie de Rodat <pmderodat@kawie.fr>
Tue, 29 Aug 2017 02:05:58 +0000 (22:05 -0400)
committer Eli Bendersky <eliben@users.noreply.github.com>
Tue, 29 Aug 2017 02:05:58 +0000 (19:05 -0700)
diff --git a/elftools/common/py3compat.py b/elftools/common/py3compat.py

index 4878bf31c5012762de27a9e435cb3d96c7e5353e..f48104364b8ff64df4fcd1ad8a0563264af8a860 100644 (file)
--- a/elftools/common/py3compat.py
+++ b/elftools/common/py3compat.py
@@ -23,6 +23,9 @@ if PY3:
      def str2bytes(s): return s.encode('latin-1')
      def int2byte(i):return bytes((i,))
      def byte2int(b): return b
+    def iterbytes(b):
+        for i in range(len(b)):
+            yield b[i:i+1]
  
      ifilter = filter
  
@@ -39,6 +42,8 @@ else:
      def str2bytes(s): return s
      int2byte = chr
      byte2int = ord
+    def iterbytes(b):
+        return iter(b)
  
      from itertools import ifilter
  
diff --git a/elftools/dwarf/callframe.py b/elftools/dwarf/callframe.py

index 22f6a71041f91f31b1e68499d7d09e6d35a086b8..127e2d875801a60fe8fa05b20c45401b530b3852 100644 (file)
--- a/elftools/dwarf/callframe.py
+++ b/elftools/dwarf/callframe.py
@@ -9,7 +9,9 @@
  import copy
  from collections import namedtuple
  from ..common.utils import (struct_parse, dwarf_assert, preserve_stream_pos)
-from ..common.py3compat import iterkeys
+from ..common.py3compat import iterbytes, iterkeys
+from ..construct import Struct, Switch
+from .enums import DW_EH_encoding_flags
  from .structs import DWARFStructs
  from .constants import *
  
@@ -17,10 +19,18 @@ from .constants import *
  class CallFrameInfo(object):
      """ DWARF CFI (Call Frame Info)
  
+    Note that this also supports unwinding information as found in .eh_frame
+    sections: its format differs slightly from the one in .debug_frame. See
+    <http://www.airs.com/blog/archives/460>.
+
          stream, size:
              A stream holding the .debug_frame section, and the size of the
              section in it.
  
+        address:
+            Virtual address for this section. This is used to decode relative
+            addresses.
+
          base_structs:
              The structs to be used as the base for parsing this section.
              Eventually, each entry gets its own structs based on the initial
@@ -34,9 +44,11 @@ class CallFrameInfo(object):
              such as guessing which CU contains which FDEs (based on their
              address ranges) and taking the address_size from those CUs.
      """
-    def __init__(self, stream, size, base_structs):
+    def __init__(self, stream, size, address, base_structs,
+                 for_eh_frame=False):
          self.stream = stream
          self.size = size
+        self.address = address
          self.base_structs = base_structs
          self.entries = None
  
@@ -45,6 +57,11 @@ class CallFrameInfo(object):
          # header field which contains a stream offset.
          self._entry_cache = {}
  
+        # The .eh_frame and .debug_frame section use almost the same CFI
+        # encoding, but there are tiny variations we need to handle during
+        # parsing.
+        self.for_eh_frame = for_eh_frame
+
      def get_entries(self):
          """ Get a list of entries that constitute this CFI. The list consists
              of CIE or FDE objects, in the order of their appearance in the
@@ -74,6 +91,10 @@ class CallFrameInfo(object):
  
          entry_length = struct_parse(
              self.base_structs.Dwarf_uint32(''), self.stream, offset)
+
+        if self.for_eh_frame and entry_length == 0:
+            return ZERO(offset)
+
          dwarf_format = 64 if entry_length == 0xFFFFFFFF else 32
  
          entry_structs = DWARFStructs(
@@ -85,28 +106,42 @@ class CallFrameInfo(object):
          CIE_id = struct_parse(
              entry_structs.Dwarf_offset(''), self.stream)
  
-        is_CIE = (
-            (dwarf_format == 32 and CIE_id == 0xFFFFFFFF) or
-            CIE_id == 0xFFFFFFFFFFFFFFFF)
+        if self.for_eh_frame:
+            is_CIE = CIE_id == 0
+        else:
+            is_CIE = (
+                (dwarf_format == 32 and CIE_id == 0xFFFFFFFF) or
+                CIE_id == 0xFFFFFFFFFFFFFFFF)
  
+        # Parse the header, which goes up to and excluding the sequence of
+        # instructions.
          if is_CIE:
-            header_struct = entry_structs.Dwarf_CIE_header
+            header_struct = (entry_structs.EH_CIE_header
+                             if self.for_eh_frame else
+                             entry_structs.Dwarf_CIE_header)
+            header = struct_parse(
+                header_struct, self.stream, offset)
          else:
-            header_struct = entry_structs.Dwarf_FDE_header
+            header = self._parse_fde_header(entry_structs, offset)
  
-        # Parse the header, which goes up to and including the
-        # return_address_register field
-        header = struct_parse(
-            header_struct, self.stream, offset)
  
          # If this is DWARF version 4 or later, we can have a more precise
          # address size, read from the CIE header.
-        if entry_structs.dwarf_version >= 4:
+        if not self.for_eh_frame and entry_structs.dwarf_version >= 4:
              entry_structs = DWARFStructs(
                  little_endian=entry_structs.little_endian,
                  dwarf_format=entry_structs.dwarf_format,
                  address_size=header.address_size)
  
+        # If the augmentation string is not empty, hope to find a length field
+        # in order to skip the data specified augmentation.
+        if is_CIE:
+            aug_bytes, aug_dict = self._parse_cie_augmentation(
+                    header, entry_structs)
+        else:
+            cie = self._parse_cie_for_fde(offset, header, entry_structs)
+            aug_bytes = self._read_augmentation_data(entry_structs)
+
          # For convenience, compute the end offset for this entry
          end_offset = (
              offset + header.length +
@@ -120,12 +155,15 @@ class CallFrameInfo(object):
          if is_CIE:
              self._entry_cache[offset] = CIE(
                  header=header, instructions=instructions, offset=offset,
+                augmentation_dict=aug_dict,
+                augmentation_bytes=aug_bytes,
                  structs=entry_structs)
+
          else: # FDE
-            with preserve_stream_pos(self.stream):
-                cie = self._parse_entry_at(header['CIE_pointer'])
+            cie = self._parse_cie_for_fde(offset, header, entry_structs)
              self._entry_cache[offset] = FDE(
                  header=header, instructions=instructions, offset=offset,
+                augmentation_bytes=aug_bytes,
                  structs=entry_structs, cie=cie)
          return self._entry_cache[offset]
  
@@ -193,6 +231,172 @@ class CallFrameInfo(object):
              offset = self.stream.tell()
          return instructions
  
+    def _parse_cie_for_fde(self, fde_offset, fde_header, entry_structs):
+        """ Parse the CIE that corresponds to an FDE.
+        """
+        # Determine the offset of the CIE that corresponds to this FDE
+        if self.for_eh_frame:
+            # CIE_pointer contains the offset for a reverse displacement from
+            # the section offset of the CIE_pointer field itself (not from the
+            # FDE header offset).
+            cie_displacement = fde_header['CIE_pointer']
+            cie_offset = (fde_offset + entry_structs.dwarf_format // 8
+                          - cie_displacement)
+        else:
+            cie_offset = fde_header['CIE_pointer']
+
+        # Then read it
+        with preserve_stream_pos(self.stream):
+            return self._parse_entry_at(cie_offset)
+
+    def _parse_cie_augmentation(self, header, entry_structs):
+        """ Parse CIE augmentation data from the annotation string in `header`.
+
+        Return a tuple that contains 1) the augmentation data as a string
+        (without the length field) and 2) the augmentation data as a dict.
+        """
+        augmentation = header.get('augmentation')
+        if not augmentation:
+            return ('', {})
+
+        # Augmentation parsing works in minimal mode here: we need the length
+        # field to be able to skip unhandled augmentation fields.
+        assert augmentation.startswith(b'z'), (
+            'Unhandled augmentation string: {}'.format(repr(augmentation)))
+
+        available_fields = {
+            b'z': entry_structs.Dwarf_uleb128('length'),
+            b'L': entry_structs.Dwarf_uint8('LSDA_encoding'),
+            b'R': entry_structs.Dwarf_uint8('FDE_encoding'),
+            b'S': True,
+            b'P': Struct(
+                'personality',
+                entry_structs.Dwarf_uint8('encoding'),
+                Switch('function', lambda ctx: ctx.encoding & 0x0f, {
+                    enc: fld_cons('function')
+                    for enc, fld_cons
+                    in self._eh_encoding_to_field(entry_structs).items()})),
+        }
+
+        # Build the Struct we will be using to parse the augmentation data.
+        # Stop as soon as we are not able to match the augmentation string.
+        fields = []
+        aug_dict = {}
+
+        for b in iterbytes(augmentation):
+            try:
+                fld = available_fields[b]
+            except KeyError:
+                break
+
+            if fld is True:
+                aug_dict[fld] = True
+            else:
+                fields.append(fld)
+
+        # Read the augmentation twice: once with the Struct, once for the raw
+        # bytes. Read the raw bytes last so we are sure we leave the stream
+        # pointing right after the augmentation: the Struct may be incomplete
+        # (missing trailing fields) due to an unknown char: see the KeyError
+        # above.
+        offset = self.stream.tell()
+        struct = Struct('Augmentation_Data', *fields)
+        aug_dict.update(struct_parse(struct, self.stream, offset))
+        self.stream.seek(offset)
+        aug_bytes = self._read_augmentation_data(entry_structs)
+        return (aug_bytes, aug_dict)
+
+    def _read_augmentation_data(self, entry_structs):
+        """ Read augmentation data.
+
+        This assumes that the augmentation string starts with 'z', i.e. that
+        augmentation data is prefixed by a length field, which is not returned.
+        """
+        if not self.for_eh_frame:
+            return b''
+
+        augmentation_data_length = struct_parse(
+            Struct('Dummy_Augmentation_Data',
+                   entry_structs.Dwarf_uleb128('length')),
+            self.stream)['length']
+        return self.stream.read(augmentation_data_length)
+
+    def _parse_fde_header(self, entry_structs, offset):
+        """ Compute a struct to parse the header of the current FDE.
+        """
+        if not self.for_eh_frame:
+            return struct_parse(entry_structs.Dwarf_FDE_header, self.stream,
+                                offset)
+
+        fields = [entry_structs.Dwarf_initial_length('length'),
+                  entry_structs.Dwarf_offset('CIE_pointer')]
+
+        # Parse the couple of header fields that are always here so we can
+        # fetch the corresponding CIE.
+        minimal_header = struct_parse(Struct('eh_frame_minimal_header',
+                                             *fields), self.stream, offset)
+        cie = self._parse_cie_for_fde(offset, minimal_header, entry_structs)
+        initial_location_offset = self.stream.tell()
+
+        # Try to parse the initial location. We need the initial location in
+        # order to create a meaningful FDE, so assume it's there. Omission does
+        # not seem to happen in practice.
+        encoding = cie.augmentation_dict['FDE_encoding']
+        assert encoding != DW_EH_encoding_flags['DW_EH_PE_omit']
+        basic_encoding = encoding & 0x0f
+        encoding_modifier = encoding & 0xf0
+
+        # Depending on the specified encoding, complete the header Struct
+        formats = self._eh_encoding_to_field(entry_structs)
+        fields.append(formats[basic_encoding]('initial_location'))
+        fields.append(formats[basic_encoding]('address_range'))
+
+        result = struct_parse(Struct('Dwarf_FDE_header', *fields),
+                              self.stream, offset)
+
+        if encoding_modifier == 0:
+            pass
+
+        elif encoding_modifier == DW_EH_encoding_flags['DW_EH_PE_pcrel']:
+            # Start address is relative to the address of the
+            # "initial_location" field.
+            result['initial_location'] += (
+                self.address + initial_location_offset)
+        else:
+            assert False, 'Unsupported encoding: {:#x}'.format(encoding)
+
+        return result
+
+    def _eh_encoding_to_field(self, entry_structs):
+        """
+        Return a mapping from basic encodings (DW_EH_encoding_flags) the
+        corresponding field constructors (for instance
+        entry_structs.Dwarf_uint32).
+        """
+        return {
+            DW_EH_encoding_flags['DW_EH_PE_absptr']:
+                entry_structs.Dwarf_uint32
+                if entry_structs.dwarf_format == 32 else
+                entry_structs.Dwarf_uint64,
+            DW_EH_encoding_flags['DW_EH_PE_uleb128']:
+                entry_structs.Dwarf_uleb128,
+            DW_EH_encoding_flags['DW_EH_PE_udata2']:
+                entry_structs.Dwarf_uint16,
+            DW_EH_encoding_flags['DW_EH_PE_udata4']:
+                entry_structs.Dwarf_uint32,
+            DW_EH_encoding_flags['DW_EH_PE_udata8']:
+                entry_structs.Dwarf_uint64,
+
+            DW_EH_encoding_flags['DW_EH_PE_sleb128']:
+                entry_structs.Dwarf_sleb128,
+            DW_EH_encoding_flags['DW_EH_PE_sdata2']:
+                entry_structs.Dwarf_int16,
+            DW_EH_encoding_flags['DW_EH_PE_sdata4']:
+                entry_structs.Dwarf_int32,
+            DW_EH_encoding_flags['DW_EH_PE_sdata8']:
+                entry_structs.Dwarf_int64,
+        }
+
  
  def instruction_name(opcode):
      """ Given an opcode, return the instruction name.
@@ -224,14 +428,23 @@ class CFIEntry(object):
          Contains a header and a list of instructions (CallFrameInstruction).
          offset: the offset of this entry from the beginning of the section
          cie: for FDEs, a CIE pointer is required
+        augmentation_dict: Augmentation data as a parsed struct (dict): see
+            CallFrameInfo._parse_cie_augmentation and
+            http://www.airs.com/blog/archives/460.
+        augmentation_bytes: Augmentation data as a chain of bytes: see
+            CallFrameInfo._parse_cie_augmentation and
+            http://www.airs.com/blog/archives/460.
      """
-    def __init__(self, header, structs, instructions, offset, cie=None):
+    def __init__(self, header, structs, instructions, offset,
+            augmentation_dict={}, augmentation_bytes=b'', cie=None):
          self.header = header
          self.structs = structs
          self.instructions = instructions
          self.offset = offset
          self.cie = cie
          self._decoded_table = None
+        self.augmentation_dict = augmentation_dict
+        self.augmentation_bytes = augmentation_bytes
  
      def get_decoded(self):
          """ Decode the CFI contained in this entry and return a
@@ -374,6 +587,17 @@ class FDE(CFIEntry):
      pass
  
  
+class ZERO(object):
+    """ End marker for the sequence of CIE/FDE.
+
+    This is specific to `.eh_frame` sections: this kind of entry does not exist
+    in pure DWARF. `readelf` displays these as "ZERO terminator", hence the
+    class name.
+    """
+    def __init__(self, offset):
+        self.offset = offset
+
+
  class RegisterRule(object):
      """ Register rules are used to find registers in call frames. Each rule
          consists of a type (enumeration following DWARFv3 section 6.4.1)
diff --git a/elftools/dwarf/descriptions.py b/elftools/dwarf/descriptions.py

index 2dbd69925a90033a5cf91534d13652dc47980f47..3a0760783ab5e8ea337d91b33f4c7cb51caad984 100644 (file)
--- a/elftools/dwarf/descriptions.py
+++ b/elftools/dwarf/descriptions.py
@@ -86,7 +86,7 @@ def describe_CFI_instructions(entry):
                          'DW_CFA_advance_loc4', 'DW_CFA_advance_loc'):
              _assert_FDE_instruction(instr)
              factored_offset = instr.args[0] * cie['code_alignment_factor']
-            s += '  %s: %s to %016x\n' % (
+            s += '  %s: %s to %08x\n' % (
                  name, factored_offset, factored_offset + pc)
              pc += factored_offset
          elif name in (  'DW_CFA_remember_state', 'DW_CFA_restore_state',
diff --git a/elftools/dwarf/dwarfinfo.py b/elftools/dwarf/dwarfinfo.py

index 330a23879c4972ba94522aac6fb66bb5fc81921e..b8faf9d7ac57b0df106eece33870e29343c1c156 100644 (file)
--- a/elftools/dwarf/dwarfinfo.py
+++ b/elftools/dwarf/dwarfinfo.py
@@ -27,12 +27,14 @@ from .aranges import ARanges
  # name: section name in the container file
  # global_offset: the global offset of the section in its container file
  # size: the size of the section's data, in bytes
+# address: the virtual address for the section's data
  #
  # 'name' and 'global_offset' are for descriptional purposes only and
-# aren't strictly required for the DWARF parsing to work.
+# aren't strictly required for the DWARF parsing to work. 'address' is required
+# to properly decode the special '.eh_frame' format.
  #
  DebugSectionDescriptor = namedtuple('DebugSectionDescriptor',
-    'stream name global_offset size')
+    'stream name global_offset size address')
  
  
  # Some configuration parameters for the DWARF reader. This exists to allow
@@ -96,6 +98,15 @@ class DWARFInfo(object):
          # Cache for abbrev tables: a dict keyed by offset
          self._abbrevtable_cache = {}
  
+    @property
+    def has_debug_info(self):
+        """ Return whether this contains debug information.
+
+        It can be not the case when the ELF only contains .eh_frame, which is
+        encoded DWARF but not actually for debugging.
+        """
+        return bool(self.debug_info_sec)
+
      def iter_CUs(self):
          """ Yield all the compile units (CompileUnit objects) in the debug info
          """
@@ -154,6 +165,7 @@ class DWARFInfo(object):
          cfi = CallFrameInfo(
              stream=self.debug_frame_sec.stream,
              size=self.debug_frame_sec.size,
+            address=self.debug_frame_sec.address,
              base_structs=self.structs)
          return cfi.get_entries()
  
@@ -168,7 +180,9 @@ class DWARFInfo(object):
          cfi = CallFrameInfo(
              stream=self.eh_frame_sec.stream,
              size=self.eh_frame_sec.size,
-            base_structs=self.structs)
+            address=self.eh_frame_sec.address,
+            base_structs=self.structs,
+            for_eh_frame=True)
          return cfi.get_entries()
  
      def get_aranges(self):
diff --git a/elftools/dwarf/enums.py b/elftools/dwarf/enums.py

index 9140f91519d962bd7aa0edf1ed01b7112b38d85f..903e7d514b700960b42669c9bc948d171d7d107a 100644 (file)
--- a/elftools/dwarf/enums.py
+++ b/elftools/dwarf/enums.py
@@ -283,3 +283,26 @@ ENUM_DW_FORM = dict(
  # Inverse mapping for ENUM_DW_FORM
  DW_FORM_raw2name = dict((v, k) for k, v in iteritems(ENUM_DW_FORM))
  
+# See http://www.airs.com/blog/archives/460
+DW_EH_encoding_flags = dict(
+    DW_EH_PE_absptr   = 0x00,
+    DW_EH_PE_uleb128  = 0x01,
+    DW_EH_PE_udata2   = 0x02,
+    DW_EH_PE_udata4   = 0x03,
+    DW_EH_PE_udata8   = 0x04,
+
+    DW_EH_PE_signed   = 0x08,
+    DW_EH_PE_sleb128  = 0x09,
+    DW_EH_PE_sdata2   = 0x0a,
+    DW_EH_PE_sdata4   = 0x0b,
+    DW_EH_PE_sdata8   = 0x0c,
+
+    DW_EH_PE_pcrel    = 0x10,
+    DW_EH_PE_textrel  = 0x20,
+    DW_EH_PE_datarel  = 0x30,
+    DW_EH_PE_funcrel  = 0x40,
+    DW_EH_PE_aligned  = 0x50,
+    DW_EH_PE_indirect = 0x80,
+
+    DW_EH_PE_omit     = 0xff,
+)
diff --git a/elftools/dwarf/structs.py b/elftools/dwarf/structs.py

index b21bd931d5175047a0cb6e1a96946804f1dfd21e..9234cf82b809b5ab0e041b75a1626fa8dd9aa0d3 100644 (file)
--- a/elftools/dwarf/structs.py
+++ b/elftools/dwarf/structs.py
@@ -253,6 +253,16 @@ class DWARFStructs(object):
              )
  
      def _create_callframe_entry_headers(self):
+        self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
+            self.Dwarf_initial_length('length'),
+            self.Dwarf_offset('CIE_id'),
+            self.Dwarf_uint8('version'),
+            CString('augmentation'),
+            self.Dwarf_uleb128('code_alignment_factor'),
+            self.Dwarf_sleb128('data_alignment_factor'),
+            self.Dwarf_uleb128('return_address_register'))
+        self.EH_CIE_header = self.Dwarf_CIE_header
+
          # The CIE header was modified in DWARFv4.
          if self.dwarf_version == 4:
              self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
@@ -265,15 +275,6 @@ class DWARFStructs(object):
                  self.Dwarf_uleb128('code_alignment_factor'),
                  self.Dwarf_sleb128('data_alignment_factor'),
                  self.Dwarf_uleb128('return_address_register'))
-        else:
-            self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
-                self.Dwarf_initial_length('length'),
-                self.Dwarf_offset('CIE_id'),
-                self.Dwarf_uint8('version'),
-                CString('augmentation'),
-                self.Dwarf_uleb128('code_alignment_factor'),
-                self.Dwarf_sleb128('data_alignment_factor'),
-                self.Dwarf_uleb128('return_address_register'))
  
          self.Dwarf_FDE_header = Struct('Dwarf_FDE_header',
              self.Dwarf_initial_length('length'),
diff --git a/elftools/elf/elffile.py b/elftools/elf/elffile.py

index fedd14de2ac6e729c41b9f952eb424c6ea351bc1..02a279c5abefd28d3cc2eb112826b7403b6c4450 100644 (file)
--- a/elftools/elf/elffile.py
+++ b/elftools/elf/elffile.py
@@ -145,8 +145,9 @@ class ELFFile(object):
              We assume that if it has the .debug_info or .zdebug_info section, it
              has all the other required sections as well.
          """
-        return bool(self.get_section_by_name('.debug_info')) or \
-            bool(self.get_section_by_name('.zdebug_info'))
+        return (self.get_section_by_name('.debug_info') or
+            self.get_section_by_name('.zdebug_info') or
+            self.get_section_by_name('.eh_frame'))
  
      def get_dwarf_info(self, relocate_dwarf_sections=True):
          """ Return a DWARFInfo object representing the debugging information in
@@ -158,7 +159,6 @@ class ELFFile(object):
          # Expect that has_dwarf_info was called, so at least .debug_info is
          # present.
          # Sections that aren't found will be passed as None to DWARFInfo.
-        #
  
          section_names = ('.debug_info', '.debug_aranges', '.debug_abbrev',
                           '.debug_str', '.debug_line', '.debug_frame',
@@ -168,9 +168,13 @@ class ELFFile(object):
          if compressed:
              section_names = tuple(map(lambda x: '.z' + x[1:], section_names))
  
+        # As it is loaded in the process image, .eh_frame cannot be compressed
+        section_names += ('.eh_frame', )
+
          (debug_info_sec_name, debug_aranges_sec_name, debug_abbrev_sec_name,
           debug_str_sec_name, debug_line_sec_name, debug_frame_sec_name,
-         debug_loc_sec_name, debug_ranges_sec_name) = section_names
+         debug_loc_sec_name, debug_ranges_sec_name,
+         eh_frame_sec_name) = section_names
  
          debug_sections = {}
          for secname in section_names:
@@ -181,7 +185,7 @@ class ELFFile(object):
                  dwarf_section = self._read_dwarf_section(
                      section,
                      relocate_dwarf_sections)
-                if compressed:
+                if compressed and secname.startswith('.z'):
                      dwarf_section = self._decompress_dwarf_section(dwarf_section)
                  debug_sections[secname] = dwarf_section
  
@@ -194,8 +198,7 @@ class ELFFile(object):
                  debug_aranges_sec=debug_sections[debug_aranges_sec_name],
                  debug_abbrev_sec=debug_sections[debug_abbrev_sec_name],
                  debug_frame_sec=debug_sections[debug_frame_sec_name],
-                # TODO(eliben): reading of eh_frame is not hooked up yet
-                eh_frame_sec=None,
+                eh_frame_sec=debug_sections[eh_frame_sec_name],
                  debug_str_sec=debug_sections[debug_str_sec_name],
                  debug_loc_sec=debug_sections[debug_loc_sec_name],
                  debug_ranges_sec=debug_sections[debug_ranges_sec_name],
@@ -413,7 +416,8 @@ class ELFFile(object):
                  stream=section_stream,
                  name=section.name,
                  global_offset=section['sh_offset'],
-                size=section['sh_size'])
+                size=section['sh_size'],
+                address=section['sh_addr'])
  
      @staticmethod
      def _decompress_dwarf_section(section):
diff --git a/elftools/elf/relocation.py b/elftools/elf/relocation.py

index c202bf9b1aa49ade97190efee8c72918f8629b92..633bbf5815cb004505dae93d339ef37299f62ff0 100644 (file)
--- a/elftools/elf/relocation.py
+++ b/elftools/elf/relocation.py
@@ -240,7 +240,7 @@ class RelocationHandler(object):
          ENUM_RELOC_TYPE_x64['R_X86_64_64']: _RELOCATION_RECIPE_TYPE(
              bytesize=8, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
          ENUM_RELOC_TYPE_x64['R_X86_64_PC32']: _RELOCATION_RECIPE_TYPE(
-            bytesize=8, has_addend=True,
+            bytesize=4, has_addend=True,
              calc_func=_reloc_calc_sym_plus_addend_pcrel),
          ENUM_RELOC_TYPE_x64['R_X86_64_32']: _RELOCATION_RECIPE_TYPE(
              bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
diff --git a/scripts/readelf.py b/scripts/readelf.py

index 8f50e222fc35e75ba1254c040fb18f64af68e24d..b19bf18c0706306d55ffbd00679539e870355f89 100755 (executable)
--- a/scripts/readelf.py
+++ b/scripts/readelf.py
@@ -19,7 +19,7 @@ sys.path.insert(0, '.')
  from elftools import __version__
  from elftools.common.exceptions import ELFError
  from elftools.common.py3compat import (
-        ifilter, byte2int, bytes2str, itervalues, str2bytes)
+        ifilter, byte2int, bytes2str, itervalues, str2bytes, iterbytes)
  from elftools.elf.elffile import ELFFile
  from elftools.elf.dynamic import DynamicSection, DynamicSegment
  from elftools.elf.enums import ENUM_D_TAG
@@ -48,7 +48,7 @@ from elftools.dwarf.descriptions import (
      )
  from elftools.dwarf.constants import (
      DW_LNS_copy, DW_LNS_set_file, DW_LNE_define_file)
-from elftools.dwarf.callframe import CIE, FDE
+from elftools.dwarf.callframe import CIE, FDE, ZERO
  
  
  class ReadElf(object):
@@ -852,6 +852,8 @@ class ReadElf(object):
      def _dump_debug_info(self):
          """ Dump the debugging info section.
          """
+        if not self._dwarfinfo.has_debug_info:
+            return
          self._emitline('Contents of the %s section:\n' % self._dwarfinfo.debug_info_sec.name)
  
          # Offset of the .debug_info section in the stream
@@ -905,6 +907,8 @@ class ReadElf(object):
          """ Dump the (decoded) line programs from .debug_line
              The programs are dumped in the order of the CUs they belong to.
          """
+        if not self._dwarfinfo.has_debug_info:
+            return
          self._emitline('Decoded dump of debug contents of section %s:\n' % self._dwarfinfo.debug_line_sec.name)
  
          for cu in self._dwarfinfo.iter_CUs():
@@ -963,14 +967,16 @@ class ReadElf(object):
                      # Another readelf oddity...
                      self._emitline()
  
-    def _dump_debug_frames(self):
-        """ Dump the raw frame information from .debug_frame
+    def _dump_frames_info(self, section, cfi_entries):
+        """ Dump the raw call frame info in a section.
+
+        `section` is the Section instance that contains the call frame info
+        while `cfi_entries` must be an iterable that yields the sequence of
+        CIE or FDE instances.
          """
-        if not self._dwarfinfo.has_CFI():
-            return
-        self._emitline('Contents of the %s section:' % self._dwarfinfo.debug_frame_sec.name)
+        self._emitline('Contents of the %s section:' % section.name)
  
-        for entry in self._dwarfinfo.CFI_entries():
+        for entry in cfi_entries:
              if isinstance(entry, CIE):
                  self._emitline('\n%08x %s %s CIE' % (
                      entry.offset,
@@ -981,8 +987,14 @@ class ReadElf(object):
                  self._emitline('  Code alignment factor: %u' % entry['code_alignment_factor'])
                  self._emitline('  Data alignment factor: %d' % entry['data_alignment_factor'])
                  self._emitline('  Return address column: %d' % entry['return_address_register'])
+                if entry.augmentation_bytes:
+                    self._emitline('  Augmentation data:     {}'.format(' '.join(
+                        '{:02x}'.format(ord(b))
+                        for b in iterbytes(entry.augmentation_bytes)
+                    )))
                  self._emitline()
-            else: # FDE
+
+            elif isinstance(entry, FDE):
                  self._emitline('\n%08x %s %s FDE cie=%08x pc=%s..%s' % (
                      entry.offset,
                      self._format_hex(entry['length'], fullhex=True, lead0x=False),
@@ -992,10 +1004,34 @@ class ReadElf(object):
                      self._format_hex(
                          entry['initial_location'] + entry['address_range'],
                          fullhex=True, lead0x=False)))
+                if entry.augmentation_bytes:
+                    self._emitline('  Augmentation data:     {}'.format(' '.join(
+                        '{:02x}'.format(ord(b))
+                        for b in iterbytes(entry.augmentation_bytes)
+                    )))
+
+            else: # ZERO terminator
+                assert isinstance(entry, ZERO)
+                self._emitline('\n%08x ZERO terminator' % entry.offset)
+                continue
  
              self._emit(describe_CFI_instructions(entry))
          self._emitline()
  
+    def _dump_debug_frames(self):
+        """ Dump the raw frame info from .debug_frame and .eh_frame sections.
+        """
+        if self._dwarfinfo.has_EH_CFI():
+            self._dump_frames_info(
+                    self._dwarfinfo.eh_frame_sec,
+                    self._dwarfinfo.EH_CFI_entries())
+        self._emitline()
+
+        if self._dwarfinfo.has_CFI():
+            self._dump_frames_info(
+                    self._dwarfinfo.debug_frame_sec,
+                    self._dwarfinfo.CFI_entries())
+
      def _dump_debug_aranges(self):
          """ Dump the aranges table
          """
@@ -1034,15 +1070,16 @@ class ReadElf(object):
                  self._format_hex(0, fullhex=True, lead0x=False),
                  self._format_hex(0, fullhex=True, lead0x=False)))
  
-    def _dump_debug_frames_interp(self):
-        """ Dump the interpreted (decoded) frame information from .debug_frame
-        """
-        if not self._dwarfinfo.has_CFI():
-            return
+    def _dump_frames_interp_info(self, section, cfi_entries):
+        """ Dump interpreted (decoded) frame information in a section.
  
-        self._emitline('Contents of the %s section:' % self._dwarfinfo.debug_frame_sec.name)
+        `section` is the Section instance that contains the call frame info
+        while `cfi_entries` must be an iterable that yields the sequence of
+        CIE or FDE instances.
+        """
+        self._emitline('Contents of the %s section:' % section.name)
  
-        for entry in self._dwarfinfo.CFI_entries():
+        for entry in cfi_entries:
              if isinstance(entry, CIE):
                  self._emitline('\n%08x %s %s CIE "%s" cf=%d df=%d ra=%d' % (
                      entry.offset,
@@ -1053,7 +1090,8 @@ class ReadElf(object):
                      entry['data_alignment_factor'],
                      entry['return_address_register']))
                  ra_regnum = entry['return_address_register']
-            else: # FDE
+
+            elif isinstance(entry, FDE):
                  self._emitline('\n%08x %s %s FDE cie=%08x pc=%s..%s' % (
                      entry.offset,
                      self._format_hex(entry['length'], fullhex=True, lead0x=False),
@@ -1064,12 +1102,18 @@ class ReadElf(object):
                          fullhex=True, lead0x=False)))
                  ra_regnum = entry.cie['return_address_register']
  
+            else: # ZERO terminator
+                assert isinstance(entry, ZERO)
+                self._emitline('\n%08x ZERO terminator' % entry.offset)
+                continue
+
+
              # Print the heading row for the decoded table
              self._emit('   LOC')
              self._emit('  ' if entry.structs.address_size == 4 else '          ')
              self._emit(' CFA      ')
  
-            # Decode the table nad look at the registers it describes.
+            # Decode the table and look at the registers it describes.
              # We build reg_order here to match readelf's order. In particular,
              # registers are sorted by their number, and the register matching
              # ra_regnum is always listed last with a special heading.
@@ -1084,8 +1128,8 @@ class ReadElf(object):
                      self._emit('%-6s' % describe_reg_name(regnum))
                  self._emitline('ra      ')
  
-                # Now include ra_regnum in reg_order to print its values similarly
-                # to the other registers.
+                # Now include ra_regnum in reg_order to print its values
+                # similarly to the other registers.
                  reg_order.append(ra_regnum)
              else:
                  self._emitline()
@@ -1104,6 +1148,21 @@ class ReadElf(object):
                  self._emitline()
          self._emitline()
  
+    def _dump_debug_frames_interp(self):
+        """ Dump the interpreted (decoded) frame information from .debug_frame
+        and .eh_framae sections.
+        """
+        if self._dwarfinfo.has_EH_CFI():
+            self._dump_frames_interp_info(
+                    self._dwarfinfo.eh_frame_sec,
+                    self._dwarfinfo.EH_CFI_entries())
+        self._emitline()
+
+        if self._dwarfinfo.has_CFI():
+            self._dump_frames_interp_info(
+                    self._dwarfinfo.debug_frame_sec,
+                    self._dwarfinfo.CFI_entries())
+
      def _emit(self, s=''):
          """ Emit an object to output
          """
diff --git a/test/run_readelf_tests.py b/test/run_readelf_tests.py

index 7ccbb4d8cd43eb02de7feb12c3ae1ff9a45a3e58..65ef6568c444f1c7a75a2f4c8bf376b0f9b04a6f 100755 (executable)
--- a/test/run_readelf_tests.py
+++ b/test/run_readelf_tests.py
@@ -107,23 +107,10 @@ def compare_output(s1, s2):
      """
      def prepare_lines(s):
          return [line for line in s.lower().splitlines() if line.strip() != '']
-    def filter_readelf_lines(lines):
-        filter_out = False
-        for line in lines:
-            if 'of the .eh_frame section' in line:
-                filter_out = True
-            elif 'of the .debug_frame section' in line or \
-                'of the .zdebug_frame section' in line:
-                filter_out = False
-            if not filter_out:
-                if not line.startswith('unknown: length'):
-                    yield line
  
      lines1 = prepare_lines(s1)
      lines2 = prepare_lines(s2)
  
-    lines1 = list(filter_readelf_lines(lines1))
-
      flag_after_symtable = False
  
      if len(lines1) != len(lines2):
diff --git a/test/test_callframe.py b/test/test_callframe.py

index 617f2c8181aa9f0c4e1c5158f22217cda6a243a6..5e5c0d5983e51de7cc000b0e139fa3751e699133 100644 (file)
--- a/test/test_callframe.py
+++ b/test/test_callframe.py
@@ -63,7 +63,7 @@ class TestCallFrame(unittest.TestCase):
          s.write(data)
  
          structs = DWARFStructs(little_endian=True, dwarf_format=32, address_size=4)
-        cfi = CallFrameInfo(s, len(data), structs)
+        cfi = CallFrameInfo(s, len(data), 0, structs)
          entries = cfi.get_entries()
  
          self.assertEqual(len(entries), 2)
@@ -137,7 +137,7 @@ class TestCallFrame(unittest.TestCase):
          s = BytesIO(data)
  
          structs = DWARFStructs(little_endian=True, dwarf_format=32, address_size=4)
-        cfi = CallFrameInfo(s, len(data), structs)
+        cfi = CallFrameInfo(s, len(data), 0, structs)
          entries = cfi.get_entries()
  
          set_global_machine_arch('x86')
author	Pierre-Marie de Rodat <pmderodat@kawie.fr>
	Tue, 29 Aug 2017 02:05:58 +0000 (22:05 -0400)
committer	Eli Bendersky <eliben@users.noreply.github.com>
	Tue, 29 Aug 2017 02:05:58 +0000 (19:05 -0700)
elftools/common/py3compat.py		patch \| blob \| history
elftools/dwarf/callframe.py		patch \| blob \| history
elftools/dwarf/descriptions.py		patch \| blob \| history
elftools/dwarf/dwarfinfo.py		patch \| blob \| history
elftools/dwarf/enums.py		patch \| blob \| history
elftools/dwarf/structs.py		patch \| blob \| history
elftools/elf/elffile.py		patch \| blob \| history
elftools/elf/relocation.py		patch \| blob \| history
scripts/readelf.py		patch \| blob \| history
test/run_readelf_tests.py		patch \| blob \| history
test/test_callframe.py		patch \| blob \| history