More DWARFv4 parsing goodness. Also added some hookups for parsing .eh_frame
authorEli Bendersky <eliben@gmail.com>
Sat, 25 May 2013 22:47:55 +0000 (15:47 -0700)
committerEli Bendersky <eliben@gmail.com>
Sat, 25 May 2013 22:47:55 +0000 (15:47 -0700)
but this isn't enabled yet, because pyelftools can't yet successfully parse
it (have to look at it deeper).

I tweaked the build of the gcc48-simple sample to generate .dwarf_frame
as well.

elftools/dwarf/callframe.py
elftools/dwarf/dwarfinfo.py
elftools/dwarf/structs.py
elftools/elf/elffile.py
elftools/elf/relocation.py
test/testfiles_for_readelf/gcc48-simple.o
test/testfiles_for_readelf/gcc48-simple.src.c [new file with mode: 0644]

index 5b35af63e873925ac46fe6e5ba4f016471f53441..264adb8dc31b317b6f76bdc87c27529e97db3bf3 100644 (file)
@@ -26,13 +26,13 @@ class CallFrameInfo(object):
             Eventually, each entry gets its own structs based on the initial
             length field it starts with. The address_size, however, is taken
             from base_structs. This appears to be a limitation of the DWARFv3
-            standard, fixed in v4 (where an address_size field exists for each
-            CFI. A discussion I had on dwarf-discuss confirms this.
-            Currently for base_structs I simply use the elfclass of the
-            containing file, but more sophisticated methods are used by
-            libdwarf and others, such as guessing which CU contains which FDEs
-            (based on their address ranges) and taking the address_size from
-            those CUs.
+            standard, fixed in v4.
+            A discussion I had on dwarf-discuss confirms this.
+            So for DWARFv4 we'll take the address size from the CIE header,
+            but for earlier versions will use the elfclass of the containing
+            file; more sophisticated methods are used by libdwarf and others,
+            such as guessing which CU contains which FDEs (based on their
+            address ranges) and taking the address_size from those CUs.
     """
     def __init__(self, stream, size, base_structs):
         self.stream = stream
@@ -99,6 +99,14 @@ class CallFrameInfo(object):
         header = struct_parse(
             header_struct, self.stream, offset)
 
+        # If this is DWARF version 4 or later, we can have a more precise
+        # address size, read from the CIE header.
+        if entry_structs.dwarf_version >= 4:
+            entry_structs = DWARFStructs(
+                little_endian=entry_structs.little_endian,
+                dwarf_format=entry_structs.dwarf_format,
+                address_size=header.address_size)
+
         # For convenience, compute the end offset for this entry
         end_offset = (
             offset + header.length +
index 2c6cc8978231b4a29a656979c74ec30168b4d3e4..e5c0e71263761bbd9512d5799b5e620961726901 100644 (file)
@@ -59,6 +59,7 @@ class DWARFInfo(object):
             debug_info_sec,
             debug_abbrev_sec,
             debug_frame_sec,
+            eh_frame_sec,
             debug_str_sec,
             debug_loc_sec,
             debug_ranges_sec,
@@ -75,6 +76,7 @@ class DWARFInfo(object):
         self.debug_info_sec = debug_info_sec
         self.debug_abbrev_sec = debug_abbrev_sec
         self.debug_frame_sec = debug_frame_sec
+        self.eh_frame_sec = eh_frame_sec
         self.debug_str_sec = debug_str_sec
         self.debug_loc_sec = debug_loc_sec
         self.debug_ranges_sec = debug_ranges_sec
@@ -139,12 +141,12 @@ class DWARFInfo(object):
             return None
 
     def has_CFI(self):
-        """ Does this dwarf info has a CFI section?
+        """ Does this dwarf info have a dwarf_frame CFI section?
         """
         return self.debug_frame_sec is not None
 
     def CFI_entries(self):
-        """ Get a list of CFI entries from the .debug_frame section.
+        """ Get a list of dwarf_frame CFI entries from the .debug_frame section.
         """
         cfi = CallFrameInfo(
             stream=self.debug_frame_sec.stream,
@@ -152,6 +154,20 @@ class DWARFInfo(object):
             base_structs=self.structs)
         return cfi.get_entries()
 
+    def has_EH_CFI(self):
+        """ Does this dwarf info have a eh_frame CFI section?
+        """
+        return self.eh_frame_sec is not None
+
+    def EH_CFI_entries(self):
+        """ Get a list of eh_frame CFI entries from the .eh_frame section.
+        """
+        cfi = CallFrameInfo(
+            stream=self.eh_frame_sec.stream,
+            size=self.eh_frame_sec.size,
+            base_structs=self.structs)
+        return cfi.get_entries()
+
     def location_lists(self):
         """ Get a LocationLists object representing the .debug_loc section of
             the DWARF data, or None if this section doesn't exist.
@@ -212,7 +228,7 @@ class DWARFInfo(object):
             cu_structs = DWARFStructs(
                 little_endian=self.config.little_endian,
                 dwarf_format=dwarf_format,
-                 address_size=8)
+                address_size=8)
 
         cu_die_offset = self.debug_info_sec.stream.tell()
         dwarf_assert(
index fb4074a096cf6c911623cc963039211a24468142..2cc281fc002ef8b57f68cde407413fd3f191b560 100644 (file)
@@ -71,8 +71,12 @@ class DWARFStructs(object):
 
         See also the documentation of public methods.
     """
-    def __init__(self, little_endian, dwarf_format, address_size):
-        """ little_endian:
+    def __init__(self,
+                 little_endian, dwarf_format, address_size, dwarf_version=2):
+        """ dwarf_version:
+                Numeric DWARF version
+
+            little_endian:
                 True if the file is little endian, False if big
 
             dwarf_format:
@@ -87,6 +91,7 @@ class DWARFStructs(object):
         self.little_endian = little_endian
         self.dwarf_format = dwarf_format
         self.address_size = address_size
+        self.dwarf_version = dwarf_version
         self._create_structs()
 
     def initial_length_field_size(self):
@@ -232,14 +237,27 @@ class DWARFStructs(object):
             )
 
     def _create_callframe_entry_headers(self):
-        self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
-            self.Dwarf_initial_length('length'),
-            self.Dwarf_offset('CIE_id'),
-            self.Dwarf_uint8('version'),
-            CString('augmentation'),
-            self.Dwarf_uleb128('code_alignment_factor'),
-            self.Dwarf_sleb128('data_alignment_factor'),
-            self.Dwarf_uleb128('return_address_register'))
+        # The CIE header was modified in DWARFv4.
+        if self.dwarf_version == 4:
+            self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
+                self.Dwarf_initial_length('length'),
+                self.Dwarf_offset('CIE_id'),
+                self.Dwarf_uint8('version'),
+                CString('augmentation'),
+                self.Dwarf_uint8('address_size'),
+                self.Dwarf_uint8('segment_size'),
+                self.Dwarf_uleb128('code_alignment_factor'),
+                self.Dwarf_sleb128('data_alignment_factor'),
+                self.Dwarf_uleb128('return_address_register'))
+        else:
+            self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
+                self.Dwarf_initial_length('length'),
+                self.Dwarf_offset('CIE_id'),
+                self.Dwarf_uint8('version'),
+                CString('augmentation'),
+                self.Dwarf_uleb128('code_alignment_factor'),
+                self.Dwarf_sleb128('data_alignment_factor'),
+                self.Dwarf_uleb128('return_address_register'))
 
         self.Dwarf_FDE_header = Struct('Dwarf_FDE_header',
             self.Dwarf_initial_length('length'),
index a8e605ffad8e8bde0b89fc73d8a6976cc124cdca..ccb7e562a220009357cbb0c58d849726739a3fbc 100644 (file)
@@ -126,15 +126,15 @@ class ELFFile(object):
         #
         debug_sections = {}
         for secname in (b'.debug_info', b'.debug_abbrev', b'.debug_str',
-                        b'.debug_line', b'.debug_frame', b'.debug_loc',
-                        b'.debug_ranges'):
+                        b'.debug_line', b'.debug_frame',
+                        b'.debug_loc', b'.debug_ranges'):
             section = self.get_section_by_name(secname)
             if section is None:
                 debug_sections[secname] = None
             else:
                 debug_sections[secname] = self._read_dwarf_section(
-                        section,
-                        relocate_dwarf_sections)
+                    section,
+                    relocate_dwarf_sections)
 
         return DWARFInfo(
                 config=DwarfConfig(
@@ -144,6 +144,8 @@ class ELFFile(object):
                 debug_info_sec=debug_sections[b'.debug_info'],
                 debug_abbrev_sec=debug_sections[b'.debug_abbrev'],
                 debug_frame_sec=debug_sections[b'.debug_frame'],
+                # TODO(eliben): reading of eh_frame is not hooked up yet
+                eh_frame_sec=None,
                 debug_str_sec=debug_sections[b'.debug_str'],
                 debug_loc_sec=debug_sections[b'.debug_loc'],
                 debug_ranges_sec=debug_sections[b'.debug_ranges'],
index 7c2b74c56f124d78245eb53d2cd649b19203bd8e..4ae73d72ff6045d1ea3f5e044e1024e52509fe46 100644 (file)
@@ -202,6 +202,9 @@ class RelocationHandler(object):
     def _reloc_calc_sym_plus_addend(value, sym_value, offset, addend=0):
         return sym_value + addend
 
+    def _reloc_calc_sym_plus_addend_pcrel(value, sym_value, offset, addend=0):
+        return sym_value + addend - offset
+
     _RELOCATION_RECIPES_X86 = {
         ENUM_RELOC_TYPE_i386['R_386_NONE']: _RELOCATION_RECIPE_TYPE(
             bytesize=4, has_addend=False, calc_func=_reloc_calc_identity),
@@ -218,6 +221,9 @@ class RelocationHandler(object):
             bytesize=8, has_addend=True, calc_func=_reloc_calc_identity),
         ENUM_RELOC_TYPE_x64['R_X86_64_64']: _RELOCATION_RECIPE_TYPE(
             bytesize=8, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
+        ENUM_RELOC_TYPE_x64['R_X86_64_PC32']: _RELOCATION_RECIPE_TYPE(
+            bytesize=8, has_addend=True,
+            calc_func=_reloc_calc_sym_plus_addend_pcrel),
         ENUM_RELOC_TYPE_x64['R_X86_64_32']: _RELOCATION_RECIPE_TYPE(
             bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
         ENUM_RELOC_TYPE_x64['R_X86_64_32S']: _RELOCATION_RECIPE_TYPE(
index 5eb04fa7328742143c35beaaceab2b68ac4a12ad..851a21ace8e5e24f76efa499caac01316a6f5b66 100644 (file)
Binary files a/test/testfiles_for_readelf/gcc48-simple.o and b/test/testfiles_for_readelf/gcc48-simple.o differ
diff --git a/test/testfiles_for_readelf/gcc48-simple.src.c b/test/testfiles_for_readelf/gcc48-simple.src.c
new file mode 100644 (file)
index 0000000..23849b3
--- /dev/null
@@ -0,0 +1,19 @@
+/* Generated by compiling with gcc 4.8 as follows:
+**
+** gcc-4.8 -O0 -g -fno-dwarf2-cfi-asm -c dwarf4_simple.c -o gcc48-simple.
+**
+** Note: -fno-dwarf2-cfi-asm to tell gcc to generate .dwarf_frames as well
+** as the .eh_frames it generates by default.
+**
+*/
+
+extern int bar(int);
+extern int baz(int);
+
+int foo(int v) {
+    int x = bar(v);
+    int i;
+    for (i = 0; i < v; ++i)
+        x += bar(i) + bar(v) * baz(i);
+    return x;
+}