Added Dwarfv5 CU headers (#442)
authorWill Denissen <85223780+WillDenissen@users.noreply.github.com>
Thu, 22 Sep 2022 20:11:30 +0000 (22:11 +0200)
committerGitHub <noreply@github.com>
Thu, 22 Sep 2022 20:11:30 +0000 (13:11 -0700)
Extended readelf.py
Added test file compiled with
-gdebug_types-section to readelf tests

elftools/dwarf/descriptions.py
elftools/dwarf/dwarfinfo.py
elftools/dwarf/structs.py
scripts/dwarfdump.py
scripts/readelf.py
test/testfiles_for_readelf/cuv5_x86-64_gcc.so.elf [new file with mode: 0755]

index 90cbaa189e5cb5f8ccee6506314eeeeac255b987..7db7f3f1dc95cea0d0b001f674bf972a14545100 100644 (file)
@@ -187,6 +187,9 @@ _MACHINE_ARCH = None
 def _describe_attr_ref(attr, die, section_offset):
     return '<0x%x>' % (attr.value + die.cu.cu_offset)
 
+def _describe_attr_ref_sig8(attr, die, section_offset):
+    return 'signature: 0x%x' % (attr.value)
+
 def _describe_attr_value_passthrough(attr, die, section_offset):
     return attr.value
 
@@ -257,7 +260,7 @@ _ATTR_DESCRIPTION_MAP = defaultdict(
     DW_FORM_block=_describe_attr_block,
     DW_FORM_flag_present=_describe_attr_present,
     DW_FORM_exprloc=_describe_attr_block,
-    DW_FORM_ref_sig8=_describe_attr_ref,
+    DW_FORM_ref_sig8=_describe_attr_ref_sig8,
 )
 
 _FORM_CLASS = dict(
index 4edc7cd0f4d7ea860905c4d0ca90204dc95209c8..96f33d9e8ca441fcea4725ad0b5993fdac9e5463 100644 (file)
@@ -60,7 +60,7 @@ DwarfConfig = namedtuple('DwarfConfig',
 
 class DWARFInfo(object):
     """ Acts also as a "context" to other major objects, bridging between
-        various parts of the debug infromation.
+        various parts of the debug information.
     """
     def __init__(self,
             config,
@@ -126,7 +126,9 @@ class DWARFInfo(object):
 
         # Cache for abbrev tables: a dict keyed by offset
         self._abbrevtable_cache = {}
-
+        # Cache for program lines tables: a dict keyed by offset
+        self._linetable_cache = {}
         # Cache of compile units and map of their offsets for bisect lookup.
         # Access with .iter_CUs(), .get_CU_containing(), and/or .get_CU_at().
         self._cu_cache = []
@@ -494,15 +496,19 @@ class DWARFInfo(object):
         """
         return 2 <= version <= 5
 
-    def _parse_line_program_at_offset(self, debug_line_offset, structs):
+    def _parse_line_program_at_offset(self, offset, structs):
         """ Given an offset to the .debug_line section, parse the line program
             starting at this offset in the section and return it.
             structs is the DWARFStructs object used to do this parsing.
         """
+
+        if offset in self._linetable_cache:
+            return self._linetable_cache[offset]
+
         lineprog_header = struct_parse(
             structs.Dwarf_lineprog_header,
             self.debug_line_sec.stream,
-            debug_line_offset)
+            offset)
 
         # DWARF5: resolve names
         def resolve_strings(self, lineprog_header, format_field, data_field):
@@ -541,16 +547,19 @@ class DWARFInfo(object):
                 for e in lineprog_header.file_names)
 
         # Calculate the offset to the next line program (see DWARF 6.2.4)
-        end_offset = (  debug_line_offset + lineprog_header['unit_length'] +
+        end_offset = (  offset + lineprog_header['unit_length'] +
                         structs.initial_length_field_size())
 
-        return LineProgram(
+        lineprogram = LineProgram(
             header=lineprog_header,
             stream=self.debug_line_sec.stream,
             structs=structs,
             program_start_offset=self.debug_line_sec.stream.tell(),
             program_end_offset=end_offset)
 
+        self._linetable_cache[offset] = lineprogram
+        return lineprogram
+
     def parse_debugsupinfo(self):
         """
         Extract a filename from either .debug_sup or .gnu_debualtlink sections.
index fe68d439236befc5a2f707e41ed1c0484f4df62d..8fa6b58aeb47a3f89e9d1b82028befb09653bf71 100644 (file)
@@ -180,18 +180,46 @@ class DWARFStructs(object):
         self.Dwarf_sleb128 = SLEB128
 
     def _create_cu_header(self):
+        dwarfv4_CU_header = Struct('',
+            self.Dwarf_offset('debug_abbrev_offset'),
+            self.Dwarf_uint8('address_size')
+        )
+        # DWARFv5 reverses the order of address_size and debug_abbrev_offset.
+        # DWARFv5 7.5.1.1
+        dwarfv5_CP_CU_header = Struct('',                  
+            self.Dwarf_uint8('address_size'),
+            self.Dwarf_offset('debug_abbrev_offset')
+        )
+        # DWARFv5 7.5.1.2
+        dwarfv5_SS_CU_header = Struct('',
+            self.Dwarf_uint8('address_size'),
+            self.Dwarf_offset('debug_abbrev_offset'),
+            self.Dwarf_uint64('dwo_id')
+        )
+        # DWARFv5 7.5.1.3
+        dwarfv5_TS_CU_header = Struct('',
+            self.Dwarf_uint8('address_size'),
+            self.Dwarf_offset('debug_abbrev_offset'),
+            self.Dwarf_uint64('type_signature'),
+            self.Dwarf_offset('type_offset')
+        )
+        dwarfv5_CU_header = Struct('',
+            Enum(self.Dwarf_uint8('unit_type'), **ENUM_DW_UT),
+            Embed(Switch('', lambda ctx: ctx.unit_type,
+            {
+                'DW_UT_compile'       : dwarfv5_CP_CU_header,
+                'DW_UT_partial'       : dwarfv5_CP_CU_header,
+                'DW_UT_skeleton'      : dwarfv5_SS_CU_header,
+                'DW_UT_split_compile' : dwarfv5_SS_CU_header,
+                'DW_UT_type'          : dwarfv5_TS_CU_header,
+                'DW_UT_split_type'    : dwarfv5_TS_CU_header,
+            })))
         self.Dwarf_CU_header = Struct('Dwarf_CU_header',
             self.Dwarf_initial_length('unit_length'),
             self.Dwarf_uint16('version'),
-            # DWARFv5 reverses the order of address_size and debug_abbrev_offset.
             IfThenElse('', lambda ctx: ctx['version'] >= 5,
-                Embed(Struct('',
-                    self.Dwarf_uint8('unit_type'),
-                    self.Dwarf_uint8('address_size'),
-                    self.Dwarf_offset('debug_abbrev_offset'))),
-                Embed(Struct('',
-                    self.Dwarf_offset('debug_abbrev_offset'),
-                    self.Dwarf_uint8('address_size'))),
+                Embed(dwarfv5_CU_header),
+                Embed(dwarfv4_CU_header),
             ))
 
     def _create_abbrev_declaration(self):
index e593f1c63f82387b9b5ca1aea9c4bf41542027a0..162af88c76c256bf7e377b6da1d9c3e79b3895b6 100644 (file)
@@ -362,8 +362,7 @@ class ReadElf(object):
         self._emitline(".debug_info contents:")
         for cu in self._dwarfinfo.iter_CUs():
             if cu.header.version >= 5:
-                ut = next(k for (k,v) in ENUM_DW_UT.items() if v == cu.header.unit_type)
-                unit_type_str = " unit_type = %s," % ut
+                unit_type_str = " unit_type = %s," % cu.header.unit_type
             else:
                 unit_type_str = ''
 
index 8a4428254f98e7bdf5bf3004a0454f55eeafa2ad..bca7a5b85d9a84386c3651b7c32cc0850f786b7f 100755 (executable)
@@ -1092,12 +1092,23 @@ class ReadElf(object):
                 self._format_hex(cu['unit_length']),
                 '%s-bit' % cu.dwarf_format()))
             self._emitline('   Version:       %s' % cu['version'])
-            if cu.header.get("unit_type", False):
-                ut = next((key for key, value in ENUM_DW_UT.items() if value == cu.header.unit_type), '?')
-                self._emitline('   Unit Type:     %s (%d)' % (ut, cu.header.unit_type))
-            self._emitline('   Abbrev Offset: %s' % (
-                self._format_hex(cu['debug_abbrev_offset']))),
-            self._emitline('   Pointer Size:  %s' % cu['address_size'])
+            if cu['version'] >= 5:
+                if cu.header.get("unit_type", ''):
+                    unit_type = cu.header.unit_type
+                    self._emitline('   Unit Type:     %s (%d)' % (
+                        unit_type, ENUM_DW_UT.get(cu.header.unit_type, 0)))
+                    self._emitline('   Abbrev Offset: %s' % (
+                        self._format_hex(cu['debug_abbrev_offset'])))
+                    self._emitline('   Pointer Size:  %s' % cu['address_size'])
+                    if unit_type in ('DW_UT_skeleton', 'DW_UT_split_compile'):
+                        self._emitline('   Dwo id:        %s' % cu['dwo_id'])
+                    elif unit_type in ('DW_UT_type', 'DW_UT_split_type'):
+                        self._emitline('   Signature:     0x%x' % cu['type_signature'])
+                        self._emitline('   Type Offset:   0x%x' % cu['type_offset'])
+            else:
+                self._emitline('   Abbrev Offset: %s' % (
+                    self._format_hex(cu['debug_abbrev_offset']))),
+                self._emitline('   Pointer Size:  %s' % cu['address_size'])
 
             # The nesting depth of each DIE within the tree of DIEs must be
             # displayed. To implement this, a counter is incremented each time
@@ -1151,9 +1162,16 @@ class ReadElf(object):
             return
         self._emitline('Contents of the %s section:' % self._dwarfinfo.debug_line_sec.name)
         self._emitline()
+        lineprogram_list = []
 
         for cu in self._dwarfinfo.iter_CUs():
+            # Avoid dumping same lineprogram multiple times
             lineprogram = self._dwarfinfo.line_program_for_CU(cu)
+
+            if lineprogram in lineprogram_list:
+                continue 
+
+            lineprogram_list.append(lineprogram)
             ver5 = lineprogram.header.version >= 5
 
             cu_filename = bytes2str(lineprogram['file_entry'][0].name)
diff --git a/test/testfiles_for_readelf/cuv5_x86-64_gcc.so.elf b/test/testfiles_for_readelf/cuv5_x86-64_gcc.so.elf
new file mode 100755 (executable)
index 0000000..f86fead
Binary files /dev/null and b/test/testfiles_for_readelf/cuv5_x86-64_gcc.so.elf differ