dwarf: initial DWARFv5 support (#363)
authorWilliam Woodruff <william@trailofbits.com>
Thu, 27 May 2021 13:38:35 +0000 (09:38 -0400)
committerGitHub <noreply@github.com>
Thu, 27 May 2021 13:38:35 +0000 (06:38 -0700)
* dwarf: initial DWARFv5 support

* dwarf/structs: use Embed to select header layout

* dwarf/structs: DW_FORM_strx family

Not sure how best to handle 24-bit values yet.

* dwarf/structs: use IfThenElse

`If` alone wraps the else in a `Value`.

* dwarf/structs: DW_FORM_addrx family handling

* dwarf_expr: support DW_OP_addrx

Not complete, but gets readelf.py to the end of a single
binary.

* dwarf/constants: DW_UT_* constants

* dwarf/structs: fix some DW_FORMs

* elftools, test: plumbing for DWARFv5 sections

* dwarf/constants: fix typo

* dwarf/structs: re-add a comment that got squashed

* dwarf/structs: DWARFv5 table header scaffolding

* dwarf/constants: typo

* test: add a basic DWARFv5 test

elftools/dwarf/constants.py
elftools/dwarf/dwarf_expr.py
elftools/dwarf/dwarfinfo.py
elftools/dwarf/structs.py
elftools/elf/elffile.py
test/test_dwarf_v5.py [new file with mode: 0644]
test/test_refaddr_bitness.py
test/testfiles_for_unittests/dwarfv5_basic.elf [new file with mode: 0755]

index d1a86fc14027d49b2cfd347c20648a90a3843b35..6542095801005ae66a7bf22978b2e6b0f1f1dfb3 100644 (file)
@@ -207,3 +207,18 @@ DW_CFA_val_offset = 0x14
 DW_CFA_val_offset_sf = 0x15
 DW_CFA_val_expression = 0x16
 DW_CFA_GNU_args_size = 0x2e
+
+
+# Compilation unit types
+#
+# DWARFv5 introduces the "unit_type" field to each CU header, allowing
+# individual CUs to indicate whether they're complete, partial, and so forth.
+# See DWARFv5 3.1 ("Unit Entries") and 7.5.1 ("Unit Headers").
+DW_UT_compile = 0x01
+DW_UT_type = 0x02
+DW_UT_partial = 0x03
+DW_UT_skeleton = 0x04
+DW_UT_split_compile = 0x05
+DW_UT_split_type = 0x06
+DW_UT_lo_user = 0x80
+DW_UT_hi_user = 0xff
index bb85daa00a487c48bdb50bd2926048f81858177c..07c6fa10f11e9ec1dbf3015eb0bedf80ef2af029 100644 (file)
@@ -197,6 +197,7 @@ def _init_dispatch_table(structs):
         return lambda stream: [struct_parse(structs.Dwarf_uleb128(''), stream), read_blob(stream, struct_parse(structs.Dwarf_uint8(''), stream))]
 
     add('DW_OP_addr', parse_op_addr())
+    add('DW_OP_addrx', parse_arg_struct(structs.Dwarf_uleb128('')))
     add('DW_OP_const1u', parse_arg_struct(structs.Dwarf_uint8('')))
     add('DW_OP_const1s', parse_arg_struct(structs.Dwarf_int8('')))
     add('DW_OP_const2u', parse_arg_struct(structs.Dwarf_uint16('')))
index 103fc491dab5af22cbeebcde9d9acc1bca89b589..48c4bb504a18ca54e408c34c44a287817048df15 100644 (file)
@@ -71,7 +71,9 @@ class DWARFInfo(object):
             debug_ranges_sec,
             debug_line_sec,
             debug_pubtypes_sec,
-            debug_pubnames_sec):
+            debug_pubnames_sec,
+            debug_addr_sec,
+            debug_str_offsets_sec):
         """ config:
                 A DwarfConfig object
 
@@ -434,7 +436,7 @@ class DWARFInfo(object):
     def _is_supported_version(self, version):
         """ DWARF version supported by this parser
         """
-        return 2 <= version <= 4
+        return 2 <= version <= 5
 
     def _parse_line_program_at_offset(self, debug_line_offset, structs):
         """ Given an offset to the .debug_line section, parse the line program
index 95130531ac831fb5eec912b4e1e6ae1382fa7085..16f29f6341b08a3598b1fe079a839a9c838e893c 100644 (file)
@@ -11,7 +11,7 @@ from ..construct import (
     UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64,
     SBInt8, SBInt16, SBInt32, SBInt64, SLInt8, SLInt16, SLInt32, SLInt64,
     Adapter, Struct, ConstructError, If, Enum, Array, PrefixedArray,
-    CString, Embed, StaticField
+    CString, Embed, StaticField, IfThenElse
     )
 from ..common.construct_utils import RepeatUntilExcluding, ULEB128, SLEB128
 from .enums import *
@@ -89,7 +89,7 @@ class DWARFStructs(object):
                 section 7.5.1)
         """
         assert dwarf_format == 32 or dwarf_format == 64
-        assert address_size == 8 or address_size == 4
+        assert address_size == 8 or address_size == 4, str(address_size)
         self.little_endian = little_endian
         self.dwarf_format = dwarf_format
         self.address_size = address_size
@@ -138,6 +138,8 @@ class DWARFStructs(object):
         self._create_callframe_entry_headers()
         self._create_aranges_header()
         self._create_nameLUT_header()
+        self._create_string_offsets_table_header()
+        self._create_address_table_header()
 
     def _create_initial_length(self):
         def _InitialLength(name):
@@ -160,8 +162,16 @@ class DWARFStructs(object):
         self.Dwarf_CU_header = Struct('Dwarf_CU_header',
             self.Dwarf_initial_length('unit_length'),
             self.Dwarf_uint16('version'),
-            self.Dwarf_offset('debug_abbrev_offset'),
-            self.Dwarf_uint8('address_size'))
+            # DWARFv5 reverses the order of address_size and debug_abbrev_offset.
+            IfThenElse('', lambda ctx: ctx['version'] >= 5,
+                Embed(Struct('',
+                    self.Dwarf_uint8('unit_type'),
+                    self.Dwarf_uint8('address_size'),
+                    self.Dwarf_offset('debug_abbrev_offset'))),
+                Embed(Struct('',
+                    self.Dwarf_offset('debug_abbrev_offset'),
+                    self.Dwarf_uint8('address_size'))),
+            ))
 
     def _create_abbrev_declaration(self):
         self.Dwarf_abbrev_declaration = Struct('Dwarf_abbrev_entry',
@@ -177,6 +187,11 @@ class DWARFStructs(object):
     def _create_dw_form(self):
         self.Dwarf_dw_form = dict(
             DW_FORM_addr=self.Dwarf_target_addr(''),
+            DW_FORM_addrx=self.Dwarf_uleb128(''),
+            DW_FORM_addrx1=self.Dwarf_uint8(''),
+            DW_FORM_addrx2=self.Dwarf_uint16(''),
+            # DW_FORM_addrx3=self.Dwarf_uint24(''),  # TODO
+            DW_FORM_addrx4=self.Dwarf_uint32(''),
 
             DW_FORM_block1=self._make_block_struct(self.Dwarf_uint8),
             DW_FORM_block2=self._make_block_struct(self.Dwarf_uint16),
@@ -193,6 +208,10 @@ class DWARFStructs(object):
 
             DW_FORM_string=CString(''),
             DW_FORM_strp=self.Dwarf_offset(''),
+            DW_FORM_strx1=self.Dwarf_uint8(''),
+            DW_FORM_strx2=self.Dwarf_uint16(''),
+            # DW_FORM_strx3=self.Dwarf_uint24(''),  # TODO
+            DW_FORM_strx4=self.Dwarf_uint64(''),
             DW_FORM_flag=self.Dwarf_uint8(''),
 
             DW_FORM_ref=self.Dwarf_uint32(''),
@@ -233,6 +252,22 @@ class DWARFStructs(object):
             self.Dwarf_length('debug_info_length')
             )
 
+    def _create_string_offsets_table_header(self):
+        self.Dwarf_string_offsets_table_header = Struct(
+            "Dwarf_string_offets_table_header",
+            self.Dwarf_initial_length('unit_length'),
+            self.Dwarf_uint16('version'),
+            self.Dwarf_uint16('padding'),
+            )
+
+    def _create_address_table_header(self):
+        self.Dwarf_address_table_header = Struct("Dwarf_address_table_header",
+            self.Dwarf_initial_length('unit_length'),
+            self.Dwarf_uint16('version'),
+            self.Dwarf_uint8('address_size'),
+            self.Dwarf_uint8('segment_selector_size'),
+            )
+
     def _create_lineprog_header(self):
         # A file entry is terminated by a NULL byte, so we don't want to parse
         # past it. Therefore an If is used.
index 779ae3b1475bddf9204c3b36d0a7ab8f035c6cd7..acddc8952bfb212ad7d25e3c67eabdff20ae75c5 100644 (file)
@@ -212,7 +212,7 @@ class ELFFile(object):
         section_names = ('.debug_info', '.debug_aranges', '.debug_abbrev',
                          '.debug_str', '.debug_line', '.debug_frame',
                          '.debug_loc', '.debug_ranges', '.debug_pubtypes',
-                         '.debug_pubnames')
+                         '.debug_pubnames', '.debug_addr', '.debug_str_offsets')
 
         compressed = bool(self.get_section_by_name('.zdebug_info'))
         if compressed:
@@ -224,7 +224,8 @@ class ELFFile(object):
         (debug_info_sec_name, debug_aranges_sec_name, debug_abbrev_sec_name,
          debug_str_sec_name, debug_line_sec_name, debug_frame_sec_name,
          debug_loc_sec_name, debug_ranges_sec_name, debug_pubtypes_name,
-         debug_pubnames_name, eh_frame_sec_name) = section_names
+         debug_pubnames_name, debug_addr_name, debug_str_offsets_name,
+         eh_frame_sec_name) = section_names
 
         debug_sections = {}
         for secname in section_names:
@@ -253,8 +254,10 @@ class ELFFile(object):
                 debug_loc_sec=debug_sections[debug_loc_sec_name],
                 debug_ranges_sec=debug_sections[debug_ranges_sec_name],
                 debug_line_sec=debug_sections[debug_line_sec_name],
-                debug_pubtypes_sec = debug_sections[debug_pubtypes_name],
-                debug_pubnames_sec = debug_sections[debug_pubnames_name]
+                debug_pubtypes_sec=debug_sections[debug_pubtypes_name],
+                debug_pubnames_sec=debug_sections[debug_pubnames_name],
+                debug_addr_sec=debug_sections[debug_addr_name],
+                debug_str_offsets_sec=debug_sections[debug_str_offsets_name],
                 )
 
     def has_ehabi_info(self):
diff --git a/test/test_dwarf_v5.py b/test/test_dwarf_v5.py
new file mode 100644 (file)
index 0000000..0468d07
--- /dev/null
@@ -0,0 +1,21 @@
+import os
+import unittest
+
+from elftools.elf.elffile import ELFFile
+
+
+class TestDWARFv5(unittest.TestCase):
+    def test_dwarfv5_parses(self):
+        dwarfv5_basic = os.path.join('test', 'testfiles_for_unittests', 'dwarfv5_basic.elf')
+        with open(dwarfv5_basic, 'rb') as f:
+            elf = ELFFile(f)
+            # DWARFv5 debugging information is detected.
+            self.assertTrue(elf.has_dwarf_info())
+
+            # Fetching DWARFInfo for DWARFv5 doesn't completely explode.
+            dwarf = elf.get_dwarf_info()
+            self.assertIsNotNone(dwarf)
+
+
+if __name__ == '__main__':
+    unittest.main()
index 85fc56d77208f1e1d9aa712aaadc57a0f7283ed9..b9e88747fce954e8534a5c301ff626fcf22a427c 100644 (file)
@@ -43,7 +43,9 @@ class TestRefAddrOnDWARFv2With64BitTarget(unittest.TestCase):
             debug_ranges_sec = None,
             debug_line_sec = None,
             debug_pubtypes_sec = None,
-            debug_pubnames_sec = None
+            debug_pubnames_sec = None,
+            debug_addr_sec=None,
+            debug_str_offsets_sec=None,
         )
 
         CUs = [cu for cu in di.iter_CUs()]
diff --git a/test/testfiles_for_unittests/dwarfv5_basic.elf b/test/testfiles_for_unittests/dwarfv5_basic.elf
new file mode 100755 (executable)
index 0000000..4a9363c
Binary files /dev/null and b/test/testfiles_for_unittests/dwarfv5_basic.elf differ