Version 5 lineprogram header (#411)

author Seva Alekseyev <sevaa@yarxi.ru>

Mon, 13 Jun 2022 12:44:44 +0000 (08:44 -0400)

committer GitHub <noreply@github.com>

Mon, 13 Jun 2022 12:44:44 +0000 (05:44 -0700)
author Seva Alekseyev <sevaa@yarxi.ru>
Mon, 13 Jun 2022 12:44:44 +0000 (08:44 -0400)
committer GitHub <noreply@github.com>
Mon, 13 Jun 2022 12:44:44 +0000 (05:44 -0700)
diff --git a/elftools/common/py3compat.py b/elftools/common/py3compat.py

index 259926440a4340b81812f21f845d830567685f7f..c701cc0d4a19c3a6663a8b6a53dafda228a17e9a 100644 (file)
--- a/elftools/common/py3compat.py
+++ b/elftools/common/py3compat.py
@@ -12,6 +12,7 @@ PY3 = sys.version_info[0] == 3
  
  if PY3:
      import io
  
  if PY3:
      import io
+    from pathlib import Path
      StringIO = io.StringIO
      BytesIO = io.BytesIO
  
      StringIO = io.StringIO
      BytesIO = io.BytesIO
  
@@ -42,8 +43,15 @@ if PY3:
      ifilter = filter
  
      maxint = sys.maxsize
      ifilter = filter
  
      maxint = sys.maxsize
+
+    def path_to_posix(s):
+        return Path(s).as_posix()
+
  else:
      import cStringIO
  else:
      import cStringIO
+    import os
+    import posixpath
+
      StringIO = BytesIO = cStringIO.StringIO
  
      def bytes2hex(b, sep=''):
      StringIO = BytesIO = cStringIO.StringIO
  
      def bytes2hex(b, sep=''):
@@ -63,6 +71,9 @@ else:
  
      maxint = sys.maxint
  
  
      maxint = sys.maxint
  
+    def path_to_posix(s):
+        return posixpath.join(*os.path.split(s))
+
  
  def iterkeys(d):
      """Return an iterator over the keys of a dictionary."""
  
  def iterkeys(d):
      """Return an iterator over the keys of a dictionary."""
diff --git a/elftools/dwarf/descriptions.py b/elftools/dwarf/descriptions.py

index ef6ac19fb32b51bde5fb64fa2fb9a762f624e661..f51f69b02ae3b5930f312be6e2e05477b4faa82a 100644 (file)
--- a/elftools/dwarf/descriptions.py
+++ b/elftools/dwarf/descriptions.py
@@ -206,6 +206,10 @@ def _describe_attr_strp(attr, die, section_offset):
      return '(indirect string, offset: 0x%x): %s' % (
          attr.raw_value, bytes2str(attr.value))
  
      return '(indirect string, offset: 0x%x): %s' % (
          attr.raw_value, bytes2str(attr.value))
  
+def _describe_attr_line_strp(attr, die, section_offset):
+    return '(indirect line string, offset: 0x%x): %s' % (
+        attr.raw_value, bytes2str(attr.value))        
+
  def _describe_attr_string(attr, die, section_offset):
      return bytes2str(attr.value)
  
  def _describe_attr_string(attr, die, section_offset):
      return bytes2str(attr.value)
  
@@ -247,6 +251,7 @@ _ATTR_DESCRIPTION_MAP = defaultdict(
      DW_FORM_udata=_describe_attr_value_passthrough,
      DW_FORM_string=_describe_attr_string,
      DW_FORM_strp=_describe_attr_strp,
      DW_FORM_udata=_describe_attr_value_passthrough,
      DW_FORM_string=_describe_attr_string,
      DW_FORM_strp=_describe_attr_strp,
+    DW_FORM_line_strp=_describe_attr_line_strp,
      DW_FORM_block1=_describe_attr_block,
      DW_FORM_block2=_describe_attr_block,
      DW_FORM_block4=_describe_attr_block,
      DW_FORM_block1=_describe_attr_block,
      DW_FORM_block2=_describe_attr_block,
      DW_FORM_block4=_describe_attr_block,
@@ -312,12 +317,29 @@ _DESCR_DW_LANG = {
      DW_LANG_UPC: '(Unified Parallel C)',
      DW_LANG_D: '(D)',
      DW_LANG_Python: '(Python)',
      DW_LANG_UPC: '(Unified Parallel C)',
      DW_LANG_D: '(D)',
      DW_LANG_Python: '(Python)',
+    DW_LANG_OpenCL: '(OpenCL)',
+    DW_LANG_Go: '(Go)',
+    DW_LANG_Modula3: '(Modula 3)',
+    DW_LANG_Haskell: '(Haskell)',
+    DW_LANG_C_plus_plus_03: '(C++03)',
+    DW_LANG_C_plus_plus_11: '(C++11)',
+    DW_LANG_OCaml: '(OCaml)',
+    DW_LANG_Rust: '(Rust)',
+    DW_LANG_C11: '(C11)',
+    DW_LANG_Swift: '(Swift)',
+    DW_LANG_Julia: '(Julia)',
+    DW_LANG_Dylan: '(Dylan)',
+    DW_LANG_C_plus_plus_14: '(C++14)',
+    DW_LANG_Fortran03: '(Fortran 03)',
+    DW_LANG_Fortran08: '(Fortran 08)',
+    DW_LANG_RenderScript: '(RenderScript)',
+    DW_LANG_BLISS: '(Bliss)', # Not in binutils
      DW_LANG_Mips_Assembler: '(MIPS assembler)',
      DW_LANG_HP_Bliss: '(HP Bliss)',
      DW_LANG_HP_Basic91: '(HP Basic 91)',
      DW_LANG_HP_Pascal91: '(HP Pascal 91)',
      DW_LANG_HP_IMacro: '(HP IMacro)',
      DW_LANG_Mips_Assembler: '(MIPS assembler)',
      DW_LANG_HP_Bliss: '(HP Bliss)',
      DW_LANG_HP_Basic91: '(HP Basic 91)',
      DW_LANG_HP_Pascal91: '(HP Pascal 91)',
      DW_LANG_HP_IMacro: '(HP IMacro)',
-    DW_LANG_HP_Assembler: '(HP assembler)',
+    DW_LANG_HP_Assembler: '(HP assembler)'
  }
  
  _DESCR_DW_ATE = {
  }
  
  _DESCR_DW_ATE = {
diff --git a/elftools/dwarf/dwarfinfo.py b/elftools/dwarf/dwarfinfo.py

index a300e7b9a07476c6096e9c50240536ce2fce2355..69d7b91b32888967129351989e055e39dc05d431 100644 (file)
--- a/elftools/dwarf/dwarfinfo.py
+++ b/elftools/dwarf/dwarfinfo.py
@@ -243,6 +243,14 @@ class DWARFInfo(object):
          """ Given a CU object, fetch the line program it points to from the
              .debug_line section.
              If the CU doesn't point to a line program, return None.
          """ Given a CU object, fetch the line program it points to from the
              .debug_line section.
              If the CU doesn't point to a line program, return None.
+            
+            Note about directory and file names. They are returned as two collections
+            in the lineprogram object's header - include_directory and file_entry.
+
+            In DWARFv5, they have introduced a different, extensible format for those
+            collections. So in a lineprogram v5+, there are two more collections in
+            the header - directories and file_names. Those might contain extra DWARFv5
+            information that is not exposed in include_directory and file_entry.
          """
          # The line program is pointed to by the DW_AT_stmt_list attribute of
          # the top DIE of a CU.
          """
          # The line program is pointed to by the DW_AT_stmt_list attribute of
          # the top DIE of a CU.
@@ -456,6 +464,34 @@ class DWARFInfo(object):
              self.debug_line_sec.stream,
              debug_line_offset)
  
              self.debug_line_sec.stream,
              debug_line_offset)
  
+        # DWARF5: resolve names
+        def resolve_strings(self, lineprog_header, format_field, data_field):
+            if lineprog_header.get(format_field, False):
+                data = lineprog_header[data_field]
+                for field in lineprog_header[format_field]:
+                    def replace_value(data, content_type, replacer):
+                        for entry in data:
+                            entry[content_type] = replacer(entry[content_type])
+
+                    if field.form == 'DW_FORM_line_strp':
+                        replace_value(data, field.content_type, self.get_string_from_linetable)
+                    elif field.form == 'DW_FORM_strp':
+                        replace_value(data, field.content_type, self.get_string_from_table)
+                    elif field.form in ('DW_FORM_strp_sup', 'DW_FORM_strx', 'DW_FORM_strx1', 'DW_FORM_strx2', 'DW_FORM_strx3', 'DW_FORM_strx4'):
+                        raise NotImplementedError()
+
+        resolve_strings(self, lineprog_header, 'directory_entry_format', 'directories')
+        resolve_strings(self, lineprog_header, 'file_name_entry_format', 'file_names')
+
+        # DWARF5: provide compatible file/directory name arrays for legacy lineprogram consumers
+        if lineprog_header.get('directories', False):
+            lineprog_header.include_directory = tuple(d.DW_LNCT_path for d in lineprog_header.directories)
+        if lineprog_header.get('file_names', False):
+            translate = namedtuple("file_entry", "name dir_index mtime length")
+            lineprog_header.file_entry = tuple(
+                translate(e.get('DW_LNCT_path'), e.get('DW_LNCT_directory_index'), e.get('DW_LNCT_timestamp'), e.get('DW_LNCT_size'))
+                for e in lineprog_header.file_names)
+        
          # Calculate the offset to the next line program (see DWARF 6.2.4)
          end_offset = (  debug_line_offset + lineprog_header['unit_length'] +
                          structs.initial_length_field_size())
          # Calculate the offset to the next line program (see DWARF 6.2.4)
          end_offset = (  debug_line_offset + lineprog_header['unit_length'] +
                          structs.initial_length_field_size())
diff --git a/elftools/dwarf/enums.py b/elftools/dwarf/enums.py

index bfeca5907f84889cafaf7f7ac16700f3d6dbee2c..a52e8034413350d07aa402145ac1aa668dcd3f9f 100644 (file)
--- a/elftools/dwarf/enums.py
+++ b/elftools/dwarf/enums.py
@@ -394,3 +394,24 @@ DW_EH_encoding_flags = dict(
  
      DW_EH_PE_omit     = 0xff,
  )
  
      DW_EH_PE_omit     = 0xff,
  )
+
+ENUM_DW_LNCT = dict(
+    DW_LNCT_path             = 0x1,
+    DW_LNCT_directory_index  = 0x2,
+    DW_LNCT_timestamp        = 0x3,
+    DW_LNCT_size             = 0x4,
+    DW_LNCT_MD5              = 0x5,
+    DW_LNCT_lo_user          = 0x2000,
+    DW_LNCT_hi_user          = 0x3fff
+)
+
+ENUM_DW_UT = dict(
+    DW_UT_compile       = 0x01,
+    DW_UT_type          = 0x02,
+    DW_UT_partial       = 0x03,
+    DW_UT_skeleton      = 0x04,
+    DW_UT_split_compile = 0x05,
+    DW_UT_split_type    = 0x06,
+    DW_UT_lo_user       = 0x80,
+    DW_UT_hi_user       = 0xff
+)
diff --git a/elftools/dwarf/structs.py b/elftools/dwarf/structs.py

index 79e0d8f230ac10648b6d2161eae3bd0bb31cce8e..46cd81777d15c19951246bdc674c3dd6d9a9dae9 100644 (file)
--- a/elftools/dwarf/structs.py
+++ b/elftools/dwarf/structs.py
@@ -7,11 +7,13 @@
  # Eli Bendersky (eliben@gmail.com)
  # This code is in the public domain
  #-------------------------------------------------------------------------------
  # Eli Bendersky (eliben@gmail.com)
  # This code is in the public domain
  #-------------------------------------------------------------------------------
+from elftools.construct.core import Subconstruct
+from elftools.construct.macros import Embedded
  from ..construct import (
      UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64,
      SBInt8, SBInt16, SBInt32, SBInt64, SLInt8, SLInt16, SLInt32, SLInt64,
      Adapter, Struct, ConstructError, If, Enum, Array, PrefixedArray,
  from ..construct import (
      UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64,
      SBInt8, SBInt16, SBInt32, SBInt64, SLInt8, SLInt16, SLInt32, SLInt64,
      Adapter, Struct, ConstructError, If, Enum, Array, PrefixedArray,
-    CString, Embed, StaticField, IfThenElse
+    CString, Embed, StaticField, IfThenElse, Construct, Rename, Sequence
      )
  from ..common.construct_utils import RepeatUntilExcluding, ULEB128, SLEB128
  from .enums import *
      )
  from ..common.construct_utils import RepeatUntilExcluding, ULEB128, SLEB128
  from .enums import *
@@ -282,27 +284,80 @@ class DWARFStructs(object):
                      self.Dwarf_uleb128('mtime'),
                      self.Dwarf_uleb128('length')))))
  
                      self.Dwarf_uleb128('mtime'),
                      self.Dwarf_uleb128('length')))))
  
+        class FormattedEntry(Construct):
+            # Generates a parser based on a previously parsed piece,
+            # similar to deprecared Dynamic.
+            # Strings are resolved later, since it potentially requires
+            # looking at another section.
+            def __init__(self, name, structs, format_field):
+                Construct.__init__(self, name)
+                self.structs = structs
+                self.format_field = format_field
+
+            def _parse(self, stream, context):
+                # Somewhat tricky technique here, explicitly writing back to the context
+                if self.format_field + "_parser" in context:
+                    parser = context[self.format_field + "_parser"]
+                else:
+                    fields = tuple(
+                        Rename(f.content_type, self.structs.Dwarf_dw_form[f.form])
+                        for f in context[self.format_field])
+                    parser = Struct('formatted_entry', *fields)
+                    context[self.format_field + "_parser"] = parser
+                return parser._parse(stream, context)
+ 
+        ver5 = lambda ctx: ctx.version >= 5
+        
          self.Dwarf_lineprog_header = Struct('Dwarf_lineprog_header',
              self.Dwarf_initial_length('unit_length'),
              self.Dwarf_uint16('version'),
          self.Dwarf_lineprog_header = Struct('Dwarf_lineprog_header',
              self.Dwarf_initial_length('unit_length'),
              self.Dwarf_uint16('version'),
+            If(ver5,
+                self.Dwarf_uint8("address_size"),
+                None),
+            If(ver5,
+                self.Dwarf_uint8("segment_selector_size"),
+                None),
              self.Dwarf_offset('header_length'),
              self.Dwarf_uint8('minimum_instruction_length'),
              self.Dwarf_offset('header_length'),
              self.Dwarf_uint8('minimum_instruction_length'),
-            If(lambda ctx: ctx['version'] >= 4,
+            If(lambda ctx: ctx.version >= 4,
                  self.Dwarf_uint8("maximum_operations_per_instruction"),
                  1),
              self.Dwarf_uint8('default_is_stmt'),
              self.Dwarf_int8('line_base'),
              self.Dwarf_uint8('line_range'),
              self.Dwarf_uint8('opcode_base'),
                  self.Dwarf_uint8("maximum_operations_per_instruction"),
                  1),
              self.Dwarf_uint8('default_is_stmt'),
              self.Dwarf_int8('line_base'),
              self.Dwarf_uint8('line_range'),
              self.Dwarf_uint8('opcode_base'),
-            Array(lambda ctx: ctx['opcode_base'] - 1,
+            Array(lambda ctx: ctx.opcode_base - 1,
                    self.Dwarf_uint8('standard_opcode_lengths')),
                    self.Dwarf_uint8('standard_opcode_lengths')),
-            RepeatUntilExcluding(
-                lambda obj, ctx: obj == b'',
-                CString('include_directory')),
-            RepeatUntilExcluding(
-                lambda obj, ctx: len(obj.name) == 0,
-                self.Dwarf_lineprog_file_entry),
-            )
+            If(ver5,
+                PrefixedArray(
+                    Struct('directory_entry_format',
+                        Enum(self.Dwarf_uleb128('content_type'), **ENUM_DW_LNCT),
+                        Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM)),
+                    self.Dwarf_uint8("directory_entry_format_count"))),
+            If(ver5, # Name deliberately doesn't match the legacy object, since the format can't be made compatible
+                PrefixedArray(
+                    FormattedEntry('directories', self, "directory_entry_format"),
+                    self.Dwarf_uleb128('directories_count'))),
+            If(ver5,
+                PrefixedArray(
+                    Struct('file_name_entry_format',
+                        Enum(self.Dwarf_uleb128('content_type'), **ENUM_DW_LNCT),
+                        Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM)),
+                    self.Dwarf_uint8("file_name_entry_format_count"))),
+            If(ver5,
+                PrefixedArray(
+                    FormattedEntry('file_names', self, "file_name_entry_format"),
+                    self.Dwarf_uleb128('file_names_count'))),
+            # Legacy  directories/files - DWARF < 5 only
+            If(lambda ctx: ctx.version < 5,
+                RepeatUntilExcluding(
+                    lambda obj, ctx: obj == b'',
+                    CString('include_directory'))),
+            If(lambda ctx: ctx.version < 5, 
+                RepeatUntilExcluding(
+                    lambda obj, ctx: len(obj.name) == 0,
+                    self.Dwarf_lineprog_file_entry)) # array name is file_entry 
+        )
  
      def _create_callframe_entry_headers(self):
          self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
  
      def _create_callframe_entry_headers(self):
          self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
diff --git a/elftools/elf/descriptions.py b/elftools/elf/descriptions.py

index 0ccc9a19e6472ef708619b360aaed2fb856e000e..9314a62093b09f1daa93455566e5bae509fc2451 100644 (file)
--- a/elftools/elf/descriptions.py
+++ b/elftools/elf/descriptions.py
@@ -259,6 +259,13 @@ def describe_note_gnu_property_x86_feature_1(value):
              descs.append(desc)
      return 'x86 feature: ' + ', '.join(descs)
  
              descs.append(desc)
      return 'x86 feature: ' + ', '.join(descs)
  
+def describe_note_gnu_property_x86_isa_1(value):
+    descs = []
+    for mask, desc in _DESCR_NOTE_GNU_PROPERTY_X86_ISA_1_FLAGS:
+        if value & mask:
+            descs.append(desc)
+    return 'x86 ISA needed: ' + ', '.join(descs)    
+
  def describe_note_gnu_properties(properties):
      descriptions = []
      for prop in properties:
  def describe_note_gnu_properties(properties):
      descriptions = []
      for prop in properties:
@@ -278,6 +285,11 @@ def describe_note_gnu_properties(properties):
                  prop_desc = ' <corrupt length: 0x%x>' % sz
              else:
                  prop_desc = describe_note_gnu_property_x86_feature_1(d)
                  prop_desc = ' <corrupt length: 0x%x>' % sz
              else:
                  prop_desc = describe_note_gnu_property_x86_feature_1(d)
+        elif t == 'GNU_PROPERTY_X86_ISA_1_NEEDED':
+            if sz != 4:
+                prop_desc = ' <corrupt length: 0x%x>' % sz
+            else:
+                prop_desc = describe_note_gnu_property_x86_isa_1(d)
          elif _DESCR_NOTE_GNU_PROPERTY_TYPE_LOPROC <= t <= _DESCR_NOTE_GNU_PROPERTY_TYPE_HIPROC:
              prop_desc = '<processor-specific type 0x%x data: %s >' % (t, bytes2hex(d, sep=' '))
          elif _DESCR_NOTE_GNU_PROPERTY_TYPE_LOUSER <= t <= _DESCR_NOTE_GNU_PROPERTY_TYPE_HIUSER:
          elif _DESCR_NOTE_GNU_PROPERTY_TYPE_LOPROC <= t <= _DESCR_NOTE_GNU_PROPERTY_TYPE_HIPROC:
              prop_desc = '<processor-specific type 0x%x data: %s >' % (t, bytes2hex(d, sep=' '))
          elif _DESCR_NOTE_GNU_PROPERTY_TYPE_LOUSER <= t <= _DESCR_NOTE_GNU_PROPERTY_TYPE_HIUSER:
@@ -603,6 +615,12 @@ _DESCR_NOTE_GNU_PROPERTY_X86_FEATURE_1_FLAGS = (
      (8, 'LAM_U57'),
  )
  
      (8, 'LAM_U57'),
  )
  
+# Same for GNU_PROPERTY_X86_SET_1_xxx
+_DESCR_NOTE_GNU_PROPERTY_X86_ISA_1_FLAGS = (
+    (1, 'x86-64-baseline'),
+    # TODO; there is a long list
+)
+
  
  def _reverse_dict(d, low_priority=()):
      """
  
  def _reverse_dict(d, low_priority=()):
      """
diff --git a/elftools/elf/enums.py b/elftools/elf/enums.py

index 8519f4e63e2a546c0dfdef850f40fe7f9d31c9ab..a5855c2677e7c8eef23148b68400ba4f61488c72 100644 (file)
--- a/elftools/elf/enums.py
+++ b/elftools/elf/enums.py
@@ -877,6 +877,7 @@ ENUM_NOTE_GNU_PROPERTY_TYPE = dict(
      GNU_PROPERTY_STACK_SIZE=1,
      GNU_PROPERTY_NO_COPY_ON_PROTECTED=2,
      GNU_PROPERTY_X86_FEATURE_1_AND=0xc0000002,
      GNU_PROPERTY_STACK_SIZE=1,
      GNU_PROPERTY_NO_COPY_ON_PROTECTED=2,
      GNU_PROPERTY_X86_FEATURE_1_AND=0xc0000002,
+    GNU_PROPERTY_X86_ISA_1_NEEDED=0xc0008002,
      _default_=Pass,
  )
  
      _default_=Pass,
  )
  
diff --git a/examples/dwarf_die_tree.py b/examples/dwarf_die_tree.py

index 0e46d9302771b7bfb38169a26de49fcfc9aa7104..afda68cca7456da2eacc9be074a8f3cc33edc5be 100644 (file)
--- a/examples/dwarf_die_tree.py
+++ b/examples/dwarf_die_tree.py
@@ -15,6 +15,7 @@ import sys
  sys.path[0:0] = ['.', '..']
  
  from elftools.elf.elffile import ELFFile
  sys.path[0:0] = ['.', '..']
  
  from elftools.elf.elffile import ELFFile
+from elftools.common.py3compat import path_to_posix
  
  
  def process_file(filename):
  
  
  def process_file(filename):
@@ -44,7 +45,7 @@ def process_file(filename):
              print('    Top DIE with tag=%s' % top_DIE.tag)
  
              # We're interested in the filename...
              print('    Top DIE with tag=%s' % top_DIE.tag)
  
              # We're interested in the filename...
-            print('    name=%s' % top_DIE.get_full_path())
+            print('    name=%s' % path_to_posix(top_DIE.get_full_path()))
  
              # Display DIEs recursively starting with top_DIE
              die_info_rec(top_DIE)
  
              # Display DIEs recursively starting with top_DIE
              die_info_rec(top_DIE)
diff --git a/examples/dwarf_lineprogram_filenames.py b/examples/dwarf_lineprogram_filenames.py

index 2dd0e70496cda3e231c9f117c63573ffce3faadf..6098f45ee45fc973478a30675297b1eeb427025e 100644 (file)
--- a/examples/dwarf_lineprogram_filenames.py
+++ b/examples/dwarf_lineprogram_filenames.py
@@ -12,6 +12,7 @@ from __future__ import print_function
  from collections import defaultdict
  import os
  import sys
  from collections import defaultdict
  import os
  import sys
+import posixpath
  
  # If pyelftools is not installed, the example can also run from the root or
  # examples/ dir of the source distribution.
  
  # If pyelftools is not installed, the example can also run from the root or
  # examples/ dir of the source distribution.
@@ -86,7 +87,7 @@ def lpe_filename(line_program, file_index):
          return file_entry.name.decode()
  
      directory = lp_header["include_directory"][dir_index - 1]
          return file_entry.name.decode()
  
      directory = lp_header["include_directory"][dir_index - 1]
-    return os.path.join(directory, file_entry.name).decode()
+    return posixpath.join(directory, file_entry.name).decode()
  
  
  if __name__ == '__main__':
  
  
  if __name__ == '__main__':
diff --git a/examples/examine_dwarf_info.py b/examples/examine_dwarf_info.py

index bb43bcc2f760d02aacfbd63ac45673a7f59ab42e..fdf10dabb34838255b9a39f3b49616b443a92e3f 100644 (file)
--- a/examples/examine_dwarf_info.py
+++ b/examples/examine_dwarf_info.py
@@ -14,6 +14,7 @@ import sys
  sys.path[0:0] = ['.', '..']
  
  from elftools.elf.elffile import ELFFile
  sys.path[0:0] = ['.', '..']
  
  from elftools.elf.elffile import ELFFile
+from elftools.common.py3compat import path_to_posix
  
  
  def process_file(filename):
  
  
  def process_file(filename):
@@ -43,7 +44,7 @@ def process_file(filename):
              print('    Top DIE with tag=%s' % top_DIE.tag)
  
              # We're interested in the filename...
              print('    Top DIE with tag=%s' % top_DIE.tag)
  
              # We're interested in the filename...
-            print('    name=%s' % top_DIE.get_full_path())
+            print('    name=%s' % path_to_posix(top_DIE.get_full_path()))
  
  if __name__ == '__main__':
      if sys.argv[1] == '--test':
  
  if __name__ == '__main__':
      if sys.argv[1] == '--test':
diff --git a/scripts/readelf.py b/scripts/readelf.py

index 9b98d0237a0c329b375121e712632c9d9429cd06..a45ec3019556d8533bb5515cfa0e359d5db3e689 100755 (executable)
--- a/scripts/readelf.py
+++ b/scripts/readelf.py
@@ -65,6 +65,7 @@ from elftools.dwarf.constants import (
  from elftools.dwarf.locationlists import LocationParser, LocationEntry
  from elftools.dwarf.callframe import CIE, FDE, ZERO
  from elftools.ehabi.ehabiinfo import CorruptEHABIEntry, CannotUnwindEHABIEntry, GenericEHABIEntry
  from elftools.dwarf.locationlists import LocationParser, LocationEntry
  from elftools.dwarf.callframe import CIE, FDE, ZERO
  from elftools.ehabi.ehabiinfo import CorruptEHABIEntry, CannotUnwindEHABIEntry, GenericEHABIEntry
+from elftools.dwarf.enums import ENUM_DW_UT
  
  
  class ReadElf(object):
  
  
  class ReadElf(object):
@@ -1061,7 +1062,10 @@ class ReadElf(object):
              self._emitline('   Length:        %s (%s)' % (
                  self._format_hex(cu['unit_length']),
                  '%s-bit' % cu.dwarf_format()))
              self._emitline('   Length:        %s (%s)' % (
                  self._format_hex(cu['unit_length']),
                  '%s-bit' % cu.dwarf_format()))
-            self._emitline('   Version:       %s' % cu['version']),
+            self._emitline('   Version:       %s' % cu['version'])
+            if cu.header.get("unit_type", False):
+                ut = next((key for key, value in ENUM_DW_UT.items() if value == cu.header.unit_type), '?')
+                self._emitline('   Unit Type:     %s (%d)' % (ut, cu.header.unit_type))
              self._emitline('   Abbrev Offset: %s' % (
                  self._format_hex(cu['debug_abbrev_offset']))),
              self._emitline('   Pointer Size:  %s' % cu['address_size'])
              self._emitline('   Abbrev Offset: %s' % (
                  self._format_hex(cu['debug_abbrev_offset']))),
              self._emitline('   Pointer Size:  %s' % cu['address_size'])
@@ -1121,6 +1125,7 @@ class ReadElf(object):
  
          for cu in self._dwarfinfo.iter_CUs():
              lineprogram = self._dwarfinfo.line_program_for_CU(cu)
  
          for cu in self._dwarfinfo.iter_CUs():
              lineprogram = self._dwarfinfo.line_program_for_CU(cu)
+            ver5 = lineprogram.header.version >= 5
  
              cu_filename = bytes2str(lineprogram['file_entry'][0].name)
              if len(lineprogram['include_directory']) > 0:
  
              cu_filename = bytes2str(lineprogram['file_entry'][0].name)
              if len(lineprogram['include_directory']) > 0:
@@ -1132,7 +1137,9 @@ class ReadElf(object):
                  cu_filename = '%s/%s' % (bytes2str(dir), cu_filename)
  
              self._emitline('CU: %s:' % cu_filename)
                  cu_filename = '%s/%s' % (bytes2str(dir), cu_filename)
  
              self._emitline('CU: %s:' % cu_filename)
-            self._emitline('File name                            Line number    Starting address    Stmt')
+            self._emitline('File name                            Line number    Starting address    View    Stmt' if ver5
+                else 'File name                            Line number    Starting address    Stmt')
+            # What goes into View on V5? To be seen...
  
              # Print each state's file, line and address information. For some
              # instructions other output is needed to be compatible with
  
              # Print each state's file, line and address information. For some
              # instructions other output is needed to be compatible with
@@ -1161,11 +1168,14 @@ class ReadElf(object):
                          '0' if state.address == 0 else self._format_hex(state.address),
                          'x' if state.is_stmt and not state.end_sequence else ''))
                  else:
                          '0' if state.address == 0 else self._format_hex(state.address),
                          'x' if state.is_stmt and not state.end_sequence else ''))
                  else:
-                    self._emitline('%-35s  %11d  %18s[%d] %s' % (
+                    # What's the deal with op_index after address on DWARF 5? Is omitting it
+                    # a function of DWARF version, or ISA, or what?
+                    # Used to be unconditional, even on non-VLIW machines.
+                    self._emitline('%-35s  %s  %18s%s %s' % (
                          bytes2str(lineprogram['file_entry'][state.file - 1].name),
                          bytes2str(lineprogram['file_entry'][state.file - 1].name),
-                        state.line if not state.end_sequence else '-',
+                        "%11d" % (state.line,) if not state.end_sequence else '-',
                          '0' if state.address == 0 else self._format_hex(state.address),
                          '0' if state.address == 0 else self._format_hex(state.address),
-                        state.op_index,
+                        '' if ver5 else '[%d]' % (state.op_index,),
                          'x' if state.is_stmt and not state.end_sequence else ''))
                  if entry.command == DW_LNS_copy:
                      # Another readelf oddity...
                          'x' if state.is_stmt and not state.end_sequence else ''))
                  if entry.command == DW_LNS_copy:
                      # Another readelf oddity...
diff --git a/test/run_readelf_tests.py b/test/run_readelf_tests.py

index 59a039c5e214d25fb5b98cd5e1819b26d4cfd426..ceca7ec06c7809f4d4a3eb3a7a7cd679c22006df 100755 (executable)
--- a/test/run_readelf_tests.py
+++ b/test/run_readelf_tests.py
@@ -80,6 +80,12 @@ def run_test_on_file(filename, verbose=False, opt=None):
                  testlog.info('.......................SKIPPED')
              continue
  
                  testlog.info('.......................SKIPPED')
              continue
  
+        # sevaa says: there is another shorted out test; in dwarf_lineprogramv5.elf, the two bytes at 0x2072 were
+        # patched from 0x07 0x10 to 00 00.
+        # Those represented the second instruction in the first FDE in .eh_frame. This changed the instruction
+        # from "DW_CFA_undefined 16" to two NOPs.
+        # GNU readelf had a bug here, had to work around. See PR #411.
+
          # stdouts will be a 2-element list: output of readelf and output
          # of scripts/readelf.py
          stdouts = []
          # stdouts will be a 2-element list: output of readelf and output
          # of scripts/readelf.py
          stdouts = []
diff --git a/test/testfiles_for_readelf/dwarf_lineprogramv5.elf b/test/testfiles_for_readelf/dwarf_lineprogramv5.elf

new file mode 100644 (file)

index 0000000..33f051b

Binary files /dev/null and b/test/testfiles_for_readelf/dwarf_lineprogramv5.elf differ
author	Seva Alekseyev <sevaa@yarxi.ru>
	Mon, 13 Jun 2022 12:44:44 +0000 (08:44 -0400)
committer	GitHub <noreply@github.com>
	Mon, 13 Jun 2022 12:44:44 +0000 (05:44 -0700)
elftools/common/py3compat.py		patch \| blob \| history
elftools/dwarf/descriptions.py		patch \| blob \| history
elftools/dwarf/dwarfinfo.py		patch \| blob \| history
elftools/dwarf/enums.py		patch \| blob \| history
elftools/dwarf/structs.py		patch \| blob \| history
elftools/elf/descriptions.py		patch \| blob \| history
elftools/elf/enums.py		patch \| blob \| history
examples/dwarf_die_tree.py		patch \| blob \| history
examples/dwarf_lineprogram_filenames.py		patch \| blob \| history
examples/examine_dwarf_info.py		patch \| blob \| history
scripts/readelf.py		patch \| blob \| history
test/run_readelf_tests.py		patch \| blob \| history
test/testfiles_for_readelf/dwarf_lineprogramv5.elf	[new file with mode: 0644]	patch \| blob