Version 5 lineprogram header (#411)
authorSeva Alekseyev <sevaa@yarxi.ru>
Mon, 13 Jun 2022 12:44:44 +0000 (08:44 -0400)
committerGitHub <noreply@github.com>
Mon, 13 Jun 2022 12:44:44 +0000 (05:44 -0700)
* Version 5 lineprogram header, take 1

* Readelf/decodedline formatting fix

* DWARF 5 fields None, not missing

* Comment

* Sample binary

* Dump unit type in readelf info

* More languages described

* Describing form_line_strp

* Basic support for GNU_PROPERTY_X86_ISA_1

* Readelf decodedline format fixes to match with DWARF5

* Readelf test shorted out for the file/test where a bug in GNU readelf manifests, see PR #411.

* Newline :)

* Readelf' language names matched against binutils

* Comment about lineprogram files and directories

* GNU binutils bug worked around in a slightly less disturbing way - patched the binary, left a comment in the test script.

* Examples autotest no longer fails on Windows over expected path format

* Autotest fix

* Typo

* Windows compatibility, take 2

* No pathlib on Python 2

Co-authored-by: Seva Alekseyev <sevaa@nih.gov>
13 files changed:
elftools/common/py3compat.py
elftools/dwarf/descriptions.py
elftools/dwarf/dwarfinfo.py
elftools/dwarf/enums.py
elftools/dwarf/structs.py
elftools/elf/descriptions.py
elftools/elf/enums.py
examples/dwarf_die_tree.py
examples/dwarf_lineprogram_filenames.py
examples/examine_dwarf_info.py
scripts/readelf.py
test/run_readelf_tests.py
test/testfiles_for_readelf/dwarf_lineprogramv5.elf [new file with mode: 0644]

index 259926440a4340b81812f21f845d830567685f7f..c701cc0d4a19c3a6663a8b6a53dafda228a17e9a 100644 (file)
@@ -12,6 +12,7 @@ PY3 = sys.version_info[0] == 3
 
 if PY3:
     import io
 
 if PY3:
     import io
+    from pathlib import Path
     StringIO = io.StringIO
     BytesIO = io.BytesIO
 
     StringIO = io.StringIO
     BytesIO = io.BytesIO
 
@@ -42,8 +43,15 @@ if PY3:
     ifilter = filter
 
     maxint = sys.maxsize
     ifilter = filter
 
     maxint = sys.maxsize
+
+    def path_to_posix(s):
+        return Path(s).as_posix()
+
 else:
     import cStringIO
 else:
     import cStringIO
+    import os
+    import posixpath
+
     StringIO = BytesIO = cStringIO.StringIO
 
     def bytes2hex(b, sep=''):
     StringIO = BytesIO = cStringIO.StringIO
 
     def bytes2hex(b, sep=''):
@@ -63,6 +71,9 @@ else:
 
     maxint = sys.maxint
 
 
     maxint = sys.maxint
 
+    def path_to_posix(s):
+        return posixpath.join(*os.path.split(s))
+
 
 def iterkeys(d):
     """Return an iterator over the keys of a dictionary."""
 
 def iterkeys(d):
     """Return an iterator over the keys of a dictionary."""
index ef6ac19fb32b51bde5fb64fa2fb9a762f624e661..f51f69b02ae3b5930f312be6e2e05477b4faa82a 100644 (file)
@@ -206,6 +206,10 @@ def _describe_attr_strp(attr, die, section_offset):
     return '(indirect string, offset: 0x%x): %s' % (
         attr.raw_value, bytes2str(attr.value))
 
     return '(indirect string, offset: 0x%x): %s' % (
         attr.raw_value, bytes2str(attr.value))
 
+def _describe_attr_line_strp(attr, die, section_offset):
+    return '(indirect line string, offset: 0x%x): %s' % (
+        attr.raw_value, bytes2str(attr.value))        
+
 def _describe_attr_string(attr, die, section_offset):
     return bytes2str(attr.value)
 
 def _describe_attr_string(attr, die, section_offset):
     return bytes2str(attr.value)
 
@@ -247,6 +251,7 @@ _ATTR_DESCRIPTION_MAP = defaultdict(
     DW_FORM_udata=_describe_attr_value_passthrough,
     DW_FORM_string=_describe_attr_string,
     DW_FORM_strp=_describe_attr_strp,
     DW_FORM_udata=_describe_attr_value_passthrough,
     DW_FORM_string=_describe_attr_string,
     DW_FORM_strp=_describe_attr_strp,
+    DW_FORM_line_strp=_describe_attr_line_strp,
     DW_FORM_block1=_describe_attr_block,
     DW_FORM_block2=_describe_attr_block,
     DW_FORM_block4=_describe_attr_block,
     DW_FORM_block1=_describe_attr_block,
     DW_FORM_block2=_describe_attr_block,
     DW_FORM_block4=_describe_attr_block,
@@ -312,12 +317,29 @@ _DESCR_DW_LANG = {
     DW_LANG_UPC: '(Unified Parallel C)',
     DW_LANG_D: '(D)',
     DW_LANG_Python: '(Python)',
     DW_LANG_UPC: '(Unified Parallel C)',
     DW_LANG_D: '(D)',
     DW_LANG_Python: '(Python)',
+    DW_LANG_OpenCL: '(OpenCL)',
+    DW_LANG_Go: '(Go)',
+    DW_LANG_Modula3: '(Modula 3)',
+    DW_LANG_Haskell: '(Haskell)',
+    DW_LANG_C_plus_plus_03: '(C++03)',
+    DW_LANG_C_plus_plus_11: '(C++11)',
+    DW_LANG_OCaml: '(OCaml)',
+    DW_LANG_Rust: '(Rust)',
+    DW_LANG_C11: '(C11)',
+    DW_LANG_Swift: '(Swift)',
+    DW_LANG_Julia: '(Julia)',
+    DW_LANG_Dylan: '(Dylan)',
+    DW_LANG_C_plus_plus_14: '(C++14)',
+    DW_LANG_Fortran03: '(Fortran 03)',
+    DW_LANG_Fortran08: '(Fortran 08)',
+    DW_LANG_RenderScript: '(RenderScript)',
+    DW_LANG_BLISS: '(Bliss)', # Not in binutils
     DW_LANG_Mips_Assembler: '(MIPS assembler)',
     DW_LANG_HP_Bliss: '(HP Bliss)',
     DW_LANG_HP_Basic91: '(HP Basic 91)',
     DW_LANG_HP_Pascal91: '(HP Pascal 91)',
     DW_LANG_HP_IMacro: '(HP IMacro)',
     DW_LANG_Mips_Assembler: '(MIPS assembler)',
     DW_LANG_HP_Bliss: '(HP Bliss)',
     DW_LANG_HP_Basic91: '(HP Basic 91)',
     DW_LANG_HP_Pascal91: '(HP Pascal 91)',
     DW_LANG_HP_IMacro: '(HP IMacro)',
-    DW_LANG_HP_Assembler: '(HP assembler)',
+    DW_LANG_HP_Assembler: '(HP assembler)'
 }
 
 _DESCR_DW_ATE = {
 }
 
 _DESCR_DW_ATE = {
index a300e7b9a07476c6096e9c50240536ce2fce2355..69d7b91b32888967129351989e055e39dc05d431 100644 (file)
@@ -243,6 +243,14 @@ class DWARFInfo(object):
         """ Given a CU object, fetch the line program it points to from the
             .debug_line section.
             If the CU doesn't point to a line program, return None.
         """ Given a CU object, fetch the line program it points to from the
             .debug_line section.
             If the CU doesn't point to a line program, return None.
+            
+            Note about directory and file names. They are returned as two collections
+            in the lineprogram object's header - include_directory and file_entry.
+
+            In DWARFv5, they have introduced a different, extensible format for those
+            collections. So in a lineprogram v5+, there are two more collections in
+            the header - directories and file_names. Those might contain extra DWARFv5
+            information that is not exposed in include_directory and file_entry.
         """
         # The line program is pointed to by the DW_AT_stmt_list attribute of
         # the top DIE of a CU.
         """
         # The line program is pointed to by the DW_AT_stmt_list attribute of
         # the top DIE of a CU.
@@ -456,6 +464,34 @@ class DWARFInfo(object):
             self.debug_line_sec.stream,
             debug_line_offset)
 
             self.debug_line_sec.stream,
             debug_line_offset)
 
+        # DWARF5: resolve names
+        def resolve_strings(self, lineprog_header, format_field, data_field):
+            if lineprog_header.get(format_field, False):
+                data = lineprog_header[data_field]
+                for field in lineprog_header[format_field]:
+                    def replace_value(data, content_type, replacer):
+                        for entry in data:
+                            entry[content_type] = replacer(entry[content_type])
+
+                    if field.form == 'DW_FORM_line_strp':
+                        replace_value(data, field.content_type, self.get_string_from_linetable)
+                    elif field.form == 'DW_FORM_strp':
+                        replace_value(data, field.content_type, self.get_string_from_table)
+                    elif field.form in ('DW_FORM_strp_sup', 'DW_FORM_strx', 'DW_FORM_strx1', 'DW_FORM_strx2', 'DW_FORM_strx3', 'DW_FORM_strx4'):
+                        raise NotImplementedError()
+
+        resolve_strings(self, lineprog_header, 'directory_entry_format', 'directories')
+        resolve_strings(self, lineprog_header, 'file_name_entry_format', 'file_names')
+
+        # DWARF5: provide compatible file/directory name arrays for legacy lineprogram consumers
+        if lineprog_header.get('directories', False):
+            lineprog_header.include_directory = tuple(d.DW_LNCT_path for d in lineprog_header.directories)
+        if lineprog_header.get('file_names', False):
+            translate = namedtuple("file_entry", "name dir_index mtime length")
+            lineprog_header.file_entry = tuple(
+                translate(e.get('DW_LNCT_path'), e.get('DW_LNCT_directory_index'), e.get('DW_LNCT_timestamp'), e.get('DW_LNCT_size'))
+                for e in lineprog_header.file_names)
+        
         # Calculate the offset to the next line program (see DWARF 6.2.4)
         end_offset = (  debug_line_offset + lineprog_header['unit_length'] +
                         structs.initial_length_field_size())
         # Calculate the offset to the next line program (see DWARF 6.2.4)
         end_offset = (  debug_line_offset + lineprog_header['unit_length'] +
                         structs.initial_length_field_size())
index bfeca5907f84889cafaf7f7ac16700f3d6dbee2c..a52e8034413350d07aa402145ac1aa668dcd3f9f 100644 (file)
@@ -394,3 +394,24 @@ DW_EH_encoding_flags = dict(
 
     DW_EH_PE_omit     = 0xff,
 )
 
     DW_EH_PE_omit     = 0xff,
 )
+
+ENUM_DW_LNCT = dict(
+    DW_LNCT_path             = 0x1,
+    DW_LNCT_directory_index  = 0x2,
+    DW_LNCT_timestamp        = 0x3,
+    DW_LNCT_size             = 0x4,
+    DW_LNCT_MD5              = 0x5,
+    DW_LNCT_lo_user          = 0x2000,
+    DW_LNCT_hi_user          = 0x3fff
+)
+
+ENUM_DW_UT = dict(
+    DW_UT_compile       = 0x01,
+    DW_UT_type          = 0x02,
+    DW_UT_partial       = 0x03,
+    DW_UT_skeleton      = 0x04,
+    DW_UT_split_compile = 0x05,
+    DW_UT_split_type    = 0x06,
+    DW_UT_lo_user       = 0x80,
+    DW_UT_hi_user       = 0xff
+)
index 79e0d8f230ac10648b6d2161eae3bd0bb31cce8e..46cd81777d15c19951246bdc674c3dd6d9a9dae9 100644 (file)
@@ -7,11 +7,13 @@
 # Eli Bendersky (eliben@gmail.com)
 # This code is in the public domain
 #-------------------------------------------------------------------------------
 # Eli Bendersky (eliben@gmail.com)
 # This code is in the public domain
 #-------------------------------------------------------------------------------
+from elftools.construct.core import Subconstruct
+from elftools.construct.macros import Embedded
 from ..construct import (
     UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64,
     SBInt8, SBInt16, SBInt32, SBInt64, SLInt8, SLInt16, SLInt32, SLInt64,
     Adapter, Struct, ConstructError, If, Enum, Array, PrefixedArray,
 from ..construct import (
     UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64,
     SBInt8, SBInt16, SBInt32, SBInt64, SLInt8, SLInt16, SLInt32, SLInt64,
     Adapter, Struct, ConstructError, If, Enum, Array, PrefixedArray,
-    CString, Embed, StaticField, IfThenElse
+    CString, Embed, StaticField, IfThenElse, Construct, Rename, Sequence
     )
 from ..common.construct_utils import RepeatUntilExcluding, ULEB128, SLEB128
 from .enums import *
     )
 from ..common.construct_utils import RepeatUntilExcluding, ULEB128, SLEB128
 from .enums import *
@@ -282,27 +284,80 @@ class DWARFStructs(object):
                     self.Dwarf_uleb128('mtime'),
                     self.Dwarf_uleb128('length')))))
 
                     self.Dwarf_uleb128('mtime'),
                     self.Dwarf_uleb128('length')))))
 
+        class FormattedEntry(Construct):
+            # Generates a parser based on a previously parsed piece,
+            # similar to deprecared Dynamic.
+            # Strings are resolved later, since it potentially requires
+            # looking at another section.
+            def __init__(self, name, structs, format_field):
+                Construct.__init__(self, name)
+                self.structs = structs
+                self.format_field = format_field
+
+            def _parse(self, stream, context):
+                # Somewhat tricky technique here, explicitly writing back to the context
+                if self.format_field + "_parser" in context:
+                    parser = context[self.format_field + "_parser"]
+                else:
+                    fields = tuple(
+                        Rename(f.content_type, self.structs.Dwarf_dw_form[f.form])
+                        for f in context[self.format_field])
+                    parser = Struct('formatted_entry', *fields)
+                    context[self.format_field + "_parser"] = parser
+                return parser._parse(stream, context)
+        ver5 = lambda ctx: ctx.version >= 5
+        
         self.Dwarf_lineprog_header = Struct('Dwarf_lineprog_header',
             self.Dwarf_initial_length('unit_length'),
             self.Dwarf_uint16('version'),
         self.Dwarf_lineprog_header = Struct('Dwarf_lineprog_header',
             self.Dwarf_initial_length('unit_length'),
             self.Dwarf_uint16('version'),
+            If(ver5,
+                self.Dwarf_uint8("address_size"),
+                None),
+            If(ver5,
+                self.Dwarf_uint8("segment_selector_size"),
+                None),
             self.Dwarf_offset('header_length'),
             self.Dwarf_uint8('minimum_instruction_length'),
             self.Dwarf_offset('header_length'),
             self.Dwarf_uint8('minimum_instruction_length'),
-            If(lambda ctx: ctx['version'] >= 4,
+            If(lambda ctx: ctx.version >= 4,
                 self.Dwarf_uint8("maximum_operations_per_instruction"),
                 1),
             self.Dwarf_uint8('default_is_stmt'),
             self.Dwarf_int8('line_base'),
             self.Dwarf_uint8('line_range'),
             self.Dwarf_uint8('opcode_base'),
                 self.Dwarf_uint8("maximum_operations_per_instruction"),
                 1),
             self.Dwarf_uint8('default_is_stmt'),
             self.Dwarf_int8('line_base'),
             self.Dwarf_uint8('line_range'),
             self.Dwarf_uint8('opcode_base'),
-            Array(lambda ctx: ctx['opcode_base'] - 1,
+            Array(lambda ctx: ctx.opcode_base - 1,
                   self.Dwarf_uint8('standard_opcode_lengths')),
                   self.Dwarf_uint8('standard_opcode_lengths')),
-            RepeatUntilExcluding(
-                lambda obj, ctx: obj == b'',
-                CString('include_directory')),
-            RepeatUntilExcluding(
-                lambda obj, ctx: len(obj.name) == 0,
-                self.Dwarf_lineprog_file_entry),
-            )
+            If(ver5,
+                PrefixedArray(
+                    Struct('directory_entry_format',
+                        Enum(self.Dwarf_uleb128('content_type'), **ENUM_DW_LNCT),
+                        Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM)),
+                    self.Dwarf_uint8("directory_entry_format_count"))),
+            If(ver5, # Name deliberately doesn't match the legacy object, since the format can't be made compatible
+                PrefixedArray(
+                    FormattedEntry('directories', self, "directory_entry_format"),
+                    self.Dwarf_uleb128('directories_count'))),
+            If(ver5,
+                PrefixedArray(
+                    Struct('file_name_entry_format',
+                        Enum(self.Dwarf_uleb128('content_type'), **ENUM_DW_LNCT),
+                        Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM)),
+                    self.Dwarf_uint8("file_name_entry_format_count"))),
+            If(ver5,
+                PrefixedArray(
+                    FormattedEntry('file_names', self, "file_name_entry_format"),
+                    self.Dwarf_uleb128('file_names_count'))),
+            # Legacy  directories/files - DWARF < 5 only
+            If(lambda ctx: ctx.version < 5,
+                RepeatUntilExcluding(
+                    lambda obj, ctx: obj == b'',
+                    CString('include_directory'))),
+            If(lambda ctx: ctx.version < 5, 
+                RepeatUntilExcluding(
+                    lambda obj, ctx: len(obj.name) == 0,
+                    self.Dwarf_lineprog_file_entry)) # array name is file_entry 
+        )
 
     def _create_callframe_entry_headers(self):
         self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
 
     def _create_callframe_entry_headers(self):
         self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
index 0ccc9a19e6472ef708619b360aaed2fb856e000e..9314a62093b09f1daa93455566e5bae509fc2451 100644 (file)
@@ -259,6 +259,13 @@ def describe_note_gnu_property_x86_feature_1(value):
             descs.append(desc)
     return 'x86 feature: ' + ', '.join(descs)
 
             descs.append(desc)
     return 'x86 feature: ' + ', '.join(descs)
 
+def describe_note_gnu_property_x86_isa_1(value):
+    descs = []
+    for mask, desc in _DESCR_NOTE_GNU_PROPERTY_X86_ISA_1_FLAGS:
+        if value & mask:
+            descs.append(desc)
+    return 'x86 ISA needed: ' + ', '.join(descs)    
+
 def describe_note_gnu_properties(properties):
     descriptions = []
     for prop in properties:
 def describe_note_gnu_properties(properties):
     descriptions = []
     for prop in properties:
@@ -278,6 +285,11 @@ def describe_note_gnu_properties(properties):
                 prop_desc = ' <corrupt length: 0x%x>' % sz
             else:
                 prop_desc = describe_note_gnu_property_x86_feature_1(d)
                 prop_desc = ' <corrupt length: 0x%x>' % sz
             else:
                 prop_desc = describe_note_gnu_property_x86_feature_1(d)
+        elif t == 'GNU_PROPERTY_X86_ISA_1_NEEDED':
+            if sz != 4:
+                prop_desc = ' <corrupt length: 0x%x>' % sz
+            else:
+                prop_desc = describe_note_gnu_property_x86_isa_1(d)
         elif _DESCR_NOTE_GNU_PROPERTY_TYPE_LOPROC <= t <= _DESCR_NOTE_GNU_PROPERTY_TYPE_HIPROC:
             prop_desc = '<processor-specific type 0x%x data: %s >' % (t, bytes2hex(d, sep=' '))
         elif _DESCR_NOTE_GNU_PROPERTY_TYPE_LOUSER <= t <= _DESCR_NOTE_GNU_PROPERTY_TYPE_HIUSER:
         elif _DESCR_NOTE_GNU_PROPERTY_TYPE_LOPROC <= t <= _DESCR_NOTE_GNU_PROPERTY_TYPE_HIPROC:
             prop_desc = '<processor-specific type 0x%x data: %s >' % (t, bytes2hex(d, sep=' '))
         elif _DESCR_NOTE_GNU_PROPERTY_TYPE_LOUSER <= t <= _DESCR_NOTE_GNU_PROPERTY_TYPE_HIUSER:
@@ -603,6 +615,12 @@ _DESCR_NOTE_GNU_PROPERTY_X86_FEATURE_1_FLAGS = (
     (8, 'LAM_U57'),
 )
 
     (8, 'LAM_U57'),
 )
 
+# Same for GNU_PROPERTY_X86_SET_1_xxx
+_DESCR_NOTE_GNU_PROPERTY_X86_ISA_1_FLAGS = (
+    (1, 'x86-64-baseline'),
+    # TODO; there is a long list
+)
+
 
 def _reverse_dict(d, low_priority=()):
     """
 
 def _reverse_dict(d, low_priority=()):
     """
index 8519f4e63e2a546c0dfdef850f40fe7f9d31c9ab..a5855c2677e7c8eef23148b68400ba4f61488c72 100644 (file)
@@ -877,6 +877,7 @@ ENUM_NOTE_GNU_PROPERTY_TYPE = dict(
     GNU_PROPERTY_STACK_SIZE=1,
     GNU_PROPERTY_NO_COPY_ON_PROTECTED=2,
     GNU_PROPERTY_X86_FEATURE_1_AND=0xc0000002,
     GNU_PROPERTY_STACK_SIZE=1,
     GNU_PROPERTY_NO_COPY_ON_PROTECTED=2,
     GNU_PROPERTY_X86_FEATURE_1_AND=0xc0000002,
+    GNU_PROPERTY_X86_ISA_1_NEEDED=0xc0008002,
     _default_=Pass,
 )
 
     _default_=Pass,
 )
 
index 0e46d9302771b7bfb38169a26de49fcfc9aa7104..afda68cca7456da2eacc9be074a8f3cc33edc5be 100644 (file)
@@ -15,6 +15,7 @@ import sys
 sys.path[0:0] = ['.', '..']
 
 from elftools.elf.elffile import ELFFile
 sys.path[0:0] = ['.', '..']
 
 from elftools.elf.elffile import ELFFile
+from elftools.common.py3compat import path_to_posix
 
 
 def process_file(filename):
 
 
 def process_file(filename):
@@ -44,7 +45,7 @@ def process_file(filename):
             print('    Top DIE with tag=%s' % top_DIE.tag)
 
             # We're interested in the filename...
             print('    Top DIE with tag=%s' % top_DIE.tag)
 
             # We're interested in the filename...
-            print('    name=%s' % top_DIE.get_full_path())
+            print('    name=%s' % path_to_posix(top_DIE.get_full_path()))
 
             # Display DIEs recursively starting with top_DIE
             die_info_rec(top_DIE)
 
             # Display DIEs recursively starting with top_DIE
             die_info_rec(top_DIE)
index 2dd0e70496cda3e231c9f117c63573ffce3faadf..6098f45ee45fc973478a30675297b1eeb427025e 100644 (file)
@@ -12,6 +12,7 @@ from __future__ import print_function
 from collections import defaultdict
 import os
 import sys
 from collections import defaultdict
 import os
 import sys
+import posixpath
 
 # If pyelftools is not installed, the example can also run from the root or
 # examples/ dir of the source distribution.
 
 # If pyelftools is not installed, the example can also run from the root or
 # examples/ dir of the source distribution.
@@ -86,7 +87,7 @@ def lpe_filename(line_program, file_index):
         return file_entry.name.decode()
 
     directory = lp_header["include_directory"][dir_index - 1]
         return file_entry.name.decode()
 
     directory = lp_header["include_directory"][dir_index - 1]
-    return os.path.join(directory, file_entry.name).decode()
+    return posixpath.join(directory, file_entry.name).decode()
 
 
 if __name__ == '__main__':
 
 
 if __name__ == '__main__':
index bb43bcc2f760d02aacfbd63ac45673a7f59ab42e..fdf10dabb34838255b9a39f3b49616b443a92e3f 100644 (file)
@@ -14,6 +14,7 @@ import sys
 sys.path[0:0] = ['.', '..']
 
 from elftools.elf.elffile import ELFFile
 sys.path[0:0] = ['.', '..']
 
 from elftools.elf.elffile import ELFFile
+from elftools.common.py3compat import path_to_posix
 
 
 def process_file(filename):
 
 
 def process_file(filename):
@@ -43,7 +44,7 @@ def process_file(filename):
             print('    Top DIE with tag=%s' % top_DIE.tag)
 
             # We're interested in the filename...
             print('    Top DIE with tag=%s' % top_DIE.tag)
 
             # We're interested in the filename...
-            print('    name=%s' % top_DIE.get_full_path())
+            print('    name=%s' % path_to_posix(top_DIE.get_full_path()))
 
 if __name__ == '__main__':
     if sys.argv[1] == '--test':
 
 if __name__ == '__main__':
     if sys.argv[1] == '--test':
index 9b98d0237a0c329b375121e712632c9d9429cd06..a45ec3019556d8533bb5515cfa0e359d5db3e689 100755 (executable)
@@ -65,6 +65,7 @@ from elftools.dwarf.constants import (
 from elftools.dwarf.locationlists import LocationParser, LocationEntry
 from elftools.dwarf.callframe import CIE, FDE, ZERO
 from elftools.ehabi.ehabiinfo import CorruptEHABIEntry, CannotUnwindEHABIEntry, GenericEHABIEntry
 from elftools.dwarf.locationlists import LocationParser, LocationEntry
 from elftools.dwarf.callframe import CIE, FDE, ZERO
 from elftools.ehabi.ehabiinfo import CorruptEHABIEntry, CannotUnwindEHABIEntry, GenericEHABIEntry
+from elftools.dwarf.enums import ENUM_DW_UT
 
 
 class ReadElf(object):
 
 
 class ReadElf(object):
@@ -1061,7 +1062,10 @@ class ReadElf(object):
             self._emitline('   Length:        %s (%s)' % (
                 self._format_hex(cu['unit_length']),
                 '%s-bit' % cu.dwarf_format()))
             self._emitline('   Length:        %s (%s)' % (
                 self._format_hex(cu['unit_length']),
                 '%s-bit' % cu.dwarf_format()))
-            self._emitline('   Version:       %s' % cu['version']),
+            self._emitline('   Version:       %s' % cu['version'])
+            if cu.header.get("unit_type", False):
+                ut = next((key for key, value in ENUM_DW_UT.items() if value == cu.header.unit_type), '?')
+                self._emitline('   Unit Type:     %s (%d)' % (ut, cu.header.unit_type))
             self._emitline('   Abbrev Offset: %s' % (
                 self._format_hex(cu['debug_abbrev_offset']))),
             self._emitline('   Pointer Size:  %s' % cu['address_size'])
             self._emitline('   Abbrev Offset: %s' % (
                 self._format_hex(cu['debug_abbrev_offset']))),
             self._emitline('   Pointer Size:  %s' % cu['address_size'])
@@ -1121,6 +1125,7 @@ class ReadElf(object):
 
         for cu in self._dwarfinfo.iter_CUs():
             lineprogram = self._dwarfinfo.line_program_for_CU(cu)
 
         for cu in self._dwarfinfo.iter_CUs():
             lineprogram = self._dwarfinfo.line_program_for_CU(cu)
+            ver5 = lineprogram.header.version >= 5
 
             cu_filename = bytes2str(lineprogram['file_entry'][0].name)
             if len(lineprogram['include_directory']) > 0:
 
             cu_filename = bytes2str(lineprogram['file_entry'][0].name)
             if len(lineprogram['include_directory']) > 0:
@@ -1132,7 +1137,9 @@ class ReadElf(object):
                 cu_filename = '%s/%s' % (bytes2str(dir), cu_filename)
 
             self._emitline('CU: %s:' % cu_filename)
                 cu_filename = '%s/%s' % (bytes2str(dir), cu_filename)
 
             self._emitline('CU: %s:' % cu_filename)
-            self._emitline('File name                            Line number    Starting address    Stmt')
+            self._emitline('File name                            Line number    Starting address    View    Stmt' if ver5
+                else 'File name                            Line number    Starting address    Stmt')
+            # What goes into View on V5? To be seen...
 
             # Print each state's file, line and address information. For some
             # instructions other output is needed to be compatible with
 
             # Print each state's file, line and address information. For some
             # instructions other output is needed to be compatible with
@@ -1161,11 +1168,14 @@ class ReadElf(object):
                         '0' if state.address == 0 else self._format_hex(state.address),
                         'x' if state.is_stmt and not state.end_sequence else ''))
                 else:
                         '0' if state.address == 0 else self._format_hex(state.address),
                         'x' if state.is_stmt and not state.end_sequence else ''))
                 else:
-                    self._emitline('%-35s  %11d  %18s[%d] %s' % (
+                    # What's the deal with op_index after address on DWARF 5? Is omitting it
+                    # a function of DWARF version, or ISA, or what?
+                    # Used to be unconditional, even on non-VLIW machines.
+                    self._emitline('%-35s  %s  %18s%s %s' % (
                         bytes2str(lineprogram['file_entry'][state.file - 1].name),
                         bytes2str(lineprogram['file_entry'][state.file - 1].name),
-                        state.line if not state.end_sequence else '-',
+                        "%11d" % (state.line,) if not state.end_sequence else '-',
                         '0' if state.address == 0 else self._format_hex(state.address),
                         '0' if state.address == 0 else self._format_hex(state.address),
-                        state.op_index,
+                        '' if ver5 else '[%d]' % (state.op_index,),
                         'x' if state.is_stmt and not state.end_sequence else ''))
                 if entry.command == DW_LNS_copy:
                     # Another readelf oddity...
                         'x' if state.is_stmt and not state.end_sequence else ''))
                 if entry.command == DW_LNS_copy:
                     # Another readelf oddity...
index 59a039c5e214d25fb5b98cd5e1819b26d4cfd426..ceca7ec06c7809f4d4a3eb3a7a7cd679c22006df 100755 (executable)
@@ -80,6 +80,12 @@ def run_test_on_file(filename, verbose=False, opt=None):
                 testlog.info('.......................SKIPPED')
             continue
 
                 testlog.info('.......................SKIPPED')
             continue
 
+        # sevaa says: there is another shorted out test; in dwarf_lineprogramv5.elf, the two bytes at 0x2072 were
+        # patched from 0x07 0x10 to 00 00.
+        # Those represented the second instruction in the first FDE in .eh_frame. This changed the instruction
+        # from "DW_CFA_undefined 16" to two NOPs.
+        # GNU readelf had a bug here, had to work around. See PR #411.
+
         # stdouts will be a 2-element list: output of readelf and output
         # of scripts/readelf.py
         stdouts = []
         # stdouts will be a 2-element list: output of readelf and output
         # of scripts/readelf.py
         stdouts = []
diff --git a/test/testfiles_for_readelf/dwarf_lineprogramv5.elf b/test/testfiles_for_readelf/dwarf_lineprogramv5.elf
new file mode 100644 (file)
index 0000000..33f051b
Binary files /dev/null and b/test/testfiles_for_readelf/dwarf_lineprogramv5.elf differ