DWARF 5 operations and DWARF5 location lists (#418)
authorSeva Alekseyev <sevaa@yarxi.ru>
Thu, 16 Jun 2022 12:19:30 +0000 (08:19 -0400)
committerGitHub <noreply@github.com>
Thu, 16 Jun 2022 12:19:30 +0000 (05:19 -0700)
* Test binary for DWARFv5 operations

* DWARFv5 ops, part 1: entry_value, const_type, deref_type

* DWARFv5 ops, part 2: regval_type, implicit_pointer, convert

* DWARFv5 loclists section parsing, take 1

* Foamtting fix

* Test fixes

* Lineprogram header file_entries with DWARFv5 now are indexable by string

* Excising the View column, if present, from GNU readelf..decodedline output

* Readelf test fixes

* Typo

* Formatting and comments

* More style fixes

14 files changed:
elftools/common/construct_utils.py
elftools/common/utils.py
elftools/dwarf/descriptions.py
elftools/dwarf/dwarf_expr.py
elftools/dwarf/dwarfinfo.py
elftools/dwarf/enums.py
elftools/dwarf/locationlists.py
elftools/dwarf/structs.py
elftools/elf/elffile.py
examples/reference_output/dwarf_location_info.out
scripts/readelf.py
test/run_readelf_tests.py
test/test_refaddr_bitness.py
test/testfiles_for_readelf/dwarf_v5ops.so.elf [new file with mode: 0644]

index 4b4a39205e9f96712cb66893d647fb0085e3c8de..64f1f9e7501c726a5526d79b77eb6b2c633de50d 100644 (file)
@@ -8,7 +8,7 @@
 #-------------------------------------------------------------------------------
 from ..construct import (
     Subconstruct, ConstructError, ArrayError, Adapter, Field, RepeatUntil,
 #-------------------------------------------------------------------------------
 from ..construct import (
     Subconstruct, ConstructError, ArrayError, Adapter, Field, RepeatUntil,
-    Rename, SizeofError
+    Rename, SizeofError, Construct
     )
 
 
     )
 
 
@@ -89,3 +89,24 @@ def SLEB128(name):
     """ A construct creator for SLEB128 encoding.
     """
     return Rename(name, _SLEB128Adapter(_LEB128_reader()))
     """ A construct creator for SLEB128 encoding.
     """
     return Rename(name, _SLEB128Adapter(_LEB128_reader()))
+
+class StreamOffset(Construct):
+    """
+    Captures the current stream offset 
+
+    Parameters:
+    * name - the name of the value
+
+    Example:
+    StreamOffset("item_offset")
+    """
+    __slots__ = []
+    def __init__(self, name):
+        Construct.__init__(self, name)
+        self._set_flag(self.FLAG_DYNAMIC)
+    def _parse(self, stream, context):
+        return stream.tell()
+    def _build(self, obj, stream, context):
+        context[self.name] = stream.tell()
+    def _sizeof(self, context):
+        return 0     
index d1fde2cacbba144b95c83a7b4b99942c4f1311b4..0ea417ce06979f803d0818378564d037abb93234 100644 (file)
@@ -10,6 +10,7 @@ from contextlib import contextmanager
 from .exceptions import ELFParseError, ELFError, DWARFError
 from .py3compat import int2byte
 from ..construct import ConstructError, ULInt8
 from .exceptions import ELFParseError, ELFError, DWARFError
 from .py3compat import int2byte
 from ..construct import ConstructError, ULInt8
+import os
 
 
 def merge_dicts(*dicts):
 
 
 def merge_dicts(*dicts):
@@ -107,6 +108,19 @@ def read_blob(stream, length):
     """
     return [struct_parse(ULInt8(''), stream) for i in range(length)]
 
     """
     return [struct_parse(ULInt8(''), stream) for i in range(length)]
 
+def save_dwarf_section(section, filename):
+    """Debug helper: dump section contents into a file
+    Section is expected to be one of the debug_xxx_sec elements of DWARFInfo
+    """
+    stream = section.stream
+    pos = stream.tell()
+    stream.seek(0, os.SEEK_SET)
+    section.stream.seek(0)
+    with open(filename, 'wb') as file:
+        data = stream.read(section.size)
+        file.write(data)
+    stream.seek(pos, os.SEEK_SET)    
+
 #------------------------- PRIVATE -------------------------
 
 def _assert_with_exception(cond, msg, exception_type):
 #------------------------- PRIVATE -------------------------
 
 def _assert_with_exception(cond, msg, exception_type):
index 059c22c812375e0b536b5459951137921a9e5ae8..1934a2eeabd5ba5ca306774be83da06519d5527a 100644 (file)
@@ -523,6 +523,7 @@ _EXTRA_INFO_DESCRIPTION_MAP = defaultdict(
     DW_AT_associated=_location_list_extra,
     DW_AT_data_location=_location_list_extra,
     DW_AT_stride=_location_list_extra,
     DW_AT_associated=_location_list_extra,
     DW_AT_data_location=_location_list_extra,
     DW_AT_stride=_location_list_extra,
+    DW_AT_call_value=_location_list_extra,
     DW_AT_import=_import_extra,
     DW_AT_GNU_call_site_value=_location_list_extra,
     DW_AT_GNU_call_site_data_value=_location_list_extra,
     DW_AT_import=_import_extra,
     DW_AT_GNU_call_site_value=_location_list_extra,
     DW_AT_GNU_call_site_data_value=_location_list_extra,
@@ -651,21 +652,21 @@ class ExprDumper(object):
             return '%s: %x' % (opcode_name, args[0])
         elif opcode_name in self._ops_with_two_decimal_args:
             return '%s: %s %s' % (opcode_name, args[0], args[1])
             return '%s: %x' % (opcode_name, args[0])
         elif opcode_name in self._ops_with_two_decimal_args:
             return '%s: %s %s' % (opcode_name, args[0], args[1])
-        elif opcode_name == 'DW_OP_GNU_entry_value':
-            return '%s: (%s)' % (opcode_name, ','.join([self._dump_to_string(deo.op, deo.op_name, deo.args) for deo in args[0]]))
+        elif opcode_name in ('DW_OP_GNU_entry_value', 'DW_OP_entry_value'):
+            return '%s: (%s)' % (opcode_name, ','.join([self._dump_to_string(deo.op, deo.op_name, deo.args, cu_offset) for deo in args[0]]))
         elif opcode_name == 'DW_OP_implicit_value':
             return "%s %s byte block: %s" % (opcode_name, len(args[0]), ''.join(["%x " % b for b in args[0]]))
         elif opcode_name == 'DW_OP_GNU_parameter_ref':
             return "%s: <0x%x>" % (opcode_name, args[0] + cu_offset)
         elif opcode_name == 'DW_OP_implicit_value':
             return "%s %s byte block: %s" % (opcode_name, len(args[0]), ''.join(["%x " % b for b in args[0]]))
         elif opcode_name == 'DW_OP_GNU_parameter_ref':
             return "%s: <0x%x>" % (opcode_name, args[0] + cu_offset)
-        elif opcode_name == 'DW_OP_GNU_implicit_pointer':
+        elif opcode_name in ('DW_OP_GNU_implicit_pointer', 'DW_OP_implicit_pointer'):
             return "%s: <0x%x> %d" % (opcode_name, args[0], args[1])
             return "%s: <0x%x> %d" % (opcode_name, args[0], args[1])
-        elif opcode_name == 'DW_OP_GNU_convert':
+        elif opcode_name in ('DW_OP_GNU_convert', 'DW_OP_convert'):
             return "%s <0x%x>" % (opcode_name, args[0] + cu_offset)
             return "%s <0x%x>" % (opcode_name, args[0] + cu_offset)
-        elif opcode_name == 'DW_OP_GNU_deref_type':
+        elif opcode_name in ('DW_OP_GNU_deref_type', 'DW_OP_deref_type'):
             return "%s: %d <0x%x>" % (opcode_name, args[0], args[1] + cu_offset)
             return "%s: %d <0x%x>" % (opcode_name, args[0], args[1] + cu_offset)
-        elif opcode_name == 'DW_OP_GNU_const_type':
+        elif opcode_name in ('DW_OP_GNU_const_type', 'DW_OP_const_type'):
             return "%s: <0x%x>  %d byte block: %s " % (opcode_name, args[0] + cu_offset, len(args[1]), ' '.join("%x" % b for b in args[1]))
             return "%s: <0x%x>  %d byte block: %s " % (opcode_name, args[0] + cu_offset, len(args[1]), ' '.join("%x" % b for b in args[1]))
-        elif opcode_name == 'DW_OP_GNU_regval_type':
+        elif opcode_name in ('DW_OP_GNU_regval_type', 'DW_OP_regval_type'):
             return "%s: %d (%s) <0x%x>" % (opcode_name, args[0], describe_reg_name(args[0], _MACHINE_ARCH), args[1] + cu_offset)
         else:
             return '<unknown %s>' % opcode_name
             return "%s: %d (%s) <0x%x>" % (opcode_name, args[0], describe_reg_name(args[0], _MACHINE_ARCH), args[1] + cu_offset)
         else:
             return '<unknown %s>' % opcode_name
index 39ceee71c154c75f2ad1601b20382d9b1f2a04b6..1e4f658c5d4445305d6a8cbfe16335ccade9b1a5 100644 (file)
@@ -244,6 +244,15 @@ def _init_dispatch_table(structs):
     add('DW_OP_call4', parse_arg_struct(structs.Dwarf_uint32('')))
     add('DW_OP_call_ref', parse_arg_struct(structs.Dwarf_offset('')))
     add('DW_OP_implicit_value', parse_blob())
     add('DW_OP_call4', parse_arg_struct(structs.Dwarf_uint32('')))
     add('DW_OP_call_ref', parse_arg_struct(structs.Dwarf_offset('')))
     add('DW_OP_implicit_value', parse_blob())
+    add('DW_OP_entry_value', parse_nestedexpr())
+    add('DW_OP_const_type', parse_typedblob())
+    add('DW_OP_regval_type', parse_arg_struct2(structs.Dwarf_uleb128(''),
+                                                   structs.Dwarf_uleb128('')))    
+    add('DW_OP_deref_type', parse_arg_struct2(structs.Dwarf_uint8(''),
+                                              structs.Dwarf_uleb128('')))   
+    add('DW_OP_implicit_pointer', parse_arg_struct2(structs.Dwarf_offset(''),
+                                                        structs.Dwarf_sleb128(''))) 
+    add('DW_OP_convert', parse_arg_struct(structs.Dwarf_uleb128('')))                                              
     add('DW_OP_GNU_entry_value', parse_nestedexpr())
     add('DW_OP_GNU_const_type', parse_typedblob())
     add('DW_OP_GNU_regval_type', parse_arg_struct2(structs.Dwarf_uleb128(''),
     add('DW_OP_GNU_entry_value', parse_nestedexpr())
     add('DW_OP_GNU_const_type', parse_typedblob())
     add('DW_OP_GNU_regval_type', parse_arg_struct2(structs.Dwarf_uleb128(''),
index 9642cc88cfbf4a70f3605a39cdd503f5efcdaf1a..8dc7028f160e16731ba82d9d787c538d2a04e801 100644 (file)
@@ -6,9 +6,11 @@
 # Eli Bendersky (eliben@gmail.com)
 # This code is in the public domain
 #-------------------------------------------------------------------------------
 # Eli Bendersky (eliben@gmail.com)
 # This code is in the public domain
 #-------------------------------------------------------------------------------
+import os
 from collections import namedtuple
 from bisect import bisect_right
 
 from collections import namedtuple
 from bisect import bisect_right
 
+from ..construct.lib.container import Container
 from ..common.exceptions import DWARFError
 from ..common.utils import (struct_parse, dwarf_assert,
                             parse_cstring_from_stream)
 from ..common.exceptions import DWARFError
 from ..common.utils import (struct_parse, dwarf_assert,
                             parse_cstring_from_stream)
@@ -74,7 +76,9 @@ class DWARFInfo(object):
             debug_pubnames_sec,
             debug_addr_sec,
             debug_str_offsets_sec,
             debug_pubnames_sec,
             debug_addr_sec,
             debug_str_offsets_sec,
-            debug_line_str_sec):
+            debug_line_str_sec,
+            debug_loclists_sec,
+            debug_rnglists_sec): # Not parsed for now
         """ config:
                 A DwarfConfig object
 
         """ config:
                 A DwarfConfig object
 
@@ -93,9 +97,12 @@ class DWARFInfo(object):
         self.debug_loc_sec = debug_loc_sec
         self.debug_ranges_sec = debug_ranges_sec
         self.debug_line_sec = debug_line_sec
         self.debug_loc_sec = debug_loc_sec
         self.debug_ranges_sec = debug_ranges_sec
         self.debug_line_sec = debug_line_sec
+        self.debug_addr_sec = debug_addr_sec
         self.debug_line_str_sec = debug_line_str_sec
         self.debug_pubtypes_sec = debug_pubtypes_sec
         self.debug_pubnames_sec = debug_pubnames_sec
         self.debug_line_str_sec = debug_line_str_sec
         self.debug_pubtypes_sec = debug_pubtypes_sec
         self.debug_pubnames_sec = debug_pubnames_sec
+        self.debug_loclists_sec = debug_loclists_sec
+        self.debug_rnglists_sec = debug_rnglists_sec # Ignored for now
 
         # This is the DWARFStructs the context uses, so it doesn't depend on
         # DWARF format and address_size (these are determined per CU) - set them
 
         # This is the DWARFStructs the context uses, so it doesn't depend on
         # DWARF format and address_size (these are determined per CU) - set them
@@ -339,7 +346,10 @@ class DWARFInfo(object):
         """ Get a LocationLists object representing the .debug_loc section of
             the DWARF data, or None if this section doesn't exist.
         """
         """ Get a LocationLists object representing the .debug_loc section of
             the DWARF data, or None if this section doesn't exist.
         """
-        if self.debug_loc_sec:
+        if self.debug_loclists_sec:
+            assert(self.debug_loc_sec is None) # Are there ever files with both kinds of location sections?
+            return LocationLists(self.debug_loclists_sec.stream, self.structs, 5, self)
+        elif self.debug_loc_sec:
             return LocationLists(self.debug_loc_sec.stream, self.structs)
         else:
             return None
             return LocationLists(self.debug_loc_sec.stream, self.structs)
         else:
             return None
@@ -487,9 +497,12 @@ class DWARFInfo(object):
         if lineprog_header.get('directories', False):
             lineprog_header.include_directory = tuple(d.DW_LNCT_path for d in lineprog_header.directories)
         if lineprog_header.get('file_names', False):
         if lineprog_header.get('directories', False):
             lineprog_header.include_directory = tuple(d.DW_LNCT_path for d in lineprog_header.directories)
         if lineprog_header.get('file_names', False):
-            translate = namedtuple("file_entry", "name dir_index mtime length")
             lineprog_header.file_entry = tuple(
             lineprog_header.file_entry = tuple(
-                translate(e.get('DW_LNCT_path'), e.get('DW_LNCT_directory_index'), e.get('DW_LNCT_timestamp'), e.get('DW_LNCT_size'))
+                Container(**{
+                    'name':e.get('DW_LNCT_path'),
+                    'dir_index': e.get('DW_LNCT_directory_index'),
+                    'mtime': e.get('DW_LNCT_timestamp'),
+                    'length': e.get('DW_LNCT_size')})
                 for e in lineprog_header.file_names)
 
         # Calculate the offset to the next line program (see DWARF 6.2.4)
                 for e in lineprog_header.file_names)
 
         # Calculate the offset to the next line program (see DWARF 6.2.4)
@@ -502,3 +515,4 @@ class DWARFInfo(object):
             structs=structs,
             program_start_offset=self.debug_line_sec.stream.tell(),
             program_end_offset=end_offset)
             structs=structs,
             program_start_offset=self.debug_line_sec.stream.tell(),
             program_end_offset=end_offset)
+
index a52e8034413350d07aa402145ac1aa668dcd3f9f..c38ebe01ec26f2af8a33cabcd10ea4fa072eb737 100644 (file)
@@ -290,6 +290,8 @@ ENUM_DW_AT = dict(
     DW_AT_GNU_pubnames                      = 0x2134,
     DW_AT_GNU_pubtypes                      = 0x2135,
     DW_AT_GNU_discriminator                 = 0x2136,
     DW_AT_GNU_pubnames                      = 0x2134,
     DW_AT_GNU_pubtypes                      = 0x2135,
     DW_AT_GNU_discriminator                 = 0x2136,
+    DW_AT_GNU_locviews                      = 0x2137,
+    DW_AT_GNU_entry_view                    = 0x2138,
 
     DW_AT_LLVM_include_path  = 0x3e00,
     DW_AT_LLVM_config_macros = 0x3e01,
 
     DW_AT_LLVM_include_path  = 0x3e00,
     DW_AT_LLVM_config_macros = 0x3e01,
@@ -415,3 +417,15 @@ ENUM_DW_UT = dict(
     DW_UT_lo_user       = 0x80,
     DW_UT_hi_user       = 0xff
 )
     DW_UT_lo_user       = 0x80,
     DW_UT_hi_user       = 0xff
 )
+
+ENUM_DW_LLE = dict(
+    DW_LLE_end_of_list      = 0x00,
+    DW_LLE_base_addressx    = 0x01,
+    DW_LLE_startx_endx      = 0x02,
+    DW_LLE_startx_length    = 0x03,
+    DW_LLE_offset_pair      = 0x04,
+    DW_LLE_default_location = 0x05,
+    DW_LLE_base_address     = 0x06,
+    DW_LLE_start_end        = 0x07,
+    DW_LLE_start_length     = 0x08    
+)
index e6c735f585e3a94f63d4cb9f148786d553f5ea29..e674d671c3206a9348f56c1e4eb7d848f3361952 100644 (file)
 #-------------------------------------------------------------------------------
 import os
 from collections import namedtuple
 #-------------------------------------------------------------------------------
 import os
 from collections import namedtuple
-
+from ..common.exceptions import DWARFError
 from ..common.utils import struct_parse
 
 LocationExpr = namedtuple('LocationExpr', 'loc_expr')
 from ..common.utils import struct_parse
 
 LocationExpr = namedtuple('LocationExpr', 'loc_expr')
-LocationEntry = namedtuple('LocationEntry', 'entry_offset begin_offset end_offset loc_expr')
-BaseAddressEntry = namedtuple('BaseAddressEntry', 'entry_offset base_address')
+LocationEntry = namedtuple('LocationEntry', 'entry_offset entry_length begin_offset end_offset loc_expr is_absolute')
+BaseAddressEntry = namedtuple('BaseAddressEntry', 'entry_offset entry_length base_address')
+LocationViewPair = namedtuple('LocationViewPair', 'entry_offset begin end')
 
 class LocationLists(object):
     """ A single location list is a Python list consisting of LocationEntry or
         BaseAddressEntry objects.
 
 class LocationLists(object):
     """ A single location list is a Python list consisting of LocationEntry or
         BaseAddressEntry objects.
+
+        Starting with DWARF5, it may also contain LocationViewPair, but only
+        if scanning the section, never when requested for a DIE attribute.
+
+        The default location entries are returned as LocationEntry with 
+        begin_offset == end_offset == -1
+
+        Version determines whether the executable contains a debug_loc
+        section, or a DWARFv5 style debug_loclists one. Only the 4/5
+        distinction matters.
+
+        Dwarfinfo is only needed for DWARFv5 location entry encodings
+        that contain references to other sections (e. g. DW_LLE_startx_endx),
+        and only for location list enumeration.
     """
     """
-    def __init__(self, stream, structs):
+    def __init__(self, stream, structs, version=4, dwarfinfo=None):
         self.stream = stream
         self.structs = structs
         self.stream = stream
         self.structs = structs
+        self.dwarfinfo = dwarfinfo
+        self.version = version
         self._max_addr = 2 ** (self.structs.address_size * 8) - 1
 
         self._max_addr = 2 ** (self.structs.address_size * 8) - 1
 
-    def get_location_list_at_offset(self, offset):
+    def get_location_list_at_offset(self, offset, die=None):
         """ Get a location list at the given offset in the section.
         """ Get a location list at the given offset in the section.
+        Passing the die is only neccessary in DWARF5+, for decoding
+        location entry encodings that contain references to other sections.
         """
         self.stream.seek(offset, os.SEEK_SET)
         """
         self.stream.seek(offset, os.SEEK_SET)
-        return self._parse_location_list_from_stream()
+        return self._parse_location_list_from_stream_v5(die) if self.version >= 5 else self._parse_location_list_from_stream()
 
     def iter_location_lists(self):
 
     def iter_location_lists(self):
-        """ Yield all location lists found in the section.
+        """ Iterates through location lists and view pairs. Returns lists of
+        LocationEntry, BaseAddressEntry, and LocationViewPair objects.
         """
         """
-        # Just call _parse_location_list_from_stream until the stream ends
-        self.stream.seek(0, os.SEEK_END)
-        endpos = self.stream.tell()
+        # The location lists section was never meant for sequential access.
+        # Location lists are referenced by DIE attributes by offset or by index.
+        
+        # As of DWARFv5, it may contain, in addition to proper location lists,
+        #location list view pairs, which are referenced by the nonstandard DW_AT_GNU_locviews
+        # attribute. A set of locview pairs (which is a couple of ULEB128 values) may preceed
+        # a location list; the former is referenced by the DW_AT_GNU_locviews attribute, the 
+        # latter - by DW_AT_location (in the same DIE). Binutils' readelf dumps those.
+        # There is a view pair for each location-type entry in the list.
+        #
+        # Also, the section may contain gaps.
+        #
+        # Taking a cue from binutils, we would have to scan this section while looking at
+        # what's in DIEs.
+        stream = self.stream
+        stream.seek(0, os.SEEK_END)
+        endpos = stream.tell()
+
+        stream.seek(0, os.SEEK_SET)        
+
+        if self.version >= 5:
+            # Need to provide support for DW_AT_GNU_locviews. They are interspersed in
+            # the locations section, no way to tell where short of checking all DIEs
+            all_offsets = set() # Set of offsets where either a locview pair set can be found, or a view-less loclist
+            locviews = dict() # Map of locview offset to the respective loclist offset
+            cu_map = dict() # Map of loclist offsets to CUs
+            for cu in self.dwarfinfo.iter_CUs():
+                cu_ver = cu['version']
+                for die in cu.iter_DIEs():
+                    # A combination of location and locviews means there is a location list
+                    # preceed by several locview pairs
+                    if 'DW_AT_GNU_locviews' in die.attributes:
+                        assert('DW_AT_location' in die.attributes and
+                            LocationParser._attribute_has_loc_list(die.attributes['DW_AT_location'], cu_ver))
+                        views_offset = die.attributes['DW_AT_GNU_locviews'].value
+                        list_offset = die.attributes['DW_AT_location'].value
+                        locviews[views_offset] = list_offset
+                        cu_map[list_offset] = cu
+                        all_offsets.add(views_offset)
+
+                    # Scan other attributes for location lists
+                    for key in die.attributes:
+                        attr = die.attributes[key]
+                        if (key != 'DW_AT_location' and
+                            LocationParser.attribute_has_location(attr, cu_ver) and
+                            LocationParser._attribute_has_loc_list(attr, cu_ver)):
+                            list_offset = attr.value
+                            all_offsets.add(list_offset)
+                            cu_map[list_offset] = cu
+            all_offsets = list(all_offsets)
+            all_offsets.sort()
 
 
-        self.stream.seek(0, os.SEEK_SET)
-        while self.stream.tell() < endpos:
-            yield self._parse_location_list_from_stream()
+            # Loclists section is organized as an array of CUs, each length prefixed.
+            # We don't assume that the CUs go in the same order as the ones in info.
+            offset_index = 0
+            while stream.tell() < endpos:
+                # We are at the start of the CU block in the loclists now
+                unit_length = struct_parse(self.structs.Dwarf_initial_length(''), stream)
+                offset_past_len = stream.tell()
+                cu_header = struct_parse(self.structs.Dwarf_loclists_CU_header, stream)
+                assert(cu_header.version == 5)
+
+                # GNU binutils supports two traversal modes: by offsets in CU header, and sequential.
+                # We don't have a binary for the former yet. On an off chance that we one day might,
+                # let's parse the header anyway.
+
+                cu_end_offset = offset_past_len + unit_length
+                # Unit_length includes the header but doesn't include the length
+                
+                while stream.tell() < cu_end_offset:
+                    # Skip the gap to the next object
+                    next_offset = all_offsets[offset_index]
+                    if next_offset == stream.tell(): # At an object, either a loc list or a loc view pair
+                        locview_pairs = self._parse_locview_pairs(locviews)
+                        entries = self._parse_location_list_from_stream_v5()
+                        yield locview_pairs + entries
+                        offset_index += 1
+                    else: # We are at a gap - skip the gap to the next object or to the next CU
+                        if next_offset > cu_end_offset: # Gap at the CU end - the next object is in the next CU
+                            next_offset = cu_end_offset # And implicitly quit the loop within the CU
+                        stream.seek(next_offset, os.SEEK_SET)
+        else:
+            # Just call _parse_location_list_from_stream until the stream ends
+            while stream.tell() < endpos:
+                yield self._parse_location_list_from_stream()
 
     #------ PRIVATE ------#
 
 
     #------ PRIVATE ------#
 
@@ -56,7 +154,8 @@ class LocationLists(object):
                 break
             elif begin_offset == self._max_addr:
                 # Base address selection entry
                 break
             elif begin_offset == self._max_addr:
                 # Base address selection entry
-                lst.append(BaseAddressEntry(entry_offset=entry_offset, base_address=end_offset))
+                entry_length = self.stream.tell() - entry_offset
+                lst.append(BaseAddressEntry(entry_offset=entry_offset, entry_length=entry_length, base_address=end_offset))
             else:
                 # Location list entry
                 expr_len = struct_parse(
             else:
                 # Location list entry
                 expr_len = struct_parse(
@@ -64,13 +163,60 @@ class LocationLists(object):
                 loc_expr = [struct_parse(self.structs.Dwarf_uint8(''),
                                          self.stream)
                                 for i in range(expr_len)]
                 loc_expr = [struct_parse(self.structs.Dwarf_uint8(''),
                                          self.stream)
                                 for i in range(expr_len)]
+                entry_length = self.stream.tell() - entry_offset
                 lst.append(LocationEntry(
                     entry_offset=entry_offset,
                 lst.append(LocationEntry(
                     entry_offset=entry_offset,
+                    entry_length=entry_length,
                     begin_offset=begin_offset,
                     end_offset=end_offset,
                     begin_offset=begin_offset,
                     end_offset=end_offset,
-                    loc_expr=loc_expr))
+                    loc_expr=loc_expr,
+                    is_absolute = False))
+        return lst
+
+    # Also returns an array with BaseAddressEntry and LocationEntry
+    # Can't possibly support indexed values, since parsing those requires
+    # knowing the DIE context it came from
+    def _parse_location_list_from_stream_v5(self, die = None):
+        # This won't contain the terminator entry
+        lst = [self._translate_entry_v5(entry, die)
+            for entry
+            in struct_parse(self.structs.Dwarf_loclists_entries, self.stream)]
         return lst
 
         return lst
 
+    # From V5 style entries to a LocationEntry/BaseAddressEntry
+    def _translate_entry_v5(self, entry, die):
+        off = entry.entry_offset
+        len = entry.entry_end_offset - off
+        type = entry.entry_type
+        if type == 'DW_LLE_base_address':
+            return BaseAddressEntry(off, len, entry.address)
+        elif type == 'DW_LLE_offset_pair':
+            return LocationEntry(off, len, entry.start_offset, entry.end_offset, entry.loc_expr, False)
+        elif type == 'DW_LLE_start_length':
+            return LocationEntry(off, len, entry.start_address, entry.start_address + entry.length, entry.loc_expr, True)
+        elif type == 'DW_LLE_start_end': # No test for this yet, but the format seems straightforward
+            return LocationEntry(off, len, entry.start_address, entry.end_address, entry.loc_expr, True)
+        elif type == 'DW_LLE_default_location': # No test for this either, and this is new in the API
+            return LocationEntry(off, len, -1, -1, entry.loc_expr, True)
+        elif type in ('DW_LLE_base_addressx', 'DW_LLE_startx_endx', 'DW_LLE_startx_length'):           
+            # We don't have sample binaries for those LLEs. Their proper parsing would
+            # require knowing the CU context (so that indices can be resolved to code offsets)
+            raise NotImplementedError("Location list entry type %s is not supported yet" % (type,))
+        else:
+            raise DWARFError(False, "Unknown DW_LLE code: %s" % (type,))
+
+    # Locviews is the dict, mapping locview offsets to corresponding loclist offsets
+    def _parse_locview_pairs(self, locviews):
+        stream = self.stream
+        list_offset = locviews.get(stream.tell(), None)
+        pairs = []
+        if list_offset is not None:
+            while stream.tell() < list_offset:
+                pair = struct_parse(self.structs.Dwarf_locview_pair, stream)
+                pairs.append(LocationViewPair(pair.entry_offset, pair.begin, pair.end))
+            assert(stream.tell() == list_offset)
+        return pairs
+
 class LocationParser(object):
     """ A parser for location information in DIEs.
         Handles both location information contained within the attribute
 class LocationParser(object):
     """ A parser for location information in DIEs.
         Handles both location information contained within the attribute
@@ -89,7 +235,7 @@ class LocationParser(object):
                 (LocationParser._attribute_has_loc_expr(attr, dwarf_version) or
                  LocationParser._attribute_has_loc_list(attr, dwarf_version)))
 
                 (LocationParser._attribute_has_loc_expr(attr, dwarf_version) or
                  LocationParser._attribute_has_loc_list(attr, dwarf_version)))
 
-    def parse_from_attribute(self, attr, dwarf_version):
+    def parse_from_attribute(self, attr, dwarf_version, die = None):
         """ Parses a DIE attribute and returns either a LocationExpr or
             a list.
         """
         """ Parses a DIE attribute and returns either a LocationExpr or
             a list.
         """
@@ -98,7 +244,11 @@ class LocationParser(object):
                 return LocationExpr(attr.value)
             elif self._attribute_has_loc_list(attr, dwarf_version):
                 return self.location_lists.get_location_list_at_offset(
                 return LocationExpr(attr.value)
             elif self._attribute_has_loc_list(attr, dwarf_version):
                 return self.location_lists.get_location_list_at_offset(
-                    attr.value)
+                    attr.value, die)
+                # We don't yet know if the DIE context will be needed.
+                # We might get it without a full tree traversal using 
+                # attr.offset as a key, but we assume a good DWARF5
+                # aware consumer would pass a DIE along.
         else:
             raise ValueError("Attribute does not have location information")
 
         else:
             raise ValueError("Attribute does not have location information")
 
index fb9b0b7d4fa6814309af437f4bf771b8e7d261e1..a1a286b41840ee994f0204aa7cfb9cd9278dc637 100644 (file)
@@ -7,15 +7,16 @@
 # Eli Bendersky (eliben@gmail.com)
 # This code is in the public domain
 #-------------------------------------------------------------------------------
 # Eli Bendersky (eliben@gmail.com)
 # This code is in the public domain
 #-------------------------------------------------------------------------------
-from elftools.construct.core import Subconstruct
-from elftools.construct.macros import Embedded
+from logging.config import valid_ident
 from ..construct import (
     UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64,
     SBInt8, SBInt16, SBInt32, SBInt64, SLInt8, SLInt16, SLInt32, SLInt64,
     Adapter, Struct, ConstructError, If, Enum, Array, PrefixedArray,
 from ..construct import (
     UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64,
     SBInt8, SBInt16, SBInt32, SBInt64, SLInt8, SLInt16, SLInt32, SLInt64,
     Adapter, Struct, ConstructError, If, Enum, Array, PrefixedArray,
-    CString, Embed, StaticField, IfThenElse, Construct, Rename, Sequence
+    CString, Embed, StaticField, IfThenElse, Construct, Rename, Sequence,
+    Switch
     )
     )
-from ..common.construct_utils import RepeatUntilExcluding, ULEB128, SLEB128
+from ..common.construct_utils import (RepeatUntilExcluding, ULEB128, SLEB128,
+    StreamOffset)
 from .enums import *
 
 
 from .enums import *
 
 
@@ -142,6 +143,7 @@ class DWARFStructs(object):
         self._create_nameLUT_header()
         self._create_string_offsets_table_header()
         self._create_address_table_header()
         self._create_nameLUT_header()
         self._create_string_offsets_table_header()
         self._create_address_table_header()
+        self._create_loclists_parsers()
 
     def _create_initial_length(self):
         def _InitialLength(name):
 
     def _create_initial_length(self):
         def _InitialLength(name):
@@ -396,6 +398,41 @@ class DWARFStructs(object):
                     subcon=self.Dwarf_uint8('elem'),
                     length_field=length_field(''))
 
                     subcon=self.Dwarf_uint8('elem'),
                     length_field=length_field(''))
 
+    def _create_loclists_parsers(self):
+        """ Create a struct for debug_loclists CU header, DWARFv5, 7,29
+        """
+        self.Dwarf_loclists_CU_header = Struct('Dwarf_loclists_CU_header',
+            # Unit_length parsed separately
+            self.Dwarf_uint16('version'),
+            self.Dwarf_uint8('address_size'),
+            self.Dwarf_uint8('segment_selector_size'),
+            PrefixedArray(
+                self.Dwarf_offset('offsets'),
+                self.Dwarf_uint32('')))
+
+        cld = self.Dwarf_loclists_counted_location_description = PrefixedArray(self.Dwarf_uint8('loc_expr'), self.Dwarf_uleb128(''))
+
+        self.Dwarf_loclists_entries = RepeatUntilExcluding(
+            lambda obj, ctx: obj.entry_type == 'DW_LLE_end_of_list',
+            Struct('entry',
+                StreamOffset('entry_offset'),
+                Enum(self.Dwarf_uint8('entry_type'), **ENUM_DW_LLE),
+                Embed(Switch('', lambda ctx: ctx.entry_type,
+                {
+                    'DW_LLE_end_of_list'      : Struct('end_of_list'),
+                    'DW_LLE_base_addressx'    : Struct('base_addressx', self.Dwarf_uleb128('index')),
+                    'DW_LLE_startx_endx'      : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('end_index'), cld),
+                    'DW_LLE_startx_length'    : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('length'), cld),
+                    'DW_LLE_offset_pair'      : Struct('startx_endx', self.Dwarf_uleb128('start_offset'), self.Dwarf_uleb128('end_offset'), cld),
+                    'DW_LLE_default_location' : Struct('default_location', cld),
+                    'DW_LLE_base_address'     : Struct('base_address', self.Dwarf_target_addr('address')),
+                    'DW_LLE_start_end'        : Struct('start_end', self.Dwarf_target_addr('start_address'), self.Dwarf_target_addr('end_address'), cld),
+                    'DW_LLE_start_length'     : Struct('start_length', self.Dwarf_target_addr('start_address'), self.Dwarf_uleb128('length'), cld),
+                })),
+                StreamOffset('entry_end_offset')))
+
+        self.Dwarf_locview_pair = Struct('locview_pair',
+            StreamOffset('entry_offset'), self.Dwarf_uleb128('begin'), self.Dwarf_uleb128('end'))
 
 class _InitialLengthAdapter(Adapter):
     """ A standard Construct adapter that expects a sub-construct
 
 class _InitialLengthAdapter(Adapter):
     """ A standard Construct adapter that expects a sub-construct
index 10367ad75a7ab8dbb57651a80456fc55924a5b5f..bdda624d7a43cdedd9ddb4a766f276ad8b4b397a 100644 (file)
@@ -218,7 +218,8 @@ class ELFFile(object):
                          '.debug_str', '.debug_line', '.debug_frame',
                          '.debug_loc', '.debug_ranges', '.debug_pubtypes',
                          '.debug_pubnames', '.debug_addr',
                          '.debug_str', '.debug_line', '.debug_frame',
                          '.debug_loc', '.debug_ranges', '.debug_pubtypes',
                          '.debug_pubnames', '.debug_addr',
-                         '.debug_str_offsets', '.debug_line_str')
+                         '.debug_str_offsets', '.debug_line_str',
+                         '.debug_loclists', '.debug_rnglists')
 
 
         compressed = bool(self.get_section_by_name('.zdebug_info'))
 
 
         compressed = bool(self.get_section_by_name('.zdebug_info'))
@@ -232,7 +233,8 @@ class ELFFile(object):
          debug_str_sec_name, debug_line_sec_name, debug_frame_sec_name,
          debug_loc_sec_name, debug_ranges_sec_name, debug_pubtypes_name,
          debug_pubnames_name, debug_addr_name, debug_str_offsets_name,
          debug_str_sec_name, debug_line_sec_name, debug_frame_sec_name,
          debug_loc_sec_name, debug_ranges_sec_name, debug_pubtypes_name,
          debug_pubnames_name, debug_addr_name, debug_str_offsets_name,
-         debug_line_str_name, eh_frame_sec_name) = section_names
+         debug_line_str_name, debug_loclists_sec_name, debug_rnglists_sec_name,
+         eh_frame_sec_name) = section_names
 
         debug_sections = {}
         for secname in section_names:
 
         debug_sections = {}
         for secname in section_names:
@@ -265,7 +267,9 @@ class ELFFile(object):
                 debug_pubnames_sec=debug_sections[debug_pubnames_name],
                 debug_addr_sec=debug_sections[debug_addr_name],
                 debug_str_offsets_sec=debug_sections[debug_str_offsets_name],
                 debug_pubnames_sec=debug_sections[debug_pubnames_name],
                 debug_addr_sec=debug_sections[debug_addr_name],
                 debug_str_offsets_sec=debug_sections[debug_str_offsets_name],
-                debug_line_str_sec=debug_sections[debug_line_str_name]
+                debug_line_str_sec=debug_sections[debug_line_str_name],
+                debug_loclists_sec=debug_sections[debug_loclists_sec_name],
+                debug_rnglists_sec=debug_sections[debug_rnglists_sec_name]
                 )
 
     def has_ehabi_info(self):
                 )
 
     def has_ehabi_info(self):
index 01c8933b9768d3aed610879ea9911368cf1cdf20..57912444abcca45ef4d4784b681e54386ea5a850 100644 (file)
@@ -5,9 +5,9 @@ Processing file: ./examples/sample_exe64.elf
       (DW_OP_addr: 400608)
   Found a compile unit at offset 258, length 156
    DIE DW_TAG_subprogram. attr DW_AT_frame_base.
       (DW_OP_addr: 400608)
   Found a compile unit at offset 258, length 156
    DIE DW_TAG_subprogram. attr DW_AT_frame_base.
-      LocationEntry(entry_offset=0, begin_offset=0, end_offset=1, loc_expr=[119, 8]) <<(DW_OP_breg7 (rsp): 8)>>
-      LocationEntry(entry_offset=20, begin_offset=1, end_offset=4, loc_expr=[119, 16]) <<(DW_OP_breg7 (rsp): 16)>>
-      LocationEntry(entry_offset=40, begin_offset=4, end_offset=43, loc_expr=[118, 16]) <<(DW_OP_breg6 (rbp): 16)>>
+      LocationEntry(entry_offset=0, entry_length=20, begin_offset=0, end_offset=1, loc_expr=[119, 8], is_absolute=False) <<(DW_OP_breg7 (rsp): 8)>>
+      LocationEntry(entry_offset=20, entry_length=20, begin_offset=1, end_offset=4, loc_expr=[119, 16], is_absolute=False) <<(DW_OP_breg7 (rsp): 16)>>
+      LocationEntry(entry_offset=40, entry_length=20, begin_offset=4, end_offset=43, loc_expr=[118, 16], is_absolute=False) <<(DW_OP_breg6 (rbp): 16)>>
    DIE DW_TAG_formal_parameter. attr DW_AT_location.
       (DW_OP_fbreg: -20)
    DIE DW_TAG_formal_parameter. attr DW_AT_location.
    DIE DW_TAG_formal_parameter. attr DW_AT_location.
       (DW_OP_fbreg: -20)
    DIE DW_TAG_formal_parameter. attr DW_AT_location.
@@ -18,16 +18,16 @@ Processing file: ./examples/sample_exe64.elf
    DIE DW_TAG_subprogram. attr DW_AT_frame_base.
       (DW_OP_breg7 (rsp): 8)
    DIE DW_TAG_subprogram. attr DW_AT_frame_base.
    DIE DW_TAG_subprogram. attr DW_AT_frame_base.
       (DW_OP_breg7 (rsp): 8)
    DIE DW_TAG_subprogram. attr DW_AT_frame_base.
-      LocationEntry(entry_offset=76, begin_offset=16, end_offset=64, loc_expr=[119, 8]) <<(DW_OP_breg7 (rsp): 8)>>
-      LocationEntry(entry_offset=96, begin_offset=64, end_offset=153, loc_expr=[119, 192, 0]) <<(DW_OP_breg7 (rsp): 64)>>
+      LocationEntry(entry_offset=76, entry_length=20, begin_offset=16, end_offset=64, loc_expr=[119, 8], is_absolute=False) <<(DW_OP_breg7 (rsp): 8)>>
+      LocationEntry(entry_offset=96, entry_length=21, begin_offset=64, end_offset=153, loc_expr=[119, 192, 0], is_absolute=False) <<(DW_OP_breg7 (rsp): 64)>>
    DIE DW_TAG_formal_parameter. attr DW_AT_location.
    DIE DW_TAG_formal_parameter. attr DW_AT_location.
-      LocationEntry(entry_offset=133, begin_offset=16, end_offset=85, loc_expr=[85]) <<(DW_OP_reg5 (rdi))>>
-      LocationEntry(entry_offset=152, begin_offset=85, end_offset=143, loc_expr=[94]) <<(DW_OP_reg14 (r14))>>
+      LocationEntry(entry_offset=133, entry_length=19, begin_offset=16, end_offset=85, loc_expr=[85], is_absolute=False) <<(DW_OP_reg5 (rdi))>>
+      LocationEntry(entry_offset=152, entry_length=19, begin_offset=85, end_offset=143, loc_expr=[94], is_absolute=False) <<(DW_OP_reg14 (r14))>>
    DIE DW_TAG_formal_parameter. attr DW_AT_location.
    DIE DW_TAG_formal_parameter. attr DW_AT_location.
-      LocationEntry(entry_offset=187, begin_offset=16, end_offset=85, loc_expr=[84]) <<(DW_OP_reg4 (rsi))>>
-      LocationEntry(entry_offset=206, begin_offset=85, end_offset=138, loc_expr=[93]) <<(DW_OP_reg13 (r13))>>
+      LocationEntry(entry_offset=187, entry_length=19, begin_offset=16, end_offset=85, loc_expr=[84], is_absolute=False) <<(DW_OP_reg4 (rsi))>>
+      LocationEntry(entry_offset=206, entry_length=19, begin_offset=85, end_offset=138, loc_expr=[93], is_absolute=False) <<(DW_OP_reg13 (r13))>>
    DIE DW_TAG_formal_parameter. attr DW_AT_location.
    DIE DW_TAG_formal_parameter. attr DW_AT_location.
-      LocationEntry(entry_offset=241, begin_offset=16, end_offset=85, loc_expr=[81]) <<(DW_OP_reg1 (rdx))>>
-      LocationEntry(entry_offset=260, begin_offset=85, end_offset=133, loc_expr=[92]) <<(DW_OP_reg12 (r12))>>
+      LocationEntry(entry_offset=241, entry_length=19, begin_offset=16, end_offset=85, loc_expr=[81], is_absolute=False) <<(DW_OP_reg1 (rdx))>>
+      LocationEntry(entry_offset=260, entry_length=19, begin_offset=85, end_offset=133, loc_expr=[92], is_absolute=False) <<(DW_OP_reg12 (r12))>>
    DIE DW_TAG_variable. attr DW_AT_location.
    DIE DW_TAG_variable. attr DW_AT_location.
-      LocationEntry(entry_offset=295, begin_offset=92, end_offset=123, loc_expr=[83]) <<(DW_OP_reg3 (rbx))>>
+      LocationEntry(entry_offset=295, entry_length=19, begin_offset=92, end_offset=123, loc_expr=[83], is_absolute=False) <<(DW_OP_reg3 (rbx))>>
index a45ec3019556d8533bb5515cfa0e359d5db3e689..6bd776badd2ed93c3befeefec53b04ff1adf329f 100755 (executable)
@@ -62,7 +62,7 @@ from elftools.dwarf.descriptions import (
     )
 from elftools.dwarf.constants import (
     DW_LNS_copy, DW_LNS_set_file, DW_LNE_define_file)
     )
 from elftools.dwarf.constants import (
     DW_LNS_copy, DW_LNS_set_file, DW_LNE_define_file)
-from elftools.dwarf.locationlists import LocationParser, LocationEntry
+from elftools.dwarf.locationlists import LocationParser, LocationEntry, LocationViewPair, BaseAddressEntry
 from elftools.dwarf.callframe import CIE, FDE, ZERO
 from elftools.ehabi.ehabiinfo import CorruptEHABIEntry, CannotUnwindEHABIEntry, GenericEHABIEntry
 from elftools.dwarf.enums import ENUM_DW_UT
 from elftools.dwarf.callframe import CIE, FDE, ZERO
 from elftools.ehabi.ehabiinfo import CorruptEHABIEntry, CannotUnwindEHABIEntry, GenericEHABIEntry
 from elftools.dwarf.enums import ENUM_DW_UT
@@ -1137,9 +1137,9 @@ class ReadElf(object):
                 cu_filename = '%s/%s' % (bytes2str(dir), cu_filename)
 
             self._emitline('CU: %s:' % cu_filename)
                 cu_filename = '%s/%s' % (bytes2str(dir), cu_filename)
 
             self._emitline('CU: %s:' % cu_filename)
-            self._emitline('File name                            Line number    Starting address    View    Stmt' if ver5
-                else 'File name                            Line number    Starting address    Stmt')
-            # What goes into View on V5? To be seen...
+            self._emitline('File name                            Line number    Starting address    Stmt')
+            # GNU readelf has a View column that we don't try to replicate
+            # The autotest has logic in place to ignore that
 
             # Print each state's file, line and address information. For some
             # instructions other output is needed to be compatible with
 
             # Print each state's file, line and address information. For some
             # instructions other output is needed to be compatible with
@@ -1427,7 +1427,7 @@ class ReadElf(object):
                     self._dwarfinfo.CFI_entries())
 
     def _dump_debug_locations(self):
                     self._dwarfinfo.CFI_entries())
 
     def _dump_debug_locations(self):
-        """ Dump the location lists from .debug_location section
+        """ Dump the location lists from .debug_loc/.debug_loclists section
         """
         def _get_cu_base(cu):
             top_die = cu.get_top_DIE()
         """
         def _get_cu_base(cu):
             top_die = cu.get_top_DIE()
@@ -1447,48 +1447,88 @@ class ReadElf(object):
         loc_lists = list(loc_lists.iter_location_lists())
         if len(loc_lists) == 0:
             # Present but empty locations section - readelf outputs a message
         loc_lists = list(loc_lists.iter_location_lists())
         if len(loc_lists) == 0:
             # Present but empty locations section - readelf outputs a message
-            self._emitline("\nSection '%s' has no debugging data." % di.debug_loc_sec.name)
+            self._emitline("\nSection '%s' has no debugging data." % (di.debug_loclists_sec or di.debug_loc_sec).name)
             return
 
         # To dump a location list, one needs to know the CU.
             return
 
         # To dump a location list, one needs to know the CU.
-        # Scroll through DIEs once, list the known location list offsets
+        # Scroll through DIEs once, list the known location list offsets.
+        # Don't need this CU/DIE scan if all entries are absolute or prefixed by base,
+        # but let's not optimize for that yet.
         cu_map = dict() # Loc list offset => CU
         for cu in di.iter_CUs():
             for die in cu.iter_DIEs():
                 for key in die.attributes:
                     attr = die.attributes[key]
                     if (LocationParser.attribute_has_location(attr, cu['version']) and
         cu_map = dict() # Loc list offset => CU
         for cu in di.iter_CUs():
             for die in cu.iter_DIEs():
                 for key in die.attributes:
                     attr = die.attributes[key]
                     if (LocationParser.attribute_has_location(attr, cu['version']) and
-                        not LocationParser._attribute_has_loc_expr(attr, cu['version'])):
+                        LocationParser._attribute_has_loc_list(attr, cu['version'])):
                         cu_map[attr.value] = cu
 
         addr_size = di.config.default_address_size # In bytes, 4 or 8
         addr_width = addr_size * 2 # In hex digits, 8 or 16
         line_template = "    %%08x %%0%dx %%0%dx %%s%%s" % (addr_width, addr_width)
 
                         cu_map[attr.value] = cu
 
         addr_size = di.config.default_address_size # In bytes, 4 or 8
         addr_width = addr_size * 2 # In hex digits, 8 or 16
         line_template = "    %%08x %%0%dx %%0%dx %%s%%s" % (addr_width, addr_width)
 
-        self._emitline('Contents of the %s section:\n' % di.debug_loc_sec.name)
+        self._emitline('Contents of the %s section:\n' % (di.debug_loclists_sec or di.debug_loc_sec).name)
         self._emitline('    Offset   Begin            End              Expression')
         for loc_list in loc_lists:
         self._emitline('    Offset   Begin            End              Expression')
         for loc_list in loc_lists:
-            cu = cu_map.get(loc_list[0].entry_offset, False)
-            if not cu:
-                raise ValueError("Location list can't be tracked to a CU")
-            base_ip = _get_cu_base(cu)
+            in_views = False
+            has_views = False
+            base_ip = None
+            loc_entry_count = 0
+            cu = None
             for entry in loc_list:
             for entry in loc_list:
-                # TODO: support BaseAddressEntry lines
-                expr = describe_DWARF_expr(entry.loc_expr, cu.structs, cu.cu_offset)
-                postfix = ' (start == end)' if entry.begin_offset == entry.end_offset else ''
-                self._emitline(line_template % (
-                    entry.entry_offset,
-                    base_ip + entry.begin_offset,
-                    base_ip + entry.end_offset,
-                    expr,
-                    postfix))
+                if isinstance(entry, LocationViewPair):
+                    has_views = in_views = True
+                    # The "v" before address is conditional in binutils, haven't figured out how
+                    self._emitline("    %08x v%015x v%015x location view pair" % (entry.entry_offset, entry.begin, entry.end))
+                else:
+                    if in_views:
+                        in_views = False             
+                        self._emitline("")
+                    # Need the CU for this loclist, but the map is keyed by the offset
+                    # of the first entry in the loclist. Got to skip the views first.
+                    if cu is None:
+                        cu = cu_map.get(entry.entry_offset, False)
+                        if not cu:
+                            raise ValueError("Location list can't be tracked to a CU")                        
+
+                    if isinstance(entry, LocationEntry):
+                        if base_ip is None and not entry.is_absolute:
+                            base_ip = _get_cu_base(cu)                                
+
+                        begin_offset = (0 if entry.is_absolute else base_ip) + entry.begin_offset
+                        end_offset = (0 if entry.is_absolute else base_ip) + entry.end_offset
+                        expr = describe_DWARF_expr(entry.loc_expr, cu.structs, cu.cu_offset)
+                        if has_views:
+                            view = loc_list[loc_entry_count]
+                            postfix = ' (start == end)' if entry.begin_offset == entry.end_offset and view.begin == view.end else ''
+                            self._emitline('    %08x v%015x v%015x views at %08x for:' %(
+                                entry.entry_offset,
+                                view.begin,
+                                view.end,
+                                view.entry_offset))
+                            self._emitline('             %016x %016x %s%s' %(
+                                begin_offset,
+                                end_offset,
+                                expr,
+                                postfix))
+                            loc_entry_count += 1
+                        else:
+                            postfix = ' (start == end)' if entry.begin_offset == entry.end_offset else ''
+                            self._emitline(line_template % (
+                                entry.entry_offset,
+                                begin_offset,
+                                end_offset,
+                                expr,
+                                postfix))
+                    elif isinstance(entry, BaseAddressEntry):
+                        base_ip = entry.base_address
+                        self._emitline("    %08x %016x (base address)" % (entry.entry_offset, entry.base_address))
+
             # Pyelftools doesn't store the terminating entry,
             # but readelf emits its offset, so this should too.
             last = loc_list[-1]
             # Pyelftools doesn't store the terminating entry,
             # but readelf emits its offset, so this should too.
             last = loc_list[-1]
-            last_len = 2*addr_size
-            if isinstance(last, LocationEntry):
-                last_len += 2 + len(last.loc_expr)
-            self._emitline("    %08x <End of list>" % (last.entry_offset + last_len))
+            self._emitline("    %08x <End of list>" % (last.entry_offset + last.entry_length))
 
     def _display_arch_specific_arm(self):
         """ Display the ARM architecture-specific info contained in the file.
 
     def _display_arch_specific_arm(self):
         """ Display the ARM architecture-specific info contained in the file.
index ceca7ec06c7809f4d4a3eb3a7a7cd679c22006df..56767c5ffd81c6298d60f52a9664e2670ac2289c 100755 (executable)
@@ -33,6 +33,9 @@ testlog.addHandler(logging.StreamHandler(sys.stdout))
 # same minor release and keeping track is a headache.
 if platform.system() == "Darwin": # MacOS
     READELF_PATH = 'greadelf'
 # same minor release and keeping track is a headache.
 if platform.system() == "Darwin": # MacOS
     READELF_PATH = 'greadelf'
+elif platform.system() == "Windows":
+    # Point the environment variable READELF at Cygwin's readelf.exe, or some other Windows build
+    READELF_PATH = os.environ.get('READELF', "readelf.exe")
 else:
     READELF_PATH = 'test/external_tools/readelf'
     if not os.path.exists(READELF_PATH):
 else:
     READELF_PATH = 'test/external_tools/readelf'
     if not os.path.exists(READELF_PATH):
@@ -142,6 +145,9 @@ def compare_output(s1, s2):
         return False, 'Number of lines different: %s vs %s' % (
                 len(lines1), len(lines2))
 
         return False, 'Number of lines different: %s vs %s' % (
                 len(lines1), len(lines2))
 
+    # Position of the View column in the output file, if parsing readelf..decodedline
+    # output, and the GNU readelf output contains the View column. Otherwise stays -1.
+    view_col_position = -1 
     for i in range(len(lines1)):
         if lines1[i].endswith('debug_line section:'):
             # .debug_line or .zdebug_line
     for i in range(len(lines1)):
         if lines1[i].endswith('debug_line section:'):
             # .debug_line or .zdebug_line
@@ -149,6 +155,23 @@ def compare_output(s1, s2):
 
         # readelf spelling error for GNU property notes
         lines1[i] = lines1[i].replace('procesor-specific type', 'processor-specific type')
 
         # readelf spelling error for GNU property notes
         lines1[i] = lines1[i].replace('procesor-specific type', 'processor-specific type')
+        
+        # The view column position may change from CU to CU:
+        if view_col_position >= 0 and lines1[i].startswith('cu:'):
+            view_col_position = -1    
+    
+        # Check if readelf..decodedline output line contains the view column
+        if flag_in_debug_line_section and lines1[i].startswith('file name') and view_col_position < 0:
+            view_col_position = lines1[i].find("view")
+            stmt_col_position = lines1[i].find("stmt")
+
+        # Excise the View column from the table, if any.
+        # View_col_position is only set to a nonzero number if one of the previous
+        # lines was a table header line with a "view" in it.
+        # We assume careful formatting on GNU readelf's part - View column values
+        # are not out of line with the View header.
+        if view_col_position >= 0 and not lines1[i].endswith(':'):
+            lines1[i] = lines1[i][:view_col_position] + lines1[i][stmt_col_position:]
 
         # Compare ignoring whitespace
         lines1_parts = lines1[i].split()
 
         # Compare ignoring whitespace
         lines1_parts = lines1[i].split()
@@ -169,16 +192,7 @@ def compare_output(s1, s2):
             sm = SequenceMatcher()
             sm.set_seqs(lines1[i], lines2[i])
             changes = sm.get_opcodes()
             sm = SequenceMatcher()
             sm.set_seqs(lines1[i], lines2[i])
             changes = sm.get_opcodes()
-            if flag_in_debug_line_section:
-                # readelf outputs an additional "View" column: ignore it
-                if len(lines1_parts) >= 2 and lines1_parts[-2] == 'view':
-                    ok = True
-                else:
-                    # Fast check special-cased for the only ELF we have which
-                    # has this information (dwarf_gnuops4.so.elf)
-                    ok = (    lines1_parts[-2:] == ['1', 'x']
-                          and lines2_parts[-1] == 'x')
-            elif '[...]' in lines1[i]:
+            if '[...]' in lines1[i]:
                 # Special case truncations with ellipsis like these:
                 #     .note.gnu.bu[...]        redelf
                 #     .note.gnu.build-i        pyelftools
                 # Special case truncations with ellipsis like these:
                 #     .note.gnu.bu[...]        redelf
                 #     .note.gnu.build-i        pyelftools
index ea01db64da0fd2939a182bee3835833b136de7d0..c92e442f2a02d4724fb4e93c5d2a643785506705 100644 (file)
@@ -47,6 +47,8 @@ class TestRefAddrOnDWARFv2With64BitTarget(unittest.TestCase):
             debug_addr_sec=None,
             debug_str_offsets_sec=None,
             debug_line_str_sec=None,
             debug_addr_sec=None,
             debug_str_offsets_sec=None,
             debug_line_str_sec=None,
+            debug_loclists_sec = None,
+            debug_rnglists_sec = None
         )
 
         CUs = [cu for cu in di.iter_CUs()]
         )
 
         CUs = [cu for cu in di.iter_CUs()]
diff --git a/test/testfiles_for_readelf/dwarf_v5ops.so.elf b/test/testfiles_for_readelf/dwarf_v5ops.so.elf
new file mode 100644 (file)
index 0000000..9da7825
Binary files /dev/null and b/test/testfiles_for_readelf/dwarf_v5ops.so.elf differ