Clean up whitespace
[pyelftools.git] / elftools / dwarf / locationlists.py
1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/locationlists.py
3 #
4 # DWARF location lists section decoding (.debug_loc)
5 #
6 # Eli Bendersky (eliben@gmail.com)
7 # This code is in the public domain
8 #-------------------------------------------------------------------------------
9 import os
10 from collections import namedtuple
11 from ..common.exceptions import DWARFError
12 from ..common.utils import struct_parse
13
14 LocationExpr = namedtuple('LocationExpr', 'loc_expr')
15 LocationEntry = namedtuple('LocationEntry', 'entry_offset entry_length begin_offset end_offset loc_expr is_absolute')
16 BaseAddressEntry = namedtuple('BaseAddressEntry', 'entry_offset entry_length base_address')
17 LocationViewPair = namedtuple('LocationViewPair', 'entry_offset begin end')
18
19 class LocationLists(object):
20 """ A single location list is a Python list consisting of LocationEntry or
21 BaseAddressEntry objects.
22
23 Starting with DWARF5, it may also contain LocationViewPair, but only
24 if scanning the section, never when requested for a DIE attribute.
25
26 The default location entries are returned as LocationEntry with
27 begin_offset == end_offset == -1
28
29 Version determines whether the executable contains a debug_loc
30 section, or a DWARFv5 style debug_loclists one. Only the 4/5
31 distinction matters.
32
33 Dwarfinfo is only needed for DWARFv5 location entry encodings
34 that contain references to other sections (e. g. DW_LLE_startx_endx),
35 and only for location list enumeration.
36 """
37 def __init__(self, stream, structs, version=4, dwarfinfo=None):
38 self.stream = stream
39 self.structs = structs
40 self.dwarfinfo = dwarfinfo
41 self.version = version
42 self._max_addr = 2 ** (self.structs.address_size * 8) - 1
43
44 def get_location_list_at_offset(self, offset, die=None):
45 """ Get a location list at the given offset in the section.
46 Passing the die is only neccessary in DWARF5+, for decoding
47 location entry encodings that contain references to other sections.
48 """
49 self.stream.seek(offset, os.SEEK_SET)
50 return self._parse_location_list_from_stream_v5(die) if self.version >= 5 else self._parse_location_list_from_stream()
51
52 def iter_location_lists(self):
53 """ Iterates through location lists and view pairs. Returns lists of
54 LocationEntry, BaseAddressEntry, and LocationViewPair objects.
55 """
56 # The location lists section was never meant for sequential access.
57 # Location lists are referenced by DIE attributes by offset or by index.
58
59 # As of DWARFv5, it may contain, in addition to proper location lists,
60 #location list view pairs, which are referenced by the nonstandard DW_AT_GNU_locviews
61 # attribute. A set of locview pairs (which is a couple of ULEB128 values) may preceed
62 # a location list; the former is referenced by the DW_AT_GNU_locviews attribute, the
63 # latter - by DW_AT_location (in the same DIE). Binutils' readelf dumps those.
64 # There is a view pair for each location-type entry in the list.
65 #
66 # Also, the section may contain gaps.
67 #
68 # Taking a cue from binutils, we would have to scan this section while looking at
69 # what's in DIEs.
70 stream = self.stream
71 stream.seek(0, os.SEEK_END)
72 endpos = stream.tell()
73
74 stream.seek(0, os.SEEK_SET)
75
76 if self.version >= 5:
77 # Need to provide support for DW_AT_GNU_locviews. They are interspersed in
78 # the locations section, no way to tell where short of checking all DIEs
79 all_offsets = set() # Set of offsets where either a locview pair set can be found, or a view-less loclist
80 locviews = dict() # Map of locview offset to the respective loclist offset
81 cu_map = dict() # Map of loclist offsets to CUs
82 for cu in self.dwarfinfo.iter_CUs():
83 cu_ver = cu['version']
84 for die in cu.iter_DIEs():
85 # A combination of location and locviews means there is a location list
86 # preceed by several locview pairs
87 if 'DW_AT_GNU_locviews' in die.attributes:
88 assert('DW_AT_location' in die.attributes and
89 LocationParser._attribute_has_loc_list(die.attributes['DW_AT_location'], cu_ver))
90 views_offset = die.attributes['DW_AT_GNU_locviews'].value
91 list_offset = die.attributes['DW_AT_location'].value
92 locviews[views_offset] = list_offset
93 cu_map[list_offset] = cu
94 all_offsets.add(views_offset)
95
96 # Scan other attributes for location lists
97 for key in die.attributes:
98 attr = die.attributes[key]
99 if (key != 'DW_AT_location' and
100 LocationParser.attribute_has_location(attr, cu_ver) and
101 LocationParser._attribute_has_loc_list(attr, cu_ver)):
102 list_offset = attr.value
103 all_offsets.add(list_offset)
104 cu_map[list_offset] = cu
105 all_offsets = list(all_offsets)
106 all_offsets.sort()
107
108 # Loclists section is organized as an array of CUs, each length prefixed.
109 # We don't assume that the CUs go in the same order as the ones in info.
110 offset_index = 0
111 while stream.tell() < endpos:
112 # We are at the start of the CU block in the loclists now
113 unit_length = struct_parse(self.structs.Dwarf_initial_length(''), stream)
114 offset_past_len = stream.tell()
115 cu_header = struct_parse(self.structs.Dwarf_loclists_CU_header, stream)
116 assert(cu_header.version == 5)
117
118 # GNU binutils supports two traversal modes: by offsets in CU header, and sequential.
119 # We don't have a binary for the former yet. On an off chance that we one day might,
120 # let's parse the header anyway.
121
122 cu_end_offset = offset_past_len + unit_length
123 # Unit_length includes the header but doesn't include the length
124
125 while stream.tell() < cu_end_offset:
126 # Skip the gap to the next object
127 next_offset = all_offsets[offset_index]
128 if next_offset == stream.tell(): # At an object, either a loc list or a loc view pair
129 locview_pairs = self._parse_locview_pairs(locviews)
130 entries = self._parse_location_list_from_stream_v5()
131 yield locview_pairs + entries
132 offset_index += 1
133 else: # We are at a gap - skip the gap to the next object or to the next CU
134 if next_offset > cu_end_offset: # Gap at the CU end - the next object is in the next CU
135 next_offset = cu_end_offset # And implicitly quit the loop within the CU
136 stream.seek(next_offset, os.SEEK_SET)
137 else:
138 # Just call _parse_location_list_from_stream until the stream ends
139 while stream.tell() < endpos:
140 yield self._parse_location_list_from_stream()
141
142 #------ PRIVATE ------#
143
144 def _parse_location_list_from_stream(self):
145 lst = []
146 while True:
147 entry_offset = self.stream.tell()
148 begin_offset = struct_parse(
149 self.structs.Dwarf_target_addr(''), self.stream)
150 end_offset = struct_parse(
151 self.structs.Dwarf_target_addr(''), self.stream)
152 if begin_offset == 0 and end_offset == 0:
153 # End of list - we're done.
154 break
155 elif begin_offset == self._max_addr:
156 # Base address selection entry
157 entry_length = self.stream.tell() - entry_offset
158 lst.append(BaseAddressEntry(entry_offset=entry_offset, entry_length=entry_length, base_address=end_offset))
159 else:
160 # Location list entry
161 expr_len = struct_parse(
162 self.structs.Dwarf_uint16(''), self.stream)
163 loc_expr = [struct_parse(self.structs.Dwarf_uint8(''),
164 self.stream)
165 for i in range(expr_len)]
166 entry_length = self.stream.tell() - entry_offset
167 lst.append(LocationEntry(
168 entry_offset=entry_offset,
169 entry_length=entry_length,
170 begin_offset=begin_offset,
171 end_offset=end_offset,
172 loc_expr=loc_expr,
173 is_absolute = False))
174 return lst
175
176 # Also returns an array with BaseAddressEntry and LocationEntry
177 # Can't possibly support indexed values, since parsing those requires
178 # knowing the DIE context it came from
179 def _parse_location_list_from_stream_v5(self, die = None):
180 # This won't contain the terminator entry
181 lst = [self._translate_entry_v5(entry, die)
182 for entry
183 in struct_parse(self.structs.Dwarf_loclists_entries, self.stream)]
184 return lst
185
186 # From V5 style entries to a LocationEntry/BaseAddressEntry
187 def _translate_entry_v5(self, entry, die):
188 off = entry.entry_offset
189 len = entry.entry_end_offset - off
190 type = entry.entry_type
191 if type == 'DW_LLE_base_address':
192 return BaseAddressEntry(off, len, entry.address)
193 elif type == 'DW_LLE_offset_pair':
194 return LocationEntry(off, len, entry.start_offset, entry.end_offset, entry.loc_expr, False)
195 elif type == 'DW_LLE_start_length':
196 return LocationEntry(off, len, entry.start_address, entry.start_address + entry.length, entry.loc_expr, True)
197 elif type == 'DW_LLE_start_end': # No test for this yet, but the format seems straightforward
198 return LocationEntry(off, len, entry.start_address, entry.end_address, entry.loc_expr, True)
199 elif type == 'DW_LLE_default_location': # No test for this either, and this is new in the API
200 return LocationEntry(off, len, -1, -1, entry.loc_expr, True)
201 elif type in ('DW_LLE_base_addressx', 'DW_LLE_startx_endx', 'DW_LLE_startx_length'):
202 # We don't have sample binaries for those LLEs. Their proper parsing would
203 # require knowing the CU context (so that indices can be resolved to code offsets)
204 raise NotImplementedError("Location list entry type %s is not supported yet" % (type,))
205 else:
206 raise DWARFError(False, "Unknown DW_LLE code: %s" % (type,))
207
208 # Locviews is the dict, mapping locview offsets to corresponding loclist offsets
209 def _parse_locview_pairs(self, locviews):
210 stream = self.stream
211 list_offset = locviews.get(stream.tell(), None)
212 pairs = []
213 if list_offset is not None:
214 while stream.tell() < list_offset:
215 pair = struct_parse(self.structs.Dwarf_locview_pair, stream)
216 pairs.append(LocationViewPair(pair.entry_offset, pair.begin, pair.end))
217 assert(stream.tell() == list_offset)
218 return pairs
219
220 class LocationParser(object):
221 """ A parser for location information in DIEs.
222 Handles both location information contained within the attribute
223 itself (represented as a LocationExpr object) and references to
224 location lists in the .debug_loc section (represented as a
225 list).
226 """
227 def __init__(self, location_lists):
228 self.location_lists = location_lists
229
230 @staticmethod
231 def attribute_has_location(attr, dwarf_version):
232 """ Checks if a DIE attribute contains location information.
233 """
234 return (LocationParser._attribute_is_loclistptr_class(attr) and
235 (LocationParser._attribute_has_loc_expr(attr, dwarf_version) or
236 LocationParser._attribute_has_loc_list(attr, dwarf_version)))
237
238 def parse_from_attribute(self, attr, dwarf_version, die = None):
239 """ Parses a DIE attribute and returns either a LocationExpr or
240 a list.
241 """
242 if self.attribute_has_location(attr, dwarf_version):
243 if self._attribute_has_loc_expr(attr, dwarf_version):
244 return LocationExpr(attr.value)
245 elif self._attribute_has_loc_list(attr, dwarf_version):
246 return self.location_lists.get_location_list_at_offset(
247 attr.value, die)
248 # We don't yet know if the DIE context will be needed.
249 # We might get it without a full tree traversal using
250 # attr.offset as a key, but we assume a good DWARF5
251 # aware consumer would pass a DIE along.
252 else:
253 raise ValueError("Attribute does not have location information")
254
255 #------ PRIVATE ------#
256
257 @staticmethod
258 def _attribute_has_loc_expr(attr, dwarf_version):
259 return ((dwarf_version < 4 and attr.form.startswith('DW_FORM_block') and
260 not attr.name == 'DW_AT_const_value') or
261 attr.form == 'DW_FORM_exprloc')
262
263 @staticmethod
264 def _attribute_has_loc_list(attr, dwarf_version):
265 return ((dwarf_version < 4 and
266 attr.form in ('DW_FORM_data4', 'DW_FORM_data8') and
267 not attr.name == 'DW_AT_const_value') or
268 attr.form == 'DW_FORM_sec_offset')
269
270 @staticmethod
271 def _attribute_is_loclistptr_class(attr):
272 return (attr.name in ( 'DW_AT_location', 'DW_AT_string_length',
273 'DW_AT_const_value', 'DW_AT_return_addr',
274 'DW_AT_data_member_location',
275 'DW_AT_frame_base', 'DW_AT_segment',
276 'DW_AT_static_link', 'DW_AT_use_location',
277 'DW_AT_vtable_elem_location',
278 'DW_AT_GNU_call_site_value',
279 'DW_AT_GNU_call_site_target',
280 'DW_AT_GNU_call_site_data_value'))