9a349779baa02eecf260f846392a2edcd50aa65b
[pyelftools.git] / elftools / dwarf / locationlists.py
1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/locationlists.py
3 #
4 # DWARF location lists section decoding (.debug_loc)
5 #
6 # Eli Bendersky (eliben@gmail.com)
7 # This code is in the public domain
8 #-------------------------------------------------------------------------------
9 import os
10 from collections import namedtuple
11 from ..common.exceptions import DWARFError
12 from ..common.utils import struct_parse
13 from .dwarf_util import _iter_CUs_in_section
14
15 LocationExpr = namedtuple('LocationExpr', 'loc_expr')
16 LocationEntry = namedtuple('LocationEntry', 'entry_offset entry_length begin_offset end_offset loc_expr is_absolute')
17 BaseAddressEntry = namedtuple('BaseAddressEntry', 'entry_offset entry_length base_address')
18 LocationViewPair = namedtuple('LocationViewPair', 'entry_offset begin end')
19
20 def _translate_startx_length(e, cu):
21 start_offset = cu.dwarfinfo.get_addr(cu, e.start_index)
22 return LocationEntry(e.entry_offset, e.entry_length, start_offset, start_offset + e.length, e.loc_expr, True)
23
24 # Maps parsed entries to the tuples above; LocationViewPair is mapped elsewhere
25 entry_translate = {
26 'DW_LLE_base_address' : lambda e, cu: BaseAddressEntry(e.entry_offset, e.entry_length, e.address),
27 'DW_LLE_offset_pair' : lambda e, cu: LocationEntry(e.entry_offset, e.entry_length, e.start_offset, e.end_offset, e.loc_expr, False),
28 'DW_LLE_start_length' : lambda e, cu: LocationEntry(e.entry_offset, e.entry_length, e.start_address, e.start_address + e.length, e.loc_expr, True),
29 'DW_LLE_start_end' : lambda e, cu: LocationEntry(e.entry_offset, e.entry_length, e.start_address, e.end_address, e.loc_expr, True),
30 'DW_LLE_default_location': lambda e, cu: LocationEntry(e.entry_offset, e.entry_length, -1, -1, e.loc_expr, True),
31 'DW_LLE_base_addressx' : lambda e, cu: BaseAddressEntry(e.entry_offset, e.entry_length, cu.dwarfinfo.get_addr(cu, e.index)),
32 'DW_LLE_startx_endx' : lambda e, cu: LocationEntry(e.entry_offset, e.entry_length, cu.dwarfinfo.get_addr(cu, e.start_index), cu.dwarfinfo.get_addr(cu, e.end_index), e.loc_expr, True),
33 'DW_LLE_startx_length' : _translate_startx_length
34 }
35
36 class LocationListsPair(object):
37 """For those binaries that contain both a debug_loc and a debug_loclists section,
38 it holds a LocationLists object for both and forwards API calls to the right one.
39 """
40 def __init__(self, streamv4, streamv5, structs, dwarfinfo=None):
41 self._loc = LocationLists(streamv4, structs, 4, dwarfinfo)
42 self._loclists = LocationLists(streamv5, structs, 5, dwarfinfo)
43
44 def get_location_list_at_offset(self, offset, die=None):
45 """See LocationLists.get_location_list_at_offset().
46 """
47 if die is None:
48 raise DWARFError("For this binary, \"die\" needs to be provided")
49 section = self._loclists if die.cu.header.version >= 5 else self._loc
50 return section.get_location_list_at_offset(offset, die)
51
52 def iter_location_lists(self):
53 """Tricky proposition, since the structure of loc and loclists
54 is not identical. A realistic readelf implementation needs to be aware of both
55 """
56 raise DWARFError("Iterating through two sections is not supported")
57
58 def iter_CUs(self):
59 """See LocationLists.iter_CUs()
60
61 There are no CUs in DWARFv4 sections.
62 """
63 raise DWARFError("Iterating through two sections is not supported")
64
65 class LocationLists(object):
66 """ A single location list is a Python list consisting of LocationEntry or
67 BaseAddressEntry objects.
68
69 Starting with DWARF5, it may also contain LocationViewPair, but only
70 if scanning the section, never when requested for a DIE attribute.
71
72 The default location entries are returned as LocationEntry with
73 begin_offset == end_offset == -1
74
75 Version determines whether the executable contains a debug_loc
76 section, or a DWARFv5 style debug_loclists one. Only the 4/5
77 distinction matters.
78
79 Dwarfinfo is only needed for DWARFv5 location entry encodings
80 that contain references to other sections (e. g. DW_LLE_startx_endx),
81 and only for location list enumeration.
82 """
83 def __init__(self, stream, structs, version=4, dwarfinfo=None):
84 self.stream = stream
85 self.structs = structs
86 self.dwarfinfo = dwarfinfo
87 self.version = version
88 self._max_addr = 2 ** (self.structs.address_size * 8) - 1
89
90 def get_location_list_at_offset(self, offset, die=None):
91 """ Get a location list at the given offset in the section.
92 Passing the die is only neccessary in DWARF5+, for decoding
93 location entry encodings that contain references to other sections.
94 """
95 if self.version >= 5 and die is None:
96 raise DWARFError("For this binary, \"die\" needs to be provided")
97 self.stream.seek(offset, os.SEEK_SET)
98 return self._parse_location_list_from_stream_v5(die.cu) if self.version >= 5 else self._parse_location_list_from_stream()
99
100 def iter_location_lists(self):
101 """ Iterates through location lists and view pairs. Returns lists of
102 LocationEntry, BaseAddressEntry, and LocationViewPair objects.
103 """
104 # The location lists section was never meant for sequential access.
105 # Location lists are referenced by DIE attributes by offset or by index.
106
107 # As of DWARFv5, it may contain, in addition to proper location lists,
108 # location list view pairs, which are referenced by the nonstandard DW_AT_GNU_locviews
109 # attribute. A set of locview pairs (which is a couple of ULEB128 values) may preceed
110 # a location list; the former is referenced by the DW_AT_GNU_locviews attribute, the
111 # latter - by DW_AT_location (in the same DIE). Binutils' readelf dumps those.
112 # There is a view pair for each location-type entry in the list.
113 #
114 # Also, the section may contain gaps.
115 #
116 # Taking a cue from binutils, we would have to scan this section while looking at
117 # what's in DIEs.
118 ver5 = self.version >= 5
119 stream = self.stream
120 stream.seek(0, os.SEEK_END)
121 endpos = stream.tell()
122
123 stream.seek(0, os.SEEK_SET)
124
125 # Need to provide support for DW_AT_GNU_locviews. They are interspersed in
126 # the locations section, no way to tell where short of checking all DIEs
127 all_offsets = set() # Set of offsets where either a locview pair set can be found, or a view-less loclist
128 locviews = dict() # Map of locview offset to the respective loclist offset
129 cu_map = dict() # Map of loclist offsets to CUs
130 for cu in self.dwarfinfo.iter_CUs():
131 cu_ver = cu['version']
132 if (cu_ver >= 5) == ver5:
133 for die in cu.iter_DIEs():
134 # A combination of location and locviews means there is a location list
135 # preceed by several locview pairs
136 if 'DW_AT_GNU_locviews' in die.attributes:
137 assert('DW_AT_location' in die.attributes and
138 LocationParser._attribute_has_loc_list(die.attributes['DW_AT_location'], cu_ver))
139 views_offset = die.attributes['DW_AT_GNU_locviews'].value
140 list_offset = die.attributes['DW_AT_location'].value
141 locviews[views_offset] = list_offset
142 cu_map[list_offset] = cu
143 all_offsets.add(views_offset)
144
145 # Scan other attributes for location lists
146 for key in die.attributes:
147 attr = die.attributes[key]
148 if ((key != 'DW_AT_location' or 'DW_AT_GNU_locviews' not in die.attributes) and
149 LocationParser.attribute_has_location(attr, cu_ver) and
150 LocationParser._attribute_has_loc_list(attr, cu_ver)):
151 list_offset = attr.value
152 all_offsets.add(list_offset)
153 cu_map[list_offset] = cu
154 all_offsets = list(all_offsets)
155 all_offsets.sort()
156
157 if ver5:
158 # Loclists section is organized as an array of CUs, each length prefixed.
159 # We don't assume that the CUs go in the same order as the ones in info.
160 offset_index = 0
161 while stream.tell() < endpos:
162 # We are at the start of the CU block in the loclists now
163 cu_header = struct_parse(self.structs.Dwarf_loclists_CU_header, stream)
164 assert(cu_header.version == 5)
165
166 # GNU binutils supports two traversal modes: by offsets in CU header, and sequential.
167 # We don't have a binary for the former yet. On an off chance that we one day might,
168 # let's parse the header anyway.
169
170 cu_end_offset = cu_header.offset_after_length + cu_header.unit_length
171 # Unit_length includes the header but doesn't include the length
172
173 while stream.tell() < cu_end_offset:
174 # Skip the gap to the next object
175 next_offset = all_offsets[offset_index]
176 if next_offset == stream.tell(): # At an object, either a loc list or a loc view pair
177 locview_pairs = self._parse_locview_pairs(locviews)
178 entries = self._parse_location_list_from_stream_v5(cu_map[stream.tell()])
179 yield locview_pairs + entries
180 offset_index += 1
181 else: # We are at a gap - skip the gap to the next object or to the next CU
182 if next_offset > cu_end_offset: # Gap at the CU end - the next object is in the next CU
183 next_offset = cu_end_offset # And implicitly quit the loop within the CU
184 stream.seek(next_offset, os.SEEK_SET)
185 else:
186 for offset in all_offsets:
187 list_offset = locviews.get(offset, offset)
188 if cu_map[list_offset].header.version < 5:
189 stream.seek(offset, os.SEEK_SET)
190 locview_pairs = self._parse_locview_pairs(locviews)
191 entries = self._parse_location_list_from_stream()
192 yield locview_pairs + entries
193
194 def iter_CUs(self):
195 """For DWARF5 returns an array of objects, where each one has an array of offsets
196 """
197 if self.version < 5:
198 raise DWARFError("CU iteration in loclists is not supported with DWARF<5")
199
200 structs = next(self.dwarfinfo.iter_CUs()).structs # Just pick one
201 return _iter_CUs_in_section(self.stream, structs, structs.Dwarf_loclists_CU_header)
202
203 #------ PRIVATE ------#
204
205 def _parse_location_list_from_stream(self):
206 lst = []
207 while True:
208 entry_offset = self.stream.tell()
209 begin_offset = struct_parse(
210 self.structs.Dwarf_target_addr(''), self.stream)
211 end_offset = struct_parse(
212 self.structs.Dwarf_target_addr(''), self.stream)
213 if begin_offset == 0 and end_offset == 0:
214 # End of list - we're done.
215 break
216 elif begin_offset == self._max_addr:
217 # Base address selection entry
218 entry_length = self.stream.tell() - entry_offset
219 lst.append(BaseAddressEntry(entry_offset=entry_offset, entry_length=entry_length, base_address=end_offset))
220 else:
221 # Location list entry
222 expr_len = struct_parse(
223 self.structs.Dwarf_uint16(''), self.stream)
224 loc_expr = [struct_parse(self.structs.Dwarf_uint8(''),
225 self.stream)
226 for i in range(expr_len)]
227 entry_length = self.stream.tell() - entry_offset
228 lst.append(LocationEntry(
229 entry_offset=entry_offset,
230 entry_length=entry_length,
231 begin_offset=begin_offset,
232 end_offset=end_offset,
233 loc_expr=loc_expr,
234 is_absolute = False))
235 return lst
236
237 def _parse_location_list_from_stream_v5(self, cu=None):
238 """ Returns an array with BaseAddressEntry and LocationEntry.
239 No terminator entries.
240
241 The cu argument is necessary if the section is a
242 DWARFv5 debug_loclists one, and the target loclist
243 contains indirect encodings.
244 """
245 return [entry_translate[entry.entry_type](entry, cu)
246 for entry
247 in struct_parse(self.structs.Dwarf_loclists_entries, self.stream)]
248
249 # From V5 style entries to a LocationEntry/BaseAddressEntry
250 def _translate_entry_v5(self, entry, die):
251 off = entry.entry_offset
252 len = entry.entry_end_offset - off
253 type = entry.entry_type
254 if type == 'DW_LLE_base_address':
255 return BaseAddressEntry(off, len, entry.address)
256 elif type == 'DW_LLE_offset_pair':
257 return LocationEntry(off, len, entry.start_offset, entry.end_offset, entry.loc_expr, False)
258 elif type == 'DW_LLE_start_length':
259 return LocationEntry(off, len, entry.start_address, entry.start_address + entry.length, entry.loc_expr, True)
260 elif type == 'DW_LLE_start_end': # No test for this yet, but the format seems straightforward
261 return LocationEntry(off, len, entry.start_address, entry.end_address, entry.loc_expr, True)
262 elif type == 'DW_LLE_default_location': # No test for this either, and this is new in the API
263 return LocationEntry(off, len, -1, -1, entry.loc_expr, True)
264 elif type in ('DW_LLE_base_addressx', 'DW_LLE_startx_endx', 'DW_LLE_startx_length'):
265 # We don't have sample binaries for those LLEs. Their proper parsing would
266 # require knowing the CU context (so that indices can be resolved to code offsets)
267 raise NotImplementedError("Location list entry type %s is not supported yet" % (type,))
268 else:
269 raise DWARFError(False, "Unknown DW_LLE code: %s" % (type,))
270
271 # Locviews is the dict, mapping locview offsets to corresponding loclist offsets
272 def _parse_locview_pairs(self, locviews):
273 stream = self.stream
274 list_offset = locviews.get(stream.tell(), None)
275 pairs = []
276 if list_offset is not None:
277 while stream.tell() < list_offset:
278 pair = struct_parse(self.structs.Dwarf_locview_pair, stream)
279 pairs.append(LocationViewPair(pair.entry_offset, pair.begin, pair.end))
280 assert(stream.tell() == list_offset)
281 return pairs
282
283 class LocationParser(object):
284 """ A parser for location information in DIEs.
285 Handles both location information contained within the attribute
286 itself (represented as a LocationExpr object) and references to
287 location lists in the .debug_loc section (represented as a
288 list).
289 """
290 def __init__(self, location_lists):
291 self.location_lists = location_lists
292
293 @staticmethod
294 def attribute_has_location(attr, dwarf_version):
295 """ Checks if a DIE attribute contains location information.
296 """
297 return (LocationParser._attribute_is_loclistptr_class(attr) and
298 (LocationParser._attribute_has_loc_expr(attr, dwarf_version) or
299 LocationParser._attribute_has_loc_list(attr, dwarf_version)))
300
301 def parse_from_attribute(self, attr, dwarf_version, die = None):
302 """ Parses a DIE attribute and returns either a LocationExpr or
303 a list.
304 """
305 if self.attribute_has_location(attr, dwarf_version):
306 if self._attribute_has_loc_expr(attr, dwarf_version):
307 return LocationExpr(attr.value)
308 elif self._attribute_has_loc_list(attr, dwarf_version):
309 return self.location_lists.get_location_list_at_offset(
310 attr.value, die)
311 # We don't yet know if the DIE context will be needed.
312 # We might get it without a full tree traversal using
313 # attr.offset as a key, but we assume a good DWARF5
314 # aware consumer would pass a DIE along.
315 else:
316 raise ValueError("Attribute does not have location information")
317
318 #------ PRIVATE ------#
319
320 @staticmethod
321 def _attribute_has_loc_expr(attr, dwarf_version):
322 return ((dwarf_version < 4 and attr.form.startswith('DW_FORM_block') and
323 not attr.name == 'DW_AT_const_value') or
324 attr.form == 'DW_FORM_exprloc')
325
326 @staticmethod
327 def _attribute_has_loc_list(attr, dwarf_version):
328 return ((dwarf_version < 4 and
329 attr.form in ('DW_FORM_data1', 'DW_FORM_data2', 'DW_FORM_data4', 'DW_FORM_data8') and
330 not attr.name == 'DW_AT_const_value') or
331 attr.form in ('DW_FORM_sec_offset', 'DW_FORM_loclistx'))
332
333 @staticmethod
334 def _attribute_is_loclistptr_class(attr):
335 return (attr.name in ( 'DW_AT_location', 'DW_AT_string_length',
336 'DW_AT_const_value', 'DW_AT_return_addr',
337 'DW_AT_data_member_location',
338 'DW_AT_frame_base', 'DW_AT_segment',
339 'DW_AT_static_link', 'DW_AT_use_location',
340 'DW_AT_vtable_elem_location',
341 'DW_AT_call_value',
342 'DW_AT_GNU_call_site_value',
343 'DW_AT_GNU_call_site_target',
344 'DW_AT_GNU_call_site_data_value'))