ef6d90834c4bbec77d5f8b11498c69a8b5532855
[pyelftools.git] / elftools / dwarf / die.py
1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/die.py
3 #
4 # DWARF Debugging Information Entry
5 #
6 # Eli Bendersky (eliben@gmail.com)
7 # This code is in the public domain
8 #-------------------------------------------------------------------------------
9 from collections import namedtuple, OrderedDict
10 import os
11
12 from ..common.exceptions import DWARFError
13 from ..common.utils import bytes2str, struct_parse, preserve_stream_pos
14 from .enums import DW_FORM_raw2name
15 from .dwarf_util import _resolve_via_offset_table, _get_base_offset
16
17
18 # AttributeValue - describes an attribute value in the DIE:
19 #
20 # name:
21 # The name (DW_AT_*) of this attribute
22 #
23 # form:
24 # The DW_FORM_* name of this attribute
25 #
26 # value:
27 # The value parsed from the section and translated accordingly to the form
28 # (e.g. for a DW_FORM_strp it's the actual string taken from the string table)
29 #
30 # raw_value:
31 # Raw value as parsed from the section - used for debugging and presentation
32 # (e.g. for a DW_FORM_strp it's the raw string offset into the table)
33 #
34 # offset:
35 # Offset of this attribute's value in the stream (absolute offset, relative
36 # the beginning of the whole stream)
37 #
38 AttributeValue = namedtuple(
39 'AttributeValue', 'name form value raw_value offset')
40
41
42 class DIE(object):
43 """ A DWARF debugging information entry. On creation, parses itself from
44 the stream. Each DIE is held by a CU.
45
46 Accessible attributes:
47
48 tag:
49 The DIE tag
50
51 size:
52 The size this DIE occupies in the section
53
54 offset:
55 The offset of this DIE in the stream
56
57 attributes:
58 An ordered dictionary mapping attribute names to values. It's
59 ordered to preserve the order of attributes in the section
60
61 has_children:
62 Specifies whether this DIE has children
63
64 abbrev_code:
65 The abbreviation code pointing to an abbreviation entry (note
66 that this is for informational purposes only - this object
67 interacts with its abbreviation table transparently).
68
69 See also the public methods.
70 """
71 def __init__(self, cu, stream, offset):
72 """ cu:
73 CompileUnit object this DIE belongs to. Used to obtain context
74 information (structs, abbrev table, etc.)
75
76 stream, offset:
77 The stream and offset into it where this DIE's data is located
78 """
79 self.cu = cu
80 self.dwarfinfo = self.cu.dwarfinfo # get DWARFInfo context
81 self.stream = stream
82 self.offset = offset
83
84 self.attributes = OrderedDict()
85 self.tag = None
86 self.has_children = None
87 self.abbrev_code = None
88 self.size = 0
89 # Null DIE terminator. It can be used to obtain offset range occupied
90 # by this DIE including its whole subtree.
91 self._terminator = None
92 self._parent = None
93
94 self._parse_DIE()
95
96 def is_null(self):
97 """ Is this a null entry?
98 """
99 return self.tag is None
100
101 def get_DIE_from_attribute(self, name):
102 """ Return the DIE referenced by the named attribute of this DIE.
103 The attribute must be in the reference attribute class.
104
105 name:
106 The name of the attribute in the reference class.
107 """
108 attr = self.attributes[name]
109 if attr.form in ('DW_FORM_ref1', 'DW_FORM_ref2', 'DW_FORM_ref4',
110 'DW_FORM_ref8', 'DW_FORM_ref', 'DW_FORM_ref_udata'):
111 refaddr = self.cu.cu_offset + attr.raw_value
112 return self.cu.get_DIE_from_refaddr(refaddr)
113 elif attr.form in ('DW_FORM_ref_addr'):
114 return self.cu.dwarfinfo.get_DIE_from_refaddr(attr.raw_value)
115 elif attr.form in ('DW_FORM_ref_sig8'):
116 # Implement search type units for matching signature
117 raise NotImplementedError('%s (type unit by signature)' % attr.form)
118 elif attr.form in ('DW_FORM_ref_sup4', 'DW_FORM_ref_sup8', 'DW_FORM_GNU_ref_alt'):
119 if self.dwarfinfo.supplementary_dwarfinfo:
120 return self.dwarfinfo.supplementary_dwarfinfo.get_DIE_from_refaddr(attr.raw_value)
121 # FIXME: how to distinguish supplementary files from dwo ?
122 raise NotImplementedError('%s to dwo' % attr.form)
123 else:
124 raise DWARFError('%s is not a reference class form attribute' % attr)
125
126 def get_parent(self):
127 """ Return the parent DIE of this DIE, or None if the DIE has no
128 parent (i.e. is a top-level DIE).
129 """
130 if self._parent is None:
131 self._search_ancestor_offspring()
132 return self._parent
133
134 def get_full_path(self):
135 """ Return the full path filename for the DIE.
136
137 The filename is the join of 'DW_AT_comp_dir' and 'DW_AT_name',
138 either of which may be missing in practice. Note that its value is
139 usually a string taken from the .debug_string section and the
140 returned value will be a string.
141 """
142 comp_dir_attr = self.attributes.get('DW_AT_comp_dir', None)
143 comp_dir = bytes2str(comp_dir_attr.value) if comp_dir_attr else ''
144 fname_attr = self.attributes.get('DW_AT_name', None)
145 fname = bytes2str(fname_attr.value) if fname_attr else ''
146 return os.path.join(comp_dir, fname)
147
148 def iter_children(self):
149 """ Iterates all children of this DIE
150 """
151 return self.cu.iter_DIE_children(self)
152
153 def iter_siblings(self):
154 """ Yield all siblings of this DIE
155 """
156 parent = self.get_parent()
157 if parent:
158 for sibling in parent.iter_children():
159 if sibling is not self:
160 yield sibling
161 else:
162 raise StopIteration()
163
164 # The following methods are used while creating the DIE and should not be
165 # interesting to consumers
166 #
167
168 def set_parent(self, die):
169 self._parent = die
170
171 #------ PRIVATE ------#
172
173 def _search_ancestor_offspring(self):
174 """ Search our ancestors identifying their offspring to find our parent.
175
176 DIEs are stored as a flattened tree. The top DIE is the ancestor
177 of all DIEs in the unit. Each parent is guaranteed to be at
178 an offset less than their children. In each generation of children
179 the sibling with the closest offset not greater than our offset is
180 our ancestor.
181 """
182 # This code is called when get_parent notices that the _parent has
183 # not been identified. To avoid execution for each sibling record all
184 # the children of any parent iterated. Assuming get_parent will also be
185 # called for siblings, it is more efficient if siblings references are
186 # provided and no worse than a single walk if they are missing, while
187 # stopping iteration early could result in O(n^2) walks.
188 search = self.cu.get_top_DIE()
189 while search.offset < self.offset:
190 prev = search
191 for child in search.iter_children():
192 child.set_parent(search)
193 if child.offset <= self.offset:
194 prev = child
195
196 # We also need to check the offset of the terminator DIE
197 if search.has_children and search._terminator.offset <= self.offset:
198 prev = search._terminator
199
200 # If we didn't find a closer parent, give up, don't loop.
201 # Either we mis-parsed an ancestor or someone created a DIE
202 # by an offset that was not actually the start of a DIE.
203 if prev is search:
204 raise ValueError("offset %s not in CU %s DIE tree" %
205 (self.offset, self.cu.cu_offset))
206
207 search = prev
208
209 def __repr__(self):
210 s = 'DIE %s, size=%s, has_children=%s\n' % (
211 self.tag, self.size, self.has_children)
212 for attrname, attrval in self.attributes.items():
213 s += ' |%-18s: %s\n' % (attrname, attrval)
214 return s
215
216 def __str__(self):
217 return self.__repr__()
218
219 def _parse_DIE(self):
220 """ Parses the DIE info from the section, based on the abbreviation
221 table of the CU
222 """
223 structs = self.cu.structs
224
225 # A DIE begins with the abbreviation code. Read it and use it to
226 # obtain the abbrev declaration for this DIE.
227 # Note: here and elsewhere, preserve_stream_pos is used on operations
228 # that manipulate the stream by reading data from it.
229 self.abbrev_code = struct_parse(
230 structs.Dwarf_uleb128(''), self.stream, self.offset)
231
232 # This may be a null entry
233 if self.abbrev_code == 0:
234 self.size = self.stream.tell() - self.offset
235 return
236
237 abbrev_decl = self.cu.get_abbrev_table().get_abbrev(self.abbrev_code)
238 self.tag = abbrev_decl['tag']
239 self.has_children = abbrev_decl.has_children()
240
241 # Guided by the attributes listed in the abbreviation declaration, parse
242 # values from the stream.
243 for spec in abbrev_decl['attr_spec']:
244 form = spec.form
245 name = spec.name
246 attr_offset = self.stream.tell()
247 # Special case here: the attribute value is stored in the attribute
248 # definition in the abbreviation spec, not in the DIE itself.
249 if form == 'DW_FORM_implicit_const':
250 value = spec.value
251 raw_value = value
252 else:
253 raw_value = struct_parse(structs.Dwarf_dw_form[form], self.stream)
254 value = self._translate_attr_value(form, raw_value)
255 self.attributes[name] = AttributeValue(
256 name=name,
257 form=form,
258 value=value,
259 raw_value=raw_value,
260 offset=attr_offset)
261
262 self.size = self.stream.tell() - self.offset
263
264 def _translate_attr_value(self, form, raw_value):
265 """ Translate a raw attr value according to the form
266 """
267 # Indirect forms can only be parsed if the top DIE of this CU has already been parsed
268 # and listed in the CU, since the top DIE would have to contain the DW_AT_xxx_base attributes.
269 # This breaks if there is an indirect encoding in the top DIE itself before the
270 # corresponding _base, and it was seen in the wild.
271 # There is a hook in get_top_DIE() to resolve those lazily.
272 translate_indirect = self.cu.has_top_DIE() or self.offset != self.cu.cu_die_offset
273 value = None
274 if form == 'DW_FORM_strp':
275 with preserve_stream_pos(self.stream):
276 value = self.dwarfinfo.get_string_from_table(raw_value)
277 elif form == 'DW_FORM_line_strp':
278 with preserve_stream_pos(self.stream):
279 value = self.dwarfinfo.get_string_from_linetable(raw_value)
280 elif form in ('DW_FORM_GNU_strp_alt', 'DW_FORM_strp_sup'):
281 if self.dwarfinfo.supplementary_dwarfinfo:
282 return self.dwarfinfo.supplementary_dwarfinfo.get_string_from_table(raw_value)
283 else:
284 value = raw_value
285 elif form == 'DW_FORM_flag':
286 value = not raw_value == 0
287 elif form == 'DW_FORM_flag_present':
288 value = True
289 elif form == 'DW_FORM_indirect':
290 try:
291 form = DW_FORM_raw2name[raw_value]
292 except KeyError as err:
293 raise DWARFError(
294 'Found DW_FORM_indirect with unknown raw_value=' +
295 str(raw_value))
296
297 raw_value = struct_parse(
298 self.cu.structs.Dwarf_dw_form[form], self.stream)
299 # Let's hope this doesn't get too deep :-)
300 return self._translate_attr_value(form, raw_value)
301 elif form in ('DW_FORM_addrx', 'DW_FORM_addrx1', 'DW_FORM_addrx2', 'DW_FORM_addrx3', 'DW_FORM_addrx4') and translate_indirect:
302 value = self.cu.dwarfinfo.get_addr(self.cu, raw_value)
303 elif form in ('DW_FORM_strx', 'DW_FORM_strx1', 'DW_FORM_strx2', 'DW_FORM_strx3', 'DW_FORM_strx4') and translate_indirect:
304 stream = self.dwarfinfo.debug_str_offsets_sec.stream
305 base_offset = _get_base_offset(self.cu, 'DW_AT_str_offsets_base')
306 offset_size = 4 if self.cu.structs.dwarf_format == 32 else 8
307 with preserve_stream_pos(stream):
308 str_offset = struct_parse(self.cu.structs.Dwarf_offset(''), stream, base_offset + raw_value*offset_size)
309 value = self.dwarfinfo.get_string_from_table(str_offset)
310 elif form == 'DW_FORM_loclistx' and translate_indirect:
311 value = _resolve_via_offset_table(self.dwarfinfo.debug_loclists_sec.stream, self.cu, raw_value, 'DW_AT_loclists_base')
312 elif form == 'DW_FORM_rnglistx' and translate_indirect:
313 value = _resolve_via_offset_table(self.dwarfinfo.debug_rnglists_sec.stream, self.cu, raw_value, 'DW_AT_rnglists_base')
314 else:
315 value = raw_value
316 return value
317
318 def _translate_indirect_attributes(self):
319 """ This is a hook to translate the DW_FORM_...x values in the top DIE
320 once the top DIE is parsed to the end. They can't be translated
321 while the top DIE is being parsed, because they implicitly make a
322 reference to the DW_AT_xxx_base attribute in the same DIE that may
323 not have been parsed yet.
324 """
325 for key in self.attributes:
326 attr = self.attributes[key]
327 if attr.form in ('DW_FORM_strx', 'DW_FORM_strx1', 'DW_FORM_strx2', 'DW_FORM_strx3', 'DW_FORM_strx4',
328 'DW_FORM_addrx', 'DW_FORM_addrx1', 'DW_FORM_addrx2', 'DW_FORM_addrx3', 'DW_FORM_addrx4',
329 'DW_FORM_loclistx', 'DW_FORM_rnglistx'):
330 # Can't change value in place, got to replace the whole attribute record
331 self.attributes[key] = AttributeValue(
332 name=attr.name,
333 form=attr.form,
334 value=self._translate_attr_value(attr.form, attr.raw_value),
335 raw_value=attr.raw_value,
336 offset=attr.offset)