b26f8a1d206f4919d92a70cfc1d0641f950847bd
[pyelftools.git] / elftools / dwarf / die.py
1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/die.py
3 #
4 # DWARF Debugging Information Entry
5 #
6 # Eli Bendersky (eliben@gmail.com)
7 # This code is in the public domain
8 #-------------------------------------------------------------------------------
9 from collections import namedtuple, OrderedDict
10 import os
11
12 from ..common.exceptions import DWARFError
13 from ..common.py3compat import bytes2str, iteritems
14 from ..common.utils import struct_parse, preserve_stream_pos
15 from .enums import DW_FORM_raw2name
16 from .dwarf_util import _resolve_via_offset_table, _get_base_offset
17
18
19 # AttributeValue - describes an attribute value in the DIE:
20 #
21 # name:
22 # The name (DW_AT_*) of this attribute
23 #
24 # form:
25 # The DW_FORM_* name of this attribute
26 #
27 # value:
28 # The value parsed from the section and translated accordingly to the form
29 # (e.g. for a DW_FORM_strp it's the actual string taken from the string table)
30 #
31 # raw_value:
32 # Raw value as parsed from the section - used for debugging and presentation
33 # (e.g. for a DW_FORM_strp it's the raw string offset into the table)
34 #
35 # offset:
36 # Offset of this attribute's value in the stream (absolute offset, relative
37 # the beginning of the whole stream)
38 #
39 AttributeValue = namedtuple(
40 'AttributeValue', 'name form value raw_value offset')
41
42
43 class DIE(object):
44 """ A DWARF debugging information entry. On creation, parses itself from
45 the stream. Each DIE is held by a CU.
46
47 Accessible attributes:
48
49 tag:
50 The DIE tag
51
52 size:
53 The size this DIE occupies in the section
54
55 offset:
56 The offset of this DIE in the stream
57
58 attributes:
59 An ordered dictionary mapping attribute names to values. It's
60 ordered to preserve the order of attributes in the section
61
62 has_children:
63 Specifies whether this DIE has children
64
65 abbrev_code:
66 The abbreviation code pointing to an abbreviation entry (note
67 that this is for informational purposes only - this object
68 interacts with its abbreviation table transparently).
69
70 See also the public methods.
71 """
72 def __init__(self, cu, stream, offset):
73 """ cu:
74 CompileUnit object this DIE belongs to. Used to obtain context
75 information (structs, abbrev table, etc.)
76
77 stream, offset:
78 The stream and offset into it where this DIE's data is located
79 """
80 self.cu = cu
81 self.dwarfinfo = self.cu.dwarfinfo # get DWARFInfo context
82 self.stream = stream
83 self.offset = offset
84
85 self.attributes = OrderedDict()
86 self.tag = None
87 self.has_children = None
88 self.abbrev_code = None
89 self.size = 0
90 # Null DIE terminator. It can be used to obtain offset range occupied
91 # by this DIE including its whole subtree.
92 self._terminator = None
93 self._parent = None
94
95 self._parse_DIE()
96
97 def is_null(self):
98 """ Is this a null entry?
99 """
100 return self.tag is None
101
102 def get_DIE_from_attribute(self, name):
103 """ Return the DIE referenced by the named attribute of this DIE.
104 The attribute must be in the reference attribute class.
105
106 name:
107 The name of the attribute in the reference class.
108 """
109 attr = self.attributes[name]
110 if attr.form in ('DW_FORM_ref1', 'DW_FORM_ref2', 'DW_FORM_ref4',
111 'DW_FORM_ref8', 'DW_FORM_ref', 'DW_FORM_ref_udata'):
112 refaddr = self.cu.cu_offset + attr.raw_value
113 return self.cu.get_DIE_from_refaddr(refaddr)
114 elif attr.form in ('DW_FORM_ref_addr'):
115 return self.cu.dwarfinfo.get_DIE_from_refaddr(attr.raw_value)
116 elif attr.form in ('DW_FORM_ref_sig8'):
117 # Implement search type units for matching signature
118 raise NotImplementedError('%s (type unit by signature)' % attr.form)
119 elif attr.form in ('DW_FORM_ref_sup4', 'DW_FORM_ref_sup8', 'DW_FORM_GNU_ref_alt'):
120 if self.dwarfinfo.supplementary_dwarfinfo:
121 return self.dwarfinfo.supplementary_dwarfinfo.get_DIE_from_refaddr(attr.raw_value)
122 # FIXME: how to distinguish supplementary files from dwo ?
123 raise NotImplementedError('%s to dwo' % attr.form)
124 else:
125 raise DWARFError('%s is not a reference class form attribute' % attr)
126
127 def get_parent(self):
128 """ Return the parent DIE of this DIE, or None if the DIE has no
129 parent (i.e. is a top-level DIE).
130 """
131 if self._parent is None:
132 self._search_ancestor_offspring()
133 return self._parent
134
135 def get_full_path(self):
136 """ Return the full path filename for the DIE.
137
138 The filename is the join of 'DW_AT_comp_dir' and 'DW_AT_name',
139 either of which may be missing in practice. Note that its value is
140 usually a string taken from the .debug_string section and the
141 returned value will be a string.
142 """
143 comp_dir_attr = self.attributes.get('DW_AT_comp_dir', None)
144 comp_dir = bytes2str(comp_dir_attr.value) if comp_dir_attr else ''
145 fname_attr = self.attributes.get('DW_AT_name', None)
146 fname = bytes2str(fname_attr.value) if fname_attr else ''
147 return os.path.join(comp_dir, fname)
148
149 def iter_children(self):
150 """ Iterates all children of this DIE
151 """
152 return self.cu.iter_DIE_children(self)
153
154 def iter_siblings(self):
155 """ Yield all siblings of this DIE
156 """
157 parent = self.get_parent()
158 if parent:
159 for sibling in parent.iter_children():
160 if sibling is not self:
161 yield sibling
162 else:
163 raise StopIteration()
164
165 # The following methods are used while creating the DIE and should not be
166 # interesting to consumers
167 #
168
169 def set_parent(self, die):
170 self._parent = die
171
172 #------ PRIVATE ------#
173
174 def _search_ancestor_offspring(self):
175 """ Search our ancestors identifying their offspring to find our parent.
176
177 DIEs are stored as a flattened tree. The top DIE is the ancestor
178 of all DIEs in the unit. Each parent is guaranteed to be at
179 an offset less than their children. In each generation of children
180 the sibling with the closest offset not greater than our offset is
181 our ancestor.
182 """
183 # This code is called when get_parent notices that the _parent has
184 # not been identified. To avoid execution for each sibling record all
185 # the children of any parent iterated. Assuming get_parent will also be
186 # called for siblings, it is more efficient if siblings references are
187 # provided and no worse than a single walk if they are missing, while
188 # stopping iteration early could result in O(n^2) walks.
189 search = self.cu.get_top_DIE()
190 while search.offset < self.offset:
191 prev = search
192 for child in search.iter_children():
193 child.set_parent(search)
194 if child.offset <= self.offset:
195 prev = child
196
197 # We also need to check the offset of the terminator DIE
198 if search.has_children and search._terminator.offset <= self.offset:
199 prev = search._terminator
200
201 # If we didn't find a closer parent, give up, don't loop.
202 # Either we mis-parsed an ancestor or someone created a DIE
203 # by an offset that was not actually the start of a DIE.
204 if prev is search:
205 raise ValueError("offset %s not in CU %s DIE tree" %
206 (self.offset, self.cu.cu_offset))
207
208 search = prev
209
210 def __repr__(self):
211 s = 'DIE %s, size=%s, has_children=%s\n' % (
212 self.tag, self.size, self.has_children)
213 for attrname, attrval in iteritems(self.attributes):
214 s += ' |%-18s: %s\n' % (attrname, attrval)
215 return s
216
217 def __str__(self):
218 return self.__repr__()
219
220 def _parse_DIE(self):
221 """ Parses the DIE info from the section, based on the abbreviation
222 table of the CU
223 """
224 structs = self.cu.structs
225
226 # A DIE begins with the abbreviation code. Read it and use it to
227 # obtain the abbrev declaration for this DIE.
228 # Note: here and elsewhere, preserve_stream_pos is used on operations
229 # that manipulate the stream by reading data from it.
230 self.abbrev_code = struct_parse(
231 structs.Dwarf_uleb128(''), self.stream, self.offset)
232
233 # This may be a null entry
234 if self.abbrev_code == 0:
235 self.size = self.stream.tell() - self.offset
236 return
237
238 abbrev_decl = self.cu.get_abbrev_table().get_abbrev(self.abbrev_code)
239 self.tag = abbrev_decl['tag']
240 self.has_children = abbrev_decl.has_children()
241
242 # Guided by the attributes listed in the abbreviation declaration, parse
243 # values from the stream.
244 for spec in abbrev_decl['attr_spec']:
245 form = spec.form
246 name = spec.name
247 attr_offset = self.stream.tell()
248 # Special case here: the attribute value is stored in the attribute
249 # definition in the abbreviation spec, not in the DIE itself.
250 if form == 'DW_FORM_implicit_const':
251 value = spec.value
252 raw_value = value
253 else:
254 raw_value = struct_parse(structs.Dwarf_dw_form[form], self.stream)
255 value = self._translate_attr_value(form, raw_value)
256 self.attributes[name] = AttributeValue(
257 name=name,
258 form=form,
259 value=value,
260 raw_value=raw_value,
261 offset=attr_offset)
262
263 self.size = self.stream.tell() - self.offset
264
265 def _translate_attr_value(self, form, raw_value):
266 """ Translate a raw attr value according to the form
267 """
268 # Indirect forms can only be parsed if the top DIE of this CU has already been parsed
269 # and listed in the CU, since the top DIE would have to contain the DW_AT_xxx_base attributes.
270 # This breaks if there is an indirect encoding in the top DIE itself before the
271 # corresponding _base, and it was seen in the wild.
272 # There is a hook in get_top_DIE() to resolve those lazily.
273 translate_indirect = self.cu.has_top_DIE() or self.offset != self.cu.cu_die_offset
274 value = None
275 if form == 'DW_FORM_strp':
276 with preserve_stream_pos(self.stream):
277 value = self.dwarfinfo.get_string_from_table(raw_value)
278 elif form == 'DW_FORM_line_strp':
279 with preserve_stream_pos(self.stream):
280 value = self.dwarfinfo.get_string_from_linetable(raw_value)
281 elif form in ('DW_FORM_GNU_strp_alt', 'DW_FORM_strp_sup'):
282 if self.dwarfinfo.supplementary_dwarfinfo:
283 return self.dwarfinfo.supplementary_dwarfinfo.get_string_from_table(raw_value)
284 else:
285 value = raw_value
286 elif form == 'DW_FORM_flag':
287 value = not raw_value == 0
288 elif form == 'DW_FORM_flag_present':
289 value = True
290 elif form == 'DW_FORM_indirect':
291 try:
292 form = DW_FORM_raw2name[raw_value]
293 except KeyError as err:
294 raise DWARFError(
295 'Found DW_FORM_indirect with unknown raw_value=' +
296 str(raw_value))
297
298 raw_value = struct_parse(
299 self.cu.structs.Dwarf_dw_form[form], self.stream)
300 # Let's hope this doesn't get too deep :-)
301 return self._translate_attr_value(form, raw_value)
302 elif form in ('DW_FORM_addrx', 'DW_FORM_addrx1', 'DW_FORM_addrx2', 'DW_FORM_addrx3', 'DW_FORM_addrx4') and translate_indirect:
303 value = self.cu.dwarfinfo.get_addr(self.cu, raw_value)
304 elif form in ('DW_FORM_strx', 'DW_FORM_strx1', 'DW_FORM_strx2', 'DW_FORM_strx3', 'DW_FORM_strx4') and translate_indirect:
305 stream = self.dwarfinfo.debug_str_offsets_sec.stream
306 base_offset = _get_base_offset(self.cu, 'DW_AT_str_offsets_base')
307 offset_size = 4 if self.cu.structs.dwarf_format == 32 else 8
308 with preserve_stream_pos(stream):
309 str_offset = struct_parse(self.cu.structs.Dwarf_offset(''), stream, base_offset + raw_value*offset_size)
310 value = self.dwarfinfo.get_string_from_table(str_offset)
311 elif form == 'DW_FORM_loclistx' and translate_indirect:
312 value = _resolve_via_offset_table(self.dwarfinfo.debug_loclists_sec.stream, self.cu, raw_value, 'DW_AT_loclists_base')
313 elif form == 'DW_FORM_rnglistx' and translate_indirect:
314 value = _resolve_via_offset_table(self.dwarfinfo.debug_rnglists_sec.stream, self.cu, raw_value, 'DW_AT_rnglists_base')
315 else:
316 value = raw_value
317 return value
318
319 def _translate_indirect_attributes(self):
320 """ This is a hook to translate the DW_FORM_...x values in the top DIE
321 once the top DIE is parsed to the end. They can't be translated
322 while the top DIE is being parsed, because they implicitly make a
323 reference to the DW_AT_xxx_base attribute in the same DIE that may
324 not have been parsed yet.
325 """
326 for key in self.attributes:
327 attr = self.attributes[key]
328 if attr.form in ('DW_FORM_strx', 'DW_FORM_strx1', 'DW_FORM_strx2', 'DW_FORM_strx3', 'DW_FORM_strx4',
329 'DW_FORM_addrx', 'DW_FORM_addrx1', 'DW_FORM_addrx2', 'DW_FORM_addrx3', 'DW_FORM_addrx4',
330 'DW_FORM_loclistx', 'DW_FORM_rnglistx'):
331 # Can't change value in place, got to replace the whole attribute record
332 self.attributes[key] = AttributeValue(
333 name=attr.name,
334 form=attr.form,
335 value=self._translate_attr_value(attr.form, attr.raw_value),
336 raw_value=attr.raw_value,
337 offset=attr.offset)