Improve DWARF 5 compatibility. (#400)
[pyelftools.git] / elftools / dwarf / die.py
1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/die.py
3 #
4 # DWARF Debugging Information Entry
5 #
6 # Eli Bendersky (eliben@gmail.com)
7 # This code is in the public domain
8 #-------------------------------------------------------------------------------
9 from collections import namedtuple, OrderedDict
10 import os
11
12 from ..common.exceptions import DWARFError
13 from ..common.py3compat import bytes2str, iteritems
14 from ..common.utils import struct_parse, preserve_stream_pos
15 from .enums import DW_FORM_raw2name
16
17
18 # AttributeValue - describes an attribute value in the DIE:
19 #
20 # name:
21 # The name (DW_AT_*) of this attribute
22 #
23 # form:
24 # The DW_FORM_* name of this attribute
25 #
26 # value:
27 # The value parsed from the section and translated accordingly to the form
28 # (e.g. for a DW_FORM_strp it's the actual string taken from the string table)
29 #
30 # raw_value:
31 # Raw value as parsed from the section - used for debugging and presentation
32 # (e.g. for a DW_FORM_strp it's the raw string offset into the table)
33 #
34 # offset:
35 # Offset of this attribute's value in the stream (absolute offset, relative
36 # the beginning of the whole stream)
37 #
38 AttributeValue = namedtuple(
39 'AttributeValue', 'name form value raw_value offset')
40
41
42 class DIE(object):
43 """ A DWARF debugging information entry. On creation, parses itself from
44 the stream. Each DIE is held by a CU.
45
46 Accessible attributes:
47
48 tag:
49 The DIE tag
50
51 size:
52 The size this DIE occupies in the section
53
54 offset:
55 The offset of this DIE in the stream
56
57 attributes:
58 An ordered dictionary mapping attribute names to values. It's
59 ordered to preserve the order of attributes in the section
60
61 has_children:
62 Specifies whether this DIE has children
63
64 abbrev_code:
65 The abbreviation code pointing to an abbreviation entry (note
66 that this is for informational pusposes only - this object
67 interacts with its abbreviation table transparently).
68
69 See also the public methods.
70 """
71 def __init__(self, cu, stream, offset):
72 """ cu:
73 CompileUnit object this DIE belongs to. Used to obtain context
74 information (structs, abbrev table, etc.)
75
76 stream, offset:
77 The stream and offset into it where this DIE's data is located
78 """
79 self.cu = cu
80 self.dwarfinfo = self.cu.dwarfinfo # get DWARFInfo context
81 self.stream = stream
82 self.offset = offset
83
84 self.attributes = OrderedDict()
85 self.tag = None
86 self.has_children = None
87 self.abbrev_code = None
88 self.size = 0
89 # Null DIE terminator. It can be used to obtain offset range occupied
90 # by this DIE including its whole subtree.
91 self._terminator = None
92 self._parent = None
93
94 self._parse_DIE()
95
96 def is_null(self):
97 """ Is this a null entry?
98 """
99 return self.tag is None
100
101 def get_DIE_from_attribute(self, name):
102 """ Return the DIE referenced by the named attribute of this DIE.
103 The attribute must be in the reference attribute class.
104
105 name:
106 The name of the attribute in the reference class.
107 """
108 attr = self.attributes[name]
109 if attr.form in ('DW_FORM_ref1', 'DW_FORM_ref2', 'DW_FORM_ref4',
110 'DW_FORM_ref8', 'DW_FORM_ref'):
111 refaddr = self.cu.cu_offset + attr.raw_value
112 return self.cu.get_DIE_from_refaddr(refaddr)
113 elif attr.form in ('DW_FORM_ref_addr'):
114 return self.cu.dwarfinfo.get_DIE_from_refaddr(attr.raw_value)
115 elif attr.form in ('DW_FORM_ref_sig8'):
116 # Implement search type units for matching signature
117 raise NotImplementedError('%s (type unit by signature)' % attr.form)
118 elif attr.form in ('DW_FORM_ref_sup4', 'DW_FORM_ref_sup8'):
119 raise NotImplementedError('%s to dwo' % attr.form)
120 else:
121 raise DWARFError('%s is not a reference class form attribute' % attr)
122
123 def get_parent(self):
124 """ Return the parent DIE of this DIE, or None if the DIE has no
125 parent (i.e. is a top-level DIE).
126 """
127 if self._parent is None:
128 self._search_ancestor_offspring()
129 return self._parent
130
131 def get_full_path(self):
132 """ Return the full path filename for the DIE.
133
134 The filename is the join of 'DW_AT_comp_dir' and 'DW_AT_name',
135 either of which may be missing in practice. Note that its value is
136 usually a string taken from the .debug_string section and the
137 returned value will be a string.
138 """
139 comp_dir_attr = self.attributes.get('DW_AT_comp_dir', None)
140 comp_dir = bytes2str(comp_dir_attr.value) if comp_dir_attr else ''
141 fname_attr = self.attributes.get('DW_AT_name', None)
142 fname = bytes2str(fname_attr.value) if fname_attr else ''
143 return os.path.join(comp_dir, fname)
144
145 def iter_children(self):
146 """ Iterates all children of this DIE
147 """
148 return self.cu.iter_DIE_children(self)
149
150 def iter_siblings(self):
151 """ Yield all siblings of this DIE
152 """
153 parent = self.get_parent()
154 if parent:
155 for sibling in parent.iter_children():
156 if sibling is not self:
157 yield sibling
158 else:
159 raise StopIteration()
160
161 # The following methods are used while creating the DIE and should not be
162 # interesting to consumers
163 #
164
165 def set_parent(self, die):
166 self._parent = die
167
168 #------ PRIVATE ------#
169
170 def _search_ancestor_offspring(self):
171 """ Search our ancestors identifying their offspring to find our parent.
172
173 DIEs are stored as a flattened tree. The top DIE is the ancestor
174 of all DIEs in the unit. Each parent is guaranteed to be at
175 an offset less than their children. In each generation of children
176 the sibling with the closest offset not greater than our offset is
177 our ancestor.
178 """
179 # This code is called when get_parent notices that the _parent has
180 # not been identified. To avoid execution for each sibling record all
181 # the children of any parent iterated. Assuming get_parent will also be
182 # called for siblings, it is more efficient if siblings references are
183 # provided and no worse than a single walk if they are missing, while
184 # stopping iteration early could result in O(n^2) walks.
185 search = self.cu.get_top_DIE()
186 while search.offset < self.offset:
187 prev = search
188 for child in search.iter_children():
189 child.set_parent(search)
190 if child.offset <= self.offset:
191 prev = child
192
193 # We also need to check the offset of the terminator DIE
194 if search.has_children and search._terminator.offset <= self.offset:
195 prev = search._terminator
196
197 # If we didn't find a closer parent, give up, don't loop.
198 # Either we mis-parsed an ancestor or someone created a DIE
199 # by an offset that was not actually the start of a DIE.
200 if prev is search:
201 raise ValueError("offset %s not in CU %s DIE tree" %
202 (self.offset, self.cu.cu_offset))
203
204 search = prev
205
206 def __repr__(self):
207 s = 'DIE %s, size=%s, has_children=%s\n' % (
208 self.tag, self.size, self.has_children)
209 for attrname, attrval in iteritems(self.attributes):
210 s += ' |%-18s: %s\n' % (attrname, attrval)
211 return s
212
213 def __str__(self):
214 return self.__repr__()
215
216 def _parse_DIE(self):
217 """ Parses the DIE info from the section, based on the abbreviation
218 table of the CU
219 """
220 structs = self.cu.structs
221
222 # A DIE begins with the abbreviation code. Read it and use it to
223 # obtain the abbrev declaration for this DIE.
224 # Note: here and elsewhere, preserve_stream_pos is used on operations
225 # that manipulate the stream by reading data from it.
226 self.abbrev_code = struct_parse(
227 structs.Dwarf_uleb128(''), self.stream, self.offset)
228
229 # This may be a null entry
230 if self.abbrev_code == 0:
231 self.size = self.stream.tell() - self.offset
232 return
233
234 abbrev_decl = self.cu.get_abbrev_table().get_abbrev(self.abbrev_code)
235 self.tag = abbrev_decl['tag']
236 self.has_children = abbrev_decl.has_children()
237
238 # Guided by the attributes listed in the abbreviation declaration, parse
239 # values from the stream.
240 for spec in abbrev_decl['attr_spec']:
241 form = spec.form
242 name = spec.name
243 attr_offset = self.stream.tell()
244 # Special case here: the attribute value is stored in the attribute
245 # definition in the abbreviation spec, not in the DIE itself.
246 if form == 'DW_FORM_implicit_const':
247 value = spec.value
248 raw_value = value
249 else:
250 raw_value = struct_parse(structs.Dwarf_dw_form[form], self.stream)
251 value = self._translate_attr_value(form, raw_value)
252 self.attributes[name] = AttributeValue(
253 name=name,
254 form=form,
255 value=value,
256 raw_value=raw_value,
257 offset=attr_offset)
258
259 self.size = self.stream.tell() - self.offset
260
261 def _translate_attr_value(self, form, raw_value):
262 """ Translate a raw attr value according to the form
263 """
264 value = None
265 if form == 'DW_FORM_strp':
266 with preserve_stream_pos(self.stream):
267 value = self.dwarfinfo.get_string_from_table(raw_value)
268 elif form == 'DW_FORM_line_strp':
269 with preserve_stream_pos(self.stream):
270 value = self.dwarfinfo.get_string_from_linetable(raw_value)
271 elif form == 'DW_FORM_flag':
272 value = not raw_value == 0
273 elif form == 'DW_FORM_flag_present':
274 value = True
275 elif form == 'DW_FORM_indirect':
276 try:
277 form = DW_FORM_raw2name[raw_value]
278 except KeyError as err:
279 raise DWARFError(
280 'Found DW_FORM_indirect with unknown raw_value=' +
281 str(raw_value))
282
283 raw_value = struct_parse(
284 self.cu.structs.Dwarf_dw_form[form], self.stream)
285 # Let's hope this doesn't get too deep :-)
286 return self._translate_attr_value(form, raw_value)
287 else:
288 value = raw_value
289 return value