1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/die.py
4 # DWARF Debugging Information Entry
6 # Eli Bendersky (eliben@gmail.com)
7 # This code is in the public domain
8 #-------------------------------------------------------------------------------
9 from collections
import namedtuple
, OrderedDict
12 from ..common
.exceptions
import DWARFError
13 from ..common
.utils
import bytes2str
, struct_parse
, preserve_stream_pos
14 from .enums
import DW_FORM_raw2name
15 from .dwarf_util
import _resolve_via_offset_table
, _get_base_offset
18 # AttributeValue - describes an attribute value in the DIE:
21 # The name (DW_AT_*) of this attribute
24 # The DW_FORM_* name of this attribute
27 # The value parsed from the section and translated accordingly to the form
28 # (e.g. for a DW_FORM_strp it's the actual string taken from the string table)
31 # Raw value as parsed from the section - used for debugging and presentation
32 # (e.g. for a DW_FORM_strp it's the raw string offset into the table)
35 # Offset of this attribute's value in the stream (absolute offset, relative
36 # the beginning of the whole stream)
38 AttributeValue
= namedtuple(
39 'AttributeValue', 'name form value raw_value offset')
43 """ A DWARF debugging information entry. On creation, parses itself from
44 the stream. Each DIE is held by a CU.
46 Accessible attributes:
52 The size this DIE occupies in the section
55 The offset of this DIE in the stream
58 An ordered dictionary mapping attribute names to values. It's
59 ordered to preserve the order of attributes in the section
62 Specifies whether this DIE has children
65 The abbreviation code pointing to an abbreviation entry (note
66 that this is for informational purposes only - this object
67 interacts with its abbreviation table transparently).
69 See also the public methods.
71 def __init__(self
, cu
, stream
, offset
):
73 CompileUnit object this DIE belongs to. Used to obtain context
74 information (structs, abbrev table, etc.)
77 The stream and offset into it where this DIE's data is located
80 self
.dwarfinfo
= self
.cu
.dwarfinfo
# get DWARFInfo context
84 self
.attributes
= OrderedDict()
86 self
.has_children
= None
87 self
.abbrev_code
= None
89 # Null DIE terminator. It can be used to obtain offset range occupied
90 # by this DIE including its whole subtree.
91 self
._terminator
= None
97 """ Is this a null entry?
99 return self
.tag
is None
101 def get_DIE_from_attribute(self
, name
):
102 """ Return the DIE referenced by the named attribute of this DIE.
103 The attribute must be in the reference attribute class.
106 The name of the attribute in the reference class.
108 attr
= self
.attributes
[name
]
109 if attr
.form
in ('DW_FORM_ref1', 'DW_FORM_ref2', 'DW_FORM_ref4',
110 'DW_FORM_ref8', 'DW_FORM_ref', 'DW_FORM_ref_udata'):
111 refaddr
= self
.cu
.cu_offset
+ attr
.raw_value
112 return self
.cu
.get_DIE_from_refaddr(refaddr
)
113 elif attr
.form
in ('DW_FORM_ref_addr'):
114 return self
.cu
.dwarfinfo
.get_DIE_from_refaddr(attr
.raw_value
)
115 elif attr
.form
in ('DW_FORM_ref_sig8'):
116 # Implement search type units for matching signature
117 raise NotImplementedError('%s (type unit by signature)' % attr
.form
)
118 elif attr
.form
in ('DW_FORM_ref_sup4', 'DW_FORM_ref_sup8', 'DW_FORM_GNU_ref_alt'):
119 if self
.dwarfinfo
.supplementary_dwarfinfo
:
120 return self
.dwarfinfo
.supplementary_dwarfinfo
.get_DIE_from_refaddr(attr
.raw_value
)
121 # FIXME: how to distinguish supplementary files from dwo ?
122 raise NotImplementedError('%s to dwo' % attr
.form
)
124 raise DWARFError('%s is not a reference class form attribute' % attr
)
126 def get_parent(self
):
127 """ Return the parent DIE of this DIE, or None if the DIE has no
128 parent (i.e. is a top-level DIE).
130 if self
._parent
is None:
131 self
._search
_ancestor
_offspring
()
134 def get_full_path(self
):
135 """ Return the full path filename for the DIE.
137 The filename is the join of 'DW_AT_comp_dir' and 'DW_AT_name',
138 either of which may be missing in practice. Note that its value is
139 usually a string taken from the .debug_string section and the
140 returned value will be a string.
142 comp_dir_attr
= self
.attributes
.get('DW_AT_comp_dir', None)
143 comp_dir
= bytes2str(comp_dir_attr
.value
) if comp_dir_attr
else ''
144 fname_attr
= self
.attributes
.get('DW_AT_name', None)
145 fname
= bytes2str(fname_attr
.value
) if fname_attr
else ''
146 return os
.path
.join(comp_dir
, fname
)
148 def iter_children(self
):
149 """ Iterates all children of this DIE
151 return self
.cu
.iter_DIE_children(self
)
153 def iter_siblings(self
):
154 """ Yield all siblings of this DIE
156 parent
= self
.get_parent()
158 for sibling
in parent
.iter_children():
159 if sibling
is not self
:
162 raise StopIteration()
164 # The following methods are used while creating the DIE and should not be
165 # interesting to consumers
168 def set_parent(self
, die
):
171 #------ PRIVATE ------#
173 def _search_ancestor_offspring(self
):
174 """ Search our ancestors identifying their offspring to find our parent.
176 DIEs are stored as a flattened tree. The top DIE is the ancestor
177 of all DIEs in the unit. Each parent is guaranteed to be at
178 an offset less than their children. In each generation of children
179 the sibling with the closest offset not greater than our offset is
182 # This code is called when get_parent notices that the _parent has
183 # not been identified. To avoid execution for each sibling record all
184 # the children of any parent iterated. Assuming get_parent will also be
185 # called for siblings, it is more efficient if siblings references are
186 # provided and no worse than a single walk if they are missing, while
187 # stopping iteration early could result in O(n^2) walks.
188 search
= self
.cu
.get_top_DIE()
189 while search
.offset
< self
.offset
:
191 for child
in search
.iter_children():
192 child
.set_parent(search
)
193 if child
.offset
<= self
.offset
:
196 # We also need to check the offset of the terminator DIE
197 if search
.has_children
and search
._terminator
.offset
<= self
.offset
:
198 prev
= search
._terminator
200 # If we didn't find a closer parent, give up, don't loop.
201 # Either we mis-parsed an ancestor or someone created a DIE
202 # by an offset that was not actually the start of a DIE.
204 raise ValueError("offset %s not in CU %s DIE tree" %
205 (self
.offset
, self
.cu
.cu_offset
))
210 s
= 'DIE %s, size=%s, has_children=%s\n' % (
211 self
.tag
, self
.size
, self
.has_children
)
212 for attrname
, attrval
in self
.attributes
.items():
213 s
+= ' |%-18s: %s\n' % (attrname
, attrval
)
217 return self
.__repr
__()
219 def _parse_DIE(self
):
220 """ Parses the DIE info from the section, based on the abbreviation
223 structs
= self
.cu
.structs
225 # A DIE begins with the abbreviation code. Read it and use it to
226 # obtain the abbrev declaration for this DIE.
227 # Note: here and elsewhere, preserve_stream_pos is used on operations
228 # that manipulate the stream by reading data from it.
229 self
.abbrev_code
= struct_parse(
230 structs
.Dwarf_uleb128(''), self
.stream
, self
.offset
)
232 # This may be a null entry
233 if self
.abbrev_code
== 0:
234 self
.size
= self
.stream
.tell() - self
.offset
237 abbrev_decl
= self
.cu
.get_abbrev_table().get_abbrev(self
.abbrev_code
)
238 self
.tag
= abbrev_decl
['tag']
239 self
.has_children
= abbrev_decl
.has_children()
241 # Guided by the attributes listed in the abbreviation declaration, parse
242 # values from the stream.
243 for spec
in abbrev_decl
['attr_spec']:
246 attr_offset
= self
.stream
.tell()
247 # Special case here: the attribute value is stored in the attribute
248 # definition in the abbreviation spec, not in the DIE itself.
249 if form
== 'DW_FORM_implicit_const':
253 raw_value
= struct_parse(structs
.Dwarf_dw_form
[form
], self
.stream
)
254 value
= self
._translate
_attr
_value
(form
, raw_value
)
255 self
.attributes
[name
] = AttributeValue(
262 self
.size
= self
.stream
.tell() - self
.offset
264 def _translate_attr_value(self
, form
, raw_value
):
265 """ Translate a raw attr value according to the form
267 # Indirect forms can only be parsed if the top DIE of this CU has already been parsed
268 # and listed in the CU, since the top DIE would have to contain the DW_AT_xxx_base attributes.
269 # This breaks if there is an indirect encoding in the top DIE itself before the
270 # corresponding _base, and it was seen in the wild.
271 # There is a hook in get_top_DIE() to resolve those lazily.
272 translate_indirect
= self
.cu
.has_top_DIE() or self
.offset
!= self
.cu
.cu_die_offset
274 if form
== 'DW_FORM_strp':
275 with
preserve_stream_pos(self
.stream
):
276 value
= self
.dwarfinfo
.get_string_from_table(raw_value
)
277 elif form
== 'DW_FORM_line_strp':
278 with
preserve_stream_pos(self
.stream
):
279 value
= self
.dwarfinfo
.get_string_from_linetable(raw_value
)
280 elif form
in ('DW_FORM_GNU_strp_alt', 'DW_FORM_strp_sup'):
281 if self
.dwarfinfo
.supplementary_dwarfinfo
:
282 return self
.dwarfinfo
.supplementary_dwarfinfo
.get_string_from_table(raw_value
)
285 elif form
== 'DW_FORM_flag':
286 value
= not raw_value
== 0
287 elif form
== 'DW_FORM_flag_present':
289 elif form
== 'DW_FORM_indirect':
291 form
= DW_FORM_raw2name
[raw_value
]
292 except KeyError as err
:
294 'Found DW_FORM_indirect with unknown raw_value=' +
297 raw_value
= struct_parse(
298 self
.cu
.structs
.Dwarf_dw_form
[form
], self
.stream
)
299 # Let's hope this doesn't get too deep :-)
300 return self
._translate
_attr
_value
(form
, raw_value
)
301 elif form
in ('DW_FORM_addrx', 'DW_FORM_addrx1', 'DW_FORM_addrx2', 'DW_FORM_addrx3', 'DW_FORM_addrx4') and translate_indirect
:
302 value
= self
.cu
.dwarfinfo
.get_addr(self
.cu
, raw_value
)
303 elif form
in ('DW_FORM_strx', 'DW_FORM_strx1', 'DW_FORM_strx2', 'DW_FORM_strx3', 'DW_FORM_strx4') and translate_indirect
:
304 stream
= self
.dwarfinfo
.debug_str_offsets_sec
.stream
305 base_offset
= _get_base_offset(self
.cu
, 'DW_AT_str_offsets_base')
306 offset_size
= 4 if self
.cu
.structs
.dwarf_format
== 32 else 8
307 with
preserve_stream_pos(stream
):
308 str_offset
= struct_parse(self
.cu
.structs
.Dwarf_offset(''), stream
, base_offset
+ raw_value
*offset_size
)
309 value
= self
.dwarfinfo
.get_string_from_table(str_offset
)
310 elif form
== 'DW_FORM_loclistx' and translate_indirect
:
311 value
= _resolve_via_offset_table(self
.dwarfinfo
.debug_loclists_sec
.stream
, self
.cu
, raw_value
, 'DW_AT_loclists_base')
312 elif form
== 'DW_FORM_rnglistx' and translate_indirect
:
313 value
= _resolve_via_offset_table(self
.dwarfinfo
.debug_rnglists_sec
.stream
, self
.cu
, raw_value
, 'DW_AT_rnglists_base')
318 def _translate_indirect_attributes(self
):
319 """ This is a hook to translate the DW_FORM_...x values in the top DIE
320 once the top DIE is parsed to the end. They can't be translated
321 while the top DIE is being parsed, because they implicitly make a
322 reference to the DW_AT_xxx_base attribute in the same DIE that may
323 not have been parsed yet.
325 for key
in self
.attributes
:
326 attr
= self
.attributes
[key
]
327 if attr
.form
in ('DW_FORM_strx', 'DW_FORM_strx1', 'DW_FORM_strx2', 'DW_FORM_strx3', 'DW_FORM_strx4',
328 'DW_FORM_addrx', 'DW_FORM_addrx1', 'DW_FORM_addrx2', 'DW_FORM_addrx3', 'DW_FORM_addrx4',
329 'DW_FORM_loclistx', 'DW_FORM_rnglistx'):
330 # Can't change value in place, got to replace the whole attribute record
331 self
.attributes
[key
] = AttributeValue(
334 value
=self
._translate
_attr
_value
(attr
.form
, attr
.raw_value
),
335 raw_value
=attr
.raw_value
,