1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/die.py
4 # DWARF Debugging Information Entry
6 # Eli Bendersky (eliben@gmail.com)
7 # This code is in the public domain
8 #-------------------------------------------------------------------------------
9 from collections
import namedtuple
, OrderedDict
12 from ..common
.exceptions
import DWARFError
13 from ..common
.py3compat
import bytes2str
, iteritems
14 from ..common
.utils
import struct_parse
, preserve_stream_pos
15 from .enums
import DW_FORM_raw2name
16 from .dwarf_util
import _resolve_via_offset_table
, _get_base_offset
19 # AttributeValue - describes an attribute value in the DIE:
22 # The name (DW_AT_*) of this attribute
25 # The DW_FORM_* name of this attribute
28 # The value parsed from the section and translated accordingly to the form
29 # (e.g. for a DW_FORM_strp it's the actual string taken from the string table)
32 # Raw value as parsed from the section - used for debugging and presentation
33 # (e.g. for a DW_FORM_strp it's the raw string offset into the table)
36 # Offset of this attribute's value in the stream (absolute offset, relative
37 # the beginning of the whole stream)
39 AttributeValue
= namedtuple(
40 'AttributeValue', 'name form value raw_value offset')
44 """ A DWARF debugging information entry. On creation, parses itself from
45 the stream. Each DIE is held by a CU.
47 Accessible attributes:
53 The size this DIE occupies in the section
56 The offset of this DIE in the stream
59 An ordered dictionary mapping attribute names to values. It's
60 ordered to preserve the order of attributes in the section
63 Specifies whether this DIE has children
66 The abbreviation code pointing to an abbreviation entry (note
67 that this is for informational purposes only - this object
68 interacts with its abbreviation table transparently).
70 See also the public methods.
72 def __init__(self
, cu
, stream
, offset
):
74 CompileUnit object this DIE belongs to. Used to obtain context
75 information (structs, abbrev table, etc.)
78 The stream and offset into it where this DIE's data is located
81 self
.dwarfinfo
= self
.cu
.dwarfinfo
# get DWARFInfo context
85 self
.attributes
= OrderedDict()
87 self
.has_children
= None
88 self
.abbrev_code
= None
90 # Null DIE terminator. It can be used to obtain offset range occupied
91 # by this DIE including its whole subtree.
92 self
._terminator
= None
98 """ Is this a null entry?
100 return self
.tag
is None
102 def get_DIE_from_attribute(self
, name
):
103 """ Return the DIE referenced by the named attribute of this DIE.
104 The attribute must be in the reference attribute class.
107 The name of the attribute in the reference class.
109 attr
= self
.attributes
[name
]
110 if attr
.form
in ('DW_FORM_ref1', 'DW_FORM_ref2', 'DW_FORM_ref4',
111 'DW_FORM_ref8', 'DW_FORM_ref', 'DW_FORM_ref_udata'):
112 refaddr
= self
.cu
.cu_offset
+ attr
.raw_value
113 return self
.cu
.get_DIE_from_refaddr(refaddr
)
114 elif attr
.form
in ('DW_FORM_ref_addr'):
115 return self
.cu
.dwarfinfo
.get_DIE_from_refaddr(attr
.raw_value
)
116 elif attr
.form
in ('DW_FORM_ref_sig8'):
117 # Implement search type units for matching signature
118 raise NotImplementedError('%s (type unit by signature)' % attr
.form
)
119 elif attr
.form
in ('DW_FORM_ref_sup4', 'DW_FORM_ref_sup8', 'DW_FORM_GNU_ref_alt'):
120 if self
.dwarfinfo
.supplementary_dwarfinfo
:
121 return self
.dwarfinfo
.supplementary_dwarfinfo
.get_DIE_from_refaddr(attr
.raw_value
)
122 # FIXME: how to distinguish supplementary files from dwo ?
123 raise NotImplementedError('%s to dwo' % attr
.form
)
125 raise DWARFError('%s is not a reference class form attribute' % attr
)
127 def get_parent(self
):
128 """ Return the parent DIE of this DIE, or None if the DIE has no
129 parent (i.e. is a top-level DIE).
131 if self
._parent
is None:
132 self
._search
_ancestor
_offspring
()
135 def get_full_path(self
):
136 """ Return the full path filename for the DIE.
138 The filename is the join of 'DW_AT_comp_dir' and 'DW_AT_name',
139 either of which may be missing in practice. Note that its value is
140 usually a string taken from the .debug_string section and the
141 returned value will be a string.
143 comp_dir_attr
= self
.attributes
.get('DW_AT_comp_dir', None)
144 comp_dir
= bytes2str(comp_dir_attr
.value
) if comp_dir_attr
else ''
145 fname_attr
= self
.attributes
.get('DW_AT_name', None)
146 fname
= bytes2str(fname_attr
.value
) if fname_attr
else ''
147 return os
.path
.join(comp_dir
, fname
)
149 def iter_children(self
):
150 """ Iterates all children of this DIE
152 return self
.cu
.iter_DIE_children(self
)
154 def iter_siblings(self
):
155 """ Yield all siblings of this DIE
157 parent
= self
.get_parent()
159 for sibling
in parent
.iter_children():
160 if sibling
is not self
:
163 raise StopIteration()
165 # The following methods are used while creating the DIE and should not be
166 # interesting to consumers
169 def set_parent(self
, die
):
172 #------ PRIVATE ------#
174 def _search_ancestor_offspring(self
):
175 """ Search our ancestors identifying their offspring to find our parent.
177 DIEs are stored as a flattened tree. The top DIE is the ancestor
178 of all DIEs in the unit. Each parent is guaranteed to be at
179 an offset less than their children. In each generation of children
180 the sibling with the closest offset not greater than our offset is
183 # This code is called when get_parent notices that the _parent has
184 # not been identified. To avoid execution for each sibling record all
185 # the children of any parent iterated. Assuming get_parent will also be
186 # called for siblings, it is more efficient if siblings references are
187 # provided and no worse than a single walk if they are missing, while
188 # stopping iteration early could result in O(n^2) walks.
189 search
= self
.cu
.get_top_DIE()
190 while search
.offset
< self
.offset
:
192 for child
in search
.iter_children():
193 child
.set_parent(search
)
194 if child
.offset
<= self
.offset
:
197 # We also need to check the offset of the terminator DIE
198 if search
.has_children
and search
._terminator
.offset
<= self
.offset
:
199 prev
= search
._terminator
201 # If we didn't find a closer parent, give up, don't loop.
202 # Either we mis-parsed an ancestor or someone created a DIE
203 # by an offset that was not actually the start of a DIE.
205 raise ValueError("offset %s not in CU %s DIE tree" %
206 (self
.offset
, self
.cu
.cu_offset
))
211 s
= 'DIE %s, size=%s, has_children=%s\n' % (
212 self
.tag
, self
.size
, self
.has_children
)
213 for attrname
, attrval
in iteritems(self
.attributes
):
214 s
+= ' |%-18s: %s\n' % (attrname
, attrval
)
218 return self
.__repr
__()
220 def _parse_DIE(self
):
221 """ Parses the DIE info from the section, based on the abbreviation
224 structs
= self
.cu
.structs
226 # A DIE begins with the abbreviation code. Read it and use it to
227 # obtain the abbrev declaration for this DIE.
228 # Note: here and elsewhere, preserve_stream_pos is used on operations
229 # that manipulate the stream by reading data from it.
230 self
.abbrev_code
= struct_parse(
231 structs
.Dwarf_uleb128(''), self
.stream
, self
.offset
)
233 # This may be a null entry
234 if self
.abbrev_code
== 0:
235 self
.size
= self
.stream
.tell() - self
.offset
238 abbrev_decl
= self
.cu
.get_abbrev_table().get_abbrev(self
.abbrev_code
)
239 self
.tag
= abbrev_decl
['tag']
240 self
.has_children
= abbrev_decl
.has_children()
242 # Guided by the attributes listed in the abbreviation declaration, parse
243 # values from the stream.
244 for spec
in abbrev_decl
['attr_spec']:
247 attr_offset
= self
.stream
.tell()
248 # Special case here: the attribute value is stored in the attribute
249 # definition in the abbreviation spec, not in the DIE itself.
250 if form
== 'DW_FORM_implicit_const':
254 raw_value
= struct_parse(structs
.Dwarf_dw_form
[form
], self
.stream
)
255 value
= self
._translate
_attr
_value
(form
, raw_value
)
256 self
.attributes
[name
] = AttributeValue(
263 self
.size
= self
.stream
.tell() - self
.offset
265 def _translate_attr_value(self
, form
, raw_value
):
266 """ Translate a raw attr value according to the form
268 # Indirect forms can only be parsed if the top DIE of this CU has already been parsed
269 # and listed in the CU, since the top DIE would have to contain the DW_AT_xxx_base attributes.
270 # This breaks if there is an indirect encoding in the top DIE itself before the
271 # corresponding _base, and it was seen in the wild.
272 # There is a hook in get_top_DIE() to resolve those lazily.
273 translate_indirect
= self
.cu
.has_top_DIE() or self
.offset
!= self
.cu
.cu_die_offset
275 if form
== 'DW_FORM_strp':
276 with
preserve_stream_pos(self
.stream
):
277 value
= self
.dwarfinfo
.get_string_from_table(raw_value
)
278 elif form
== 'DW_FORM_line_strp':
279 with
preserve_stream_pos(self
.stream
):
280 value
= self
.dwarfinfo
.get_string_from_linetable(raw_value
)
281 elif form
in ('DW_FORM_GNU_strp_alt', 'DW_FORM_strp_sup'):
282 if self
.dwarfinfo
.supplementary_dwarfinfo
:
283 return self
.dwarfinfo
.supplementary_dwarfinfo
.get_string_from_table(raw_value
)
286 elif form
== 'DW_FORM_flag':
287 value
= not raw_value
== 0
288 elif form
== 'DW_FORM_flag_present':
290 elif form
== 'DW_FORM_indirect':
292 form
= DW_FORM_raw2name
[raw_value
]
293 except KeyError as err
:
295 'Found DW_FORM_indirect with unknown raw_value=' +
298 raw_value
= struct_parse(
299 self
.cu
.structs
.Dwarf_dw_form
[form
], self
.stream
)
300 # Let's hope this doesn't get too deep :-)
301 return self
._translate
_attr
_value
(form
, raw_value
)
302 elif form
in ('DW_FORM_addrx', 'DW_FORM_addrx1', 'DW_FORM_addrx2', 'DW_FORM_addrx3', 'DW_FORM_addrx4') and translate_indirect
:
303 value
= self
.cu
.dwarfinfo
.get_addr(self
.cu
, raw_value
)
304 elif form
in ('DW_FORM_strx', 'DW_FORM_strx1', 'DW_FORM_strx2', 'DW_FORM_strx3', 'DW_FORM_strx4') and translate_indirect
:
305 stream
= self
.dwarfinfo
.debug_str_offsets_sec
.stream
306 base_offset
= _get_base_offset(self
.cu
, 'DW_AT_str_offsets_base')
307 offset_size
= 4 if self
.cu
.structs
.dwarf_format
== 32 else 8
308 with
preserve_stream_pos(stream
):
309 str_offset
= struct_parse(self
.cu
.structs
.Dwarf_offset(''), stream
, base_offset
+ raw_value
*offset_size
)
310 value
= self
.dwarfinfo
.get_string_from_table(str_offset
)
311 elif form
== 'DW_FORM_loclistx' and translate_indirect
:
312 value
= _resolve_via_offset_table(self
.dwarfinfo
.debug_loclists_sec
.stream
, self
.cu
, raw_value
, 'DW_AT_loclists_base')
313 elif form
== 'DW_FORM_rnglistx' and translate_indirect
:
314 value
= _resolve_via_offset_table(self
.dwarfinfo
.debug_rnglists_sec
.stream
, self
.cu
, raw_value
, 'DW_AT_rnglists_base')
319 def _translate_indirect_attributes(self
):
320 """ This is a hook to translate the DW_FORM_...x values in the top DIE
321 once the top DIE is parsed to the end. They can't be translated
322 while the top DIE is being parsed, because they implicitly make a
323 reference to the DW_AT_xxx_base attribute in the same DIE that may
324 not have been parsed yet.
326 for key
in self
.attributes
:
327 attr
= self
.attributes
[key
]
328 if attr
.form
in ('DW_FORM_strx', 'DW_FORM_strx1', 'DW_FORM_strx2', 'DW_FORM_strx3', 'DW_FORM_strx4',
329 'DW_FORM_addrx', 'DW_FORM_addrx1', 'DW_FORM_addrx2', 'DW_FORM_addrx3', 'DW_FORM_addrx4',
330 'DW_FORM_loclistx', 'DW_FORM_rnglistx'):
331 # Can't change value in place, got to replace the whole attribute record
332 self
.attributes
[key
] = AttributeValue(
335 value
=self
._translate
_attr
_value
(attr
.form
, attr
.raw_value
),
336 raw_value
=attr
.raw_value
,