1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/compileunit.py
6 # Eli Bendersky (eliben@gmail.com)
7 # This code is in the public domain
8 #-------------------------------------------------------------------------------
9 from bisect
import bisect_right
11 from ..common
.utils
import dwarf_assert
14 class CompileUnit(object):
15 """ A DWARF compilation unit (CU).
17 A normal compilation unit typically represents the text and data
18 contributed to an executable by a single relocatable object file.
19 It may be derived from several source files,
20 including pre-processed "include files"
22 Serves as a container and context to DIEs that describe objects and code
23 belonging to a compilation unit.
25 CU header entries can be accessed as dict keys from this object, i.e.
27 cu['version'] # version field of the CU header
29 To get the top-level DIE describing the compilation unit, call the
32 def __init__(self
, header
, dwarfinfo
, structs
, cu_offset
, cu_die_offset
):
34 CU header for this compile unit
37 The DWARFInfo context object which created this one
40 A DWARFStructs instance suitable for this compile unit
43 Offset in the stream to the beginning of this CU (its header)
46 Offset in the stream of the top DIE of this CU
48 self
.dwarfinfo
= dwarfinfo
50 self
.structs
= structs
51 self
.cu_offset
= cu_offset
52 self
.cu_die_offset
= cu_die_offset
54 # The abbreviation table for this CU. Filled lazily when DIEs are
56 self
._abbrev
_table
= None
58 # A list of DIEs belonging to this CU.
59 # This list is lazily constructed as DIEs are iterated over.
61 # A list of file offsets, corresponding (by index) to the DIEs
62 # in `self._dielist`. This list exists separately from
63 # `self._dielist` to make it binary searchable, enabling the
64 # DIE population strategy used in `iter_DIE_children`.
65 # Like `self._dielist`, this list is lazily constructed
66 # as DIEs are iterated over.
69 def dwarf_format(self
):
70 """ Get the DWARF format (32 or 64) for this CU
72 return self
.structs
.dwarf_format
74 def get_abbrev_table(self
):
75 """ Get the abbreviation table (AbbrevTable object) for this CU
77 if self
._abbrev
_table
is None:
78 self
._abbrev
_table
= self
.dwarfinfo
.get_abbrev_table(
79 self
['debug_abbrev_offset'])
80 return self
._abbrev
_table
82 def get_top_DIE(self
):
83 """ Get the top DIE (which is either a DW_TAG_compile_unit or
84 DW_TAG_partial_unit) of this CU
87 # Note that a top DIE always has minimal offset and is therefore
88 # at the beginning of our lists, so no bisect is required.
89 if len(self
._diemap
) > 0:
90 return self
._dielist
[0]
94 stream
=self
.dwarfinfo
.debug_info_sec
.stream
,
95 offset
=self
.cu_die_offset
)
97 self
._dielist
.insert(0, top
)
98 self
._diemap
.insert(0, self
.cu_die_offset
)
100 top
._translate
_indirect
_attributes
() # Can't translate indirect attributes until the top DIE has been parsed to the end
104 def has_top_DIE(self
):
105 """ Returns whether the top DIE in this CU has already been parsed and cached.
106 No parsing on demand!
108 return len(self
._diemap
) > 0
112 return self
['unit_length'] + self
.structs
.initial_length_field_size()
114 def get_DIE_from_refaddr(self
, refaddr
):
115 """ Obtain a DIE contained in this CU from a reference.
118 The offset into the .debug_info section, which must be
119 contained in this CU or a DWARFError will be raised.
121 When using a reference class attribute with a form that is
122 relative to the compile unit, add unit add the compile unit's
123 .cu_addr before calling this function.
125 # All DIEs are after the cu header and within the unit
127 self
.cu_die_offset
<= refaddr
< self
.cu_offset
+ self
.size
,
128 'refaddr %s not in DIE range of CU %s' % (refaddr
, self
.cu_offset
))
130 return self
._get
_cached
_DIE
(refaddr
)
133 """ Iterate over all the DIEs in the CU, in order of their appearance.
134 Note that null DIEs will also be returned.
136 return self
._iter
_DIE
_subtree
(self
.get_top_DIE())
138 def iter_DIE_children(self
, die
):
139 """ Given a DIE, yields either its children, without null DIE list
140 terminator, or nothing, if that DIE has no children.
142 The null DIE terminator is saved in that DIE when iteration ended.
144 if not die
.has_children
:
147 # `cur_offset` tracks the stream offset of the next DIE to yield
148 # as we iterate over our children,
149 cur_offset
= die
.offset
+ die
.size
152 child
= self
._get
_cached
_DIE
(cur_offset
)
154 child
.set_parent(die
)
157 die
._terminator
= child
162 if not child
.has_children
:
163 cur_offset
+= child
.size
164 elif "DW_AT_sibling" in child
.attributes
:
165 sibling
= child
.attributes
["DW_AT_sibling"]
166 if sibling
.form
in ('DW_FORM_ref1', 'DW_FORM_ref2',
167 'DW_FORM_ref4', 'DW_FORM_ref8',
169 cur_offset
= sibling
.value
+ self
.cu_offset
170 elif sibling
.form
== 'DW_FORM_ref_addr':
171 cur_offset
= sibling
.value
173 raise NotImplementedError('sibling in form %s' % sibling
.form
)
175 # If no DW_AT_sibling attribute is provided by the producer
176 # then the whole child subtree must be parsed to find its next
177 # sibling. There is one zero byte representing null DIE
178 # terminating children list. It is used to locate child subtree
181 # If children are not parsed yet, this instruction will manage
182 # to recursive call of this function which will result in
183 # setting of `_terminator` attribute of the `child`.
184 if child
._terminator
is None:
185 for _
in self
.iter_DIE_children(child
):
188 cur_offset
= child
._terminator
.offset
+ child
._terminator
.size
190 #------ PRIVATE ------#
192 def __getitem__(self
, name
):
193 """ Implement dict-like access to header entries
195 return self
.header
[name
]
197 def _iter_DIE_subtree(self
, die
):
198 """ Given a DIE, this yields it with its subtree including null DIEs
199 (child list terminators).
203 for c
in die
.iter_children():
204 for d
in self
._iter
_DIE
_subtree
(c
):
206 yield die
._terminator
208 def _get_cached_DIE(self
, offset
):
209 """ Given a DIE offset, look it up in the cache. If not present,
210 parse the DIE and insert it into the cache.
213 The offset of the DIE in the debug_info section to retrieve.
215 The stream reference is copied from the top DIE. The top die will
216 also be parsed and cached if needed.
218 See also get_DIE_from_refaddr(self, refaddr).
220 # The top die must be in the cache if any DIE is in the cache.
221 # The stream is the same for all DIEs in this CU, so populate
222 # the top DIE and obtain a reference to its stream.
223 top_die_stream
= self
.get_top_DIE().stream
225 # `offset` is the offset in the stream of the DIE we want to return.
226 # The map is maintined as a parallel array to the list. We call
227 # bisect each time to ensure new DIEs are inserted in the correct
228 # order within both `self._dielist` and `self._diemap`.
229 i
= bisect_right(self
._diemap
, offset
)
231 # Note that `self._diemap` cannot be empty because a the top DIE
232 # was inserted by the call to .get_top_DIE(). Also it has the minimal
233 # offset, so the bisect_right insert point will always be at least 1.
234 if offset
== self
._diemap
[i
- 1]:
235 die
= self
._dielist
[i
- 1]
237 die
= DIE(cu
=self
, stream
=top_die_stream
, offset
=offset
)
238 self
._dielist
.insert(i
, die
)
239 self
._diemap
.insert(i
, offset
)