e40dab4e9616323e67f1fabf338a084371cb3465
1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/compileunit.py
6 # Eli Bendersky (eliben@gmail.com)
7 # This code is in the public domain
8 #-------------------------------------------------------------------------------
9 from bisect
import bisect_right
11 from ..common
.utils
import dwarf_assert
14 class CompileUnit(object):
15 """ A DWARF compilation unit (CU).
17 A normal compilation unit typically represents the text and data
18 contributed to an executable by a single relocatable object file.
19 It may be derived from several source files,
20 including pre-processed "include files"
22 Serves as a container and context to DIEs that describe objects and code
23 belonging to a compilation unit.
25 CU header entries can be accessed as dict keys from this object, i.e.
27 cu['version'] # version field of the CU header
29 To get the top-level DIE describing the compilation unit, call the
32 def __init__(self
, header
, dwarfinfo
, structs
, cu_offset
, cu_die_offset
):
34 CU header for this compile unit
37 The DWARFInfo context object which created this one
40 A DWARFStructs instance suitable for this compile unit
43 Offset in the stream to the beginning of this CU (its header)
46 Offset in the stream of the top DIE of this CU
48 self
.dwarfinfo
= dwarfinfo
50 self
.structs
= structs
51 self
.cu_offset
= cu_offset
52 self
.cu_die_offset
= cu_die_offset
54 # The abbreviation table for this CU. Filled lazily when DIEs are
56 self
._abbrev
_table
= None
58 # A list of DIEs belonging to this CU.
59 # This list is lazily constructed as DIEs are iterated over.
61 # A list of file offsets, corresponding (by index) to the DIEs
62 # in `self._dielist`. This list exists separately from
63 # `self._dielist` to make it binary searchable, enabling the
64 # DIE population strategy used in `iter_DIE_children`.
65 # Like `self._dielist`, this list is lazily constructed
66 # as DIEs are iterated over.
69 def dwarf_format(self
):
70 """ Get the DWARF format (32 or 64) for this CU
72 return self
.structs
.dwarf_format
74 def get_abbrev_table(self
):
75 """ Get the abbreviation table (AbbrevTable object) for this CU
77 if self
._abbrev
_table
is None:
78 self
._abbrev
_table
= self
.dwarfinfo
.get_abbrev_table(
79 self
['debug_abbrev_offset'])
80 return self
._abbrev
_table
82 def get_top_DIE(self
):
83 """ Get the top DIE (which is either a DW_TAG_compile_unit or
84 DW_TAG_partial_unit) of this CU
87 # Note that a top DIE always has minimal offset and is therefore
88 # at the beginning of our lists, so no bisect is required.
89 if len(self
._diemap
) > 0:
90 return self
._dielist
[0]
94 stream
=self
.dwarfinfo
.debug_info_sec
.stream
,
95 offset
=self
.cu_die_offset
)
97 self
._dielist
.insert(0, top
)
98 self
._diemap
.insert(0, self
.cu_die_offset
)
104 return self
['unit_length'] + self
.structs
.initial_length_field_size()
106 def get_DIE_from_refaddr(self
, refaddr
):
107 """ Obtain a DIE contained in this CU from a reference.
110 The offset into the .debug_info section, which must be
111 contained in this CU or a DWARFError will be raised.
113 When using a reference class attribute with a form that is
114 relative to the compile unit, add unit add the compile unit's
115 .cu_addr before calling this function.
117 # All DIEs are after the cu header and within the unit
119 self
.cu_die_offset
<= refaddr
< self
.cu_offset
+ self
.size
,
120 'refaddr %s not in DIE range of CU %s' % (refaddr
, self
.cu_offset
))
122 return self
._get
_cached
_DIE
(refaddr
)
125 """ Iterate over all the DIEs in the CU, in order of their appearance.
126 Note that null DIEs will also be returned.
128 return self
._iter
_DIE
_subtree
(self
.get_top_DIE())
130 def iter_DIE_children(self
, die
):
131 """ Given a DIE, yields either its children, without null DIE list
132 terminator, or nothing, if that DIE has no children.
134 The null DIE terminator is saved in that DIE when iteration ended.
136 if not die
.has_children
:
139 # `cur_offset` tracks the stream offset of the next DIE to yield
140 # as we iterate over our children,
141 cur_offset
= die
.offset
+ die
.size
144 child
= self
._get
_cached
_DIE
(cur_offset
)
146 child
.set_parent(die
)
149 die
._terminator
= child
154 if not child
.has_children
:
155 cur_offset
+= child
.size
156 elif "DW_AT_sibling" in child
.attributes
:
157 sibling
= child
.attributes
["DW_AT_sibling"]
158 if sibling
.form
in ('DW_FORM_ref1', 'DW_FORM_ref2', 'DW_FORM_ref4', 'DW_FORM_ref8', 'DW_FORM_ref'):
159 cur_offset
= sibling
.value
+ self
.cu_offset
160 elif sibling
.form
== 'DW_FORM_ref_addr':
161 cur_offset
= sibling
.value
163 raise NotImplementedError('Sibling in form %s' % sibling
.form
)
165 # If no DW_AT_sibling attribute is provided by the producer
166 # then the whole child subtree must be parsed to find its next
167 # sibling. There is one zero byte representing null DIE
168 # terminating children list. It is used to locate child subtree
171 # If children are not parsed yet, this instruction will manage
172 # to recursive call of this function which will result in
173 # setting of `_terminator` attribute of the `child`.
174 if child
._terminator
is None:
175 for _
in self
.iter_DIE_children(child
):
178 cur_offset
= child
._terminator
.offset
+ child
._terminator
.size
180 #------ PRIVATE ------#
182 def __getitem__(self
, name
):
183 """ Implement dict-like access to header entries
185 return self
.header
[name
]
187 def _iter_DIE_subtree(self
, die
):
188 """ Given a DIE, this yields it with its subtree including null DIEs
189 (child list terminators).
193 for c
in die
.iter_children():
194 for d
in self
._iter
_DIE
_subtree
(c
):
196 yield die
._terminator
198 def _get_cached_DIE(self
, offset
):
199 """ Given a DIE offset, look it up in the cache. If not present,
200 parse the DIE and insert it into the cache.
203 The offset of the DIE in the debug_info section to retrieve.
205 The stream reference is copied from the top DIE. The top die will
206 also be parsed and cached if needed.
208 See also get_DIE_from_refaddr(self, refaddr).
210 # The top die must be in the cache if any DIE is in the cache.
211 # The stream is the same for all DIEs in this CU, so populate
212 # the top DIE and obtain a reference to its stream.
213 top_die_stream
= self
.get_top_DIE().stream
215 # `offset` is the offset in the stream of the DIE we want to return.
216 # The map is maintined as a parallel array to the list. We call
217 # bisect each time to ensure new DIEs are inserted in the correct
218 # order within both `self._dielist` and `self._diemap`.
219 i
= bisect_right(self
._diemap
, offset
)
221 # Note that `self._diemap` cannot be empty because a the top DIE
222 # was inserted by the call to .get_top_DIE(). Also it has the minimal
223 # offset, so the bisect_right insert point will always be at least 1.
224 if offset
== self
._diemap
[i
- 1]:
225 die
= self
._dielist
[i
- 1]
227 die
= DIE(cu
=self
, stream
=top_die_stream
, offset
=offset
)
228 self
._dielist
.insert(i
, die
)
229 self
._diemap
.insert(i
, offset
)