e40dab4e9616323e67f1fabf338a084371cb3465
[pyelftools.git] / elftools / dwarf / compileunit.py
1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/compileunit.py
3 #
4 # DWARF compile unit
5 #
6 # Eli Bendersky (eliben@gmail.com)
7 # This code is in the public domain
8 #-------------------------------------------------------------------------------
9 from bisect import bisect_right
10 from .die import DIE
11 from ..common.utils import dwarf_assert
12
13
14 class CompileUnit(object):
15 """ A DWARF compilation unit (CU).
16
17 A normal compilation unit typically represents the text and data
18 contributed to an executable by a single relocatable object file.
19 It may be derived from several source files,
20 including pre-processed "include files"
21
22 Serves as a container and context to DIEs that describe objects and code
23 belonging to a compilation unit.
24
25 CU header entries can be accessed as dict keys from this object, i.e.
26 cu = CompileUnit(...)
27 cu['version'] # version field of the CU header
28
29 To get the top-level DIE describing the compilation unit, call the
30 get_top_DIE method.
31 """
32 def __init__(self, header, dwarfinfo, structs, cu_offset, cu_die_offset):
33 """ header:
34 CU header for this compile unit
35
36 dwarfinfo:
37 The DWARFInfo context object which created this one
38
39 structs:
40 A DWARFStructs instance suitable for this compile unit
41
42 cu_offset:
43 Offset in the stream to the beginning of this CU (its header)
44
45 cu_die_offset:
46 Offset in the stream of the top DIE of this CU
47 """
48 self.dwarfinfo = dwarfinfo
49 self.header = header
50 self.structs = structs
51 self.cu_offset = cu_offset
52 self.cu_die_offset = cu_die_offset
53
54 # The abbreviation table for this CU. Filled lazily when DIEs are
55 # requested.
56 self._abbrev_table = None
57
58 # A list of DIEs belonging to this CU.
59 # This list is lazily constructed as DIEs are iterated over.
60 self._dielist = []
61 # A list of file offsets, corresponding (by index) to the DIEs
62 # in `self._dielist`. This list exists separately from
63 # `self._dielist` to make it binary searchable, enabling the
64 # DIE population strategy used in `iter_DIE_children`.
65 # Like `self._dielist`, this list is lazily constructed
66 # as DIEs are iterated over.
67 self._diemap = []
68
69 def dwarf_format(self):
70 """ Get the DWARF format (32 or 64) for this CU
71 """
72 return self.structs.dwarf_format
73
74 def get_abbrev_table(self):
75 """ Get the abbreviation table (AbbrevTable object) for this CU
76 """
77 if self._abbrev_table is None:
78 self._abbrev_table = self.dwarfinfo.get_abbrev_table(
79 self['debug_abbrev_offset'])
80 return self._abbrev_table
81
82 def get_top_DIE(self):
83 """ Get the top DIE (which is either a DW_TAG_compile_unit or
84 DW_TAG_partial_unit) of this CU
85 """
86
87 # Note that a top DIE always has minimal offset and is therefore
88 # at the beginning of our lists, so no bisect is required.
89 if len(self._diemap) > 0:
90 return self._dielist[0]
91
92 top = DIE(
93 cu=self,
94 stream=self.dwarfinfo.debug_info_sec.stream,
95 offset=self.cu_die_offset)
96
97 self._dielist.insert(0, top)
98 self._diemap.insert(0, self.cu_die_offset)
99
100 return top
101
102 @property
103 def size(self):
104 return self['unit_length'] + self.structs.initial_length_field_size()
105
106 def get_DIE_from_refaddr(self, refaddr):
107 """ Obtain a DIE contained in this CU from a reference.
108
109 refaddr:
110 The offset into the .debug_info section, which must be
111 contained in this CU or a DWARFError will be raised.
112
113 When using a reference class attribute with a form that is
114 relative to the compile unit, add unit add the compile unit's
115 .cu_addr before calling this function.
116 """
117 # All DIEs are after the cu header and within the unit
118 dwarf_assert(
119 self.cu_die_offset <= refaddr < self.cu_offset + self.size,
120 'refaddr %s not in DIE range of CU %s' % (refaddr, self.cu_offset))
121
122 return self._get_cached_DIE(refaddr)
123
124 def iter_DIEs(self):
125 """ Iterate over all the DIEs in the CU, in order of their appearance.
126 Note that null DIEs will also be returned.
127 """
128 return self._iter_DIE_subtree(self.get_top_DIE())
129
130 def iter_DIE_children(self, die):
131 """ Given a DIE, yields either its children, without null DIE list
132 terminator, or nothing, if that DIE has no children.
133
134 The null DIE terminator is saved in that DIE when iteration ended.
135 """
136 if not die.has_children:
137 return
138
139 # `cur_offset` tracks the stream offset of the next DIE to yield
140 # as we iterate over our children,
141 cur_offset = die.offset + die.size
142
143 while True:
144 child = self._get_cached_DIE(cur_offset)
145
146 child.set_parent(die)
147
148 if child.is_null():
149 die._terminator = child
150 return
151
152 yield child
153
154 if not child.has_children:
155 cur_offset += child.size
156 elif "DW_AT_sibling" in child.attributes:
157 sibling = child.attributes["DW_AT_sibling"]
158 if sibling.form in ('DW_FORM_ref1', 'DW_FORM_ref2', 'DW_FORM_ref4', 'DW_FORM_ref8', 'DW_FORM_ref'):
159 cur_offset = sibling.value + self.cu_offset
160 elif sibling.form == 'DW_FORM_ref_addr':
161 cur_offset = sibling.value
162 else:
163 raise NotImplementedError('Sibling in form %s' % sibling.form)
164 else:
165 # If no DW_AT_sibling attribute is provided by the producer
166 # then the whole child subtree must be parsed to find its next
167 # sibling. There is one zero byte representing null DIE
168 # terminating children list. It is used to locate child subtree
169 # bounds.
170
171 # If children are not parsed yet, this instruction will manage
172 # to recursive call of this function which will result in
173 # setting of `_terminator` attribute of the `child`.
174 if child._terminator is None:
175 for _ in self.iter_DIE_children(child):
176 pass
177
178 cur_offset = child._terminator.offset + child._terminator.size
179
180 #------ PRIVATE ------#
181
182 def __getitem__(self, name):
183 """ Implement dict-like access to header entries
184 """
185 return self.header[name]
186
187 def _iter_DIE_subtree(self, die):
188 """ Given a DIE, this yields it with its subtree including null DIEs
189 (child list terminators).
190 """
191 yield die
192 if die.has_children:
193 for c in die.iter_children():
194 for d in self._iter_DIE_subtree(c):
195 yield d
196 yield die._terminator
197
198 def _get_cached_DIE(self, offset):
199 """ Given a DIE offset, look it up in the cache. If not present,
200 parse the DIE and insert it into the cache.
201
202 offset:
203 The offset of the DIE in the debug_info section to retrieve.
204
205 The stream reference is copied from the top DIE. The top die will
206 also be parsed and cached if needed.
207
208 See also get_DIE_from_refaddr(self, refaddr).
209 """
210 # The top die must be in the cache if any DIE is in the cache.
211 # The stream is the same for all DIEs in this CU, so populate
212 # the top DIE and obtain a reference to its stream.
213 top_die_stream = self.get_top_DIE().stream
214
215 # `offset` is the offset in the stream of the DIE we want to return.
216 # The map is maintined as a parallel array to the list. We call
217 # bisect each time to ensure new DIEs are inserted in the correct
218 # order within both `self._dielist` and `self._diemap`.
219 i = bisect_right(self._diemap, offset)
220
221 # Note that `self._diemap` cannot be empty because a the top DIE
222 # was inserted by the call to .get_top_DIE(). Also it has the minimal
223 # offset, so the bisect_right insert point will always be at least 1.
224 if offset == self._diemap[i - 1]:
225 die = self._dielist[i - 1]
226 else:
227 die = DIE(cu=self, stream=top_die_stream, offset=offset)
228 self._dielist.insert(i, die)
229 self._diemap.insert(i, offset)
230
231 return die