Indirect encoding support (#430)
[pyelftools.git] / elftools / dwarf / compileunit.py
1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/compileunit.py
3 #
4 # DWARF compile unit
5 #
6 # Eli Bendersky (eliben@gmail.com)
7 # This code is in the public domain
8 #-------------------------------------------------------------------------------
9 from bisect import bisect_right
10 from .die import DIE
11 from ..common.utils import dwarf_assert
12
13
14 class CompileUnit(object):
15 """ A DWARF compilation unit (CU).
16
17 A normal compilation unit typically represents the text and data
18 contributed to an executable by a single relocatable object file.
19 It may be derived from several source files,
20 including pre-processed "include files"
21
22 Serves as a container and context to DIEs that describe objects and code
23 belonging to a compilation unit.
24
25 CU header entries can be accessed as dict keys from this object, i.e.
26 cu = CompileUnit(...)
27 cu['version'] # version field of the CU header
28
29 To get the top-level DIE describing the compilation unit, call the
30 get_top_DIE method.
31 """
32 def __init__(self, header, dwarfinfo, structs, cu_offset, cu_die_offset):
33 """ header:
34 CU header for this compile unit
35
36 dwarfinfo:
37 The DWARFInfo context object which created this one
38
39 structs:
40 A DWARFStructs instance suitable for this compile unit
41
42 cu_offset:
43 Offset in the stream to the beginning of this CU (its header)
44
45 cu_die_offset:
46 Offset in the stream of the top DIE of this CU
47 """
48 self.dwarfinfo = dwarfinfo
49 self.header = header
50 self.structs = structs
51 self.cu_offset = cu_offset
52 self.cu_die_offset = cu_die_offset
53
54 # The abbreviation table for this CU. Filled lazily when DIEs are
55 # requested.
56 self._abbrev_table = None
57
58 # A list of DIEs belonging to this CU.
59 # This list is lazily constructed as DIEs are iterated over.
60 self._dielist = []
61 # A list of file offsets, corresponding (by index) to the DIEs
62 # in `self._dielist`. This list exists separately from
63 # `self._dielist` to make it binary searchable, enabling the
64 # DIE population strategy used in `iter_DIE_children`.
65 # Like `self._dielist`, this list is lazily constructed
66 # as DIEs are iterated over.
67 self._diemap = []
68
69 def dwarf_format(self):
70 """ Get the DWARF format (32 or 64) for this CU
71 """
72 return self.structs.dwarf_format
73
74 def get_abbrev_table(self):
75 """ Get the abbreviation table (AbbrevTable object) for this CU
76 """
77 if self._abbrev_table is None:
78 self._abbrev_table = self.dwarfinfo.get_abbrev_table(
79 self['debug_abbrev_offset'])
80 return self._abbrev_table
81
82 def get_top_DIE(self):
83 """ Get the top DIE (which is either a DW_TAG_compile_unit or
84 DW_TAG_partial_unit) of this CU
85 """
86
87 # Note that a top DIE always has minimal offset and is therefore
88 # at the beginning of our lists, so no bisect is required.
89 if len(self._diemap) > 0:
90 return self._dielist[0]
91
92 top = DIE(
93 cu=self,
94 stream=self.dwarfinfo.debug_info_sec.stream,
95 offset=self.cu_die_offset)
96
97 self._dielist.insert(0, top)
98 self._diemap.insert(0, self.cu_die_offset)
99
100 top._translate_indirect_attributes() # Can't translate indirect attributes until the top DIE has been parsed to the end
101
102 return top
103
104 def has_top_DIE(self):
105 """ Returns whether the top DIE in this CU has already been parsed and cached.
106 No parsing on demand!
107 """
108 return len(self._diemap) > 0
109
110 @property
111 def size(self):
112 return self['unit_length'] + self.structs.initial_length_field_size()
113
114 def get_DIE_from_refaddr(self, refaddr):
115 """ Obtain a DIE contained in this CU from a reference.
116
117 refaddr:
118 The offset into the .debug_info section, which must be
119 contained in this CU or a DWARFError will be raised.
120
121 When using a reference class attribute with a form that is
122 relative to the compile unit, add unit add the compile unit's
123 .cu_addr before calling this function.
124 """
125 # All DIEs are after the cu header and within the unit
126 dwarf_assert(
127 self.cu_die_offset <= refaddr < self.cu_offset + self.size,
128 'refaddr %s not in DIE range of CU %s' % (refaddr, self.cu_offset))
129
130 return self._get_cached_DIE(refaddr)
131
132 def iter_DIEs(self):
133 """ Iterate over all the DIEs in the CU, in order of their appearance.
134 Note that null DIEs will also be returned.
135 """
136 return self._iter_DIE_subtree(self.get_top_DIE())
137
138 def iter_DIE_children(self, die):
139 """ Given a DIE, yields either its children, without null DIE list
140 terminator, or nothing, if that DIE has no children.
141
142 The null DIE terminator is saved in that DIE when iteration ended.
143 """
144 if not die.has_children:
145 return
146
147 # `cur_offset` tracks the stream offset of the next DIE to yield
148 # as we iterate over our children,
149 cur_offset = die.offset + die.size
150
151 while True:
152 child = self._get_cached_DIE(cur_offset)
153
154 child.set_parent(die)
155
156 if child.is_null():
157 die._terminator = child
158 return
159
160 yield child
161
162 if not child.has_children:
163 cur_offset += child.size
164 elif "DW_AT_sibling" in child.attributes:
165 sibling = child.attributes["DW_AT_sibling"]
166 if sibling.form in ('DW_FORM_ref1', 'DW_FORM_ref2',
167 'DW_FORM_ref4', 'DW_FORM_ref8',
168 'DW_FORM_ref'):
169 cur_offset = sibling.value + self.cu_offset
170 elif sibling.form == 'DW_FORM_ref_addr':
171 cur_offset = sibling.value
172 else:
173 raise NotImplementedError('sibling in form %s' % sibling.form)
174 else:
175 # If no DW_AT_sibling attribute is provided by the producer
176 # then the whole child subtree must be parsed to find its next
177 # sibling. There is one zero byte representing null DIE
178 # terminating children list. It is used to locate child subtree
179 # bounds.
180
181 # If children are not parsed yet, this instruction will manage
182 # to recursive call of this function which will result in
183 # setting of `_terminator` attribute of the `child`.
184 if child._terminator is None:
185 for _ in self.iter_DIE_children(child):
186 pass
187
188 cur_offset = child._terminator.offset + child._terminator.size
189
190 #------ PRIVATE ------#
191
192 def __getitem__(self, name):
193 """ Implement dict-like access to header entries
194 """
195 return self.header[name]
196
197 def _iter_DIE_subtree(self, die):
198 """ Given a DIE, this yields it with its subtree including null DIEs
199 (child list terminators).
200 """
201 yield die
202 if die.has_children:
203 for c in die.iter_children():
204 for d in self._iter_DIE_subtree(c):
205 yield d
206 yield die._terminator
207
208 def _get_cached_DIE(self, offset):
209 """ Given a DIE offset, look it up in the cache. If not present,
210 parse the DIE and insert it into the cache.
211
212 offset:
213 The offset of the DIE in the debug_info section to retrieve.
214
215 The stream reference is copied from the top DIE. The top die will
216 also be parsed and cached if needed.
217
218 See also get_DIE_from_refaddr(self, refaddr).
219 """
220 # The top die must be in the cache if any DIE is in the cache.
221 # The stream is the same for all DIEs in this CU, so populate
222 # the top DIE and obtain a reference to its stream.
223 top_die_stream = self.get_top_DIE().stream
224
225 # `offset` is the offset in the stream of the DIE we want to return.
226 # The map is maintined as a parallel array to the list. We call
227 # bisect each time to ensure new DIEs are inserted in the correct
228 # order within both `self._dielist` and `self._diemap`.
229 i = bisect_right(self._diemap, offset)
230
231 # Note that `self._diemap` cannot be empty because a the top DIE
232 # was inserted by the call to .get_top_DIE(). Also it has the minimal
233 # offset, so the bisect_right insert point will always be at least 1.
234 if offset == self._diemap[i - 1]:
235 die = self._dielist[i - 1]
236 else:
237 die = DIE(cu=self, stream=top_die_stream, offset=offset)
238 self._dielist.insert(i, die)
239 self._diemap.insert(i, offset)
240
241 return die