16f29f6341b08a3598b1fe079a839a9c838e893c
[pyelftools.git] / elftools / dwarf / structs.py
1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/structs.py
3 #
4 # Encapsulation of Construct structs for parsing DWARF, adjusted for correct
5 # endianness and word-size.
6 #
7 # Eli Bendersky (eliben@gmail.com)
8 # This code is in the public domain
9 #-------------------------------------------------------------------------------
10 from ..construct import (
11 UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64,
12 SBInt8, SBInt16, SBInt32, SBInt64, SLInt8, SLInt16, SLInt32, SLInt64,
13 Adapter, Struct, ConstructError, If, Enum, Array, PrefixedArray,
14 CString, Embed, StaticField, IfThenElse
15 )
16 from ..common.construct_utils import RepeatUntilExcluding, ULEB128, SLEB128
17 from .enums import *
18
19
20 class DWARFStructs(object):
21 """ Exposes Construct structs suitable for parsing information from DWARF
22 sections. Each compile unit in DWARF info can have its own structs
23 object. Keep in mind that these structs have to be given a name (by
24 calling them with a name) before being used for parsing (like other
25 Construct structs). Those that should be used without a name are marked
26 by (+).
27
28 Accessible attributes (mostly as described in chapter 7 of the DWARF
29 spec v3):
30
31 Dwarf_[u]int{8,16,32,64):
32 Data chunks of the common sizes
33
34 Dwarf_offset:
35 32-bit or 64-bit word, depending on dwarf_format
36
37 Dwarf_length:
38 32-bit or 64-bit word, depending on dwarf_format
39
40 Dwarf_target_addr:
41 32-bit or 64-bit word, depending on address size
42
43 Dwarf_initial_length:
44 "Initial length field" encoding
45 section 7.4
46
47 Dwarf_{u,s}leb128:
48 ULEB128 and SLEB128 variable-length encoding
49
50 Dwarf_CU_header (+):
51 Compilation unit header
52
53 Dwarf_abbrev_declaration (+):
54 Abbreviation table declaration - doesn't include the initial
55 code, only the contents.
56
57 Dwarf_dw_form (+):
58 A dictionary mapping 'DW_FORM_*' keys into construct Structs
59 that parse such forms. These Structs have already been given
60 dummy names.
61
62 Dwarf_lineprog_header (+):
63 Line program header
64
65 Dwarf_lineprog_file_entry (+):
66 A single file entry in a line program header or instruction
67
68 Dwarf_CIE_header (+):
69 A call-frame CIE
70
71 Dwarf_FDE_header (+):
72 A call-frame FDE
73
74 See also the documentation of public methods.
75 """
76 def __init__(self,
77 little_endian, dwarf_format, address_size, dwarf_version=2):
78 """ dwarf_version:
79 Numeric DWARF version
80
81 little_endian:
82 True if the file is little endian, False if big
83
84 dwarf_format:
85 DWARF Format: 32 or 64-bit (see spec section 7.4)
86
87 address_size:
88 Target machine address size, in bytes (4 or 8). (See spec
89 section 7.5.1)
90 """
91 assert dwarf_format == 32 or dwarf_format == 64
92 assert address_size == 8 or address_size == 4, str(address_size)
93 self.little_endian = little_endian
94 self.dwarf_format = dwarf_format
95 self.address_size = address_size
96 self.dwarf_version = dwarf_version
97 self._create_structs()
98
99 def initial_length_field_size(self):
100 """ Size of an initial length field.
101 """
102 return 4 if self.dwarf_format == 32 else 12
103
104 def _create_structs(self):
105 if self.little_endian:
106 self.Dwarf_uint8 = ULInt8
107 self.Dwarf_uint16 = ULInt16
108 self.Dwarf_uint32 = ULInt32
109 self.Dwarf_uint64 = ULInt64
110 self.Dwarf_offset = ULInt32 if self.dwarf_format == 32 else ULInt64
111 self.Dwarf_length = ULInt32 if self.dwarf_format == 32 else ULInt64
112 self.Dwarf_target_addr = (
113 ULInt32 if self.address_size == 4 else ULInt64)
114 self.Dwarf_int8 = SLInt8
115 self.Dwarf_int16 = SLInt16
116 self.Dwarf_int32 = SLInt32
117 self.Dwarf_int64 = SLInt64
118 else:
119 self.Dwarf_uint8 = UBInt8
120 self.Dwarf_uint16 = UBInt16
121 self.Dwarf_uint32 = UBInt32
122 self.Dwarf_uint64 = UBInt64
123 self.Dwarf_offset = UBInt32 if self.dwarf_format == 32 else UBInt64
124 self.Dwarf_length = UBInt32 if self.dwarf_format == 32 else UBInt64
125 self.Dwarf_target_addr = (
126 UBInt32 if self.address_size == 4 else UBInt64)
127 self.Dwarf_int8 = SBInt8
128 self.Dwarf_int16 = SBInt16
129 self.Dwarf_int32 = SBInt32
130 self.Dwarf_int64 = SBInt64
131
132 self._create_initial_length()
133 self._create_leb128()
134 self._create_cu_header()
135 self._create_abbrev_declaration()
136 self._create_dw_form()
137 self._create_lineprog_header()
138 self._create_callframe_entry_headers()
139 self._create_aranges_header()
140 self._create_nameLUT_header()
141 self._create_string_offsets_table_header()
142 self._create_address_table_header()
143
144 def _create_initial_length(self):
145 def _InitialLength(name):
146 # Adapts a Struct that parses forward a full initial length field.
147 # Only if the first word is the continuation value, the second
148 # word is parsed from the stream.
149 return _InitialLengthAdapter(
150 Struct(name,
151 self.Dwarf_uint32('first'),
152 If(lambda ctx: ctx.first == 0xFFFFFFFF,
153 self.Dwarf_uint64('second'),
154 elsevalue=None)))
155 self.Dwarf_initial_length = _InitialLength
156
157 def _create_leb128(self):
158 self.Dwarf_uleb128 = ULEB128
159 self.Dwarf_sleb128 = SLEB128
160
161 def _create_cu_header(self):
162 self.Dwarf_CU_header = Struct('Dwarf_CU_header',
163 self.Dwarf_initial_length('unit_length'),
164 self.Dwarf_uint16('version'),
165 # DWARFv5 reverses the order of address_size and debug_abbrev_offset.
166 IfThenElse('', lambda ctx: ctx['version'] >= 5,
167 Embed(Struct('',
168 self.Dwarf_uint8('unit_type'),
169 self.Dwarf_uint8('address_size'),
170 self.Dwarf_offset('debug_abbrev_offset'))),
171 Embed(Struct('',
172 self.Dwarf_offset('debug_abbrev_offset'),
173 self.Dwarf_uint8('address_size'))),
174 ))
175
176 def _create_abbrev_declaration(self):
177 self.Dwarf_abbrev_declaration = Struct('Dwarf_abbrev_entry',
178 Enum(self.Dwarf_uleb128('tag'), **ENUM_DW_TAG),
179 Enum(self.Dwarf_uint8('children_flag'), **ENUM_DW_CHILDREN),
180 RepeatUntilExcluding(
181 lambda obj, ctx:
182 obj.name == 'DW_AT_null' and obj.form == 'DW_FORM_null',
183 Struct('attr_spec',
184 Enum(self.Dwarf_uleb128('name'), **ENUM_DW_AT),
185 Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM))))
186
187 def _create_dw_form(self):
188 self.Dwarf_dw_form = dict(
189 DW_FORM_addr=self.Dwarf_target_addr(''),
190 DW_FORM_addrx=self.Dwarf_uleb128(''),
191 DW_FORM_addrx1=self.Dwarf_uint8(''),
192 DW_FORM_addrx2=self.Dwarf_uint16(''),
193 # DW_FORM_addrx3=self.Dwarf_uint24(''), # TODO
194 DW_FORM_addrx4=self.Dwarf_uint32(''),
195
196 DW_FORM_block1=self._make_block_struct(self.Dwarf_uint8),
197 DW_FORM_block2=self._make_block_struct(self.Dwarf_uint16),
198 DW_FORM_block4=self._make_block_struct(self.Dwarf_uint32),
199 DW_FORM_block=self._make_block_struct(self.Dwarf_uleb128),
200
201 # All DW_FORM_data<n> forms are assumed to be unsigned
202 DW_FORM_data1=self.Dwarf_uint8(''),
203 DW_FORM_data2=self.Dwarf_uint16(''),
204 DW_FORM_data4=self.Dwarf_uint32(''),
205 DW_FORM_data8=self.Dwarf_uint64(''),
206 DW_FORM_sdata=self.Dwarf_sleb128(''),
207 DW_FORM_udata=self.Dwarf_uleb128(''),
208
209 DW_FORM_string=CString(''),
210 DW_FORM_strp=self.Dwarf_offset(''),
211 DW_FORM_strx1=self.Dwarf_uint8(''),
212 DW_FORM_strx2=self.Dwarf_uint16(''),
213 # DW_FORM_strx3=self.Dwarf_uint24(''), # TODO
214 DW_FORM_strx4=self.Dwarf_uint64(''),
215 DW_FORM_flag=self.Dwarf_uint8(''),
216
217 DW_FORM_ref=self.Dwarf_uint32(''),
218 DW_FORM_ref1=self.Dwarf_uint8(''),
219 DW_FORM_ref2=self.Dwarf_uint16(''),
220 DW_FORM_ref4=self.Dwarf_uint32(''),
221 DW_FORM_ref8=self.Dwarf_uint64(''),
222 DW_FORM_ref_udata=self.Dwarf_uleb128(''),
223 DW_FORM_ref_addr=self.Dwarf_target_addr('') if self.dwarf_version == 2 else self.Dwarf_offset(''),
224
225 DW_FORM_indirect=self.Dwarf_uleb128(''),
226
227 # New forms in DWARFv4
228 DW_FORM_flag_present = StaticField('', 0),
229 DW_FORM_sec_offset = self.Dwarf_offset(''),
230 DW_FORM_exprloc = self._make_block_struct(self.Dwarf_uleb128),
231 DW_FORM_ref_sig8 = self.Dwarf_uint64(''),
232
233 DW_FORM_GNU_strp_alt=self.Dwarf_offset(''),
234 DW_FORM_GNU_ref_alt=self.Dwarf_offset(''),
235 DW_AT_GNU_all_call_sites=self.Dwarf_uleb128(''),
236 )
237
238 def _create_aranges_header(self):
239 self.Dwarf_aranges_header = Struct("Dwarf_aranges_header",
240 self.Dwarf_initial_length('unit_length'),
241 self.Dwarf_uint16('version'),
242 self.Dwarf_offset('debug_info_offset'), # a little tbd
243 self.Dwarf_uint8('address_size'),
244 self.Dwarf_uint8('segment_size')
245 )
246
247 def _create_nameLUT_header(self):
248 self.Dwarf_nameLUT_header = Struct("Dwarf_nameLUT_header",
249 self.Dwarf_initial_length('unit_length'),
250 self.Dwarf_uint16('version'),
251 self.Dwarf_offset('debug_info_offset'),
252 self.Dwarf_length('debug_info_length')
253 )
254
255 def _create_string_offsets_table_header(self):
256 self.Dwarf_string_offsets_table_header = Struct(
257 "Dwarf_string_offets_table_header",
258 self.Dwarf_initial_length('unit_length'),
259 self.Dwarf_uint16('version'),
260 self.Dwarf_uint16('padding'),
261 )
262
263 def _create_address_table_header(self):
264 self.Dwarf_address_table_header = Struct("Dwarf_address_table_header",
265 self.Dwarf_initial_length('unit_length'),
266 self.Dwarf_uint16('version'),
267 self.Dwarf_uint8('address_size'),
268 self.Dwarf_uint8('segment_selector_size'),
269 )
270
271 def _create_lineprog_header(self):
272 # A file entry is terminated by a NULL byte, so we don't want to parse
273 # past it. Therefore an If is used.
274 self.Dwarf_lineprog_file_entry = Struct('file_entry',
275 CString('name'),
276 If(lambda ctx: len(ctx.name) != 0,
277 Embed(Struct('',
278 self.Dwarf_uleb128('dir_index'),
279 self.Dwarf_uleb128('mtime'),
280 self.Dwarf_uleb128('length')))))
281
282 self.Dwarf_lineprog_header = Struct('Dwarf_lineprog_header',
283 self.Dwarf_initial_length('unit_length'),
284 self.Dwarf_uint16('version'),
285 self.Dwarf_offset('header_length'),
286 self.Dwarf_uint8('minimum_instruction_length'),
287 If(lambda ctx: ctx['version'] >= 4,
288 self.Dwarf_uint8("maximum_operations_per_instruction"),
289 1),
290 self.Dwarf_uint8('default_is_stmt'),
291 self.Dwarf_int8('line_base'),
292 self.Dwarf_uint8('line_range'),
293 self.Dwarf_uint8('opcode_base'),
294 Array(lambda ctx: ctx['opcode_base'] - 1,
295 self.Dwarf_uint8('standard_opcode_lengths')),
296 RepeatUntilExcluding(
297 lambda obj, ctx: obj == b'',
298 CString('include_directory')),
299 RepeatUntilExcluding(
300 lambda obj, ctx: len(obj.name) == 0,
301 self.Dwarf_lineprog_file_entry),
302 )
303
304 def _create_callframe_entry_headers(self):
305 self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
306 self.Dwarf_initial_length('length'),
307 self.Dwarf_offset('CIE_id'),
308 self.Dwarf_uint8('version'),
309 CString('augmentation'),
310 self.Dwarf_uleb128('code_alignment_factor'),
311 self.Dwarf_sleb128('data_alignment_factor'),
312 self.Dwarf_uleb128('return_address_register'))
313 self.EH_CIE_header = self.Dwarf_CIE_header
314
315 # The CIE header was modified in DWARFv4.
316 if self.dwarf_version == 4:
317 self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
318 self.Dwarf_initial_length('length'),
319 self.Dwarf_offset('CIE_id'),
320 self.Dwarf_uint8('version'),
321 CString('augmentation'),
322 self.Dwarf_uint8('address_size'),
323 self.Dwarf_uint8('segment_size'),
324 self.Dwarf_uleb128('code_alignment_factor'),
325 self.Dwarf_sleb128('data_alignment_factor'),
326 self.Dwarf_uleb128('return_address_register'))
327
328 self.Dwarf_FDE_header = Struct('Dwarf_FDE_header',
329 self.Dwarf_initial_length('length'),
330 self.Dwarf_offset('CIE_pointer'),
331 self.Dwarf_target_addr('initial_location'),
332 self.Dwarf_target_addr('address_range'))
333
334 def _make_block_struct(self, length_field):
335 """ Create a struct for DW_FORM_block<size>
336 """
337 return PrefixedArray(
338 subcon=self.Dwarf_uint8('elem'),
339 length_field=length_field(''))
340
341
342 class _InitialLengthAdapter(Adapter):
343 """ A standard Construct adapter that expects a sub-construct
344 as a struct with one or two values (first, second).
345 """
346 def _decode(self, obj, context):
347 if obj.first < 0xFFFFFF00:
348 return obj.first
349 else:
350 if obj.first == 0xFFFFFFFF:
351 return obj.second
352 else:
353 raise ConstructError("Failed decoding initial length for %X" % (
354 obj.first))