Supplementary object files (#426)
[pyelftools.git] / elftools / dwarf / structs.py
1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/structs.py
3 #
4 # Encapsulation of Construct structs for parsing DWARF, adjusted for correct
5 # endianness and word-size.
6 #
7 # Eli Bendersky (eliben@gmail.com)
8 # This code is in the public domain
9 #-------------------------------------------------------------------------------
10 from logging.config import valid_ident
11 from ..construct import (
12 UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64,
13 SBInt8, SBInt16, SBInt32, SBInt64, SLInt8, SLInt16, SLInt32, SLInt64,
14 Adapter, Struct, ConstructError, If, Enum, Array, PrefixedArray,
15 CString, Embed, StaticField, IfThenElse, Construct, Rename, Sequence,
16 String, Switch, Value
17 )
18 from ..common.construct_utils import (RepeatUntilExcluding, ULEB128, SLEB128,
19 StreamOffset)
20 from .enums import *
21
22
23 class DWARFStructs(object):
24 """ Exposes Construct structs suitable for parsing information from DWARF
25 sections. Each compile unit in DWARF info can have its own structs
26 object. Keep in mind that these structs have to be given a name (by
27 calling them with a name) before being used for parsing (like other
28 Construct structs). Those that should be used without a name are marked
29 by (+).
30
31 Accessible attributes (mostly as described in chapter 7 of the DWARF
32 spec v3):
33
34 Dwarf_[u]int{8,16,32,64):
35 Data chunks of the common sizes
36
37 Dwarf_offset:
38 32-bit or 64-bit word, depending on dwarf_format
39
40 Dwarf_length:
41 32-bit or 64-bit word, depending on dwarf_format
42
43 Dwarf_target_addr:
44 32-bit or 64-bit word, depending on address size
45
46 Dwarf_initial_length:
47 "Initial length field" encoding
48 section 7.4
49
50 Dwarf_{u,s}leb128:
51 ULEB128 and SLEB128 variable-length encoding
52
53 Dwarf_CU_header (+):
54 Compilation unit header
55
56 Dwarf_abbrev_declaration (+):
57 Abbreviation table declaration - doesn't include the initial
58 code, only the contents.
59
60 Dwarf_dw_form (+):
61 A dictionary mapping 'DW_FORM_*' keys into construct Structs
62 that parse such forms. These Structs have already been given
63 dummy names.
64
65 Dwarf_lineprog_header (+):
66 Line program header
67
68 Dwarf_lineprog_file_entry (+):
69 A single file entry in a line program header or instruction
70
71 Dwarf_CIE_header (+):
72 A call-frame CIE
73
74 Dwarf_FDE_header (+):
75 A call-frame FDE
76
77 See also the documentation of public methods.
78 """
79 def __init__(self,
80 little_endian, dwarf_format, address_size, dwarf_version=2):
81 """ dwarf_version:
82 Numeric DWARF version
83
84 little_endian:
85 True if the file is little endian, False if big
86
87 dwarf_format:
88 DWARF Format: 32 or 64-bit (see spec section 7.4)
89
90 address_size:
91 Target machine address size, in bytes (4 or 8). (See spec
92 section 7.5.1)
93 """
94 assert dwarf_format == 32 or dwarf_format == 64
95 assert address_size == 8 or address_size == 4, str(address_size)
96 self.little_endian = little_endian
97 self.dwarf_format = dwarf_format
98 self.address_size = address_size
99 self.dwarf_version = dwarf_version
100 self._create_structs()
101
102 def initial_length_field_size(self):
103 """ Size of an initial length field.
104 """
105 return 4 if self.dwarf_format == 32 else 12
106
107 def _create_structs(self):
108 if self.little_endian:
109 self.Dwarf_uint8 = ULInt8
110 self.Dwarf_uint16 = ULInt16
111 self.Dwarf_uint32 = ULInt32
112 self.Dwarf_uint64 = ULInt64
113 self.Dwarf_offset = ULInt32 if self.dwarf_format == 32 else ULInt64
114 self.Dwarf_length = ULInt32 if self.dwarf_format == 32 else ULInt64
115 self.Dwarf_target_addr = (
116 ULInt32 if self.address_size == 4 else ULInt64)
117 self.Dwarf_int8 = SLInt8
118 self.Dwarf_int16 = SLInt16
119 self.Dwarf_int32 = SLInt32
120 self.Dwarf_int64 = SLInt64
121 else:
122 self.Dwarf_uint8 = UBInt8
123 self.Dwarf_uint16 = UBInt16
124 self.Dwarf_uint32 = UBInt32
125 self.Dwarf_uint64 = UBInt64
126 self.Dwarf_offset = UBInt32 if self.dwarf_format == 32 else UBInt64
127 self.Dwarf_length = UBInt32 if self.dwarf_format == 32 else UBInt64
128 self.Dwarf_target_addr = (
129 UBInt32 if self.address_size == 4 else UBInt64)
130 self.Dwarf_int8 = SBInt8
131 self.Dwarf_int16 = SBInt16
132 self.Dwarf_int32 = SBInt32
133 self.Dwarf_int64 = SBInt64
134
135 self._create_initial_length()
136 self._create_leb128()
137 self._create_cu_header()
138 self._create_abbrev_declaration()
139 self._create_dw_form()
140 self._create_lineprog_header()
141 self._create_callframe_entry_headers()
142 self._create_aranges_header()
143 self._create_nameLUT_header()
144 self._create_string_offsets_table_header()
145 self._create_address_table_header()
146 self._create_loclists_parsers()
147 self._create_rnglists_parsers()
148
149 self._create_debugsup()
150 self._create_gnu_debugaltlink()
151
152 def _create_initial_length(self):
153 def _InitialLength(name):
154 # Adapts a Struct that parses forward a full initial length field.
155 # Only if the first word is the continuation value, the second
156 # word is parsed from the stream.
157 return _InitialLengthAdapter(
158 Struct(name,
159 self.Dwarf_uint32('first'),
160 If(lambda ctx: ctx.first == 0xFFFFFFFF,
161 self.Dwarf_uint64('second'),
162 elsevalue=None)))
163 self.Dwarf_initial_length = _InitialLength
164
165 def _create_leb128(self):
166 self.Dwarf_uleb128 = ULEB128
167 self.Dwarf_sleb128 = SLEB128
168
169 def _create_cu_header(self):
170 self.Dwarf_CU_header = Struct('Dwarf_CU_header',
171 self.Dwarf_initial_length('unit_length'),
172 self.Dwarf_uint16('version'),
173 # DWARFv5 reverses the order of address_size and debug_abbrev_offset.
174 IfThenElse('', lambda ctx: ctx['version'] >= 5,
175 Embed(Struct('',
176 self.Dwarf_uint8('unit_type'),
177 self.Dwarf_uint8('address_size'),
178 self.Dwarf_offset('debug_abbrev_offset'))),
179 Embed(Struct('',
180 self.Dwarf_offset('debug_abbrev_offset'),
181 self.Dwarf_uint8('address_size'))),
182 ))
183
184 def _create_abbrev_declaration(self):
185 self.Dwarf_abbrev_declaration = Struct('Dwarf_abbrev_entry',
186 Enum(self.Dwarf_uleb128('tag'), **ENUM_DW_TAG),
187 Enum(self.Dwarf_uint8('children_flag'), **ENUM_DW_CHILDREN),
188 RepeatUntilExcluding(
189 lambda obj, ctx:
190 obj.name == 'DW_AT_null' and obj.form == 'DW_FORM_null',
191 Struct('attr_spec',
192 Enum(self.Dwarf_uleb128('name'), **ENUM_DW_AT),
193 Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM),
194 If(lambda ctx: ctx['form'] == 'DW_FORM_implicit_const',
195 self.Dwarf_sleb128('value')))))
196
197 def _create_debugsup(self):
198 # We don't care about checksums, for now.
199 self.Dwarf_debugsup = Struct('Elf_debugsup',
200 self.Dwarf_int16('version'),
201 self.Dwarf_uint8('is_supplementary'),
202 CString('sup_filename'))
203
204 def _create_gnu_debugaltlink(self):
205 self.Dwarf_debugaltlink = Struct('Elf_debugaltlink',
206 CString("sup_filename"),
207 String("sup_checksum", length=20))
208
209 def _create_dw_form(self):
210 self.Dwarf_dw_form = dict(
211 DW_FORM_addr=self.Dwarf_target_addr(''),
212 DW_FORM_addrx=self.Dwarf_uleb128(''),
213 DW_FORM_addrx1=self.Dwarf_uint8(''),
214 DW_FORM_addrx2=self.Dwarf_uint16(''),
215 # DW_FORM_addrx3=self.Dwarf_uint24(''), # TODO
216 DW_FORM_addrx4=self.Dwarf_uint32(''),
217
218 DW_FORM_block1=self._make_block_struct(self.Dwarf_uint8),
219 DW_FORM_block2=self._make_block_struct(self.Dwarf_uint16),
220 DW_FORM_block4=self._make_block_struct(self.Dwarf_uint32),
221 DW_FORM_block=self._make_block_struct(self.Dwarf_uleb128),
222
223 # All DW_FORM_data<n> forms are assumed to be unsigned
224 DW_FORM_data1=self.Dwarf_uint8(''),
225 DW_FORM_data2=self.Dwarf_uint16(''),
226 DW_FORM_data4=self.Dwarf_uint32(''),
227 DW_FORM_data8=self.Dwarf_uint64(''),
228 DW_FORM_sdata=self.Dwarf_sleb128(''),
229 DW_FORM_udata=self.Dwarf_uleb128(''),
230
231 DW_FORM_string=CString(''),
232 DW_FORM_strp=self.Dwarf_offset(''),
233 DW_FORM_strp_sup=self.Dwarf_offset(''),
234 DW_FORM_line_strp=self.Dwarf_offset(''),
235 DW_FORM_strx1=self.Dwarf_uint8(''),
236 DW_FORM_strx2=self.Dwarf_uint16(''),
237 # DW_FORM_strx3=self.Dwarf_uint24(''), # TODO
238 DW_FORM_strx4=self.Dwarf_uint64(''),
239 DW_FORM_flag=self.Dwarf_uint8(''),
240
241 DW_FORM_ref=self.Dwarf_uint32(''),
242 DW_FORM_ref1=self.Dwarf_uint8(''),
243 DW_FORM_ref2=self.Dwarf_uint16(''),
244 DW_FORM_ref4=self.Dwarf_uint32(''),
245 DW_FORM_ref_sup4=self.Dwarf_uint32(''),
246 DW_FORM_ref8=self.Dwarf_uint64(''),
247 DW_FORM_ref_sup8=self.Dwarf_uint64(''),
248 DW_FORM_ref_udata=self.Dwarf_uleb128(''),
249 DW_FORM_ref_addr=self.Dwarf_target_addr('') if self.dwarf_version == 2 else self.Dwarf_offset(''),
250
251 DW_FORM_indirect=self.Dwarf_uleb128(''),
252
253 # New forms in DWARFv4
254 DW_FORM_flag_present = StaticField('', 0),
255 DW_FORM_sec_offset = self.Dwarf_offset(''),
256 DW_FORM_exprloc = self._make_block_struct(self.Dwarf_uleb128),
257 DW_FORM_ref_sig8 = self.Dwarf_uint64(''),
258
259 DW_FORM_GNU_strp_alt=self.Dwarf_offset(''),
260 DW_FORM_GNU_ref_alt=self.Dwarf_offset(''),
261 DW_AT_GNU_all_call_sites=self.Dwarf_uleb128(''),
262
263 # New forms in DWARFv5
264 DW_FORM_loclistx=self.Dwarf_uleb128(''),
265 DW_FORM_rnglistx=self.Dwarf_uleb128('')
266 )
267
268 def _create_aranges_header(self):
269 self.Dwarf_aranges_header = Struct("Dwarf_aranges_header",
270 self.Dwarf_initial_length('unit_length'),
271 self.Dwarf_uint16('version'),
272 self.Dwarf_offset('debug_info_offset'), # a little tbd
273 self.Dwarf_uint8('address_size'),
274 self.Dwarf_uint8('segment_size')
275 )
276
277 def _create_nameLUT_header(self):
278 self.Dwarf_nameLUT_header = Struct("Dwarf_nameLUT_header",
279 self.Dwarf_initial_length('unit_length'),
280 self.Dwarf_uint16('version'),
281 self.Dwarf_offset('debug_info_offset'),
282 self.Dwarf_length('debug_info_length')
283 )
284
285 def _create_string_offsets_table_header(self):
286 self.Dwarf_string_offsets_table_header = Struct(
287 "Dwarf_string_offets_table_header",
288 self.Dwarf_initial_length('unit_length'),
289 self.Dwarf_uint16('version'),
290 self.Dwarf_uint16('padding'),
291 )
292
293 def _create_address_table_header(self):
294 self.Dwarf_address_table_header = Struct("Dwarf_address_table_header",
295 self.Dwarf_initial_length('unit_length'),
296 self.Dwarf_uint16('version'),
297 self.Dwarf_uint8('address_size'),
298 self.Dwarf_uint8('segment_selector_size'),
299 )
300
301 def _create_lineprog_header(self):
302 # A file entry is terminated by a NULL byte, so we don't want to parse
303 # past it. Therefore an If is used.
304 self.Dwarf_lineprog_file_entry = Struct('file_entry',
305 CString('name'),
306 If(lambda ctx: len(ctx.name) != 0,
307 Embed(Struct('',
308 self.Dwarf_uleb128('dir_index'),
309 self.Dwarf_uleb128('mtime'),
310 self.Dwarf_uleb128('length')))))
311
312 class FormattedEntry(Construct):
313 # Generates a parser based on a previously parsed piece,
314 # similar to deprecared Dynamic.
315 # Strings are resolved later, since it potentially requires
316 # looking at another section.
317 def __init__(self, name, structs, format_field):
318 Construct.__init__(self, name)
319 self.structs = structs
320 self.format_field = format_field
321
322 def _parse(self, stream, context):
323 # Somewhat tricky technique here, explicitly writing back to the context
324 if self.format_field + "_parser" in context:
325 parser = context[self.format_field + "_parser"]
326 else:
327 fields = tuple(
328 Rename(f.content_type, self.structs.Dwarf_dw_form[f.form])
329 for f in context[self.format_field])
330 parser = Struct('formatted_entry', *fields)
331 context[self.format_field + "_parser"] = parser
332 return parser._parse(stream, context)
333
334 ver5 = lambda ctx: ctx.version >= 5
335
336 self.Dwarf_lineprog_header = Struct('Dwarf_lineprog_header',
337 self.Dwarf_initial_length('unit_length'),
338 self.Dwarf_uint16('version'),
339 If(ver5,
340 self.Dwarf_uint8("address_size"),
341 None),
342 If(ver5,
343 self.Dwarf_uint8("segment_selector_size"),
344 None),
345 self.Dwarf_offset('header_length'),
346 self.Dwarf_uint8('minimum_instruction_length'),
347 If(lambda ctx: ctx.version >= 4,
348 self.Dwarf_uint8("maximum_operations_per_instruction"),
349 1),
350 self.Dwarf_uint8('default_is_stmt'),
351 self.Dwarf_int8('line_base'),
352 self.Dwarf_uint8('line_range'),
353 self.Dwarf_uint8('opcode_base'),
354 Array(lambda ctx: ctx.opcode_base - 1,
355 self.Dwarf_uint8('standard_opcode_lengths')),
356 If(ver5,
357 PrefixedArray(
358 Struct('directory_entry_format',
359 Enum(self.Dwarf_uleb128('content_type'), **ENUM_DW_LNCT),
360 Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM)),
361 self.Dwarf_uint8("directory_entry_format_count"))),
362 If(ver5, # Name deliberately doesn't match the legacy object, since the format can't be made compatible
363 PrefixedArray(
364 FormattedEntry('directories', self, "directory_entry_format"),
365 self.Dwarf_uleb128('directories_count'))),
366 If(ver5,
367 PrefixedArray(
368 Struct('file_name_entry_format',
369 Enum(self.Dwarf_uleb128('content_type'), **ENUM_DW_LNCT),
370 Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM)),
371 self.Dwarf_uint8("file_name_entry_format_count"))),
372 If(ver5,
373 PrefixedArray(
374 FormattedEntry('file_names', self, "file_name_entry_format"),
375 self.Dwarf_uleb128('file_names_count'))),
376 # Legacy directories/files - DWARF < 5 only
377 If(lambda ctx: ctx.version < 5,
378 RepeatUntilExcluding(
379 lambda obj, ctx: obj == b'',
380 CString('include_directory'))),
381 If(lambda ctx: ctx.version < 5,
382 RepeatUntilExcluding(
383 lambda obj, ctx: len(obj.name) == 0,
384 self.Dwarf_lineprog_file_entry)) # array name is file_entry
385 )
386
387 def _create_callframe_entry_headers(self):
388 self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
389 self.Dwarf_initial_length('length'),
390 self.Dwarf_offset('CIE_id'),
391 self.Dwarf_uint8('version'),
392 CString('augmentation'),
393 self.Dwarf_uleb128('code_alignment_factor'),
394 self.Dwarf_sleb128('data_alignment_factor'),
395 self.Dwarf_uleb128('return_address_register'))
396 self.EH_CIE_header = self.Dwarf_CIE_header
397
398 # The CIE header was modified in DWARFv4.
399 if self.dwarf_version == 4:
400 self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
401 self.Dwarf_initial_length('length'),
402 self.Dwarf_offset('CIE_id'),
403 self.Dwarf_uint8('version'),
404 CString('augmentation'),
405 self.Dwarf_uint8('address_size'),
406 self.Dwarf_uint8('segment_size'),
407 self.Dwarf_uleb128('code_alignment_factor'),
408 self.Dwarf_sleb128('data_alignment_factor'),
409 self.Dwarf_uleb128('return_address_register'))
410
411 self.Dwarf_FDE_header = Struct('Dwarf_FDE_header',
412 self.Dwarf_initial_length('length'),
413 self.Dwarf_offset('CIE_pointer'),
414 self.Dwarf_target_addr('initial_location'),
415 self.Dwarf_target_addr('address_range'))
416
417 def _make_block_struct(self, length_field):
418 """ Create a struct for DW_FORM_block<size>
419 """
420 return PrefixedArray(
421 subcon=self.Dwarf_uint8('elem'),
422 length_field=length_field(''))
423
424 def _create_loclists_parsers(self):
425 """ Create a struct for debug_loclists CU header, DWARFv5, 7,29
426 """
427 self.Dwarf_loclists_CU_header = Struct('Dwarf_loclists_CU_header',
428 StreamOffset('cu_offset'),
429 self.Dwarf_initial_length('unit_length'),
430 Value('is64', lambda ctx: ctx.is64),
431 StreamOffset('offset_after_length'),
432 self.Dwarf_uint16('version'),
433 self.Dwarf_uint8('address_size'),
434 self.Dwarf_uint8('segment_selector_size'),
435 self.Dwarf_uint32('offset_count'),
436 StreamOffset('offset_table_offset'))
437
438 cld = self.Dwarf_loclists_counted_location_description = PrefixedArray(self.Dwarf_uint8('loc_expr'), self.Dwarf_uleb128(''))
439
440 self.Dwarf_loclists_entries = RepeatUntilExcluding(
441 lambda obj, ctx: obj.entry_type == 'DW_LLE_end_of_list',
442 Struct('entry',
443 StreamOffset('entry_offset'),
444 Enum(self.Dwarf_uint8('entry_type'), **ENUM_DW_LLE),
445 Embed(Switch('', lambda ctx: ctx.entry_type,
446 {
447 'DW_LLE_end_of_list' : Struct('end_of_list'),
448 'DW_LLE_base_addressx' : Struct('base_addressx', self.Dwarf_uleb128('index')),
449 'DW_LLE_startx_endx' : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('end_index'), cld),
450 'DW_LLE_startx_length' : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('length'), cld),
451 'DW_LLE_offset_pair' : Struct('startx_endx', self.Dwarf_uleb128('start_offset'), self.Dwarf_uleb128('end_offset'), cld),
452 'DW_LLE_default_location' : Struct('default_location', cld),
453 'DW_LLE_base_address' : Struct('base_address', self.Dwarf_target_addr('address')),
454 'DW_LLE_start_end' : Struct('start_end', self.Dwarf_target_addr('start_address'), self.Dwarf_target_addr('end_address'), cld),
455 'DW_LLE_start_length' : Struct('start_length', self.Dwarf_target_addr('start_address'), self.Dwarf_uleb128('length'), cld),
456 })),
457 StreamOffset('entry_end_offset'),
458 Value('entry_length', lambda ctx: ctx.entry_end_offset - ctx.entry_offset)))
459
460 self.Dwarf_locview_pair = Struct('locview_pair',
461 StreamOffset('entry_offset'), self.Dwarf_uleb128('begin'), self.Dwarf_uleb128('end'))
462
463 def _create_rnglists_parsers(self):
464 self.Dwarf_rnglists_CU_header = Struct('Dwarf_rnglists_CU_header',
465 StreamOffset('cu_offset'),
466 self.Dwarf_initial_length('unit_length'),
467 Value('is64', lambda ctx: ctx.is64),
468 StreamOffset('offset_after_length'),
469 self.Dwarf_uint16('version'),
470 self.Dwarf_uint8('address_size'),
471 self.Dwarf_uint8('segment_selector_size'),
472 self.Dwarf_uint32('offset_count'),
473 StreamOffset('offset_table_offset'))
474
475 self.Dwarf_rnglists_entries = RepeatUntilExcluding(
476 lambda obj, ctx: obj.entry_type == 'DW_RLE_end_of_list',
477 Struct('entry',
478 StreamOffset('entry_offset'),
479 Enum(self.Dwarf_uint8('entry_type'), **ENUM_DW_RLE),
480 Embed(Switch('', lambda ctx: ctx.entry_type,
481 {
482 'DW_RLE_end_of_list' : Struct('end_of_list'),
483 'DW_RLE_base_addressx' : Struct('base_addressx', self.Dwarf_uleb128('index')),
484 'DW_RLE_startx_endx' : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('end_index')),
485 'DW_RLE_startx_length' : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('length')),
486 'DW_RLE_offset_pair' : Struct('startx_endx', self.Dwarf_uleb128('start_offset'), self.Dwarf_uleb128('end_offset')),
487 'DW_RLE_base_address' : Struct('base_address', self.Dwarf_target_addr('address')),
488 'DW_RLE_start_end' : Struct('start_end', self.Dwarf_target_addr('start_address'), self.Dwarf_target_addr('end_address')),
489 'DW_RLE_start_length' : Struct('start_length', self.Dwarf_target_addr('start_address'), self.Dwarf_uleb128('length'))
490 })),
491 StreamOffset('entry_end_offset'),
492 Value('entry_length', lambda ctx: ctx.entry_end_offset - ctx.entry_offset)))
493
494
495 class _InitialLengthAdapter(Adapter):
496 """ A standard Construct adapter that expects a sub-construct
497 as a struct with one or two values (first, second).
498 """
499 def _decode(self, obj, context):
500 if obj.first < 0xFFFFFF00:
501 context['is64'] = False
502 return obj.first
503 else:
504 if obj.first == 0xFFFFFFFF:
505 context['is64'] = True
506 return obj.second
507 else:
508 raise ConstructError("Failed decoding initial length for %X" % (
509 obj.first))