Cache instantiation of DWARF structs. (#435)
[pyelftools.git] / elftools / dwarf / structs.py
1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/structs.py
3 #
4 # Encapsulation of Construct structs for parsing DWARF, adjusted for correct
5 # endianness and word-size.
6 #
7 # Eli Bendersky (eliben@gmail.com)
8 # This code is in the public domain
9 #-------------------------------------------------------------------------------
10 from logging.config import valid_ident
11 from ..construct import (
12 UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64,
13 SBInt8, SBInt16, SBInt32, SBInt64, SLInt8, SLInt16, SLInt32, SLInt64,
14 Adapter, Struct, ConstructError, If, Enum, Array, PrefixedArray,
15 CString, Embed, StaticField, IfThenElse, Construct, Rename, Sequence,
16 String, Switch, Value
17 )
18 from ..common.construct_utils import (RepeatUntilExcluding, ULEB128, SLEB128,
19 StreamOffset)
20 from .enums import *
21
22
23 class DWARFStructs(object):
24 """ Exposes Construct structs suitable for parsing information from DWARF
25 sections. Each compile unit in DWARF info can have its own structs
26 object. Keep in mind that these structs have to be given a name (by
27 calling them with a name) before being used for parsing (like other
28 Construct structs). Those that should be used without a name are marked
29 by (+).
30
31 Accessible attributes (mostly as described in chapter 7 of the DWARF
32 spec v3):
33
34 Dwarf_[u]int{8,16,32,64):
35 Data chunks of the common sizes
36
37 Dwarf_offset:
38 32-bit or 64-bit word, depending on dwarf_format
39
40 Dwarf_length:
41 32-bit or 64-bit word, depending on dwarf_format
42
43 Dwarf_target_addr:
44 32-bit or 64-bit word, depending on address size
45
46 Dwarf_initial_length:
47 "Initial length field" encoding
48 section 7.4
49
50 Dwarf_{u,s}leb128:
51 ULEB128 and SLEB128 variable-length encoding
52
53 Dwarf_CU_header (+):
54 Compilation unit header
55
56 Dwarf_abbrev_declaration (+):
57 Abbreviation table declaration - doesn't include the initial
58 code, only the contents.
59
60 Dwarf_dw_form (+):
61 A dictionary mapping 'DW_FORM_*' keys into construct Structs
62 that parse such forms. These Structs have already been given
63 dummy names.
64
65 Dwarf_lineprog_header (+):
66 Line program header
67
68 Dwarf_lineprog_file_entry (+):
69 A single file entry in a line program header or instruction
70
71 Dwarf_CIE_header (+):
72 A call-frame CIE
73
74 Dwarf_FDE_header (+):
75 A call-frame FDE
76
77 See also the documentation of public methods.
78 """
79
80 __StructsCache = {}
81
82 def __new__(cls, little_endian, dwarf_format, address_size, dwarf_version=2):
83 """ dwarf_version:
84 Numeric DWARF version
85
86 little_endian:
87 True if the file is little endian, False if big
88
89 dwarf_format:
90 DWARF Format: 32 or 64-bit (see spec section 7.4)
91
92 address_size:
93 Target machine address size, in bytes (4 or 8). (See spec
94 section 7.5.1)
95 """
96 key = (little_endian, dwarf_format, address_size, dwarf_version)
97
98 if key in cls.__StructsCache:
99 return cls.__StructsCache[key]
100
101 self = super().__new__(cls)
102 assert dwarf_format == 32 or dwarf_format == 64
103 assert address_size == 8 or address_size == 4, str(address_size)
104 self.little_endian = little_endian
105 self.dwarf_format = dwarf_format
106 self.address_size = address_size
107 self.dwarf_version = dwarf_version
108 self._create_structs()
109 cls.__StructsCache[key] = self
110 return self
111
112 def initial_length_field_size(self):
113 """ Size of an initial length field.
114 """
115 return 4 if self.dwarf_format == 32 else 12
116
117 def _create_structs(self):
118 if self.little_endian:
119 self.Dwarf_uint8 = ULInt8
120 self.Dwarf_uint16 = ULInt16
121 self.Dwarf_uint32 = ULInt32
122 self.Dwarf_uint64 = ULInt64
123 self.Dwarf_offset = ULInt32 if self.dwarf_format == 32 else ULInt64
124 self.Dwarf_length = ULInt32 if self.dwarf_format == 32 else ULInt64
125 self.Dwarf_target_addr = (
126 ULInt32 if self.address_size == 4 else ULInt64)
127 self.Dwarf_int8 = SLInt8
128 self.Dwarf_int16 = SLInt16
129 self.Dwarf_int32 = SLInt32
130 self.Dwarf_int64 = SLInt64
131 else:
132 self.Dwarf_uint8 = UBInt8
133 self.Dwarf_uint16 = UBInt16
134 self.Dwarf_uint32 = UBInt32
135 self.Dwarf_uint64 = UBInt64
136 self.Dwarf_offset = UBInt32 if self.dwarf_format == 32 else UBInt64
137 self.Dwarf_length = UBInt32 if self.dwarf_format == 32 else UBInt64
138 self.Dwarf_target_addr = (
139 UBInt32 if self.address_size == 4 else UBInt64)
140 self.Dwarf_int8 = SBInt8
141 self.Dwarf_int16 = SBInt16
142 self.Dwarf_int32 = SBInt32
143 self.Dwarf_int64 = SBInt64
144
145 self._create_initial_length()
146 self._create_leb128()
147 self._create_cu_header()
148 self._create_abbrev_declaration()
149 self._create_dw_form()
150 self._create_lineprog_header()
151 self._create_callframe_entry_headers()
152 self._create_aranges_header()
153 self._create_nameLUT_header()
154 self._create_string_offsets_table_header()
155 self._create_address_table_header()
156 self._create_loclists_parsers()
157 self._create_rnglists_parsers()
158
159 self._create_debugsup()
160 self._create_gnu_debugaltlink()
161
162 def _create_initial_length(self):
163 def _InitialLength(name):
164 # Adapts a Struct that parses forward a full initial length field.
165 # Only if the first word is the continuation value, the second
166 # word is parsed from the stream.
167 return _InitialLengthAdapter(
168 Struct(name,
169 self.Dwarf_uint32('first'),
170 If(lambda ctx: ctx.first == 0xFFFFFFFF,
171 self.Dwarf_uint64('second'),
172 elsevalue=None)))
173 self.Dwarf_initial_length = _InitialLength
174
175 def _create_leb128(self):
176 self.Dwarf_uleb128 = ULEB128
177 self.Dwarf_sleb128 = SLEB128
178
179 def _create_cu_header(self):
180 self.Dwarf_CU_header = Struct('Dwarf_CU_header',
181 self.Dwarf_initial_length('unit_length'),
182 self.Dwarf_uint16('version'),
183 # DWARFv5 reverses the order of address_size and debug_abbrev_offset.
184 IfThenElse('', lambda ctx: ctx['version'] >= 5,
185 Embed(Struct('',
186 self.Dwarf_uint8('unit_type'),
187 self.Dwarf_uint8('address_size'),
188 self.Dwarf_offset('debug_abbrev_offset'))),
189 Embed(Struct('',
190 self.Dwarf_offset('debug_abbrev_offset'),
191 self.Dwarf_uint8('address_size'))),
192 ))
193
194 def _create_abbrev_declaration(self):
195 self.Dwarf_abbrev_declaration = Struct('Dwarf_abbrev_entry',
196 Enum(self.Dwarf_uleb128('tag'), **ENUM_DW_TAG),
197 Enum(self.Dwarf_uint8('children_flag'), **ENUM_DW_CHILDREN),
198 RepeatUntilExcluding(
199 lambda obj, ctx:
200 obj.name == 'DW_AT_null' and obj.form == 'DW_FORM_null',
201 Struct('attr_spec',
202 Enum(self.Dwarf_uleb128('name'), **ENUM_DW_AT),
203 Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM),
204 If(lambda ctx: ctx['form'] == 'DW_FORM_implicit_const',
205 self.Dwarf_sleb128('value')))))
206
207 def _create_debugsup(self):
208 # We don't care about checksums, for now.
209 self.Dwarf_debugsup = Struct('Elf_debugsup',
210 self.Dwarf_int16('version'),
211 self.Dwarf_uint8('is_supplementary'),
212 CString('sup_filename'))
213
214 def _create_gnu_debugaltlink(self):
215 self.Dwarf_debugaltlink = Struct('Elf_debugaltlink',
216 CString("sup_filename"),
217 String("sup_checksum", length=20))
218
219 def _create_dw_form(self):
220 self.Dwarf_dw_form = dict(
221 DW_FORM_addr=self.Dwarf_target_addr(''),
222 DW_FORM_addrx=self.Dwarf_uleb128(''),
223 DW_FORM_addrx1=self.Dwarf_uint8(''),
224 DW_FORM_addrx2=self.Dwarf_uint16(''),
225 # DW_FORM_addrx3=self.Dwarf_uint24(''), # TODO
226 DW_FORM_addrx4=self.Dwarf_uint32(''),
227
228 DW_FORM_block1=self._make_block_struct(self.Dwarf_uint8),
229 DW_FORM_block2=self._make_block_struct(self.Dwarf_uint16),
230 DW_FORM_block4=self._make_block_struct(self.Dwarf_uint32),
231 DW_FORM_block=self._make_block_struct(self.Dwarf_uleb128),
232
233 # All DW_FORM_data<n> forms are assumed to be unsigned
234 DW_FORM_data1=self.Dwarf_uint8(''),
235 DW_FORM_data2=self.Dwarf_uint16(''),
236 DW_FORM_data4=self.Dwarf_uint32(''),
237 DW_FORM_data8=self.Dwarf_uint64(''),
238 DW_FORM_data16=Array(16, self.Dwarf_uint8('')), # Used for hashes and such, not for integers
239 DW_FORM_sdata=self.Dwarf_sleb128(''),
240 DW_FORM_udata=self.Dwarf_uleb128(''),
241
242 DW_FORM_string=CString(''),
243 DW_FORM_strp=self.Dwarf_offset(''),
244 DW_FORM_strp_sup=self.Dwarf_offset(''),
245 DW_FORM_line_strp=self.Dwarf_offset(''),
246 DW_FORM_strx1=self.Dwarf_uint8(''),
247 DW_FORM_strx2=self.Dwarf_uint16(''),
248 # DW_FORM_strx3=self.Dwarf_uint24(''), # TODO
249 DW_FORM_strx4=self.Dwarf_uint64(''),
250 DW_FORM_flag=self.Dwarf_uint8(''),
251
252 DW_FORM_ref=self.Dwarf_uint32(''),
253 DW_FORM_ref1=self.Dwarf_uint8(''),
254 DW_FORM_ref2=self.Dwarf_uint16(''),
255 DW_FORM_ref4=self.Dwarf_uint32(''),
256 DW_FORM_ref_sup4=self.Dwarf_uint32(''),
257 DW_FORM_ref8=self.Dwarf_uint64(''),
258 DW_FORM_ref_sup8=self.Dwarf_uint64(''),
259 DW_FORM_ref_udata=self.Dwarf_uleb128(''),
260 DW_FORM_ref_addr=self.Dwarf_target_addr('') if self.dwarf_version == 2 else self.Dwarf_offset(''),
261
262 DW_FORM_indirect=self.Dwarf_uleb128(''),
263
264 # New forms in DWARFv4
265 DW_FORM_flag_present = StaticField('', 0),
266 DW_FORM_sec_offset = self.Dwarf_offset(''),
267 DW_FORM_exprloc = self._make_block_struct(self.Dwarf_uleb128),
268 DW_FORM_ref_sig8 = self.Dwarf_uint64(''),
269
270 DW_FORM_GNU_strp_alt=self.Dwarf_offset(''),
271 DW_FORM_GNU_ref_alt=self.Dwarf_offset(''),
272 DW_AT_GNU_all_call_sites=self.Dwarf_uleb128(''),
273
274 # New forms in DWARFv5
275 DW_FORM_loclistx=self.Dwarf_uleb128(''),
276 DW_FORM_rnglistx=self.Dwarf_uleb128('')
277 )
278
279 def _create_aranges_header(self):
280 self.Dwarf_aranges_header = Struct("Dwarf_aranges_header",
281 self.Dwarf_initial_length('unit_length'),
282 self.Dwarf_uint16('version'),
283 self.Dwarf_offset('debug_info_offset'), # a little tbd
284 self.Dwarf_uint8('address_size'),
285 self.Dwarf_uint8('segment_size')
286 )
287
288 def _create_nameLUT_header(self):
289 self.Dwarf_nameLUT_header = Struct("Dwarf_nameLUT_header",
290 self.Dwarf_initial_length('unit_length'),
291 self.Dwarf_uint16('version'),
292 self.Dwarf_offset('debug_info_offset'),
293 self.Dwarf_length('debug_info_length')
294 )
295
296 def _create_string_offsets_table_header(self):
297 self.Dwarf_string_offsets_table_header = Struct(
298 "Dwarf_string_offets_table_header",
299 self.Dwarf_initial_length('unit_length'),
300 self.Dwarf_uint16('version'),
301 self.Dwarf_uint16('padding'),
302 )
303
304 def _create_address_table_header(self):
305 self.Dwarf_address_table_header = Struct("Dwarf_address_table_header",
306 self.Dwarf_initial_length('unit_length'),
307 self.Dwarf_uint16('version'),
308 self.Dwarf_uint8('address_size'),
309 self.Dwarf_uint8('segment_selector_size'),
310 )
311
312 def _create_lineprog_header(self):
313 # A file entry is terminated by a NULL byte, so we don't want to parse
314 # past it. Therefore an If is used.
315 self.Dwarf_lineprog_file_entry = Struct('file_entry',
316 CString('name'),
317 If(lambda ctx: len(ctx.name) != 0,
318 Embed(Struct('',
319 self.Dwarf_uleb128('dir_index'),
320 self.Dwarf_uleb128('mtime'),
321 self.Dwarf_uleb128('length')))))
322
323 class FormattedEntry(Construct):
324 # Generates a parser based on a previously parsed piece,
325 # similar to deprecared Dynamic.
326 # Strings are resolved later, since it potentially requires
327 # looking at another section.
328 def __init__(self, name, structs, format_field):
329 Construct.__init__(self, name)
330 self.structs = structs
331 self.format_field = format_field
332
333 def _parse(self, stream, context):
334 # Somewhat tricky technique here, explicitly writing back to the context
335 if self.format_field + "_parser" in context:
336 parser = context[self.format_field + "_parser"]
337 else:
338 fields = tuple(
339 Rename(f.content_type, self.structs.Dwarf_dw_form[f.form])
340 for f in context[self.format_field])
341 parser = Struct('formatted_entry', *fields)
342 context[self.format_field + "_parser"] = parser
343 return parser._parse(stream, context)
344
345 ver5 = lambda ctx: ctx.version >= 5
346
347 self.Dwarf_lineprog_header = Struct('Dwarf_lineprog_header',
348 self.Dwarf_initial_length('unit_length'),
349 self.Dwarf_uint16('version'),
350 If(ver5,
351 self.Dwarf_uint8("address_size"),
352 None),
353 If(ver5,
354 self.Dwarf_uint8("segment_selector_size"),
355 None),
356 self.Dwarf_offset('header_length'),
357 self.Dwarf_uint8('minimum_instruction_length'),
358 If(lambda ctx: ctx.version >= 4,
359 self.Dwarf_uint8("maximum_operations_per_instruction"),
360 1),
361 self.Dwarf_uint8('default_is_stmt'),
362 self.Dwarf_int8('line_base'),
363 self.Dwarf_uint8('line_range'),
364 self.Dwarf_uint8('opcode_base'),
365 Array(lambda ctx: ctx.opcode_base - 1,
366 self.Dwarf_uint8('standard_opcode_lengths')),
367 If(ver5,
368 PrefixedArray(
369 Struct('directory_entry_format',
370 Enum(self.Dwarf_uleb128('content_type'), **ENUM_DW_LNCT),
371 Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM)),
372 self.Dwarf_uint8("directory_entry_format_count"))),
373 If(ver5, # Name deliberately doesn't match the legacy object, since the format can't be made compatible
374 PrefixedArray(
375 FormattedEntry('directories', self, "directory_entry_format"),
376 self.Dwarf_uleb128('directories_count'))),
377 If(ver5,
378 PrefixedArray(
379 Struct('file_name_entry_format',
380 Enum(self.Dwarf_uleb128('content_type'), **ENUM_DW_LNCT),
381 Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM)),
382 self.Dwarf_uint8("file_name_entry_format_count"))),
383 If(ver5,
384 PrefixedArray(
385 FormattedEntry('file_names', self, "file_name_entry_format"),
386 self.Dwarf_uleb128('file_names_count'))),
387 # Legacy directories/files - DWARF < 5 only
388 If(lambda ctx: ctx.version < 5,
389 RepeatUntilExcluding(
390 lambda obj, ctx: obj == b'',
391 CString('include_directory'))),
392 If(lambda ctx: ctx.version < 5,
393 RepeatUntilExcluding(
394 lambda obj, ctx: len(obj.name) == 0,
395 self.Dwarf_lineprog_file_entry)) # array name is file_entry
396 )
397
398 def _create_callframe_entry_headers(self):
399 self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
400 self.Dwarf_initial_length('length'),
401 self.Dwarf_offset('CIE_id'),
402 self.Dwarf_uint8('version'),
403 CString('augmentation'),
404 self.Dwarf_uleb128('code_alignment_factor'),
405 self.Dwarf_sleb128('data_alignment_factor'),
406 self.Dwarf_uleb128('return_address_register'))
407 self.EH_CIE_header = self.Dwarf_CIE_header
408
409 # The CIE header was modified in DWARFv4.
410 if self.dwarf_version == 4:
411 self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
412 self.Dwarf_initial_length('length'),
413 self.Dwarf_offset('CIE_id'),
414 self.Dwarf_uint8('version'),
415 CString('augmentation'),
416 self.Dwarf_uint8('address_size'),
417 self.Dwarf_uint8('segment_size'),
418 self.Dwarf_uleb128('code_alignment_factor'),
419 self.Dwarf_sleb128('data_alignment_factor'),
420 self.Dwarf_uleb128('return_address_register'))
421
422 self.Dwarf_FDE_header = Struct('Dwarf_FDE_header',
423 self.Dwarf_initial_length('length'),
424 self.Dwarf_offset('CIE_pointer'),
425 self.Dwarf_target_addr('initial_location'),
426 self.Dwarf_target_addr('address_range'))
427
428 def _make_block_struct(self, length_field):
429 """ Create a struct for DW_FORM_block<size>
430 """
431 return PrefixedArray(
432 subcon=self.Dwarf_uint8('elem'),
433 length_field=length_field(''))
434
435 def _create_loclists_parsers(self):
436 """ Create a struct for debug_loclists CU header, DWARFv5, 7,29
437 """
438 self.Dwarf_loclists_CU_header = Struct('Dwarf_loclists_CU_header',
439 StreamOffset('cu_offset'),
440 self.Dwarf_initial_length('unit_length'),
441 Value('is64', lambda ctx: ctx.is64),
442 StreamOffset('offset_after_length'),
443 self.Dwarf_uint16('version'),
444 self.Dwarf_uint8('address_size'),
445 self.Dwarf_uint8('segment_selector_size'),
446 self.Dwarf_uint32('offset_count'),
447 StreamOffset('offset_table_offset'))
448
449 cld = self.Dwarf_loclists_counted_location_description = PrefixedArray(self.Dwarf_uint8('loc_expr'), self.Dwarf_uleb128(''))
450
451 self.Dwarf_loclists_entries = RepeatUntilExcluding(
452 lambda obj, ctx: obj.entry_type == 'DW_LLE_end_of_list',
453 Struct('entry',
454 StreamOffset('entry_offset'),
455 Enum(self.Dwarf_uint8('entry_type'), **ENUM_DW_LLE),
456 Embed(Switch('', lambda ctx: ctx.entry_type,
457 {
458 'DW_LLE_end_of_list' : Struct('end_of_list'),
459 'DW_LLE_base_addressx' : Struct('base_addressx', self.Dwarf_uleb128('index')),
460 'DW_LLE_startx_endx' : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('end_index'), cld),
461 'DW_LLE_startx_length' : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('length'), cld),
462 'DW_LLE_offset_pair' : Struct('startx_endx', self.Dwarf_uleb128('start_offset'), self.Dwarf_uleb128('end_offset'), cld),
463 'DW_LLE_default_location' : Struct('default_location', cld),
464 'DW_LLE_base_address' : Struct('base_address', self.Dwarf_target_addr('address')),
465 'DW_LLE_start_end' : Struct('start_end', self.Dwarf_target_addr('start_address'), self.Dwarf_target_addr('end_address'), cld),
466 'DW_LLE_start_length' : Struct('start_length', self.Dwarf_target_addr('start_address'), self.Dwarf_uleb128('length'), cld),
467 })),
468 StreamOffset('entry_end_offset'),
469 Value('entry_length', lambda ctx: ctx.entry_end_offset - ctx.entry_offset)))
470
471 self.Dwarf_locview_pair = Struct('locview_pair',
472 StreamOffset('entry_offset'), self.Dwarf_uleb128('begin'), self.Dwarf_uleb128('end'))
473
474 def _create_rnglists_parsers(self):
475 self.Dwarf_rnglists_CU_header = Struct('Dwarf_rnglists_CU_header',
476 StreamOffset('cu_offset'),
477 self.Dwarf_initial_length('unit_length'),
478 Value('is64', lambda ctx: ctx.is64),
479 StreamOffset('offset_after_length'),
480 self.Dwarf_uint16('version'),
481 self.Dwarf_uint8('address_size'),
482 self.Dwarf_uint8('segment_selector_size'),
483 self.Dwarf_uint32('offset_count'),
484 StreamOffset('offset_table_offset'))
485
486 self.Dwarf_rnglists_entries = RepeatUntilExcluding(
487 lambda obj, ctx: obj.entry_type == 'DW_RLE_end_of_list',
488 Struct('entry',
489 StreamOffset('entry_offset'),
490 Enum(self.Dwarf_uint8('entry_type'), **ENUM_DW_RLE),
491 Embed(Switch('', lambda ctx: ctx.entry_type,
492 {
493 'DW_RLE_end_of_list' : Struct('end_of_list'),
494 'DW_RLE_base_addressx' : Struct('base_addressx', self.Dwarf_uleb128('index')),
495 'DW_RLE_startx_endx' : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('end_index')),
496 'DW_RLE_startx_length' : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('length')),
497 'DW_RLE_offset_pair' : Struct('startx_endx', self.Dwarf_uleb128('start_offset'), self.Dwarf_uleb128('end_offset')),
498 'DW_RLE_base_address' : Struct('base_address', self.Dwarf_target_addr('address')),
499 'DW_RLE_start_end' : Struct('start_end', self.Dwarf_target_addr('start_address'), self.Dwarf_target_addr('end_address')),
500 'DW_RLE_start_length' : Struct('start_length', self.Dwarf_target_addr('start_address'), self.Dwarf_uleb128('length'))
501 })),
502 StreamOffset('entry_end_offset'),
503 Value('entry_length', lambda ctx: ctx.entry_end_offset - ctx.entry_offset)))
504
505
506 class _InitialLengthAdapter(Adapter):
507 """ A standard Construct adapter that expects a sub-construct
508 as a struct with one or two values (first, second).
509 """
510 def _decode(self, obj, context):
511 if obj.first < 0xFFFFFF00:
512 context['is64'] = False
513 return obj.first
514 else:
515 if obj.first == 0xFFFFFFFF:
516 context['is64'] = True
517 return obj.second
518 else:
519 raise ConstructError("Failed decoding initial length for %X" % (
520 obj.first))