fe68d439236befc5a2f707e41ed1c0484f4df62d
[pyelftools.git] / elftools / dwarf / structs.py
1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/structs.py
3 #
4 # Encapsulation of Construct structs for parsing DWARF, adjusted for correct
5 # endianness and word-size.
6 #
7 # Eli Bendersky (eliben@gmail.com)
8 # This code is in the public domain
9 #-------------------------------------------------------------------------------
10 from logging.config import valid_ident
11 from ..construct import (
12 UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64,
13 SBInt8, SBInt16, SBInt32, SBInt64, SLInt8, SLInt16, SLInt32, SLInt64,
14 Adapter, Struct, ConstructError, If, Enum, Array, PrefixedArray,
15 CString, Embed, StaticField, IfThenElse, Construct, Rename, Sequence,
16 String, Switch, Value
17 )
18 from ..common.construct_utils import (RepeatUntilExcluding, ULEB128, SLEB128,
19 StreamOffset)
20 from .enums import *
21
22
23 class DWARFStructs(object):
24 """ Exposes Construct structs suitable for parsing information from DWARF
25 sections. Each compile unit in DWARF info can have its own structs
26 object. Keep in mind that these structs have to be given a name (by
27 calling them with a name) before being used for parsing (like other
28 Construct structs). Those that should be used without a name are marked
29 by (+).
30
31 Accessible attributes (mostly as described in chapter 7 of the DWARF
32 spec v3):
33
34 Dwarf_[u]int{8,16,32,64):
35 Data chunks of the common sizes
36
37 Dwarf_offset:
38 32-bit or 64-bit word, depending on dwarf_format
39
40 Dwarf_length:
41 32-bit or 64-bit word, depending on dwarf_format
42
43 Dwarf_target_addr:
44 32-bit or 64-bit word, depending on address size
45
46 Dwarf_initial_length:
47 "Initial length field" encoding
48 section 7.4
49
50 Dwarf_{u,s}leb128:
51 ULEB128 and SLEB128 variable-length encoding
52
53 Dwarf_CU_header (+):
54 Compilation unit header
55
56 Dwarf_abbrev_declaration (+):
57 Abbreviation table declaration - doesn't include the initial
58 code, only the contents.
59
60 Dwarf_dw_form (+):
61 A dictionary mapping 'DW_FORM_*' keys into construct Structs
62 that parse such forms. These Structs have already been given
63 dummy names.
64
65 Dwarf_lineprog_header (+):
66 Line program header
67
68 Dwarf_lineprog_file_entry (+):
69 A single file entry in a line program header or instruction
70
71 Dwarf_CIE_header (+):
72 A call-frame CIE
73
74 Dwarf_FDE_header (+):
75 A call-frame FDE
76
77 See also the documentation of public methods.
78 """
79
80 # Cache for structs instances based on creation parameters. Structs
81 # initialization is expensive and we don't won't to repeat it
82 # unnecessarily.
83 _structs_cache = {}
84
85 def __new__(cls, little_endian, dwarf_format, address_size, dwarf_version=2):
86 """ dwarf_version:
87 Numeric DWARF version
88
89 little_endian:
90 True if the file is little endian, False if big
91
92 dwarf_format:
93 DWARF Format: 32 or 64-bit (see spec section 7.4)
94
95 address_size:
96 Target machine address size, in bytes (4 or 8). (See spec
97 section 7.5.1)
98 """
99 key = (little_endian, dwarf_format, address_size, dwarf_version)
100
101 if key in cls._structs_cache:
102 return cls._structs_cache[key]
103
104 self = super().__new__(cls)
105 assert dwarf_format == 32 or dwarf_format == 64
106 assert address_size == 8 or address_size == 4, str(address_size)
107 self.little_endian = little_endian
108 self.dwarf_format = dwarf_format
109 self.address_size = address_size
110 self.dwarf_version = dwarf_version
111 self._create_structs()
112 cls._structs_cache[key] = self
113 return self
114
115 def initial_length_field_size(self):
116 """ Size of an initial length field.
117 """
118 return 4 if self.dwarf_format == 32 else 12
119
120 def _create_structs(self):
121 if self.little_endian:
122 self.Dwarf_uint8 = ULInt8
123 self.Dwarf_uint16 = ULInt16
124 self.Dwarf_uint32 = ULInt32
125 self.Dwarf_uint64 = ULInt64
126 self.Dwarf_offset = ULInt32 if self.dwarf_format == 32 else ULInt64
127 self.Dwarf_length = ULInt32 if self.dwarf_format == 32 else ULInt64
128 self.Dwarf_target_addr = (
129 ULInt32 if self.address_size == 4 else ULInt64)
130 self.Dwarf_int8 = SLInt8
131 self.Dwarf_int16 = SLInt16
132 self.Dwarf_int32 = SLInt32
133 self.Dwarf_int64 = SLInt64
134 else:
135 self.Dwarf_uint8 = UBInt8
136 self.Dwarf_uint16 = UBInt16
137 self.Dwarf_uint32 = UBInt32
138 self.Dwarf_uint64 = UBInt64
139 self.Dwarf_offset = UBInt32 if self.dwarf_format == 32 else UBInt64
140 self.Dwarf_length = UBInt32 if self.dwarf_format == 32 else UBInt64
141 self.Dwarf_target_addr = (
142 UBInt32 if self.address_size == 4 else UBInt64)
143 self.Dwarf_int8 = SBInt8
144 self.Dwarf_int16 = SBInt16
145 self.Dwarf_int32 = SBInt32
146 self.Dwarf_int64 = SBInt64
147
148 self._create_initial_length()
149 self._create_leb128()
150 self._create_cu_header()
151 self._create_abbrev_declaration()
152 self._create_dw_form()
153 self._create_lineprog_header()
154 self._create_callframe_entry_headers()
155 self._create_aranges_header()
156 self._create_nameLUT_header()
157 self._create_string_offsets_table_header()
158 self._create_address_table_header()
159 self._create_loclists_parsers()
160 self._create_rnglists_parsers()
161
162 self._create_debugsup()
163 self._create_gnu_debugaltlink()
164
165 def _create_initial_length(self):
166 def _InitialLength(name):
167 # Adapts a Struct that parses forward a full initial length field.
168 # Only if the first word is the continuation value, the second
169 # word is parsed from the stream.
170 return _InitialLengthAdapter(
171 Struct(name,
172 self.Dwarf_uint32('first'),
173 If(lambda ctx: ctx.first == 0xFFFFFFFF,
174 self.Dwarf_uint64('second'),
175 elsevalue=None)))
176 self.Dwarf_initial_length = _InitialLength
177
178 def _create_leb128(self):
179 self.Dwarf_uleb128 = ULEB128
180 self.Dwarf_sleb128 = SLEB128
181
182 def _create_cu_header(self):
183 self.Dwarf_CU_header = Struct('Dwarf_CU_header',
184 self.Dwarf_initial_length('unit_length'),
185 self.Dwarf_uint16('version'),
186 # DWARFv5 reverses the order of address_size and debug_abbrev_offset.
187 IfThenElse('', lambda ctx: ctx['version'] >= 5,
188 Embed(Struct('',
189 self.Dwarf_uint8('unit_type'),
190 self.Dwarf_uint8('address_size'),
191 self.Dwarf_offset('debug_abbrev_offset'))),
192 Embed(Struct('',
193 self.Dwarf_offset('debug_abbrev_offset'),
194 self.Dwarf_uint8('address_size'))),
195 ))
196
197 def _create_abbrev_declaration(self):
198 self.Dwarf_abbrev_declaration = Struct('Dwarf_abbrev_entry',
199 Enum(self.Dwarf_uleb128('tag'), **ENUM_DW_TAG),
200 Enum(self.Dwarf_uint8('children_flag'), **ENUM_DW_CHILDREN),
201 RepeatUntilExcluding(
202 lambda obj, ctx:
203 obj.name == 'DW_AT_null' and obj.form == 'DW_FORM_null',
204 Struct('attr_spec',
205 Enum(self.Dwarf_uleb128('name'), **ENUM_DW_AT),
206 Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM),
207 If(lambda ctx: ctx['form'] == 'DW_FORM_implicit_const',
208 self.Dwarf_sleb128('value')))))
209
210 def _create_debugsup(self):
211 # We don't care about checksums, for now.
212 self.Dwarf_debugsup = Struct('Elf_debugsup',
213 self.Dwarf_int16('version'),
214 self.Dwarf_uint8('is_supplementary'),
215 CString('sup_filename'))
216
217 def _create_gnu_debugaltlink(self):
218 self.Dwarf_debugaltlink = Struct('Elf_debugaltlink',
219 CString("sup_filename"),
220 String("sup_checksum", length=20))
221
222 def _create_dw_form(self):
223 self.Dwarf_dw_form = dict(
224 DW_FORM_addr=self.Dwarf_target_addr(''),
225 DW_FORM_addrx=self.Dwarf_uleb128(''),
226 DW_FORM_addrx1=self.Dwarf_uint8(''),
227 DW_FORM_addrx2=self.Dwarf_uint16(''),
228 # DW_FORM_addrx3=self.Dwarf_uint24(''), # TODO
229 DW_FORM_addrx4=self.Dwarf_uint32(''),
230
231 DW_FORM_block1=self._make_block_struct(self.Dwarf_uint8),
232 DW_FORM_block2=self._make_block_struct(self.Dwarf_uint16),
233 DW_FORM_block4=self._make_block_struct(self.Dwarf_uint32),
234 DW_FORM_block=self._make_block_struct(self.Dwarf_uleb128),
235
236 # All DW_FORM_data<n> forms are assumed to be unsigned
237 DW_FORM_data1=self.Dwarf_uint8(''),
238 DW_FORM_data2=self.Dwarf_uint16(''),
239 DW_FORM_data4=self.Dwarf_uint32(''),
240 DW_FORM_data8=self.Dwarf_uint64(''),
241 DW_FORM_data16=Array(16, self.Dwarf_uint8('')), # Used for hashes and such, not for integers
242 DW_FORM_sdata=self.Dwarf_sleb128(''),
243 DW_FORM_udata=self.Dwarf_uleb128(''),
244
245 DW_FORM_string=CString(''),
246 DW_FORM_strp=self.Dwarf_offset(''),
247 DW_FORM_strp_sup=self.Dwarf_offset(''),
248 DW_FORM_line_strp=self.Dwarf_offset(''),
249 DW_FORM_strx1=self.Dwarf_uint8(''),
250 DW_FORM_strx2=self.Dwarf_uint16(''),
251 # DW_FORM_strx3=self.Dwarf_uint24(''), # TODO
252 DW_FORM_strx4=self.Dwarf_uint64(''),
253 DW_FORM_flag=self.Dwarf_uint8(''),
254
255 DW_FORM_ref=self.Dwarf_uint32(''),
256 DW_FORM_ref1=self.Dwarf_uint8(''),
257 DW_FORM_ref2=self.Dwarf_uint16(''),
258 DW_FORM_ref4=self.Dwarf_uint32(''),
259 DW_FORM_ref_sup4=self.Dwarf_uint32(''),
260 DW_FORM_ref8=self.Dwarf_uint64(''),
261 DW_FORM_ref_sup8=self.Dwarf_uint64(''),
262 DW_FORM_ref_udata=self.Dwarf_uleb128(''),
263 DW_FORM_ref_addr=self.Dwarf_target_addr('') if self.dwarf_version == 2 else self.Dwarf_offset(''),
264
265 DW_FORM_indirect=self.Dwarf_uleb128(''),
266
267 # New forms in DWARFv4
268 DW_FORM_flag_present = StaticField('', 0),
269 DW_FORM_sec_offset = self.Dwarf_offset(''),
270 DW_FORM_exprloc = self._make_block_struct(self.Dwarf_uleb128),
271 DW_FORM_ref_sig8 = self.Dwarf_uint64(''),
272
273 DW_FORM_GNU_strp_alt=self.Dwarf_offset(''),
274 DW_FORM_GNU_ref_alt=self.Dwarf_offset(''),
275 DW_AT_GNU_all_call_sites=self.Dwarf_uleb128(''),
276
277 # New forms in DWARFv5
278 DW_FORM_loclistx=self.Dwarf_uleb128(''),
279 DW_FORM_rnglistx=self.Dwarf_uleb128('')
280 )
281
282 def _create_aranges_header(self):
283 self.Dwarf_aranges_header = Struct("Dwarf_aranges_header",
284 self.Dwarf_initial_length('unit_length'),
285 self.Dwarf_uint16('version'),
286 self.Dwarf_offset('debug_info_offset'), # a little tbd
287 self.Dwarf_uint8('address_size'),
288 self.Dwarf_uint8('segment_size')
289 )
290
291 def _create_nameLUT_header(self):
292 self.Dwarf_nameLUT_header = Struct("Dwarf_nameLUT_header",
293 self.Dwarf_initial_length('unit_length'),
294 self.Dwarf_uint16('version'),
295 self.Dwarf_offset('debug_info_offset'),
296 self.Dwarf_length('debug_info_length')
297 )
298
299 def _create_string_offsets_table_header(self):
300 self.Dwarf_string_offsets_table_header = Struct(
301 "Dwarf_string_offets_table_header",
302 self.Dwarf_initial_length('unit_length'),
303 self.Dwarf_uint16('version'),
304 self.Dwarf_uint16('padding'),
305 )
306
307 def _create_address_table_header(self):
308 self.Dwarf_address_table_header = Struct("Dwarf_address_table_header",
309 self.Dwarf_initial_length('unit_length'),
310 self.Dwarf_uint16('version'),
311 self.Dwarf_uint8('address_size'),
312 self.Dwarf_uint8('segment_selector_size'),
313 )
314
315 def _create_lineprog_header(self):
316 # A file entry is terminated by a NULL byte, so we don't want to parse
317 # past it. Therefore an If is used.
318 self.Dwarf_lineprog_file_entry = Struct('file_entry',
319 CString('name'),
320 If(lambda ctx: len(ctx.name) != 0,
321 Embed(Struct('',
322 self.Dwarf_uleb128('dir_index'),
323 self.Dwarf_uleb128('mtime'),
324 self.Dwarf_uleb128('length')))))
325
326 class FormattedEntry(Construct):
327 # Generates a parser based on a previously parsed piece,
328 # similar to deprecared Dynamic.
329 # Strings are resolved later, since it potentially requires
330 # looking at another section.
331 def __init__(self, name, structs, format_field):
332 Construct.__init__(self, name)
333 self.structs = structs
334 self.format_field = format_field
335
336 def _parse(self, stream, context):
337 # Somewhat tricky technique here, explicitly writing back to the context
338 if self.format_field + "_parser" in context:
339 parser = context[self.format_field + "_parser"]
340 else:
341 fields = tuple(
342 Rename(f.content_type, self.structs.Dwarf_dw_form[f.form])
343 for f in context[self.format_field])
344 parser = Struct('formatted_entry', *fields)
345 context[self.format_field + "_parser"] = parser
346 return parser._parse(stream, context)
347
348 ver5 = lambda ctx: ctx.version >= 5
349
350 self.Dwarf_lineprog_header = Struct('Dwarf_lineprog_header',
351 self.Dwarf_initial_length('unit_length'),
352 self.Dwarf_uint16('version'),
353 If(ver5,
354 self.Dwarf_uint8("address_size"),
355 None),
356 If(ver5,
357 self.Dwarf_uint8("segment_selector_size"),
358 None),
359 self.Dwarf_offset('header_length'),
360 self.Dwarf_uint8('minimum_instruction_length'),
361 If(lambda ctx: ctx.version >= 4,
362 self.Dwarf_uint8("maximum_operations_per_instruction"),
363 1),
364 self.Dwarf_uint8('default_is_stmt'),
365 self.Dwarf_int8('line_base'),
366 self.Dwarf_uint8('line_range'),
367 self.Dwarf_uint8('opcode_base'),
368 Array(lambda ctx: ctx.opcode_base - 1,
369 self.Dwarf_uint8('standard_opcode_lengths')),
370 If(ver5,
371 PrefixedArray(
372 Struct('directory_entry_format',
373 Enum(self.Dwarf_uleb128('content_type'), **ENUM_DW_LNCT),
374 Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM)),
375 self.Dwarf_uint8("directory_entry_format_count"))),
376 If(ver5, # Name deliberately doesn't match the legacy object, since the format can't be made compatible
377 PrefixedArray(
378 FormattedEntry('directories', self, "directory_entry_format"),
379 self.Dwarf_uleb128('directories_count'))),
380 If(ver5,
381 PrefixedArray(
382 Struct('file_name_entry_format',
383 Enum(self.Dwarf_uleb128('content_type'), **ENUM_DW_LNCT),
384 Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM)),
385 self.Dwarf_uint8("file_name_entry_format_count"))),
386 If(ver5,
387 PrefixedArray(
388 FormattedEntry('file_names', self, "file_name_entry_format"),
389 self.Dwarf_uleb128('file_names_count'))),
390 # Legacy directories/files - DWARF < 5 only
391 If(lambda ctx: ctx.version < 5,
392 RepeatUntilExcluding(
393 lambda obj, ctx: obj == b'',
394 CString('include_directory'))),
395 If(lambda ctx: ctx.version < 5,
396 RepeatUntilExcluding(
397 lambda obj, ctx: len(obj.name) == 0,
398 self.Dwarf_lineprog_file_entry)) # array name is file_entry
399 )
400
401 def _create_callframe_entry_headers(self):
402 self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
403 self.Dwarf_initial_length('length'),
404 self.Dwarf_offset('CIE_id'),
405 self.Dwarf_uint8('version'),
406 CString('augmentation'),
407 self.Dwarf_uleb128('code_alignment_factor'),
408 self.Dwarf_sleb128('data_alignment_factor'),
409 self.Dwarf_uleb128('return_address_register'))
410 self.EH_CIE_header = self.Dwarf_CIE_header
411
412 # The CIE header was modified in DWARFv4.
413 if self.dwarf_version == 4:
414 self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
415 self.Dwarf_initial_length('length'),
416 self.Dwarf_offset('CIE_id'),
417 self.Dwarf_uint8('version'),
418 CString('augmentation'),
419 self.Dwarf_uint8('address_size'),
420 self.Dwarf_uint8('segment_size'),
421 self.Dwarf_uleb128('code_alignment_factor'),
422 self.Dwarf_sleb128('data_alignment_factor'),
423 self.Dwarf_uleb128('return_address_register'))
424
425 self.Dwarf_FDE_header = Struct('Dwarf_FDE_header',
426 self.Dwarf_initial_length('length'),
427 self.Dwarf_offset('CIE_pointer'),
428 self.Dwarf_target_addr('initial_location'),
429 self.Dwarf_target_addr('address_range'))
430
431 def _make_block_struct(self, length_field):
432 """ Create a struct for DW_FORM_block<size>
433 """
434 return PrefixedArray(
435 subcon=self.Dwarf_uint8('elem'),
436 length_field=length_field(''))
437
438 def _create_loclists_parsers(self):
439 """ Create a struct for debug_loclists CU header, DWARFv5, 7,29
440 """
441 self.Dwarf_loclists_CU_header = Struct('Dwarf_loclists_CU_header',
442 StreamOffset('cu_offset'),
443 self.Dwarf_initial_length('unit_length'),
444 Value('is64', lambda ctx: ctx.is64),
445 StreamOffset('offset_after_length'),
446 self.Dwarf_uint16('version'),
447 self.Dwarf_uint8('address_size'),
448 self.Dwarf_uint8('segment_selector_size'),
449 self.Dwarf_uint32('offset_count'),
450 StreamOffset('offset_table_offset'))
451
452 cld = self.Dwarf_loclists_counted_location_description = PrefixedArray(self.Dwarf_uint8('loc_expr'), self.Dwarf_uleb128(''))
453
454 self.Dwarf_loclists_entries = RepeatUntilExcluding(
455 lambda obj, ctx: obj.entry_type == 'DW_LLE_end_of_list',
456 Struct('entry',
457 StreamOffset('entry_offset'),
458 Enum(self.Dwarf_uint8('entry_type'), **ENUM_DW_LLE),
459 Embed(Switch('', lambda ctx: ctx.entry_type,
460 {
461 'DW_LLE_end_of_list' : Struct('end_of_list'),
462 'DW_LLE_base_addressx' : Struct('base_addressx', self.Dwarf_uleb128('index')),
463 'DW_LLE_startx_endx' : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('end_index'), cld),
464 'DW_LLE_startx_length' : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('length'), cld),
465 'DW_LLE_offset_pair' : Struct('startx_endx', self.Dwarf_uleb128('start_offset'), self.Dwarf_uleb128('end_offset'), cld),
466 'DW_LLE_default_location' : Struct('default_location', cld),
467 'DW_LLE_base_address' : Struct('base_address', self.Dwarf_target_addr('address')),
468 'DW_LLE_start_end' : Struct('start_end', self.Dwarf_target_addr('start_address'), self.Dwarf_target_addr('end_address'), cld),
469 'DW_LLE_start_length' : Struct('start_length', self.Dwarf_target_addr('start_address'), self.Dwarf_uleb128('length'), cld),
470 })),
471 StreamOffset('entry_end_offset'),
472 Value('entry_length', lambda ctx: ctx.entry_end_offset - ctx.entry_offset)))
473
474 self.Dwarf_locview_pair = Struct('locview_pair',
475 StreamOffset('entry_offset'), self.Dwarf_uleb128('begin'), self.Dwarf_uleb128('end'))
476
477 def _create_rnglists_parsers(self):
478 self.Dwarf_rnglists_CU_header = Struct('Dwarf_rnglists_CU_header',
479 StreamOffset('cu_offset'),
480 self.Dwarf_initial_length('unit_length'),
481 Value('is64', lambda ctx: ctx.is64),
482 StreamOffset('offset_after_length'),
483 self.Dwarf_uint16('version'),
484 self.Dwarf_uint8('address_size'),
485 self.Dwarf_uint8('segment_selector_size'),
486 self.Dwarf_uint32('offset_count'),
487 StreamOffset('offset_table_offset'))
488
489 self.Dwarf_rnglists_entries = RepeatUntilExcluding(
490 lambda obj, ctx: obj.entry_type == 'DW_RLE_end_of_list',
491 Struct('entry',
492 StreamOffset('entry_offset'),
493 Enum(self.Dwarf_uint8('entry_type'), **ENUM_DW_RLE),
494 Embed(Switch('', lambda ctx: ctx.entry_type,
495 {
496 'DW_RLE_end_of_list' : Struct('end_of_list'),
497 'DW_RLE_base_addressx' : Struct('base_addressx', self.Dwarf_uleb128('index')),
498 'DW_RLE_startx_endx' : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('end_index')),
499 'DW_RLE_startx_length' : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('length')),
500 'DW_RLE_offset_pair' : Struct('startx_endx', self.Dwarf_uleb128('start_offset'), self.Dwarf_uleb128('end_offset')),
501 'DW_RLE_base_address' : Struct('base_address', self.Dwarf_target_addr('address')),
502 'DW_RLE_start_end' : Struct('start_end', self.Dwarf_target_addr('start_address'), self.Dwarf_target_addr('end_address')),
503 'DW_RLE_start_length' : Struct('start_length', self.Dwarf_target_addr('start_address'), self.Dwarf_uleb128('length'))
504 })),
505 StreamOffset('entry_end_offset'),
506 Value('entry_length', lambda ctx: ctx.entry_end_offset - ctx.entry_offset)))
507
508
509 class _InitialLengthAdapter(Adapter):
510 """ A standard Construct adapter that expects a sub-construct
511 as a struct with one or two values (first, second).
512 """
513 def _decode(self, obj, context):
514 if obj.first < 0xFFFFFF00:
515 context['is64'] = False
516 return obj.first
517 else:
518 if obj.first == 0xFFFFFFFF:
519 context['is64'] = True
520 return obj.second
521 else:
522 raise ConstructError("Failed decoding initial length for %X" % (
523 obj.first))