b479399d10dfc75e060100d6cc9f31e7dbb09421
[pyelftools.git] / elftools / dwarf / structs.py
1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/structs.py
3 #
4 # Encapsulation of Construct structs for parsing DWARF, adjusted for correct
5 # endianness and word-size.
6 #
7 # Eli Bendersky (eliben@gmail.com)
8 # This code is in the public domain
9 #-------------------------------------------------------------------------------
10 from logging.config import valid_ident
11 from ..construct import (
12 UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64,
13 SBInt8, SBInt16, SBInt32, SBInt64, SLInt8, SLInt16, SLInt32, SLInt64,
14 Adapter, Struct, ConstructError, If, Enum, Array, PrefixedArray,
15 CString, Embed, StaticField, IfThenElse, Construct, Rename, Sequence,
16 Switch, Value
17 )
18 from ..common.construct_utils import (RepeatUntilExcluding, ULEB128, SLEB128,
19 StreamOffset)
20 from .enums import *
21
22
23 class DWARFStructs(object):
24 """ Exposes Construct structs suitable for parsing information from DWARF
25 sections. Each compile unit in DWARF info can have its own structs
26 object. Keep in mind that these structs have to be given a name (by
27 calling them with a name) before being used for parsing (like other
28 Construct structs). Those that should be used without a name are marked
29 by (+).
30
31 Accessible attributes (mostly as described in chapter 7 of the DWARF
32 spec v3):
33
34 Dwarf_[u]int{8,16,32,64):
35 Data chunks of the common sizes
36
37 Dwarf_offset:
38 32-bit or 64-bit word, depending on dwarf_format
39
40 Dwarf_length:
41 32-bit or 64-bit word, depending on dwarf_format
42
43 Dwarf_target_addr:
44 32-bit or 64-bit word, depending on address size
45
46 Dwarf_initial_length:
47 "Initial length field" encoding
48 section 7.4
49
50 Dwarf_{u,s}leb128:
51 ULEB128 and SLEB128 variable-length encoding
52
53 Dwarf_CU_header (+):
54 Compilation unit header
55
56 Dwarf_abbrev_declaration (+):
57 Abbreviation table declaration - doesn't include the initial
58 code, only the contents.
59
60 Dwarf_dw_form (+):
61 A dictionary mapping 'DW_FORM_*' keys into construct Structs
62 that parse such forms. These Structs have already been given
63 dummy names.
64
65 Dwarf_lineprog_header (+):
66 Line program header
67
68 Dwarf_lineprog_file_entry (+):
69 A single file entry in a line program header or instruction
70
71 Dwarf_CIE_header (+):
72 A call-frame CIE
73
74 Dwarf_FDE_header (+):
75 A call-frame FDE
76
77 See also the documentation of public methods.
78 """
79 def __init__(self,
80 little_endian, dwarf_format, address_size, dwarf_version=2):
81 """ dwarf_version:
82 Numeric DWARF version
83
84 little_endian:
85 True if the file is little endian, False if big
86
87 dwarf_format:
88 DWARF Format: 32 or 64-bit (see spec section 7.4)
89
90 address_size:
91 Target machine address size, in bytes (4 or 8). (See spec
92 section 7.5.1)
93 """
94 assert dwarf_format == 32 or dwarf_format == 64
95 assert address_size == 8 or address_size == 4, str(address_size)
96 self.little_endian = little_endian
97 self.dwarf_format = dwarf_format
98 self.address_size = address_size
99 self.dwarf_version = dwarf_version
100 self._create_structs()
101
102 def initial_length_field_size(self):
103 """ Size of an initial length field.
104 """
105 return 4 if self.dwarf_format == 32 else 12
106
107 def _create_structs(self):
108 if self.little_endian:
109 self.Dwarf_uint8 = ULInt8
110 self.Dwarf_uint16 = ULInt16
111 self.Dwarf_uint32 = ULInt32
112 self.Dwarf_uint64 = ULInt64
113 self.Dwarf_offset = ULInt32 if self.dwarf_format == 32 else ULInt64
114 self.Dwarf_length = ULInt32 if self.dwarf_format == 32 else ULInt64
115 self.Dwarf_target_addr = (
116 ULInt32 if self.address_size == 4 else ULInt64)
117 self.Dwarf_int8 = SLInt8
118 self.Dwarf_int16 = SLInt16
119 self.Dwarf_int32 = SLInt32
120 self.Dwarf_int64 = SLInt64
121 else:
122 self.Dwarf_uint8 = UBInt8
123 self.Dwarf_uint16 = UBInt16
124 self.Dwarf_uint32 = UBInt32
125 self.Dwarf_uint64 = UBInt64
126 self.Dwarf_offset = UBInt32 if self.dwarf_format == 32 else UBInt64
127 self.Dwarf_length = UBInt32 if self.dwarf_format == 32 else UBInt64
128 self.Dwarf_target_addr = (
129 UBInt32 if self.address_size == 4 else UBInt64)
130 self.Dwarf_int8 = SBInt8
131 self.Dwarf_int16 = SBInt16
132 self.Dwarf_int32 = SBInt32
133 self.Dwarf_int64 = SBInt64
134
135 self._create_initial_length()
136 self._create_leb128()
137 self._create_cu_header()
138 self._create_abbrev_declaration()
139 self._create_dw_form()
140 self._create_lineprog_header()
141 self._create_callframe_entry_headers()
142 self._create_aranges_header()
143 self._create_nameLUT_header()
144 self._create_string_offsets_table_header()
145 self._create_address_table_header()
146 self._create_loclists_parsers()
147 self._create_rnglists_parsers()
148
149 def _create_initial_length(self):
150 def _InitialLength(name):
151 # Adapts a Struct that parses forward a full initial length field.
152 # Only if the first word is the continuation value, the second
153 # word is parsed from the stream.
154 return _InitialLengthAdapter(
155 Struct(name,
156 self.Dwarf_uint32('first'),
157 If(lambda ctx: ctx.first == 0xFFFFFFFF,
158 self.Dwarf_uint64('second'),
159 elsevalue=None)))
160 self.Dwarf_initial_length = _InitialLength
161
162 def _create_leb128(self):
163 self.Dwarf_uleb128 = ULEB128
164 self.Dwarf_sleb128 = SLEB128
165
166 def _create_cu_header(self):
167 self.Dwarf_CU_header = Struct('Dwarf_CU_header',
168 self.Dwarf_initial_length('unit_length'),
169 self.Dwarf_uint16('version'),
170 # DWARFv5 reverses the order of address_size and debug_abbrev_offset.
171 IfThenElse('', lambda ctx: ctx['version'] >= 5,
172 Embed(Struct('',
173 self.Dwarf_uint8('unit_type'),
174 self.Dwarf_uint8('address_size'),
175 self.Dwarf_offset('debug_abbrev_offset'))),
176 Embed(Struct('',
177 self.Dwarf_offset('debug_abbrev_offset'),
178 self.Dwarf_uint8('address_size'))),
179 ))
180
181 def _create_abbrev_declaration(self):
182 self.Dwarf_abbrev_declaration = Struct('Dwarf_abbrev_entry',
183 Enum(self.Dwarf_uleb128('tag'), **ENUM_DW_TAG),
184 Enum(self.Dwarf_uint8('children_flag'), **ENUM_DW_CHILDREN),
185 RepeatUntilExcluding(
186 lambda obj, ctx:
187 obj.name == 'DW_AT_null' and obj.form == 'DW_FORM_null',
188 Struct('attr_spec',
189 Enum(self.Dwarf_uleb128('name'), **ENUM_DW_AT),
190 Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM),
191 If(lambda ctx: ctx['form'] == 'DW_FORM_implicit_const',
192 self.Dwarf_sleb128('value')))))
193
194 def _create_dw_form(self):
195 self.Dwarf_dw_form = dict(
196 DW_FORM_addr=self.Dwarf_target_addr(''),
197 DW_FORM_addrx=self.Dwarf_uleb128(''),
198 DW_FORM_addrx1=self.Dwarf_uint8(''),
199 DW_FORM_addrx2=self.Dwarf_uint16(''),
200 # DW_FORM_addrx3=self.Dwarf_uint24(''), # TODO
201 DW_FORM_addrx4=self.Dwarf_uint32(''),
202
203 DW_FORM_block1=self._make_block_struct(self.Dwarf_uint8),
204 DW_FORM_block2=self._make_block_struct(self.Dwarf_uint16),
205 DW_FORM_block4=self._make_block_struct(self.Dwarf_uint32),
206 DW_FORM_block=self._make_block_struct(self.Dwarf_uleb128),
207
208 # All DW_FORM_data<n> forms are assumed to be unsigned
209 DW_FORM_data1=self.Dwarf_uint8(''),
210 DW_FORM_data2=self.Dwarf_uint16(''),
211 DW_FORM_data4=self.Dwarf_uint32(''),
212 DW_FORM_data8=self.Dwarf_uint64(''),
213 DW_FORM_sdata=self.Dwarf_sleb128(''),
214 DW_FORM_udata=self.Dwarf_uleb128(''),
215
216 DW_FORM_string=CString(''),
217 DW_FORM_strp=self.Dwarf_offset(''),
218 DW_FORM_line_strp=self.Dwarf_offset(''),
219 DW_FORM_strx1=self.Dwarf_uint8(''),
220 DW_FORM_strx2=self.Dwarf_uint16(''),
221 # DW_FORM_strx3=self.Dwarf_uint24(''), # TODO
222 DW_FORM_strx4=self.Dwarf_uint64(''),
223 DW_FORM_flag=self.Dwarf_uint8(''),
224
225 DW_FORM_ref=self.Dwarf_uint32(''),
226 DW_FORM_ref1=self.Dwarf_uint8(''),
227 DW_FORM_ref2=self.Dwarf_uint16(''),
228 DW_FORM_ref4=self.Dwarf_uint32(''),
229 DW_FORM_ref8=self.Dwarf_uint64(''),
230 DW_FORM_ref_udata=self.Dwarf_uleb128(''),
231 DW_FORM_ref_addr=self.Dwarf_target_addr('') if self.dwarf_version == 2 else self.Dwarf_offset(''),
232
233 DW_FORM_indirect=self.Dwarf_uleb128(''),
234
235 # New forms in DWARFv4
236 DW_FORM_flag_present = StaticField('', 0),
237 DW_FORM_sec_offset = self.Dwarf_offset(''),
238 DW_FORM_exprloc = self._make_block_struct(self.Dwarf_uleb128),
239 DW_FORM_ref_sig8 = self.Dwarf_uint64(''),
240
241 DW_FORM_GNU_strp_alt=self.Dwarf_offset(''),
242 DW_FORM_GNU_ref_alt=self.Dwarf_offset(''),
243 DW_AT_GNU_all_call_sites=self.Dwarf_uleb128(''),
244 )
245
246 def _create_aranges_header(self):
247 self.Dwarf_aranges_header = Struct("Dwarf_aranges_header",
248 self.Dwarf_initial_length('unit_length'),
249 self.Dwarf_uint16('version'),
250 self.Dwarf_offset('debug_info_offset'), # a little tbd
251 self.Dwarf_uint8('address_size'),
252 self.Dwarf_uint8('segment_size')
253 )
254
255 def _create_nameLUT_header(self):
256 self.Dwarf_nameLUT_header = Struct("Dwarf_nameLUT_header",
257 self.Dwarf_initial_length('unit_length'),
258 self.Dwarf_uint16('version'),
259 self.Dwarf_offset('debug_info_offset'),
260 self.Dwarf_length('debug_info_length')
261 )
262
263 def _create_string_offsets_table_header(self):
264 self.Dwarf_string_offsets_table_header = Struct(
265 "Dwarf_string_offets_table_header",
266 self.Dwarf_initial_length('unit_length'),
267 self.Dwarf_uint16('version'),
268 self.Dwarf_uint16('padding'),
269 )
270
271 def _create_address_table_header(self):
272 self.Dwarf_address_table_header = Struct("Dwarf_address_table_header",
273 self.Dwarf_initial_length('unit_length'),
274 self.Dwarf_uint16('version'),
275 self.Dwarf_uint8('address_size'),
276 self.Dwarf_uint8('segment_selector_size'),
277 )
278
279 def _create_lineprog_header(self):
280 # A file entry is terminated by a NULL byte, so we don't want to parse
281 # past it. Therefore an If is used.
282 self.Dwarf_lineprog_file_entry = Struct('file_entry',
283 CString('name'),
284 If(lambda ctx: len(ctx.name) != 0,
285 Embed(Struct('',
286 self.Dwarf_uleb128('dir_index'),
287 self.Dwarf_uleb128('mtime'),
288 self.Dwarf_uleb128('length')))))
289
290 class FormattedEntry(Construct):
291 # Generates a parser based on a previously parsed piece,
292 # similar to deprecared Dynamic.
293 # Strings are resolved later, since it potentially requires
294 # looking at another section.
295 def __init__(self, name, structs, format_field):
296 Construct.__init__(self, name)
297 self.structs = structs
298 self.format_field = format_field
299
300 def _parse(self, stream, context):
301 # Somewhat tricky technique here, explicitly writing back to the context
302 if self.format_field + "_parser" in context:
303 parser = context[self.format_field + "_parser"]
304 else:
305 fields = tuple(
306 Rename(f.content_type, self.structs.Dwarf_dw_form[f.form])
307 for f in context[self.format_field])
308 parser = Struct('formatted_entry', *fields)
309 context[self.format_field + "_parser"] = parser
310 return parser._parse(stream, context)
311
312 ver5 = lambda ctx: ctx.version >= 5
313
314 self.Dwarf_lineprog_header = Struct('Dwarf_lineprog_header',
315 self.Dwarf_initial_length('unit_length'),
316 self.Dwarf_uint16('version'),
317 If(ver5,
318 self.Dwarf_uint8("address_size"),
319 None),
320 If(ver5,
321 self.Dwarf_uint8("segment_selector_size"),
322 None),
323 self.Dwarf_offset('header_length'),
324 self.Dwarf_uint8('minimum_instruction_length'),
325 If(lambda ctx: ctx.version >= 4,
326 self.Dwarf_uint8("maximum_operations_per_instruction"),
327 1),
328 self.Dwarf_uint8('default_is_stmt'),
329 self.Dwarf_int8('line_base'),
330 self.Dwarf_uint8('line_range'),
331 self.Dwarf_uint8('opcode_base'),
332 Array(lambda ctx: ctx.opcode_base - 1,
333 self.Dwarf_uint8('standard_opcode_lengths')),
334 If(ver5,
335 PrefixedArray(
336 Struct('directory_entry_format',
337 Enum(self.Dwarf_uleb128('content_type'), **ENUM_DW_LNCT),
338 Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM)),
339 self.Dwarf_uint8("directory_entry_format_count"))),
340 If(ver5, # Name deliberately doesn't match the legacy object, since the format can't be made compatible
341 PrefixedArray(
342 FormattedEntry('directories', self, "directory_entry_format"),
343 self.Dwarf_uleb128('directories_count'))),
344 If(ver5,
345 PrefixedArray(
346 Struct('file_name_entry_format',
347 Enum(self.Dwarf_uleb128('content_type'), **ENUM_DW_LNCT),
348 Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM)),
349 self.Dwarf_uint8("file_name_entry_format_count"))),
350 If(ver5,
351 PrefixedArray(
352 FormattedEntry('file_names', self, "file_name_entry_format"),
353 self.Dwarf_uleb128('file_names_count'))),
354 # Legacy directories/files - DWARF < 5 only
355 If(lambda ctx: ctx.version < 5,
356 RepeatUntilExcluding(
357 lambda obj, ctx: obj == b'',
358 CString('include_directory'))),
359 If(lambda ctx: ctx.version < 5,
360 RepeatUntilExcluding(
361 lambda obj, ctx: len(obj.name) == 0,
362 self.Dwarf_lineprog_file_entry)) # array name is file_entry
363 )
364
365 def _create_callframe_entry_headers(self):
366 self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
367 self.Dwarf_initial_length('length'),
368 self.Dwarf_offset('CIE_id'),
369 self.Dwarf_uint8('version'),
370 CString('augmentation'),
371 self.Dwarf_uleb128('code_alignment_factor'),
372 self.Dwarf_sleb128('data_alignment_factor'),
373 self.Dwarf_uleb128('return_address_register'))
374 self.EH_CIE_header = self.Dwarf_CIE_header
375
376 # The CIE header was modified in DWARFv4.
377 if self.dwarf_version == 4:
378 self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
379 self.Dwarf_initial_length('length'),
380 self.Dwarf_offset('CIE_id'),
381 self.Dwarf_uint8('version'),
382 CString('augmentation'),
383 self.Dwarf_uint8('address_size'),
384 self.Dwarf_uint8('segment_size'),
385 self.Dwarf_uleb128('code_alignment_factor'),
386 self.Dwarf_sleb128('data_alignment_factor'),
387 self.Dwarf_uleb128('return_address_register'))
388
389 self.Dwarf_FDE_header = Struct('Dwarf_FDE_header',
390 self.Dwarf_initial_length('length'),
391 self.Dwarf_offset('CIE_pointer'),
392 self.Dwarf_target_addr('initial_location'),
393 self.Dwarf_target_addr('address_range'))
394
395 def _make_block_struct(self, length_field):
396 """ Create a struct for DW_FORM_block<size>
397 """
398 return PrefixedArray(
399 subcon=self.Dwarf_uint8('elem'),
400 length_field=length_field(''))
401
402 def _create_loclists_parsers(self):
403 """ Create a struct for debug_loclists CU header, DWARFv5, 7,29
404 """
405 self.Dwarf_loclists_CU_header = Struct('Dwarf_loclists_CU_header',
406 StreamOffset('cu_offset'),
407 self.Dwarf_initial_length('unit_length'),
408 Value('is64', lambda ctx: ctx.is64),
409 StreamOffset('offset_after_length'),
410 self.Dwarf_uint16('version'),
411 self.Dwarf_uint8('address_size'),
412 self.Dwarf_uint8('segment_selector_size'),
413 self.Dwarf_uint32('offset_count'),
414 StreamOffset('offset_table_offset'))
415
416 cld = self.Dwarf_loclists_counted_location_description = PrefixedArray(self.Dwarf_uint8('loc_expr'), self.Dwarf_uleb128(''))
417
418 self.Dwarf_loclists_entries = RepeatUntilExcluding(
419 lambda obj, ctx: obj.entry_type == 'DW_LLE_end_of_list',
420 Struct('entry',
421 StreamOffset('entry_offset'),
422 Enum(self.Dwarf_uint8('entry_type'), **ENUM_DW_LLE),
423 Embed(Switch('', lambda ctx: ctx.entry_type,
424 {
425 'DW_LLE_end_of_list' : Struct('end_of_list'),
426 'DW_LLE_base_addressx' : Struct('base_addressx', self.Dwarf_uleb128('index')),
427 'DW_LLE_startx_endx' : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('end_index'), cld),
428 'DW_LLE_startx_length' : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('length'), cld),
429 'DW_LLE_offset_pair' : Struct('startx_endx', self.Dwarf_uleb128('start_offset'), self.Dwarf_uleb128('end_offset'), cld),
430 'DW_LLE_default_location' : Struct('default_location', cld),
431 'DW_LLE_base_address' : Struct('base_address', self.Dwarf_target_addr('address')),
432 'DW_LLE_start_end' : Struct('start_end', self.Dwarf_target_addr('start_address'), self.Dwarf_target_addr('end_address'), cld),
433 'DW_LLE_start_length' : Struct('start_length', self.Dwarf_target_addr('start_address'), self.Dwarf_uleb128('length'), cld),
434 })),
435 StreamOffset('entry_end_offset')))
436
437 self.Dwarf_locview_pair = Struct('locview_pair',
438 StreamOffset('entry_offset'), self.Dwarf_uleb128('begin'), self.Dwarf_uleb128('end'))
439
440 def _create_rnglists_parsers(self):
441 self.Dwarf_rnglists_CU_header = Struct('Dwarf_rnglists_CU_header',
442 StreamOffset('cu_offset'),
443 self.Dwarf_initial_length('unit_length'),
444 Value('is64', lambda ctx: ctx.is64),
445 StreamOffset('offset_after_length'),
446 self.Dwarf_uint16('version'),
447 self.Dwarf_uint8('address_size'),
448 self.Dwarf_uint8('segment_selector_size'),
449 self.Dwarf_uint32('offset_count'),
450 StreamOffset('offset_table_offset'))
451
452 self.Dwarf_rnglists_entries = RepeatUntilExcluding(
453 lambda obj, ctx: obj.entry_type == 'DW_RLE_end_of_list',
454 Struct('entry',
455 StreamOffset('entry_offset'),
456 Enum(self.Dwarf_uint8('entry_type'), **ENUM_DW_RLE),
457 Embed(Switch('', lambda ctx: ctx.entry_type,
458 {
459 'DW_RLE_end_of_list' : Struct('end_of_list'),
460 'DW_RLE_base_addressx' : Struct('base_addressx', self.Dwarf_uleb128('index')),
461 'DW_RLE_startx_endx' : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('end_index')),
462 'DW_RLE_startx_length' : Struct('startx_endx', self.Dwarf_uleb128('start_index'), self.Dwarf_uleb128('length')),
463 'DW_RLE_offset_pair' : Struct('startx_endx', self.Dwarf_uleb128('start_offset'), self.Dwarf_uleb128('end_offset')),
464 'DW_RLE_base_address' : Struct('base_address', self.Dwarf_target_addr('address')),
465 'DW_RLE_start_end' : Struct('start_end', self.Dwarf_target_addr('start_address'), self.Dwarf_target_addr('end_address')),
466 'DW_RLE_start_length' : Struct('start_length', self.Dwarf_target_addr('start_address'), self.Dwarf_uleb128('length'))
467 })),
468 StreamOffset('entry_end_offset'),
469 Value('entry_length', lambda ctx: ctx.entry_end_offset - ctx.entry_offset)))
470
471
472 class _InitialLengthAdapter(Adapter):
473 """ A standard Construct adapter that expects a sub-construct
474 as a struct with one or two values (first, second).
475 """
476 def _decode(self, obj, context):
477 if obj.first < 0xFFFFFF00:
478 context['is64'] = False
479 return obj.first
480 else:
481 if obj.first == 0xFFFFFFFF:
482 context['is64'] = True
483 return obj.second
484 else:
485 raise ConstructError("Failed decoding initial length for %X" % (
486 obj.first))