1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/structs.py
4 # Encapsulation of Construct structs for parsing DWARF, adjusted for correct
5 # endianness and word-size.
7 # Eli Bendersky (eliben@gmail.com)
8 # This code is in the public domain
9 #-------------------------------------------------------------------------------
10 from logging
.config
import valid_ident
11 from ..construct
import (
12 UBInt8
, UBInt16
, UBInt32
, UBInt64
, ULInt8
, ULInt16
, ULInt32
, ULInt64
,
13 SBInt8
, SBInt16
, SBInt32
, SBInt64
, SLInt8
, SLInt16
, SLInt32
, SLInt64
,
14 Adapter
, Struct
, ConstructError
, If
, Enum
, Array
, PrefixedArray
,
15 CString
, Embed
, StaticField
, IfThenElse
, Construct
, Rename
, Sequence
,
18 from ..common
.construct_utils
import (RepeatUntilExcluding
, ULEB128
, SLEB128
,
23 class DWARFStructs(object):
24 """ Exposes Construct structs suitable for parsing information from DWARF
25 sections. Each compile unit in DWARF info can have its own structs
26 object. Keep in mind that these structs have to be given a name (by
27 calling them with a name) before being used for parsing (like other
28 Construct structs). Those that should be used without a name are marked
31 Accessible attributes (mostly as described in chapter 7 of the DWARF
34 Dwarf_[u]int{8,16,32,64):
35 Data chunks of the common sizes
38 32-bit or 64-bit word, depending on dwarf_format
41 32-bit or 64-bit word, depending on dwarf_format
44 32-bit or 64-bit word, depending on address size
47 "Initial length field" encoding
51 ULEB128 and SLEB128 variable-length encoding
54 Compilation unit header
56 Dwarf_abbrev_declaration (+):
57 Abbreviation table declaration - doesn't include the initial
58 code, only the contents.
61 A dictionary mapping 'DW_FORM_*' keys into construct Structs
62 that parse such forms. These Structs have already been given
65 Dwarf_lineprog_header (+):
68 Dwarf_lineprog_file_entry (+):
69 A single file entry in a line program header or instruction
77 See also the documentation of public methods.
80 little_endian
, dwarf_format
, address_size
, dwarf_version
=2):
85 True if the file is little endian, False if big
88 DWARF Format: 32 or 64-bit (see spec section 7.4)
91 Target machine address size, in bytes (4 or 8). (See spec
94 assert dwarf_format
== 32 or dwarf_format
== 64
95 assert address_size
== 8 or address_size
== 4, str(address_size
)
96 self
.little_endian
= little_endian
97 self
.dwarf_format
= dwarf_format
98 self
.address_size
= address_size
99 self
.dwarf_version
= dwarf_version
100 self
._create
_structs
()
102 def initial_length_field_size(self
):
103 """ Size of an initial length field.
105 return 4 if self
.dwarf_format
== 32 else 12
107 def _create_structs(self
):
108 if self
.little_endian
:
109 self
.Dwarf_uint8
= ULInt8
110 self
.Dwarf_uint16
= ULInt16
111 self
.Dwarf_uint32
= ULInt32
112 self
.Dwarf_uint64
= ULInt64
113 self
.Dwarf_offset
= ULInt32
if self
.dwarf_format
== 32 else ULInt64
114 self
.Dwarf_length
= ULInt32
if self
.dwarf_format
== 32 else ULInt64
115 self
.Dwarf_target_addr
= (
116 ULInt32
if self
.address_size
== 4 else ULInt64
)
117 self
.Dwarf_int8
= SLInt8
118 self
.Dwarf_int16
= SLInt16
119 self
.Dwarf_int32
= SLInt32
120 self
.Dwarf_int64
= SLInt64
122 self
.Dwarf_uint8
= UBInt8
123 self
.Dwarf_uint16
= UBInt16
124 self
.Dwarf_uint32
= UBInt32
125 self
.Dwarf_uint64
= UBInt64
126 self
.Dwarf_offset
= UBInt32
if self
.dwarf_format
== 32 else UBInt64
127 self
.Dwarf_length
= UBInt32
if self
.dwarf_format
== 32 else UBInt64
128 self
.Dwarf_target_addr
= (
129 UBInt32
if self
.address_size
== 4 else UBInt64
)
130 self
.Dwarf_int8
= SBInt8
131 self
.Dwarf_int16
= SBInt16
132 self
.Dwarf_int32
= SBInt32
133 self
.Dwarf_int64
= SBInt64
135 self
._create
_initial
_length
()
136 self
._create
_leb
128()
137 self
._create
_cu
_header
()
138 self
._create
_abbrev
_declaration
()
139 self
._create
_dw
_form
()
140 self
._create
_lineprog
_header
()
141 self
._create
_callframe
_entry
_headers
()
142 self
._create
_aranges
_header
()
143 self
._create
_nameLUT
_header
()
144 self
._create
_string
_offsets
_table
_header
()
145 self
._create
_address
_table
_header
()
146 self
._create
_loclists
_parsers
()
147 self
._create
_rnglists
_parsers
()
149 self
._create
_debugsup
()
150 self
._create
_gnu
_debugaltlink
()
152 def _create_initial_length(self
):
153 def _InitialLength(name
):
154 # Adapts a Struct that parses forward a full initial length field.
155 # Only if the first word is the continuation value, the second
156 # word is parsed from the stream.
157 return _InitialLengthAdapter(
159 self
.Dwarf_uint32('first'),
160 If(lambda ctx
: ctx
.first
== 0xFFFFFFFF,
161 self
.Dwarf_uint64('second'),
163 self
.Dwarf_initial_length
= _InitialLength
165 def _create_leb128(self
):
166 self
.Dwarf_uleb128
= ULEB128
167 self
.Dwarf_sleb128
= SLEB128
169 def _create_cu_header(self
):
170 self
.Dwarf_CU_header
= Struct('Dwarf_CU_header',
171 self
.Dwarf_initial_length('unit_length'),
172 self
.Dwarf_uint16('version'),
173 # DWARFv5 reverses the order of address_size and debug_abbrev_offset.
174 IfThenElse('', lambda ctx
: ctx
['version'] >= 5,
176 self
.Dwarf_uint8('unit_type'),
177 self
.Dwarf_uint8('address_size'),
178 self
.Dwarf_offset('debug_abbrev_offset'))),
180 self
.Dwarf_offset('debug_abbrev_offset'),
181 self
.Dwarf_uint8('address_size'))),
184 def _create_abbrev_declaration(self
):
185 self
.Dwarf_abbrev_declaration
= Struct('Dwarf_abbrev_entry',
186 Enum(self
.Dwarf_uleb128('tag'), **ENUM_DW_TAG
),
187 Enum(self
.Dwarf_uint8('children_flag'), **ENUM_DW_CHILDREN
),
188 RepeatUntilExcluding(
190 obj
.name
== 'DW_AT_null' and obj
.form
== 'DW_FORM_null',
192 Enum(self
.Dwarf_uleb128('name'), **ENUM_DW_AT
),
193 Enum(self
.Dwarf_uleb128('form'), **ENUM_DW_FORM
),
194 If(lambda ctx
: ctx
['form'] == 'DW_FORM_implicit_const',
195 self
.Dwarf_sleb128('value')))))
197 def _create_debugsup(self
):
198 # We don't care about checksums, for now.
199 self
.Dwarf_debugsup
= Struct('Elf_debugsup',
200 self
.Dwarf_int16('version'),
201 self
.Dwarf_uint8('is_supplementary'),
202 CString('sup_filename'))
204 def _create_gnu_debugaltlink(self
):
205 self
.Dwarf_debugaltlink
= Struct('Elf_debugaltlink',
206 CString("sup_filename"),
207 String("sup_checksum", length
=20))
209 def _create_dw_form(self
):
210 self
.Dwarf_dw_form
= dict(
211 DW_FORM_addr
=self
.Dwarf_target_addr(''),
212 DW_FORM_addrx
=self
.Dwarf_uleb128(''),
213 DW_FORM_addrx1
=self
.Dwarf_uint8(''),
214 DW_FORM_addrx2
=self
.Dwarf_uint16(''),
215 # DW_FORM_addrx3=self.Dwarf_uint24(''), # TODO
216 DW_FORM_addrx4
=self
.Dwarf_uint32(''),
218 DW_FORM_block1
=self
._make
_block
_struct
(self
.Dwarf_uint8
),
219 DW_FORM_block2
=self
._make
_block
_struct
(self
.Dwarf_uint16
),
220 DW_FORM_block4
=self
._make
_block
_struct
(self
.Dwarf_uint32
),
221 DW_FORM_block
=self
._make
_block
_struct
(self
.Dwarf_uleb128
),
223 # All DW_FORM_data<n> forms are assumed to be unsigned
224 DW_FORM_data1
=self
.Dwarf_uint8(''),
225 DW_FORM_data2
=self
.Dwarf_uint16(''),
226 DW_FORM_data4
=self
.Dwarf_uint32(''),
227 DW_FORM_data8
=self
.Dwarf_uint64(''),
228 DW_FORM_sdata
=self
.Dwarf_sleb128(''),
229 DW_FORM_udata
=self
.Dwarf_uleb128(''),
231 DW_FORM_string
=CString(''),
232 DW_FORM_strp
=self
.Dwarf_offset(''),
233 DW_FORM_strp_sup
=self
.Dwarf_offset(''),
234 DW_FORM_line_strp
=self
.Dwarf_offset(''),
235 DW_FORM_strx1
=self
.Dwarf_uint8(''),
236 DW_FORM_strx2
=self
.Dwarf_uint16(''),
237 # DW_FORM_strx3=self.Dwarf_uint24(''), # TODO
238 DW_FORM_strx4
=self
.Dwarf_uint64(''),
239 DW_FORM_flag
=self
.Dwarf_uint8(''),
241 DW_FORM_ref
=self
.Dwarf_uint32(''),
242 DW_FORM_ref1
=self
.Dwarf_uint8(''),
243 DW_FORM_ref2
=self
.Dwarf_uint16(''),
244 DW_FORM_ref4
=self
.Dwarf_uint32(''),
245 DW_FORM_ref_sup4
=self
.Dwarf_uint32(''),
246 DW_FORM_ref8
=self
.Dwarf_uint64(''),
247 DW_FORM_ref_sup8
=self
.Dwarf_uint64(''),
248 DW_FORM_ref_udata
=self
.Dwarf_uleb128(''),
249 DW_FORM_ref_addr
=self
.Dwarf_target_addr('') if self
.dwarf_version
== 2 else self
.Dwarf_offset(''),
251 DW_FORM_indirect
=self
.Dwarf_uleb128(''),
253 # New forms in DWARFv4
254 DW_FORM_flag_present
= StaticField('', 0),
255 DW_FORM_sec_offset
= self
.Dwarf_offset(''),
256 DW_FORM_exprloc
= self
._make
_block
_struct
(self
.Dwarf_uleb128
),
257 DW_FORM_ref_sig8
= self
.Dwarf_uint64(''),
259 DW_FORM_GNU_strp_alt
=self
.Dwarf_offset(''),
260 DW_FORM_GNU_ref_alt
=self
.Dwarf_offset(''),
261 DW_AT_GNU_all_call_sites
=self
.Dwarf_uleb128(''),
263 # New forms in DWARFv5
264 DW_FORM_loclistx
=self
.Dwarf_uleb128(''),
265 DW_FORM_rnglistx
=self
.Dwarf_uleb128('')
268 def _create_aranges_header(self
):
269 self
.Dwarf_aranges_header
= Struct("Dwarf_aranges_header",
270 self
.Dwarf_initial_length('unit_length'),
271 self
.Dwarf_uint16('version'),
272 self
.Dwarf_offset('debug_info_offset'), # a little tbd
273 self
.Dwarf_uint8('address_size'),
274 self
.Dwarf_uint8('segment_size')
277 def _create_nameLUT_header(self
):
278 self
.Dwarf_nameLUT_header
= Struct("Dwarf_nameLUT_header",
279 self
.Dwarf_initial_length('unit_length'),
280 self
.Dwarf_uint16('version'),
281 self
.Dwarf_offset('debug_info_offset'),
282 self
.Dwarf_length('debug_info_length')
285 def _create_string_offsets_table_header(self
):
286 self
.Dwarf_string_offsets_table_header
= Struct(
287 "Dwarf_string_offets_table_header",
288 self
.Dwarf_initial_length('unit_length'),
289 self
.Dwarf_uint16('version'),
290 self
.Dwarf_uint16('padding'),
293 def _create_address_table_header(self
):
294 self
.Dwarf_address_table_header
= Struct("Dwarf_address_table_header",
295 self
.Dwarf_initial_length('unit_length'),
296 self
.Dwarf_uint16('version'),
297 self
.Dwarf_uint8('address_size'),
298 self
.Dwarf_uint8('segment_selector_size'),
301 def _create_lineprog_header(self
):
302 # A file entry is terminated by a NULL byte, so we don't want to parse
303 # past it. Therefore an If is used.
304 self
.Dwarf_lineprog_file_entry
= Struct('file_entry',
306 If(lambda ctx
: len(ctx
.name
) != 0,
308 self
.Dwarf_uleb128('dir_index'),
309 self
.Dwarf_uleb128('mtime'),
310 self
.Dwarf_uleb128('length')))))
312 class FormattedEntry(Construct
):
313 # Generates a parser based on a previously parsed piece,
314 # similar to deprecared Dynamic.
315 # Strings are resolved later, since it potentially requires
316 # looking at another section.
317 def __init__(self
, name
, structs
, format_field
):
318 Construct
.__init
__(self
, name
)
319 self
.structs
= structs
320 self
.format_field
= format_field
322 def _parse(self
, stream
, context
):
323 # Somewhat tricky technique here, explicitly writing back to the context
324 if self
.format_field
+ "_parser" in context
:
325 parser
= context
[self
.format_field
+ "_parser"]
328 Rename(f
.content_type
, self
.structs
.Dwarf_dw_form
[f
.form
])
329 for f
in context
[self
.format_field
])
330 parser
= Struct('formatted_entry', *fields
)
331 context
[self
.format_field
+ "_parser"] = parser
332 return parser
._parse
(stream
, context
)
334 ver5
= lambda ctx
: ctx
.version
>= 5
336 self
.Dwarf_lineprog_header
= Struct('Dwarf_lineprog_header',
337 self
.Dwarf_initial_length('unit_length'),
338 self
.Dwarf_uint16('version'),
340 self
.Dwarf_uint8("address_size"),
343 self
.Dwarf_uint8("segment_selector_size"),
345 self
.Dwarf_offset('header_length'),
346 self
.Dwarf_uint8('minimum_instruction_length'),
347 If(lambda ctx
: ctx
.version
>= 4,
348 self
.Dwarf_uint8("maximum_operations_per_instruction"),
350 self
.Dwarf_uint8('default_is_stmt'),
351 self
.Dwarf_int8('line_base'),
352 self
.Dwarf_uint8('line_range'),
353 self
.Dwarf_uint8('opcode_base'),
354 Array(lambda ctx
: ctx
.opcode_base
- 1,
355 self
.Dwarf_uint8('standard_opcode_lengths')),
358 Struct('directory_entry_format',
359 Enum(self
.Dwarf_uleb128('content_type'), **ENUM_DW_LNCT
),
360 Enum(self
.Dwarf_uleb128('form'), **ENUM_DW_FORM
)),
361 self
.Dwarf_uint8("directory_entry_format_count"))),
362 If(ver5
, # Name deliberately doesn't match the legacy object, since the format can't be made compatible
364 FormattedEntry('directories', self
, "directory_entry_format"),
365 self
.Dwarf_uleb128('directories_count'))),
368 Struct('file_name_entry_format',
369 Enum(self
.Dwarf_uleb128('content_type'), **ENUM_DW_LNCT
),
370 Enum(self
.Dwarf_uleb128('form'), **ENUM_DW_FORM
)),
371 self
.Dwarf_uint8("file_name_entry_format_count"))),
374 FormattedEntry('file_names', self
, "file_name_entry_format"),
375 self
.Dwarf_uleb128('file_names_count'))),
376 # Legacy directories/files - DWARF < 5 only
377 If(lambda ctx
: ctx
.version
< 5,
378 RepeatUntilExcluding(
379 lambda obj
, ctx
: obj
== b
'',
380 CString('include_directory'))),
381 If(lambda ctx
: ctx
.version
< 5,
382 RepeatUntilExcluding(
383 lambda obj
, ctx
: len(obj
.name
) == 0,
384 self
.Dwarf_lineprog_file_entry
)) # array name is file_entry
387 def _create_callframe_entry_headers(self
):
388 self
.Dwarf_CIE_header
= Struct('Dwarf_CIE_header',
389 self
.Dwarf_initial_length('length'),
390 self
.Dwarf_offset('CIE_id'),
391 self
.Dwarf_uint8('version'),
392 CString('augmentation'),
393 self
.Dwarf_uleb128('code_alignment_factor'),
394 self
.Dwarf_sleb128('data_alignment_factor'),
395 self
.Dwarf_uleb128('return_address_register'))
396 self
.EH_CIE_header
= self
.Dwarf_CIE_header
398 # The CIE header was modified in DWARFv4.
399 if self
.dwarf_version
== 4:
400 self
.Dwarf_CIE_header
= Struct('Dwarf_CIE_header',
401 self
.Dwarf_initial_length('length'),
402 self
.Dwarf_offset('CIE_id'),
403 self
.Dwarf_uint8('version'),
404 CString('augmentation'),
405 self
.Dwarf_uint8('address_size'),
406 self
.Dwarf_uint8('segment_size'),
407 self
.Dwarf_uleb128('code_alignment_factor'),
408 self
.Dwarf_sleb128('data_alignment_factor'),
409 self
.Dwarf_uleb128('return_address_register'))
411 self
.Dwarf_FDE_header
= Struct('Dwarf_FDE_header',
412 self
.Dwarf_initial_length('length'),
413 self
.Dwarf_offset('CIE_pointer'),
414 self
.Dwarf_target_addr('initial_location'),
415 self
.Dwarf_target_addr('address_range'))
417 def _make_block_struct(self
, length_field
):
418 """ Create a struct for DW_FORM_block<size>
420 return PrefixedArray(
421 subcon
=self
.Dwarf_uint8('elem'),
422 length_field
=length_field(''))
424 def _create_loclists_parsers(self
):
425 """ Create a struct for debug_loclists CU header, DWARFv5, 7,29
427 self
.Dwarf_loclists_CU_header
= Struct('Dwarf_loclists_CU_header',
428 StreamOffset('cu_offset'),
429 self
.Dwarf_initial_length('unit_length'),
430 Value('is64', lambda ctx
: ctx
.is64
),
431 StreamOffset('offset_after_length'),
432 self
.Dwarf_uint16('version'),
433 self
.Dwarf_uint8('address_size'),
434 self
.Dwarf_uint8('segment_selector_size'),
435 self
.Dwarf_uint32('offset_count'),
436 StreamOffset('offset_table_offset'))
438 cld
= self
.Dwarf_loclists_counted_location_description
= PrefixedArray(self
.Dwarf_uint8('loc_expr'), self
.Dwarf_uleb128(''))
440 self
.Dwarf_loclists_entries
= RepeatUntilExcluding(
441 lambda obj
, ctx
: obj
.entry_type
== 'DW_LLE_end_of_list',
443 StreamOffset('entry_offset'),
444 Enum(self
.Dwarf_uint8('entry_type'), **ENUM_DW_LLE
),
445 Embed(Switch('', lambda ctx
: ctx
.entry_type
,
447 'DW_LLE_end_of_list' : Struct('end_of_list'),
448 'DW_LLE_base_addressx' : Struct('base_addressx', self
.Dwarf_uleb128('index')),
449 'DW_LLE_startx_endx' : Struct('startx_endx', self
.Dwarf_uleb128('start_index'), self
.Dwarf_uleb128('end_index'), cld
),
450 'DW_LLE_startx_length' : Struct('startx_endx', self
.Dwarf_uleb128('start_index'), self
.Dwarf_uleb128('length'), cld
),
451 'DW_LLE_offset_pair' : Struct('startx_endx', self
.Dwarf_uleb128('start_offset'), self
.Dwarf_uleb128('end_offset'), cld
),
452 'DW_LLE_default_location' : Struct('default_location', cld
),
453 'DW_LLE_base_address' : Struct('base_address', self
.Dwarf_target_addr('address')),
454 'DW_LLE_start_end' : Struct('start_end', self
.Dwarf_target_addr('start_address'), self
.Dwarf_target_addr('end_address'), cld
),
455 'DW_LLE_start_length' : Struct('start_length', self
.Dwarf_target_addr('start_address'), self
.Dwarf_uleb128('length'), cld
),
457 StreamOffset('entry_end_offset'),
458 Value('entry_length', lambda ctx
: ctx
.entry_end_offset
- ctx
.entry_offset
)))
460 self
.Dwarf_locview_pair
= Struct('locview_pair',
461 StreamOffset('entry_offset'), self
.Dwarf_uleb128('begin'), self
.Dwarf_uleb128('end'))
463 def _create_rnglists_parsers(self
):
464 self
.Dwarf_rnglists_CU_header
= Struct('Dwarf_rnglists_CU_header',
465 StreamOffset('cu_offset'),
466 self
.Dwarf_initial_length('unit_length'),
467 Value('is64', lambda ctx
: ctx
.is64
),
468 StreamOffset('offset_after_length'),
469 self
.Dwarf_uint16('version'),
470 self
.Dwarf_uint8('address_size'),
471 self
.Dwarf_uint8('segment_selector_size'),
472 self
.Dwarf_uint32('offset_count'),
473 StreamOffset('offset_table_offset'))
475 self
.Dwarf_rnglists_entries
= RepeatUntilExcluding(
476 lambda obj
, ctx
: obj
.entry_type
== 'DW_RLE_end_of_list',
478 StreamOffset('entry_offset'),
479 Enum(self
.Dwarf_uint8('entry_type'), **ENUM_DW_RLE
),
480 Embed(Switch('', lambda ctx
: ctx
.entry_type
,
482 'DW_RLE_end_of_list' : Struct('end_of_list'),
483 'DW_RLE_base_addressx' : Struct('base_addressx', self
.Dwarf_uleb128('index')),
484 'DW_RLE_startx_endx' : Struct('startx_endx', self
.Dwarf_uleb128('start_index'), self
.Dwarf_uleb128('end_index')),
485 'DW_RLE_startx_length' : Struct('startx_endx', self
.Dwarf_uleb128('start_index'), self
.Dwarf_uleb128('length')),
486 'DW_RLE_offset_pair' : Struct('startx_endx', self
.Dwarf_uleb128('start_offset'), self
.Dwarf_uleb128('end_offset')),
487 'DW_RLE_base_address' : Struct('base_address', self
.Dwarf_target_addr('address')),
488 'DW_RLE_start_end' : Struct('start_end', self
.Dwarf_target_addr('start_address'), self
.Dwarf_target_addr('end_address')),
489 'DW_RLE_start_length' : Struct('start_length', self
.Dwarf_target_addr('start_address'), self
.Dwarf_uleb128('length'))
491 StreamOffset('entry_end_offset'),
492 Value('entry_length', lambda ctx
: ctx
.entry_end_offset
- ctx
.entry_offset
)))
495 class _InitialLengthAdapter(Adapter
):
496 """ A standard Construct adapter that expects a sub-construct
497 as a struct with one or two values (first, second).
499 def _decode(self
, obj
, context
):
500 if obj
.first
< 0xFFFFFF00:
501 context
['is64'] = False
504 if obj
.first
== 0xFFFFFFFF:
505 context
['is64'] = True
508 raise ConstructError("Failed decoding initial length for %X" % (