1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/structs.py
4 # Encapsulation of Construct structs for parsing DWARF, adjusted for correct
5 # endianness and word-size.
7 # Eli Bendersky (eliben@gmail.com)
8 # This code is in the public domain
9 #-------------------------------------------------------------------------------
10 from logging
.config
import valid_ident
11 from ..construct
import (
12 UBInt8
, UBInt16
, UBInt32
, UBInt64
, ULInt8
, ULInt16
, ULInt32
, ULInt64
,
13 SBInt8
, SBInt16
, SBInt32
, SBInt64
, SLInt8
, SLInt16
, SLInt32
, SLInt64
,
14 Adapter
, Struct
, ConstructError
, If
, Enum
, Array
, PrefixedArray
,
15 CString
, Embed
, StaticField
, IfThenElse
, Construct
, Rename
, Sequence
,
18 from ..common
.construct_utils
import (RepeatUntilExcluding
, ULEB128
, SLEB128
,
23 class DWARFStructs(object):
24 """ Exposes Construct structs suitable for parsing information from DWARF
25 sections. Each compile unit in DWARF info can have its own structs
26 object. Keep in mind that these structs have to be given a name (by
27 calling them with a name) before being used for parsing (like other
28 Construct structs). Those that should be used without a name are marked
31 Accessible attributes (mostly as described in chapter 7 of the DWARF
34 Dwarf_[u]int{8,16,32,64):
35 Data chunks of the common sizes
38 32-bit or 64-bit word, depending on dwarf_format
41 32-bit or 64-bit word, depending on dwarf_format
44 32-bit or 64-bit word, depending on address size
47 "Initial length field" encoding
51 ULEB128 and SLEB128 variable-length encoding
54 Compilation unit header
56 Dwarf_abbrev_declaration (+):
57 Abbreviation table declaration - doesn't include the initial
58 code, only the contents.
61 A dictionary mapping 'DW_FORM_*' keys into construct Structs
62 that parse such forms. These Structs have already been given
65 Dwarf_lineprog_header (+):
68 Dwarf_lineprog_file_entry (+):
69 A single file entry in a line program header or instruction
77 See also the documentation of public methods.
80 # Cache for structs instances based on creation parameters. Structs
81 # initialization is expensive and we don't won't to repeat it
85 def __new__(cls
, little_endian
, dwarf_format
, address_size
, dwarf_version
=2):
90 True if the file is little endian, False if big
93 DWARF Format: 32 or 64-bit (see spec section 7.4)
96 Target machine address size, in bytes (4 or 8). (See spec
99 key
= (little_endian
, dwarf_format
, address_size
, dwarf_version
)
101 if key
in cls
._structs
_cache
:
102 return cls
._structs
_cache
[key
]
104 self
= super().__new
__(cls
)
105 assert dwarf_format
== 32 or dwarf_format
== 64
106 assert address_size
== 8 or address_size
== 4, str(address_size
)
107 self
.little_endian
= little_endian
108 self
.dwarf_format
= dwarf_format
109 self
.address_size
= address_size
110 self
.dwarf_version
= dwarf_version
111 self
._create
_structs
()
112 cls
._structs
_cache
[key
] = self
115 def initial_length_field_size(self
):
116 """ Size of an initial length field.
118 return 4 if self
.dwarf_format
== 32 else 12
120 def _create_structs(self
):
121 if self
.little_endian
:
122 self
.Dwarf_uint8
= ULInt8
123 self
.Dwarf_uint16
= ULInt16
124 self
.Dwarf_uint32
= ULInt32
125 self
.Dwarf_uint64
= ULInt64
126 self
.Dwarf_offset
= ULInt32
if self
.dwarf_format
== 32 else ULInt64
127 self
.Dwarf_length
= ULInt32
if self
.dwarf_format
== 32 else ULInt64
128 self
.Dwarf_target_addr
= (
129 ULInt32
if self
.address_size
== 4 else ULInt64
)
130 self
.Dwarf_int8
= SLInt8
131 self
.Dwarf_int16
= SLInt16
132 self
.Dwarf_int32
= SLInt32
133 self
.Dwarf_int64
= SLInt64
135 self
.Dwarf_uint8
= UBInt8
136 self
.Dwarf_uint16
= UBInt16
137 self
.Dwarf_uint32
= UBInt32
138 self
.Dwarf_uint64
= UBInt64
139 self
.Dwarf_offset
= UBInt32
if self
.dwarf_format
== 32 else UBInt64
140 self
.Dwarf_length
= UBInt32
if self
.dwarf_format
== 32 else UBInt64
141 self
.Dwarf_target_addr
= (
142 UBInt32
if self
.address_size
== 4 else UBInt64
)
143 self
.Dwarf_int8
= SBInt8
144 self
.Dwarf_int16
= SBInt16
145 self
.Dwarf_int32
= SBInt32
146 self
.Dwarf_int64
= SBInt64
148 self
._create
_initial
_length
()
149 self
._create
_leb
128()
150 self
._create
_cu
_header
()
151 self
._create
_abbrev
_declaration
()
152 self
._create
_dw
_form
()
153 self
._create
_lineprog
_header
()
154 self
._create
_callframe
_entry
_headers
()
155 self
._create
_aranges
_header
()
156 self
._create
_nameLUT
_header
()
157 self
._create
_string
_offsets
_table
_header
()
158 self
._create
_address
_table
_header
()
159 self
._create
_loclists
_parsers
()
160 self
._create
_rnglists
_parsers
()
162 self
._create
_debugsup
()
163 self
._create
_gnu
_debugaltlink
()
165 def _create_initial_length(self
):
166 def _InitialLength(name
):
167 # Adapts a Struct that parses forward a full initial length field.
168 # Only if the first word is the continuation value, the second
169 # word is parsed from the stream.
170 return _InitialLengthAdapter(
172 self
.Dwarf_uint32('first'),
173 If(lambda ctx
: ctx
.first
== 0xFFFFFFFF,
174 self
.Dwarf_uint64('second'),
176 self
.Dwarf_initial_length
= _InitialLength
178 def _create_leb128(self
):
179 self
.Dwarf_uleb128
= ULEB128
180 self
.Dwarf_sleb128
= SLEB128
182 def _create_cu_header(self
):
183 self
.Dwarf_CU_header
= Struct('Dwarf_CU_header',
184 self
.Dwarf_initial_length('unit_length'),
185 self
.Dwarf_uint16('version'),
186 # DWARFv5 reverses the order of address_size and debug_abbrev_offset.
187 IfThenElse('', lambda ctx
: ctx
['version'] >= 5,
189 self
.Dwarf_uint8('unit_type'),
190 self
.Dwarf_uint8('address_size'),
191 self
.Dwarf_offset('debug_abbrev_offset'))),
193 self
.Dwarf_offset('debug_abbrev_offset'),
194 self
.Dwarf_uint8('address_size'))),
197 def _create_abbrev_declaration(self
):
198 self
.Dwarf_abbrev_declaration
= Struct('Dwarf_abbrev_entry',
199 Enum(self
.Dwarf_uleb128('tag'), **ENUM_DW_TAG
),
200 Enum(self
.Dwarf_uint8('children_flag'), **ENUM_DW_CHILDREN
),
201 RepeatUntilExcluding(
203 obj
.name
== 'DW_AT_null' and obj
.form
== 'DW_FORM_null',
205 Enum(self
.Dwarf_uleb128('name'), **ENUM_DW_AT
),
206 Enum(self
.Dwarf_uleb128('form'), **ENUM_DW_FORM
),
207 If(lambda ctx
: ctx
['form'] == 'DW_FORM_implicit_const',
208 self
.Dwarf_sleb128('value')))))
210 def _create_debugsup(self
):
211 # We don't care about checksums, for now.
212 self
.Dwarf_debugsup
= Struct('Elf_debugsup',
213 self
.Dwarf_int16('version'),
214 self
.Dwarf_uint8('is_supplementary'),
215 CString('sup_filename'))
217 def _create_gnu_debugaltlink(self
):
218 self
.Dwarf_debugaltlink
= Struct('Elf_debugaltlink',
219 CString("sup_filename"),
220 String("sup_checksum", length
=20))
222 def _create_dw_form(self
):
223 self
.Dwarf_dw_form
= dict(
224 DW_FORM_addr
=self
.Dwarf_target_addr(''),
225 DW_FORM_addrx
=self
.Dwarf_uleb128(''),
226 DW_FORM_addrx1
=self
.Dwarf_uint8(''),
227 DW_FORM_addrx2
=self
.Dwarf_uint16(''),
228 # DW_FORM_addrx3=self.Dwarf_uint24(''), # TODO
229 DW_FORM_addrx4
=self
.Dwarf_uint32(''),
231 DW_FORM_block1
=self
._make
_block
_struct
(self
.Dwarf_uint8
),
232 DW_FORM_block2
=self
._make
_block
_struct
(self
.Dwarf_uint16
),
233 DW_FORM_block4
=self
._make
_block
_struct
(self
.Dwarf_uint32
),
234 DW_FORM_block
=self
._make
_block
_struct
(self
.Dwarf_uleb128
),
236 # All DW_FORM_data<n> forms are assumed to be unsigned
237 DW_FORM_data1
=self
.Dwarf_uint8(''),
238 DW_FORM_data2
=self
.Dwarf_uint16(''),
239 DW_FORM_data4
=self
.Dwarf_uint32(''),
240 DW_FORM_data8
=self
.Dwarf_uint64(''),
241 DW_FORM_data16
=Array(16, self
.Dwarf_uint8('')), # Used for hashes and such, not for integers
242 DW_FORM_sdata
=self
.Dwarf_sleb128(''),
243 DW_FORM_udata
=self
.Dwarf_uleb128(''),
245 DW_FORM_string
=CString(''),
246 DW_FORM_strp
=self
.Dwarf_offset(''),
247 DW_FORM_strp_sup
=self
.Dwarf_offset(''),
248 DW_FORM_line_strp
=self
.Dwarf_offset(''),
249 DW_FORM_strx1
=self
.Dwarf_uint8(''),
250 DW_FORM_strx2
=self
.Dwarf_uint16(''),
251 # DW_FORM_strx3=self.Dwarf_uint24(''), # TODO
252 DW_FORM_strx4
=self
.Dwarf_uint64(''),
253 DW_FORM_flag
=self
.Dwarf_uint8(''),
255 DW_FORM_ref
=self
.Dwarf_uint32(''),
256 DW_FORM_ref1
=self
.Dwarf_uint8(''),
257 DW_FORM_ref2
=self
.Dwarf_uint16(''),
258 DW_FORM_ref4
=self
.Dwarf_uint32(''),
259 DW_FORM_ref_sup4
=self
.Dwarf_uint32(''),
260 DW_FORM_ref8
=self
.Dwarf_uint64(''),
261 DW_FORM_ref_sup8
=self
.Dwarf_uint64(''),
262 DW_FORM_ref_udata
=self
.Dwarf_uleb128(''),
263 DW_FORM_ref_addr
=self
.Dwarf_target_addr('') if self
.dwarf_version
== 2 else self
.Dwarf_offset(''),
265 DW_FORM_indirect
=self
.Dwarf_uleb128(''),
267 # New forms in DWARFv4
268 DW_FORM_flag_present
= StaticField('', 0),
269 DW_FORM_sec_offset
= self
.Dwarf_offset(''),
270 DW_FORM_exprloc
= self
._make
_block
_struct
(self
.Dwarf_uleb128
),
271 DW_FORM_ref_sig8
= self
.Dwarf_uint64(''),
273 DW_FORM_GNU_strp_alt
=self
.Dwarf_offset(''),
274 DW_FORM_GNU_ref_alt
=self
.Dwarf_offset(''),
275 DW_AT_GNU_all_call_sites
=self
.Dwarf_uleb128(''),
277 # New forms in DWARFv5
278 DW_FORM_loclistx
=self
.Dwarf_uleb128(''),
279 DW_FORM_rnglistx
=self
.Dwarf_uleb128('')
282 def _create_aranges_header(self
):
283 self
.Dwarf_aranges_header
= Struct("Dwarf_aranges_header",
284 self
.Dwarf_initial_length('unit_length'),
285 self
.Dwarf_uint16('version'),
286 self
.Dwarf_offset('debug_info_offset'), # a little tbd
287 self
.Dwarf_uint8('address_size'),
288 self
.Dwarf_uint8('segment_size')
291 def _create_nameLUT_header(self
):
292 self
.Dwarf_nameLUT_header
= Struct("Dwarf_nameLUT_header",
293 self
.Dwarf_initial_length('unit_length'),
294 self
.Dwarf_uint16('version'),
295 self
.Dwarf_offset('debug_info_offset'),
296 self
.Dwarf_length('debug_info_length')
299 def _create_string_offsets_table_header(self
):
300 self
.Dwarf_string_offsets_table_header
= Struct(
301 "Dwarf_string_offets_table_header",
302 self
.Dwarf_initial_length('unit_length'),
303 self
.Dwarf_uint16('version'),
304 self
.Dwarf_uint16('padding'),
307 def _create_address_table_header(self
):
308 self
.Dwarf_address_table_header
= Struct("Dwarf_address_table_header",
309 self
.Dwarf_initial_length('unit_length'),
310 self
.Dwarf_uint16('version'),
311 self
.Dwarf_uint8('address_size'),
312 self
.Dwarf_uint8('segment_selector_size'),
315 def _create_lineprog_header(self
):
316 # A file entry is terminated by a NULL byte, so we don't want to parse
317 # past it. Therefore an If is used.
318 self
.Dwarf_lineprog_file_entry
= Struct('file_entry',
320 If(lambda ctx
: len(ctx
.name
) != 0,
322 self
.Dwarf_uleb128('dir_index'),
323 self
.Dwarf_uleb128('mtime'),
324 self
.Dwarf_uleb128('length')))))
326 class FormattedEntry(Construct
):
327 # Generates a parser based on a previously parsed piece,
328 # similar to deprecared Dynamic.
329 # Strings are resolved later, since it potentially requires
330 # looking at another section.
331 def __init__(self
, name
, structs
, format_field
):
332 Construct
.__init
__(self
, name
)
333 self
.structs
= structs
334 self
.format_field
= format_field
336 def _parse(self
, stream
, context
):
337 # Somewhat tricky technique here, explicitly writing back to the context
338 if self
.format_field
+ "_parser" in context
:
339 parser
= context
[self
.format_field
+ "_parser"]
342 Rename(f
.content_type
, self
.structs
.Dwarf_dw_form
[f
.form
])
343 for f
in context
[self
.format_field
])
344 parser
= Struct('formatted_entry', *fields
)
345 context
[self
.format_field
+ "_parser"] = parser
346 return parser
._parse
(stream
, context
)
348 ver5
= lambda ctx
: ctx
.version
>= 5
350 self
.Dwarf_lineprog_header
= Struct('Dwarf_lineprog_header',
351 self
.Dwarf_initial_length('unit_length'),
352 self
.Dwarf_uint16('version'),
354 self
.Dwarf_uint8("address_size"),
357 self
.Dwarf_uint8("segment_selector_size"),
359 self
.Dwarf_offset('header_length'),
360 self
.Dwarf_uint8('minimum_instruction_length'),
361 If(lambda ctx
: ctx
.version
>= 4,
362 self
.Dwarf_uint8("maximum_operations_per_instruction"),
364 self
.Dwarf_uint8('default_is_stmt'),
365 self
.Dwarf_int8('line_base'),
366 self
.Dwarf_uint8('line_range'),
367 self
.Dwarf_uint8('opcode_base'),
368 Array(lambda ctx
: ctx
.opcode_base
- 1,
369 self
.Dwarf_uint8('standard_opcode_lengths')),
372 Struct('directory_entry_format',
373 Enum(self
.Dwarf_uleb128('content_type'), **ENUM_DW_LNCT
),
374 Enum(self
.Dwarf_uleb128('form'), **ENUM_DW_FORM
)),
375 self
.Dwarf_uint8("directory_entry_format_count"))),
376 If(ver5
, # Name deliberately doesn't match the legacy object, since the format can't be made compatible
378 FormattedEntry('directories', self
, "directory_entry_format"),
379 self
.Dwarf_uleb128('directories_count'))),
382 Struct('file_name_entry_format',
383 Enum(self
.Dwarf_uleb128('content_type'), **ENUM_DW_LNCT
),
384 Enum(self
.Dwarf_uleb128('form'), **ENUM_DW_FORM
)),
385 self
.Dwarf_uint8("file_name_entry_format_count"))),
388 FormattedEntry('file_names', self
, "file_name_entry_format"),
389 self
.Dwarf_uleb128('file_names_count'))),
390 # Legacy directories/files - DWARF < 5 only
391 If(lambda ctx
: ctx
.version
< 5,
392 RepeatUntilExcluding(
393 lambda obj
, ctx
: obj
== b
'',
394 CString('include_directory'))),
395 If(lambda ctx
: ctx
.version
< 5,
396 RepeatUntilExcluding(
397 lambda obj
, ctx
: len(obj
.name
) == 0,
398 self
.Dwarf_lineprog_file_entry
)) # array name is file_entry
401 def _create_callframe_entry_headers(self
):
402 self
.Dwarf_CIE_header
= Struct('Dwarf_CIE_header',
403 self
.Dwarf_initial_length('length'),
404 self
.Dwarf_offset('CIE_id'),
405 self
.Dwarf_uint8('version'),
406 CString('augmentation'),
407 self
.Dwarf_uleb128('code_alignment_factor'),
408 self
.Dwarf_sleb128('data_alignment_factor'),
409 self
.Dwarf_uleb128('return_address_register'))
410 self
.EH_CIE_header
= self
.Dwarf_CIE_header
412 # The CIE header was modified in DWARFv4.
413 if self
.dwarf_version
== 4:
414 self
.Dwarf_CIE_header
= Struct('Dwarf_CIE_header',
415 self
.Dwarf_initial_length('length'),
416 self
.Dwarf_offset('CIE_id'),
417 self
.Dwarf_uint8('version'),
418 CString('augmentation'),
419 self
.Dwarf_uint8('address_size'),
420 self
.Dwarf_uint8('segment_size'),
421 self
.Dwarf_uleb128('code_alignment_factor'),
422 self
.Dwarf_sleb128('data_alignment_factor'),
423 self
.Dwarf_uleb128('return_address_register'))
425 self
.Dwarf_FDE_header
= Struct('Dwarf_FDE_header',
426 self
.Dwarf_initial_length('length'),
427 self
.Dwarf_offset('CIE_pointer'),
428 self
.Dwarf_target_addr('initial_location'),
429 self
.Dwarf_target_addr('address_range'))
431 def _make_block_struct(self
, length_field
):
432 """ Create a struct for DW_FORM_block<size>
434 return PrefixedArray(
435 subcon
=self
.Dwarf_uint8('elem'),
436 length_field
=length_field(''))
438 def _create_loclists_parsers(self
):
439 """ Create a struct for debug_loclists CU header, DWARFv5, 7,29
441 self
.Dwarf_loclists_CU_header
= Struct('Dwarf_loclists_CU_header',
442 StreamOffset('cu_offset'),
443 self
.Dwarf_initial_length('unit_length'),
444 Value('is64', lambda ctx
: ctx
.is64
),
445 StreamOffset('offset_after_length'),
446 self
.Dwarf_uint16('version'),
447 self
.Dwarf_uint8('address_size'),
448 self
.Dwarf_uint8('segment_selector_size'),
449 self
.Dwarf_uint32('offset_count'),
450 StreamOffset('offset_table_offset'))
452 cld
= self
.Dwarf_loclists_counted_location_description
= PrefixedArray(self
.Dwarf_uint8('loc_expr'), self
.Dwarf_uleb128(''))
454 self
.Dwarf_loclists_entries
= RepeatUntilExcluding(
455 lambda obj
, ctx
: obj
.entry_type
== 'DW_LLE_end_of_list',
457 StreamOffset('entry_offset'),
458 Enum(self
.Dwarf_uint8('entry_type'), **ENUM_DW_LLE
),
459 Embed(Switch('', lambda ctx
: ctx
.entry_type
,
461 'DW_LLE_end_of_list' : Struct('end_of_list'),
462 'DW_LLE_base_addressx' : Struct('base_addressx', self
.Dwarf_uleb128('index')),
463 'DW_LLE_startx_endx' : Struct('startx_endx', self
.Dwarf_uleb128('start_index'), self
.Dwarf_uleb128('end_index'), cld
),
464 'DW_LLE_startx_length' : Struct('startx_endx', self
.Dwarf_uleb128('start_index'), self
.Dwarf_uleb128('length'), cld
),
465 'DW_LLE_offset_pair' : Struct('startx_endx', self
.Dwarf_uleb128('start_offset'), self
.Dwarf_uleb128('end_offset'), cld
),
466 'DW_LLE_default_location' : Struct('default_location', cld
),
467 'DW_LLE_base_address' : Struct('base_address', self
.Dwarf_target_addr('address')),
468 'DW_LLE_start_end' : Struct('start_end', self
.Dwarf_target_addr('start_address'), self
.Dwarf_target_addr('end_address'), cld
),
469 'DW_LLE_start_length' : Struct('start_length', self
.Dwarf_target_addr('start_address'), self
.Dwarf_uleb128('length'), cld
),
471 StreamOffset('entry_end_offset'),
472 Value('entry_length', lambda ctx
: ctx
.entry_end_offset
- ctx
.entry_offset
)))
474 self
.Dwarf_locview_pair
= Struct('locview_pair',
475 StreamOffset('entry_offset'), self
.Dwarf_uleb128('begin'), self
.Dwarf_uleb128('end'))
477 def _create_rnglists_parsers(self
):
478 self
.Dwarf_rnglists_CU_header
= Struct('Dwarf_rnglists_CU_header',
479 StreamOffset('cu_offset'),
480 self
.Dwarf_initial_length('unit_length'),
481 Value('is64', lambda ctx
: ctx
.is64
),
482 StreamOffset('offset_after_length'),
483 self
.Dwarf_uint16('version'),
484 self
.Dwarf_uint8('address_size'),
485 self
.Dwarf_uint8('segment_selector_size'),
486 self
.Dwarf_uint32('offset_count'),
487 StreamOffset('offset_table_offset'))
489 self
.Dwarf_rnglists_entries
= RepeatUntilExcluding(
490 lambda obj
, ctx
: obj
.entry_type
== 'DW_RLE_end_of_list',
492 StreamOffset('entry_offset'),
493 Enum(self
.Dwarf_uint8('entry_type'), **ENUM_DW_RLE
),
494 Embed(Switch('', lambda ctx
: ctx
.entry_type
,
496 'DW_RLE_end_of_list' : Struct('end_of_list'),
497 'DW_RLE_base_addressx' : Struct('base_addressx', self
.Dwarf_uleb128('index')),
498 'DW_RLE_startx_endx' : Struct('startx_endx', self
.Dwarf_uleb128('start_index'), self
.Dwarf_uleb128('end_index')),
499 'DW_RLE_startx_length' : Struct('startx_endx', self
.Dwarf_uleb128('start_index'), self
.Dwarf_uleb128('length')),
500 'DW_RLE_offset_pair' : Struct('startx_endx', self
.Dwarf_uleb128('start_offset'), self
.Dwarf_uleb128('end_offset')),
501 'DW_RLE_base_address' : Struct('base_address', self
.Dwarf_target_addr('address')),
502 'DW_RLE_start_end' : Struct('start_end', self
.Dwarf_target_addr('start_address'), self
.Dwarf_target_addr('end_address')),
503 'DW_RLE_start_length' : Struct('start_length', self
.Dwarf_target_addr('start_address'), self
.Dwarf_uleb128('length'))
505 StreamOffset('entry_end_offset'),
506 Value('entry_length', lambda ctx
: ctx
.entry_end_offset
- ctx
.entry_offset
)))
509 class _InitialLengthAdapter(Adapter
):
510 """ A standard Construct adapter that expects a sub-construct
511 as a struct with one or two values (first, second).
513 def _decode(self
, obj
, context
):
514 if obj
.first
< 0xFFFFFF00:
515 context
['is64'] = False
518 if obj
.first
== 0xFFFFFFFF:
519 context
['is64'] = True
522 raise ConstructError("Failed decoding initial length for %X" % (