1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/structs.py
4 # Encapsulation of Construct structs for parsing DWARF, adjusted for correct
5 # endianness and word-size.
7 # Eli Bendersky (eliben@gmail.com)
8 # This code is in the public domain
9 #-------------------------------------------------------------------------------
10 from logging
.config
import valid_ident
11 from ..construct
import (
12 UBInt8
, UBInt16
, UBInt32
, UBInt64
, ULInt8
, ULInt16
, ULInt32
, ULInt64
,
13 SBInt8
, SBInt16
, SBInt32
, SBInt64
, SLInt8
, SLInt16
, SLInt32
, SLInt64
,
14 Adapter
, Struct
, ConstructError
, If
, Enum
, Array
, PrefixedArray
,
15 CString
, Embed
, StaticField
, IfThenElse
, Construct
, Rename
, Sequence
,
18 from ..common
.construct_utils
import (RepeatUntilExcluding
, ULEB128
, SLEB128
,
23 class DWARFStructs(object):
24 """ Exposes Construct structs suitable for parsing information from DWARF
25 sections. Each compile unit in DWARF info can have its own structs
26 object. Keep in mind that these structs have to be given a name (by
27 calling them with a name) before being used for parsing (like other
28 Construct structs). Those that should be used without a name are marked
31 Accessible attributes (mostly as described in chapter 7 of the DWARF
34 Dwarf_[u]int{8,16,32,64):
35 Data chunks of the common sizes
38 32-bit or 64-bit word, depending on dwarf_format
41 32-bit or 64-bit word, depending on dwarf_format
44 32-bit or 64-bit word, depending on address size
47 "Initial length field" encoding
51 ULEB128 and SLEB128 variable-length encoding
54 Compilation unit header
56 Dwarf_abbrev_declaration (+):
57 Abbreviation table declaration - doesn't include the initial
58 code, only the contents.
61 A dictionary mapping 'DW_FORM_*' keys into construct Structs
62 that parse such forms. These Structs have already been given
65 Dwarf_lineprog_header (+):
68 Dwarf_lineprog_file_entry (+):
69 A single file entry in a line program header or instruction
77 See also the documentation of public methods.
82 def __new__(cls
, little_endian
, dwarf_format
, address_size
, dwarf_version
=2):
87 True if the file is little endian, False if big
90 DWARF Format: 32 or 64-bit (see spec section 7.4)
93 Target machine address size, in bytes (4 or 8). (See spec
96 key
= (little_endian
, dwarf_format
, address_size
, dwarf_version
)
98 if key
in cls
.__StructsCache
:
99 return cls
.__StructsCache
[key
]
101 self
= super().__new
__(cls
)
102 assert dwarf_format
== 32 or dwarf_format
== 64
103 assert address_size
== 8 or address_size
== 4, str(address_size
)
104 self
.little_endian
= little_endian
105 self
.dwarf_format
= dwarf_format
106 self
.address_size
= address_size
107 self
.dwarf_version
= dwarf_version
108 self
._create
_structs
()
109 cls
.__StructsCache
[key
] = self
112 def initial_length_field_size(self
):
113 """ Size of an initial length field.
115 return 4 if self
.dwarf_format
== 32 else 12
117 def _create_structs(self
):
118 if self
.little_endian
:
119 self
.Dwarf_uint8
= ULInt8
120 self
.Dwarf_uint16
= ULInt16
121 self
.Dwarf_uint32
= ULInt32
122 self
.Dwarf_uint64
= ULInt64
123 self
.Dwarf_offset
= ULInt32
if self
.dwarf_format
== 32 else ULInt64
124 self
.Dwarf_length
= ULInt32
if self
.dwarf_format
== 32 else ULInt64
125 self
.Dwarf_target_addr
= (
126 ULInt32
if self
.address_size
== 4 else ULInt64
)
127 self
.Dwarf_int8
= SLInt8
128 self
.Dwarf_int16
= SLInt16
129 self
.Dwarf_int32
= SLInt32
130 self
.Dwarf_int64
= SLInt64
132 self
.Dwarf_uint8
= UBInt8
133 self
.Dwarf_uint16
= UBInt16
134 self
.Dwarf_uint32
= UBInt32
135 self
.Dwarf_uint64
= UBInt64
136 self
.Dwarf_offset
= UBInt32
if self
.dwarf_format
== 32 else UBInt64
137 self
.Dwarf_length
= UBInt32
if self
.dwarf_format
== 32 else UBInt64
138 self
.Dwarf_target_addr
= (
139 UBInt32
if self
.address_size
== 4 else UBInt64
)
140 self
.Dwarf_int8
= SBInt8
141 self
.Dwarf_int16
= SBInt16
142 self
.Dwarf_int32
= SBInt32
143 self
.Dwarf_int64
= SBInt64
145 self
._create
_initial
_length
()
146 self
._create
_leb
128()
147 self
._create
_cu
_header
()
148 self
._create
_abbrev
_declaration
()
149 self
._create
_dw
_form
()
150 self
._create
_lineprog
_header
()
151 self
._create
_callframe
_entry
_headers
()
152 self
._create
_aranges
_header
()
153 self
._create
_nameLUT
_header
()
154 self
._create
_string
_offsets
_table
_header
()
155 self
._create
_address
_table
_header
()
156 self
._create
_loclists
_parsers
()
157 self
._create
_rnglists
_parsers
()
159 self
._create
_debugsup
()
160 self
._create
_gnu
_debugaltlink
()
162 def _create_initial_length(self
):
163 def _InitialLength(name
):
164 # Adapts a Struct that parses forward a full initial length field.
165 # Only if the first word is the continuation value, the second
166 # word is parsed from the stream.
167 return _InitialLengthAdapter(
169 self
.Dwarf_uint32('first'),
170 If(lambda ctx
: ctx
.first
== 0xFFFFFFFF,
171 self
.Dwarf_uint64('second'),
173 self
.Dwarf_initial_length
= _InitialLength
175 def _create_leb128(self
):
176 self
.Dwarf_uleb128
= ULEB128
177 self
.Dwarf_sleb128
= SLEB128
179 def _create_cu_header(self
):
180 self
.Dwarf_CU_header
= Struct('Dwarf_CU_header',
181 self
.Dwarf_initial_length('unit_length'),
182 self
.Dwarf_uint16('version'),
183 # DWARFv5 reverses the order of address_size and debug_abbrev_offset.
184 IfThenElse('', lambda ctx
: ctx
['version'] >= 5,
186 self
.Dwarf_uint8('unit_type'),
187 self
.Dwarf_uint8('address_size'),
188 self
.Dwarf_offset('debug_abbrev_offset'))),
190 self
.Dwarf_offset('debug_abbrev_offset'),
191 self
.Dwarf_uint8('address_size'))),
194 def _create_abbrev_declaration(self
):
195 self
.Dwarf_abbrev_declaration
= Struct('Dwarf_abbrev_entry',
196 Enum(self
.Dwarf_uleb128('tag'), **ENUM_DW_TAG
),
197 Enum(self
.Dwarf_uint8('children_flag'), **ENUM_DW_CHILDREN
),
198 RepeatUntilExcluding(
200 obj
.name
== 'DW_AT_null' and obj
.form
== 'DW_FORM_null',
202 Enum(self
.Dwarf_uleb128('name'), **ENUM_DW_AT
),
203 Enum(self
.Dwarf_uleb128('form'), **ENUM_DW_FORM
),
204 If(lambda ctx
: ctx
['form'] == 'DW_FORM_implicit_const',
205 self
.Dwarf_sleb128('value')))))
207 def _create_debugsup(self
):
208 # We don't care about checksums, for now.
209 self
.Dwarf_debugsup
= Struct('Elf_debugsup',
210 self
.Dwarf_int16('version'),
211 self
.Dwarf_uint8('is_supplementary'),
212 CString('sup_filename'))
214 def _create_gnu_debugaltlink(self
):
215 self
.Dwarf_debugaltlink
= Struct('Elf_debugaltlink',
216 CString("sup_filename"),
217 String("sup_checksum", length
=20))
219 def _create_dw_form(self
):
220 self
.Dwarf_dw_form
= dict(
221 DW_FORM_addr
=self
.Dwarf_target_addr(''),
222 DW_FORM_addrx
=self
.Dwarf_uleb128(''),
223 DW_FORM_addrx1
=self
.Dwarf_uint8(''),
224 DW_FORM_addrx2
=self
.Dwarf_uint16(''),
225 # DW_FORM_addrx3=self.Dwarf_uint24(''), # TODO
226 DW_FORM_addrx4
=self
.Dwarf_uint32(''),
228 DW_FORM_block1
=self
._make
_block
_struct
(self
.Dwarf_uint8
),
229 DW_FORM_block2
=self
._make
_block
_struct
(self
.Dwarf_uint16
),
230 DW_FORM_block4
=self
._make
_block
_struct
(self
.Dwarf_uint32
),
231 DW_FORM_block
=self
._make
_block
_struct
(self
.Dwarf_uleb128
),
233 # All DW_FORM_data<n> forms are assumed to be unsigned
234 DW_FORM_data1
=self
.Dwarf_uint8(''),
235 DW_FORM_data2
=self
.Dwarf_uint16(''),
236 DW_FORM_data4
=self
.Dwarf_uint32(''),
237 DW_FORM_data8
=self
.Dwarf_uint64(''),
238 DW_FORM_data16
=Array(16, self
.Dwarf_uint8('')), # Used for hashes and such, not for integers
239 DW_FORM_sdata
=self
.Dwarf_sleb128(''),
240 DW_FORM_udata
=self
.Dwarf_uleb128(''),
242 DW_FORM_string
=CString(''),
243 DW_FORM_strp
=self
.Dwarf_offset(''),
244 DW_FORM_strp_sup
=self
.Dwarf_offset(''),
245 DW_FORM_line_strp
=self
.Dwarf_offset(''),
246 DW_FORM_strx1
=self
.Dwarf_uint8(''),
247 DW_FORM_strx2
=self
.Dwarf_uint16(''),
248 # DW_FORM_strx3=self.Dwarf_uint24(''), # TODO
249 DW_FORM_strx4
=self
.Dwarf_uint64(''),
250 DW_FORM_flag
=self
.Dwarf_uint8(''),
252 DW_FORM_ref
=self
.Dwarf_uint32(''),
253 DW_FORM_ref1
=self
.Dwarf_uint8(''),
254 DW_FORM_ref2
=self
.Dwarf_uint16(''),
255 DW_FORM_ref4
=self
.Dwarf_uint32(''),
256 DW_FORM_ref_sup4
=self
.Dwarf_uint32(''),
257 DW_FORM_ref8
=self
.Dwarf_uint64(''),
258 DW_FORM_ref_sup8
=self
.Dwarf_uint64(''),
259 DW_FORM_ref_udata
=self
.Dwarf_uleb128(''),
260 DW_FORM_ref_addr
=self
.Dwarf_target_addr('') if self
.dwarf_version
== 2 else self
.Dwarf_offset(''),
262 DW_FORM_indirect
=self
.Dwarf_uleb128(''),
264 # New forms in DWARFv4
265 DW_FORM_flag_present
= StaticField('', 0),
266 DW_FORM_sec_offset
= self
.Dwarf_offset(''),
267 DW_FORM_exprloc
= self
._make
_block
_struct
(self
.Dwarf_uleb128
),
268 DW_FORM_ref_sig8
= self
.Dwarf_uint64(''),
270 DW_FORM_GNU_strp_alt
=self
.Dwarf_offset(''),
271 DW_FORM_GNU_ref_alt
=self
.Dwarf_offset(''),
272 DW_AT_GNU_all_call_sites
=self
.Dwarf_uleb128(''),
274 # New forms in DWARFv5
275 DW_FORM_loclistx
=self
.Dwarf_uleb128(''),
276 DW_FORM_rnglistx
=self
.Dwarf_uleb128('')
279 def _create_aranges_header(self
):
280 self
.Dwarf_aranges_header
= Struct("Dwarf_aranges_header",
281 self
.Dwarf_initial_length('unit_length'),
282 self
.Dwarf_uint16('version'),
283 self
.Dwarf_offset('debug_info_offset'), # a little tbd
284 self
.Dwarf_uint8('address_size'),
285 self
.Dwarf_uint8('segment_size')
288 def _create_nameLUT_header(self
):
289 self
.Dwarf_nameLUT_header
= Struct("Dwarf_nameLUT_header",
290 self
.Dwarf_initial_length('unit_length'),
291 self
.Dwarf_uint16('version'),
292 self
.Dwarf_offset('debug_info_offset'),
293 self
.Dwarf_length('debug_info_length')
296 def _create_string_offsets_table_header(self
):
297 self
.Dwarf_string_offsets_table_header
= Struct(
298 "Dwarf_string_offets_table_header",
299 self
.Dwarf_initial_length('unit_length'),
300 self
.Dwarf_uint16('version'),
301 self
.Dwarf_uint16('padding'),
304 def _create_address_table_header(self
):
305 self
.Dwarf_address_table_header
= Struct("Dwarf_address_table_header",
306 self
.Dwarf_initial_length('unit_length'),
307 self
.Dwarf_uint16('version'),
308 self
.Dwarf_uint8('address_size'),
309 self
.Dwarf_uint8('segment_selector_size'),
312 def _create_lineprog_header(self
):
313 # A file entry is terminated by a NULL byte, so we don't want to parse
314 # past it. Therefore an If is used.
315 self
.Dwarf_lineprog_file_entry
= Struct('file_entry',
317 If(lambda ctx
: len(ctx
.name
) != 0,
319 self
.Dwarf_uleb128('dir_index'),
320 self
.Dwarf_uleb128('mtime'),
321 self
.Dwarf_uleb128('length')))))
323 class FormattedEntry(Construct
):
324 # Generates a parser based on a previously parsed piece,
325 # similar to deprecared Dynamic.
326 # Strings are resolved later, since it potentially requires
327 # looking at another section.
328 def __init__(self
, name
, structs
, format_field
):
329 Construct
.__init
__(self
, name
)
330 self
.structs
= structs
331 self
.format_field
= format_field
333 def _parse(self
, stream
, context
):
334 # Somewhat tricky technique here, explicitly writing back to the context
335 if self
.format_field
+ "_parser" in context
:
336 parser
= context
[self
.format_field
+ "_parser"]
339 Rename(f
.content_type
, self
.structs
.Dwarf_dw_form
[f
.form
])
340 for f
in context
[self
.format_field
])
341 parser
= Struct('formatted_entry', *fields
)
342 context
[self
.format_field
+ "_parser"] = parser
343 return parser
._parse
(stream
, context
)
345 ver5
= lambda ctx
: ctx
.version
>= 5
347 self
.Dwarf_lineprog_header
= Struct('Dwarf_lineprog_header',
348 self
.Dwarf_initial_length('unit_length'),
349 self
.Dwarf_uint16('version'),
351 self
.Dwarf_uint8("address_size"),
354 self
.Dwarf_uint8("segment_selector_size"),
356 self
.Dwarf_offset('header_length'),
357 self
.Dwarf_uint8('minimum_instruction_length'),
358 If(lambda ctx
: ctx
.version
>= 4,
359 self
.Dwarf_uint8("maximum_operations_per_instruction"),
361 self
.Dwarf_uint8('default_is_stmt'),
362 self
.Dwarf_int8('line_base'),
363 self
.Dwarf_uint8('line_range'),
364 self
.Dwarf_uint8('opcode_base'),
365 Array(lambda ctx
: ctx
.opcode_base
- 1,
366 self
.Dwarf_uint8('standard_opcode_lengths')),
369 Struct('directory_entry_format',
370 Enum(self
.Dwarf_uleb128('content_type'), **ENUM_DW_LNCT
),
371 Enum(self
.Dwarf_uleb128('form'), **ENUM_DW_FORM
)),
372 self
.Dwarf_uint8("directory_entry_format_count"))),
373 If(ver5
, # Name deliberately doesn't match the legacy object, since the format can't be made compatible
375 FormattedEntry('directories', self
, "directory_entry_format"),
376 self
.Dwarf_uleb128('directories_count'))),
379 Struct('file_name_entry_format',
380 Enum(self
.Dwarf_uleb128('content_type'), **ENUM_DW_LNCT
),
381 Enum(self
.Dwarf_uleb128('form'), **ENUM_DW_FORM
)),
382 self
.Dwarf_uint8("file_name_entry_format_count"))),
385 FormattedEntry('file_names', self
, "file_name_entry_format"),
386 self
.Dwarf_uleb128('file_names_count'))),
387 # Legacy directories/files - DWARF < 5 only
388 If(lambda ctx
: ctx
.version
< 5,
389 RepeatUntilExcluding(
390 lambda obj
, ctx
: obj
== b
'',
391 CString('include_directory'))),
392 If(lambda ctx
: ctx
.version
< 5,
393 RepeatUntilExcluding(
394 lambda obj
, ctx
: len(obj
.name
) == 0,
395 self
.Dwarf_lineprog_file_entry
)) # array name is file_entry
398 def _create_callframe_entry_headers(self
):
399 self
.Dwarf_CIE_header
= Struct('Dwarf_CIE_header',
400 self
.Dwarf_initial_length('length'),
401 self
.Dwarf_offset('CIE_id'),
402 self
.Dwarf_uint8('version'),
403 CString('augmentation'),
404 self
.Dwarf_uleb128('code_alignment_factor'),
405 self
.Dwarf_sleb128('data_alignment_factor'),
406 self
.Dwarf_uleb128('return_address_register'))
407 self
.EH_CIE_header
= self
.Dwarf_CIE_header
409 # The CIE header was modified in DWARFv4.
410 if self
.dwarf_version
== 4:
411 self
.Dwarf_CIE_header
= Struct('Dwarf_CIE_header',
412 self
.Dwarf_initial_length('length'),
413 self
.Dwarf_offset('CIE_id'),
414 self
.Dwarf_uint8('version'),
415 CString('augmentation'),
416 self
.Dwarf_uint8('address_size'),
417 self
.Dwarf_uint8('segment_size'),
418 self
.Dwarf_uleb128('code_alignment_factor'),
419 self
.Dwarf_sleb128('data_alignment_factor'),
420 self
.Dwarf_uleb128('return_address_register'))
422 self
.Dwarf_FDE_header
= Struct('Dwarf_FDE_header',
423 self
.Dwarf_initial_length('length'),
424 self
.Dwarf_offset('CIE_pointer'),
425 self
.Dwarf_target_addr('initial_location'),
426 self
.Dwarf_target_addr('address_range'))
428 def _make_block_struct(self
, length_field
):
429 """ Create a struct for DW_FORM_block<size>
431 return PrefixedArray(
432 subcon
=self
.Dwarf_uint8('elem'),
433 length_field
=length_field(''))
435 def _create_loclists_parsers(self
):
436 """ Create a struct for debug_loclists CU header, DWARFv5, 7,29
438 self
.Dwarf_loclists_CU_header
= Struct('Dwarf_loclists_CU_header',
439 StreamOffset('cu_offset'),
440 self
.Dwarf_initial_length('unit_length'),
441 Value('is64', lambda ctx
: ctx
.is64
),
442 StreamOffset('offset_after_length'),
443 self
.Dwarf_uint16('version'),
444 self
.Dwarf_uint8('address_size'),
445 self
.Dwarf_uint8('segment_selector_size'),
446 self
.Dwarf_uint32('offset_count'),
447 StreamOffset('offset_table_offset'))
449 cld
= self
.Dwarf_loclists_counted_location_description
= PrefixedArray(self
.Dwarf_uint8('loc_expr'), self
.Dwarf_uleb128(''))
451 self
.Dwarf_loclists_entries
= RepeatUntilExcluding(
452 lambda obj
, ctx
: obj
.entry_type
== 'DW_LLE_end_of_list',
454 StreamOffset('entry_offset'),
455 Enum(self
.Dwarf_uint8('entry_type'), **ENUM_DW_LLE
),
456 Embed(Switch('', lambda ctx
: ctx
.entry_type
,
458 'DW_LLE_end_of_list' : Struct('end_of_list'),
459 'DW_LLE_base_addressx' : Struct('base_addressx', self
.Dwarf_uleb128('index')),
460 'DW_LLE_startx_endx' : Struct('startx_endx', self
.Dwarf_uleb128('start_index'), self
.Dwarf_uleb128('end_index'), cld
),
461 'DW_LLE_startx_length' : Struct('startx_endx', self
.Dwarf_uleb128('start_index'), self
.Dwarf_uleb128('length'), cld
),
462 'DW_LLE_offset_pair' : Struct('startx_endx', self
.Dwarf_uleb128('start_offset'), self
.Dwarf_uleb128('end_offset'), cld
),
463 'DW_LLE_default_location' : Struct('default_location', cld
),
464 'DW_LLE_base_address' : Struct('base_address', self
.Dwarf_target_addr('address')),
465 'DW_LLE_start_end' : Struct('start_end', self
.Dwarf_target_addr('start_address'), self
.Dwarf_target_addr('end_address'), cld
),
466 'DW_LLE_start_length' : Struct('start_length', self
.Dwarf_target_addr('start_address'), self
.Dwarf_uleb128('length'), cld
),
468 StreamOffset('entry_end_offset'),
469 Value('entry_length', lambda ctx
: ctx
.entry_end_offset
- ctx
.entry_offset
)))
471 self
.Dwarf_locview_pair
= Struct('locview_pair',
472 StreamOffset('entry_offset'), self
.Dwarf_uleb128('begin'), self
.Dwarf_uleb128('end'))
474 def _create_rnglists_parsers(self
):
475 self
.Dwarf_rnglists_CU_header
= Struct('Dwarf_rnglists_CU_header',
476 StreamOffset('cu_offset'),
477 self
.Dwarf_initial_length('unit_length'),
478 Value('is64', lambda ctx
: ctx
.is64
),
479 StreamOffset('offset_after_length'),
480 self
.Dwarf_uint16('version'),
481 self
.Dwarf_uint8('address_size'),
482 self
.Dwarf_uint8('segment_selector_size'),
483 self
.Dwarf_uint32('offset_count'),
484 StreamOffset('offset_table_offset'))
486 self
.Dwarf_rnglists_entries
= RepeatUntilExcluding(
487 lambda obj
, ctx
: obj
.entry_type
== 'DW_RLE_end_of_list',
489 StreamOffset('entry_offset'),
490 Enum(self
.Dwarf_uint8('entry_type'), **ENUM_DW_RLE
),
491 Embed(Switch('', lambda ctx
: ctx
.entry_type
,
493 'DW_RLE_end_of_list' : Struct('end_of_list'),
494 'DW_RLE_base_addressx' : Struct('base_addressx', self
.Dwarf_uleb128('index')),
495 'DW_RLE_startx_endx' : Struct('startx_endx', self
.Dwarf_uleb128('start_index'), self
.Dwarf_uleb128('end_index')),
496 'DW_RLE_startx_length' : Struct('startx_endx', self
.Dwarf_uleb128('start_index'), self
.Dwarf_uleb128('length')),
497 'DW_RLE_offset_pair' : Struct('startx_endx', self
.Dwarf_uleb128('start_offset'), self
.Dwarf_uleb128('end_offset')),
498 'DW_RLE_base_address' : Struct('base_address', self
.Dwarf_target_addr('address')),
499 'DW_RLE_start_end' : Struct('start_end', self
.Dwarf_target_addr('start_address'), self
.Dwarf_target_addr('end_address')),
500 'DW_RLE_start_length' : Struct('start_length', self
.Dwarf_target_addr('start_address'), self
.Dwarf_uleb128('length'))
502 StreamOffset('entry_end_offset'),
503 Value('entry_length', lambda ctx
: ctx
.entry_end_offset
- ctx
.entry_offset
)))
506 class _InitialLengthAdapter(Adapter
):
507 """ A standard Construct adapter that expects a sub-construct
508 as a struct with one or two values (first, second).
510 def _decode(self
, obj
, context
):
511 if obj
.first
< 0xFFFFFF00:
512 context
['is64'] = False
515 if obj
.first
== 0xFFFFFFFF:
516 context
['is64'] = True
519 raise ConstructError("Failed decoding initial length for %X" % (