1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/structs.py
4 # Encapsulation of Construct structs for parsing DWARF, adjusted for correct
5 # endianness and word-size.
7 # Eli Bendersky (eliben@gmail.com)
8 # This code is in the public domain
9 #-------------------------------------------------------------------------------
10 from logging
.config
import valid_ident
11 from ..construct
import (
12 UBInt8
, UBInt16
, UBInt32
, UBInt64
, ULInt8
, ULInt16
, ULInt32
, ULInt64
,
13 SBInt8
, SBInt16
, SBInt32
, SBInt64
, SLInt8
, SLInt16
, SLInt32
, SLInt64
,
14 Adapter
, Struct
, ConstructError
, If
, Enum
, Array
, PrefixedArray
,
15 CString
, Embed
, StaticField
, IfThenElse
, Construct
, Rename
, Sequence
,
18 from ..common
.construct_utils
import (RepeatUntilExcluding
, ULEB128
, SLEB128
,
23 class DWARFStructs(object):
24 """ Exposes Construct structs suitable for parsing information from DWARF
25 sections. Each compile unit in DWARF info can have its own structs
26 object. Keep in mind that these structs have to be given a name (by
27 calling them with a name) before being used for parsing (like other
28 Construct structs). Those that should be used without a name are marked
31 Accessible attributes (mostly as described in chapter 7 of the DWARF
34 Dwarf_[u]int{8,16,32,64):
35 Data chunks of the common sizes
38 32-bit or 64-bit word, depending on dwarf_format
41 32-bit or 64-bit word, depending on dwarf_format
44 32-bit or 64-bit word, depending on address size
47 "Initial length field" encoding
51 ULEB128 and SLEB128 variable-length encoding
54 Compilation unit header
56 Dwarf_abbrev_declaration (+):
57 Abbreviation table declaration - doesn't include the initial
58 code, only the contents.
61 A dictionary mapping 'DW_FORM_*' keys into construct Structs
62 that parse such forms. These Structs have already been given
65 Dwarf_lineprog_header (+):
68 Dwarf_lineprog_file_entry (+):
69 A single file entry in a line program header or instruction
77 See also the documentation of public methods.
80 little_endian
, dwarf_format
, address_size
, dwarf_version
=2):
85 True if the file is little endian, False if big
88 DWARF Format: 32 or 64-bit (see spec section 7.4)
91 Target machine address size, in bytes (4 or 8). (See spec
94 assert dwarf_format
== 32 or dwarf_format
== 64
95 assert address_size
== 8 or address_size
== 4, str(address_size
)
96 self
.little_endian
= little_endian
97 self
.dwarf_format
= dwarf_format
98 self
.address_size
= address_size
99 self
.dwarf_version
= dwarf_version
100 self
._create
_structs
()
102 def initial_length_field_size(self
):
103 """ Size of an initial length field.
105 return 4 if self
.dwarf_format
== 32 else 12
107 def _create_structs(self
):
108 if self
.little_endian
:
109 self
.Dwarf_uint8
= ULInt8
110 self
.Dwarf_uint16
= ULInt16
111 self
.Dwarf_uint32
= ULInt32
112 self
.Dwarf_uint64
= ULInt64
113 self
.Dwarf_offset
= ULInt32
if self
.dwarf_format
== 32 else ULInt64
114 self
.Dwarf_length
= ULInt32
if self
.dwarf_format
== 32 else ULInt64
115 self
.Dwarf_target_addr
= (
116 ULInt32
if self
.address_size
== 4 else ULInt64
)
117 self
.Dwarf_int8
= SLInt8
118 self
.Dwarf_int16
= SLInt16
119 self
.Dwarf_int32
= SLInt32
120 self
.Dwarf_int64
= SLInt64
122 self
.Dwarf_uint8
= UBInt8
123 self
.Dwarf_uint16
= UBInt16
124 self
.Dwarf_uint32
= UBInt32
125 self
.Dwarf_uint64
= UBInt64
126 self
.Dwarf_offset
= UBInt32
if self
.dwarf_format
== 32 else UBInt64
127 self
.Dwarf_length
= UBInt32
if self
.dwarf_format
== 32 else UBInt64
128 self
.Dwarf_target_addr
= (
129 UBInt32
if self
.address_size
== 4 else UBInt64
)
130 self
.Dwarf_int8
= SBInt8
131 self
.Dwarf_int16
= SBInt16
132 self
.Dwarf_int32
= SBInt32
133 self
.Dwarf_int64
= SBInt64
135 self
._create
_initial
_length
()
136 self
._create
_leb
128()
137 self
._create
_cu
_header
()
138 self
._create
_abbrev
_declaration
()
139 self
._create
_dw
_form
()
140 self
._create
_lineprog
_header
()
141 self
._create
_callframe
_entry
_headers
()
142 self
._create
_aranges
_header
()
143 self
._create
_nameLUT
_header
()
144 self
._create
_string
_offsets
_table
_header
()
145 self
._create
_address
_table
_header
()
146 self
._create
_loclists
_parsers
()
147 self
._create
_rnglists
_parsers
()
149 def _create_initial_length(self
):
150 def _InitialLength(name
):
151 # Adapts a Struct that parses forward a full initial length field.
152 # Only if the first word is the continuation value, the second
153 # word is parsed from the stream.
154 return _InitialLengthAdapter(
156 self
.Dwarf_uint32('first'),
157 If(lambda ctx
: ctx
.first
== 0xFFFFFFFF,
158 self
.Dwarf_uint64('second'),
160 self
.Dwarf_initial_length
= _InitialLength
162 def _create_leb128(self
):
163 self
.Dwarf_uleb128
= ULEB128
164 self
.Dwarf_sleb128
= SLEB128
166 def _create_cu_header(self
):
167 self
.Dwarf_CU_header
= Struct('Dwarf_CU_header',
168 self
.Dwarf_initial_length('unit_length'),
169 self
.Dwarf_uint16('version'),
170 # DWARFv5 reverses the order of address_size and debug_abbrev_offset.
171 IfThenElse('', lambda ctx
: ctx
['version'] >= 5,
173 self
.Dwarf_uint8('unit_type'),
174 self
.Dwarf_uint8('address_size'),
175 self
.Dwarf_offset('debug_abbrev_offset'))),
177 self
.Dwarf_offset('debug_abbrev_offset'),
178 self
.Dwarf_uint8('address_size'))),
181 def _create_abbrev_declaration(self
):
182 self
.Dwarf_abbrev_declaration
= Struct('Dwarf_abbrev_entry',
183 Enum(self
.Dwarf_uleb128('tag'), **ENUM_DW_TAG
),
184 Enum(self
.Dwarf_uint8('children_flag'), **ENUM_DW_CHILDREN
),
185 RepeatUntilExcluding(
187 obj
.name
== 'DW_AT_null' and obj
.form
== 'DW_FORM_null',
189 Enum(self
.Dwarf_uleb128('name'), **ENUM_DW_AT
),
190 Enum(self
.Dwarf_uleb128('form'), **ENUM_DW_FORM
),
191 If(lambda ctx
: ctx
['form'] == 'DW_FORM_implicit_const',
192 self
.Dwarf_sleb128('value')))))
194 def _create_dw_form(self
):
195 self
.Dwarf_dw_form
= dict(
196 DW_FORM_addr
=self
.Dwarf_target_addr(''),
197 DW_FORM_addrx
=self
.Dwarf_uleb128(''),
198 DW_FORM_addrx1
=self
.Dwarf_uint8(''),
199 DW_FORM_addrx2
=self
.Dwarf_uint16(''),
200 # DW_FORM_addrx3=self.Dwarf_uint24(''), # TODO
201 DW_FORM_addrx4
=self
.Dwarf_uint32(''),
203 DW_FORM_block1
=self
._make
_block
_struct
(self
.Dwarf_uint8
),
204 DW_FORM_block2
=self
._make
_block
_struct
(self
.Dwarf_uint16
),
205 DW_FORM_block4
=self
._make
_block
_struct
(self
.Dwarf_uint32
),
206 DW_FORM_block
=self
._make
_block
_struct
(self
.Dwarf_uleb128
),
208 # All DW_FORM_data<n> forms are assumed to be unsigned
209 DW_FORM_data1
=self
.Dwarf_uint8(''),
210 DW_FORM_data2
=self
.Dwarf_uint16(''),
211 DW_FORM_data4
=self
.Dwarf_uint32(''),
212 DW_FORM_data8
=self
.Dwarf_uint64(''),
213 DW_FORM_sdata
=self
.Dwarf_sleb128(''),
214 DW_FORM_udata
=self
.Dwarf_uleb128(''),
216 DW_FORM_string
=CString(''),
217 DW_FORM_strp
=self
.Dwarf_offset(''),
218 DW_FORM_line_strp
=self
.Dwarf_offset(''),
219 DW_FORM_strx1
=self
.Dwarf_uint8(''),
220 DW_FORM_strx2
=self
.Dwarf_uint16(''),
221 # DW_FORM_strx3=self.Dwarf_uint24(''), # TODO
222 DW_FORM_strx4
=self
.Dwarf_uint64(''),
223 DW_FORM_flag
=self
.Dwarf_uint8(''),
225 DW_FORM_ref
=self
.Dwarf_uint32(''),
226 DW_FORM_ref1
=self
.Dwarf_uint8(''),
227 DW_FORM_ref2
=self
.Dwarf_uint16(''),
228 DW_FORM_ref4
=self
.Dwarf_uint32(''),
229 DW_FORM_ref8
=self
.Dwarf_uint64(''),
230 DW_FORM_ref_udata
=self
.Dwarf_uleb128(''),
231 DW_FORM_ref_addr
=self
.Dwarf_target_addr('') if self
.dwarf_version
== 2 else self
.Dwarf_offset(''),
233 DW_FORM_indirect
=self
.Dwarf_uleb128(''),
235 # New forms in DWARFv4
236 DW_FORM_flag_present
= StaticField('', 0),
237 DW_FORM_sec_offset
= self
.Dwarf_offset(''),
238 DW_FORM_exprloc
= self
._make
_block
_struct
(self
.Dwarf_uleb128
),
239 DW_FORM_ref_sig8
= self
.Dwarf_uint64(''),
241 DW_FORM_GNU_strp_alt
=self
.Dwarf_offset(''),
242 DW_FORM_GNU_ref_alt
=self
.Dwarf_offset(''),
243 DW_AT_GNU_all_call_sites
=self
.Dwarf_uleb128(''),
246 def _create_aranges_header(self
):
247 self
.Dwarf_aranges_header
= Struct("Dwarf_aranges_header",
248 self
.Dwarf_initial_length('unit_length'),
249 self
.Dwarf_uint16('version'),
250 self
.Dwarf_offset('debug_info_offset'), # a little tbd
251 self
.Dwarf_uint8('address_size'),
252 self
.Dwarf_uint8('segment_size')
255 def _create_nameLUT_header(self
):
256 self
.Dwarf_nameLUT_header
= Struct("Dwarf_nameLUT_header",
257 self
.Dwarf_initial_length('unit_length'),
258 self
.Dwarf_uint16('version'),
259 self
.Dwarf_offset('debug_info_offset'),
260 self
.Dwarf_length('debug_info_length')
263 def _create_string_offsets_table_header(self
):
264 self
.Dwarf_string_offsets_table_header
= Struct(
265 "Dwarf_string_offets_table_header",
266 self
.Dwarf_initial_length('unit_length'),
267 self
.Dwarf_uint16('version'),
268 self
.Dwarf_uint16('padding'),
271 def _create_address_table_header(self
):
272 self
.Dwarf_address_table_header
= Struct("Dwarf_address_table_header",
273 self
.Dwarf_initial_length('unit_length'),
274 self
.Dwarf_uint16('version'),
275 self
.Dwarf_uint8('address_size'),
276 self
.Dwarf_uint8('segment_selector_size'),
279 def _create_lineprog_header(self
):
280 # A file entry is terminated by a NULL byte, so we don't want to parse
281 # past it. Therefore an If is used.
282 self
.Dwarf_lineprog_file_entry
= Struct('file_entry',
284 If(lambda ctx
: len(ctx
.name
) != 0,
286 self
.Dwarf_uleb128('dir_index'),
287 self
.Dwarf_uleb128('mtime'),
288 self
.Dwarf_uleb128('length')))))
290 class FormattedEntry(Construct
):
291 # Generates a parser based on a previously parsed piece,
292 # similar to deprecared Dynamic.
293 # Strings are resolved later, since it potentially requires
294 # looking at another section.
295 def __init__(self
, name
, structs
, format_field
):
296 Construct
.__init
__(self
, name
)
297 self
.structs
= structs
298 self
.format_field
= format_field
300 def _parse(self
, stream
, context
):
301 # Somewhat tricky technique here, explicitly writing back to the context
302 if self
.format_field
+ "_parser" in context
:
303 parser
= context
[self
.format_field
+ "_parser"]
306 Rename(f
.content_type
, self
.structs
.Dwarf_dw_form
[f
.form
])
307 for f
in context
[self
.format_field
])
308 parser
= Struct('formatted_entry', *fields
)
309 context
[self
.format_field
+ "_parser"] = parser
310 return parser
._parse
(stream
, context
)
312 ver5
= lambda ctx
: ctx
.version
>= 5
314 self
.Dwarf_lineprog_header
= Struct('Dwarf_lineprog_header',
315 self
.Dwarf_initial_length('unit_length'),
316 self
.Dwarf_uint16('version'),
318 self
.Dwarf_uint8("address_size"),
321 self
.Dwarf_uint8("segment_selector_size"),
323 self
.Dwarf_offset('header_length'),
324 self
.Dwarf_uint8('minimum_instruction_length'),
325 If(lambda ctx
: ctx
.version
>= 4,
326 self
.Dwarf_uint8("maximum_operations_per_instruction"),
328 self
.Dwarf_uint8('default_is_stmt'),
329 self
.Dwarf_int8('line_base'),
330 self
.Dwarf_uint8('line_range'),
331 self
.Dwarf_uint8('opcode_base'),
332 Array(lambda ctx
: ctx
.opcode_base
- 1,
333 self
.Dwarf_uint8('standard_opcode_lengths')),
336 Struct('directory_entry_format',
337 Enum(self
.Dwarf_uleb128('content_type'), **ENUM_DW_LNCT
),
338 Enum(self
.Dwarf_uleb128('form'), **ENUM_DW_FORM
)),
339 self
.Dwarf_uint8("directory_entry_format_count"))),
340 If(ver5
, # Name deliberately doesn't match the legacy object, since the format can't be made compatible
342 FormattedEntry('directories', self
, "directory_entry_format"),
343 self
.Dwarf_uleb128('directories_count'))),
346 Struct('file_name_entry_format',
347 Enum(self
.Dwarf_uleb128('content_type'), **ENUM_DW_LNCT
),
348 Enum(self
.Dwarf_uleb128('form'), **ENUM_DW_FORM
)),
349 self
.Dwarf_uint8("file_name_entry_format_count"))),
352 FormattedEntry('file_names', self
, "file_name_entry_format"),
353 self
.Dwarf_uleb128('file_names_count'))),
354 # Legacy directories/files - DWARF < 5 only
355 If(lambda ctx
: ctx
.version
< 5,
356 RepeatUntilExcluding(
357 lambda obj
, ctx
: obj
== b
'',
358 CString('include_directory'))),
359 If(lambda ctx
: ctx
.version
< 5,
360 RepeatUntilExcluding(
361 lambda obj
, ctx
: len(obj
.name
) == 0,
362 self
.Dwarf_lineprog_file_entry
)) # array name is file_entry
365 def _create_callframe_entry_headers(self
):
366 self
.Dwarf_CIE_header
= Struct('Dwarf_CIE_header',
367 self
.Dwarf_initial_length('length'),
368 self
.Dwarf_offset('CIE_id'),
369 self
.Dwarf_uint8('version'),
370 CString('augmentation'),
371 self
.Dwarf_uleb128('code_alignment_factor'),
372 self
.Dwarf_sleb128('data_alignment_factor'),
373 self
.Dwarf_uleb128('return_address_register'))
374 self
.EH_CIE_header
= self
.Dwarf_CIE_header
376 # The CIE header was modified in DWARFv4.
377 if self
.dwarf_version
== 4:
378 self
.Dwarf_CIE_header
= Struct('Dwarf_CIE_header',
379 self
.Dwarf_initial_length('length'),
380 self
.Dwarf_offset('CIE_id'),
381 self
.Dwarf_uint8('version'),
382 CString('augmentation'),
383 self
.Dwarf_uint8('address_size'),
384 self
.Dwarf_uint8('segment_size'),
385 self
.Dwarf_uleb128('code_alignment_factor'),
386 self
.Dwarf_sleb128('data_alignment_factor'),
387 self
.Dwarf_uleb128('return_address_register'))
389 self
.Dwarf_FDE_header
= Struct('Dwarf_FDE_header',
390 self
.Dwarf_initial_length('length'),
391 self
.Dwarf_offset('CIE_pointer'),
392 self
.Dwarf_target_addr('initial_location'),
393 self
.Dwarf_target_addr('address_range'))
395 def _make_block_struct(self
, length_field
):
396 """ Create a struct for DW_FORM_block<size>
398 return PrefixedArray(
399 subcon
=self
.Dwarf_uint8('elem'),
400 length_field
=length_field(''))
402 def _create_loclists_parsers(self
):
403 """ Create a struct for debug_loclists CU header, DWARFv5, 7,29
405 self
.Dwarf_loclists_CU_header
= Struct('Dwarf_loclists_CU_header',
406 StreamOffset('cu_offset'),
407 self
.Dwarf_initial_length('unit_length'),
408 Value('is64', lambda ctx
: ctx
.is64
),
409 StreamOffset('offset_after_length'),
410 self
.Dwarf_uint16('version'),
411 self
.Dwarf_uint8('address_size'),
412 self
.Dwarf_uint8('segment_selector_size'),
413 self
.Dwarf_uint32('offset_count'),
414 StreamOffset('offset_table_offset'))
416 cld
= self
.Dwarf_loclists_counted_location_description
= PrefixedArray(self
.Dwarf_uint8('loc_expr'), self
.Dwarf_uleb128(''))
418 self
.Dwarf_loclists_entries
= RepeatUntilExcluding(
419 lambda obj
, ctx
: obj
.entry_type
== 'DW_LLE_end_of_list',
421 StreamOffset('entry_offset'),
422 Enum(self
.Dwarf_uint8('entry_type'), **ENUM_DW_LLE
),
423 Embed(Switch('', lambda ctx
: ctx
.entry_type
,
425 'DW_LLE_end_of_list' : Struct('end_of_list'),
426 'DW_LLE_base_addressx' : Struct('base_addressx', self
.Dwarf_uleb128('index')),
427 'DW_LLE_startx_endx' : Struct('startx_endx', self
.Dwarf_uleb128('start_index'), self
.Dwarf_uleb128('end_index'), cld
),
428 'DW_LLE_startx_length' : Struct('startx_endx', self
.Dwarf_uleb128('start_index'), self
.Dwarf_uleb128('length'), cld
),
429 'DW_LLE_offset_pair' : Struct('startx_endx', self
.Dwarf_uleb128('start_offset'), self
.Dwarf_uleb128('end_offset'), cld
),
430 'DW_LLE_default_location' : Struct('default_location', cld
),
431 'DW_LLE_base_address' : Struct('base_address', self
.Dwarf_target_addr('address')),
432 'DW_LLE_start_end' : Struct('start_end', self
.Dwarf_target_addr('start_address'), self
.Dwarf_target_addr('end_address'), cld
),
433 'DW_LLE_start_length' : Struct('start_length', self
.Dwarf_target_addr('start_address'), self
.Dwarf_uleb128('length'), cld
),
435 StreamOffset('entry_end_offset')))
437 self
.Dwarf_locview_pair
= Struct('locview_pair',
438 StreamOffset('entry_offset'), self
.Dwarf_uleb128('begin'), self
.Dwarf_uleb128('end'))
440 def _create_rnglists_parsers(self
):
441 self
.Dwarf_rnglists_CU_header
= Struct('Dwarf_rnglists_CU_header',
442 StreamOffset('cu_offset'),
443 self
.Dwarf_initial_length('unit_length'),
444 Value('is64', lambda ctx
: ctx
.is64
),
445 StreamOffset('offset_after_length'),
446 self
.Dwarf_uint16('version'),
447 self
.Dwarf_uint8('address_size'),
448 self
.Dwarf_uint8('segment_selector_size'),
449 self
.Dwarf_uint32('offset_count'),
450 StreamOffset('offset_table_offset'))
452 self
.Dwarf_rnglists_entries
= RepeatUntilExcluding(
453 lambda obj
, ctx
: obj
.entry_type
== 'DW_RLE_end_of_list',
455 StreamOffset('entry_offset'),
456 Enum(self
.Dwarf_uint8('entry_type'), **ENUM_DW_RLE
),
457 Embed(Switch('', lambda ctx
: ctx
.entry_type
,
459 'DW_RLE_end_of_list' : Struct('end_of_list'),
460 'DW_RLE_base_addressx' : Struct('base_addressx', self
.Dwarf_uleb128('index')),
461 'DW_RLE_startx_endx' : Struct('startx_endx', self
.Dwarf_uleb128('start_index'), self
.Dwarf_uleb128('end_index')),
462 'DW_RLE_startx_length' : Struct('startx_endx', self
.Dwarf_uleb128('start_index'), self
.Dwarf_uleb128('length')),
463 'DW_RLE_offset_pair' : Struct('startx_endx', self
.Dwarf_uleb128('start_offset'), self
.Dwarf_uleb128('end_offset')),
464 'DW_RLE_base_address' : Struct('base_address', self
.Dwarf_target_addr('address')),
465 'DW_RLE_start_end' : Struct('start_end', self
.Dwarf_target_addr('start_address'), self
.Dwarf_target_addr('end_address')),
466 'DW_RLE_start_length' : Struct('start_length', self
.Dwarf_target_addr('start_address'), self
.Dwarf_uleb128('length'))
468 StreamOffset('entry_end_offset'),
469 Value('entry_length', lambda ctx
: ctx
.entry_end_offset
- ctx
.entry_offset
)))
472 class _InitialLengthAdapter(Adapter
):
473 """ A standard Construct adapter that expects a sub-construct
474 as a struct with one or two values (first, second).
476 def _decode(self
, obj
, context
):
477 if obj
.first
< 0xFFFFFF00:
478 context
['is64'] = False
481 if obj
.first
== 0xFFFFFFFF:
482 context
['is64'] = True
485 raise ConstructError("Failed decoding initial length for %X" % (