1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/structs.py
4 # Encapsulation of Construct structs for parsing DWARF, adjusted for correct
5 # endianness and word-size.
7 # Eli Bendersky (eliben@gmail.com)
8 # This code is in the public domain
9 #-------------------------------------------------------------------------------
10 from elftools
.construct
.core
import Subconstruct
11 from elftools
.construct
.macros
import Embedded
12 from ..construct
import (
13 UBInt8
, UBInt16
, UBInt32
, UBInt64
, ULInt8
, ULInt16
, ULInt32
, ULInt64
,
14 SBInt8
, SBInt16
, SBInt32
, SBInt64
, SLInt8
, SLInt16
, SLInt32
, SLInt64
,
15 Adapter
, Struct
, ConstructError
, If
, Enum
, Array
, PrefixedArray
,
16 CString
, Embed
, StaticField
, IfThenElse
, Construct
, Rename
, Sequence
18 from ..common
.construct_utils
import RepeatUntilExcluding
, ULEB128
, SLEB128
22 class DWARFStructs(object):
23 """ Exposes Construct structs suitable for parsing information from DWARF
24 sections. Each compile unit in DWARF info can have its own structs
25 object. Keep in mind that these structs have to be given a name (by
26 calling them with a name) before being used for parsing (like other
27 Construct structs). Those that should be used without a name are marked
30 Accessible attributes (mostly as described in chapter 7 of the DWARF
33 Dwarf_[u]int{8,16,32,64):
34 Data chunks of the common sizes
37 32-bit or 64-bit word, depending on dwarf_format
40 32-bit or 64-bit word, depending on dwarf_format
43 32-bit or 64-bit word, depending on address size
46 "Initial length field" encoding
50 ULEB128 and SLEB128 variable-length encoding
53 Compilation unit header
55 Dwarf_abbrev_declaration (+):
56 Abbreviation table declaration - doesn't include the initial
57 code, only the contents.
60 A dictionary mapping 'DW_FORM_*' keys into construct Structs
61 that parse such forms. These Structs have already been given
64 Dwarf_lineprog_header (+):
67 Dwarf_lineprog_file_entry (+):
68 A single file entry in a line program header or instruction
76 See also the documentation of public methods.
79 little_endian
, dwarf_format
, address_size
, dwarf_version
=2):
84 True if the file is little endian, False if big
87 DWARF Format: 32 or 64-bit (see spec section 7.4)
90 Target machine address size, in bytes (4 or 8). (See spec
93 assert dwarf_format
== 32 or dwarf_format
== 64
94 assert address_size
== 8 or address_size
== 4, str(address_size
)
95 self
.little_endian
= little_endian
96 self
.dwarf_format
= dwarf_format
97 self
.address_size
= address_size
98 self
.dwarf_version
= dwarf_version
99 self
._create
_structs
()
101 def initial_length_field_size(self
):
102 """ Size of an initial length field.
104 return 4 if self
.dwarf_format
== 32 else 12
106 def _create_structs(self
):
107 if self
.little_endian
:
108 self
.Dwarf_uint8
= ULInt8
109 self
.Dwarf_uint16
= ULInt16
110 self
.Dwarf_uint32
= ULInt32
111 self
.Dwarf_uint64
= ULInt64
112 self
.Dwarf_offset
= ULInt32
if self
.dwarf_format
== 32 else ULInt64
113 self
.Dwarf_length
= ULInt32
if self
.dwarf_format
== 32 else ULInt64
114 self
.Dwarf_target_addr
= (
115 ULInt32
if self
.address_size
== 4 else ULInt64
)
116 self
.Dwarf_int8
= SLInt8
117 self
.Dwarf_int16
= SLInt16
118 self
.Dwarf_int32
= SLInt32
119 self
.Dwarf_int64
= SLInt64
121 self
.Dwarf_uint8
= UBInt8
122 self
.Dwarf_uint16
= UBInt16
123 self
.Dwarf_uint32
= UBInt32
124 self
.Dwarf_uint64
= UBInt64
125 self
.Dwarf_offset
= UBInt32
if self
.dwarf_format
== 32 else UBInt64
126 self
.Dwarf_length
= UBInt32
if self
.dwarf_format
== 32 else UBInt64
127 self
.Dwarf_target_addr
= (
128 UBInt32
if self
.address_size
== 4 else UBInt64
)
129 self
.Dwarf_int8
= SBInt8
130 self
.Dwarf_int16
= SBInt16
131 self
.Dwarf_int32
= SBInt32
132 self
.Dwarf_int64
= SBInt64
134 self
._create
_initial
_length
()
135 self
._create
_leb
128()
136 self
._create
_cu
_header
()
137 self
._create
_abbrev
_declaration
()
138 self
._create
_dw
_form
()
139 self
._create
_lineprog
_header
()
140 self
._create
_callframe
_entry
_headers
()
141 self
._create
_aranges
_header
()
142 self
._create
_nameLUT
_header
()
143 self
._create
_string
_offsets
_table
_header
()
144 self
._create
_address
_table
_header
()
146 def _create_initial_length(self
):
147 def _InitialLength(name
):
148 # Adapts a Struct that parses forward a full initial length field.
149 # Only if the first word is the continuation value, the second
150 # word is parsed from the stream.
151 return _InitialLengthAdapter(
153 self
.Dwarf_uint32('first'),
154 If(lambda ctx
: ctx
.first
== 0xFFFFFFFF,
155 self
.Dwarf_uint64('second'),
157 self
.Dwarf_initial_length
= _InitialLength
159 def _create_leb128(self
):
160 self
.Dwarf_uleb128
= ULEB128
161 self
.Dwarf_sleb128
= SLEB128
163 def _create_cu_header(self
):
164 self
.Dwarf_CU_header
= Struct('Dwarf_CU_header',
165 self
.Dwarf_initial_length('unit_length'),
166 self
.Dwarf_uint16('version'),
167 # DWARFv5 reverses the order of address_size and debug_abbrev_offset.
168 IfThenElse('', lambda ctx
: ctx
['version'] >= 5,
170 self
.Dwarf_uint8('unit_type'),
171 self
.Dwarf_uint8('address_size'),
172 self
.Dwarf_offset('debug_abbrev_offset'))),
174 self
.Dwarf_offset('debug_abbrev_offset'),
175 self
.Dwarf_uint8('address_size'))),
178 def _create_abbrev_declaration(self
):
179 self
.Dwarf_abbrev_declaration
= Struct('Dwarf_abbrev_entry',
180 Enum(self
.Dwarf_uleb128('tag'), **ENUM_DW_TAG
),
181 Enum(self
.Dwarf_uint8('children_flag'), **ENUM_DW_CHILDREN
),
182 RepeatUntilExcluding(
184 obj
.name
== 'DW_AT_null' and obj
.form
== 'DW_FORM_null',
186 Enum(self
.Dwarf_uleb128('name'), **ENUM_DW_AT
),
187 Enum(self
.Dwarf_uleb128('form'), **ENUM_DW_FORM
),
188 If(lambda ctx
: ctx
['form'] == 'DW_FORM_implicit_const',
189 self
.Dwarf_sleb128('value')))))
191 def _create_dw_form(self
):
192 self
.Dwarf_dw_form
= dict(
193 DW_FORM_addr
=self
.Dwarf_target_addr(''),
194 DW_FORM_addrx
=self
.Dwarf_uleb128(''),
195 DW_FORM_addrx1
=self
.Dwarf_uint8(''),
196 DW_FORM_addrx2
=self
.Dwarf_uint16(''),
197 # DW_FORM_addrx3=self.Dwarf_uint24(''), # TODO
198 DW_FORM_addrx4
=self
.Dwarf_uint32(''),
200 DW_FORM_block1
=self
._make
_block
_struct
(self
.Dwarf_uint8
),
201 DW_FORM_block2
=self
._make
_block
_struct
(self
.Dwarf_uint16
),
202 DW_FORM_block4
=self
._make
_block
_struct
(self
.Dwarf_uint32
),
203 DW_FORM_block
=self
._make
_block
_struct
(self
.Dwarf_uleb128
),
205 # All DW_FORM_data<n> forms are assumed to be unsigned
206 DW_FORM_data1
=self
.Dwarf_uint8(''),
207 DW_FORM_data2
=self
.Dwarf_uint16(''),
208 DW_FORM_data4
=self
.Dwarf_uint32(''),
209 DW_FORM_data8
=self
.Dwarf_uint64(''),
210 DW_FORM_sdata
=self
.Dwarf_sleb128(''),
211 DW_FORM_udata
=self
.Dwarf_uleb128(''),
213 DW_FORM_string
=CString(''),
214 DW_FORM_strp
=self
.Dwarf_offset(''),
215 DW_FORM_line_strp
=self
.Dwarf_offset(''),
216 DW_FORM_strx1
=self
.Dwarf_uint8(''),
217 DW_FORM_strx2
=self
.Dwarf_uint16(''),
218 # DW_FORM_strx3=self.Dwarf_uint24(''), # TODO
219 DW_FORM_strx4
=self
.Dwarf_uint64(''),
220 DW_FORM_flag
=self
.Dwarf_uint8(''),
222 DW_FORM_ref
=self
.Dwarf_uint32(''),
223 DW_FORM_ref1
=self
.Dwarf_uint8(''),
224 DW_FORM_ref2
=self
.Dwarf_uint16(''),
225 DW_FORM_ref4
=self
.Dwarf_uint32(''),
226 DW_FORM_ref8
=self
.Dwarf_uint64(''),
227 DW_FORM_ref_udata
=self
.Dwarf_uleb128(''),
228 DW_FORM_ref_addr
=self
.Dwarf_target_addr('') if self
.dwarf_version
== 2 else self
.Dwarf_offset(''),
230 DW_FORM_indirect
=self
.Dwarf_uleb128(''),
232 # New forms in DWARFv4
233 DW_FORM_flag_present
= StaticField('', 0),
234 DW_FORM_sec_offset
= self
.Dwarf_offset(''),
235 DW_FORM_exprloc
= self
._make
_block
_struct
(self
.Dwarf_uleb128
),
236 DW_FORM_ref_sig8
= self
.Dwarf_uint64(''),
238 DW_FORM_GNU_strp_alt
=self
.Dwarf_offset(''),
239 DW_FORM_GNU_ref_alt
=self
.Dwarf_offset(''),
240 DW_AT_GNU_all_call_sites
=self
.Dwarf_uleb128(''),
243 def _create_aranges_header(self
):
244 self
.Dwarf_aranges_header
= Struct("Dwarf_aranges_header",
245 self
.Dwarf_initial_length('unit_length'),
246 self
.Dwarf_uint16('version'),
247 self
.Dwarf_offset('debug_info_offset'), # a little tbd
248 self
.Dwarf_uint8('address_size'),
249 self
.Dwarf_uint8('segment_size')
252 def _create_nameLUT_header(self
):
253 self
.Dwarf_nameLUT_header
= Struct("Dwarf_nameLUT_header",
254 self
.Dwarf_initial_length('unit_length'),
255 self
.Dwarf_uint16('version'),
256 self
.Dwarf_offset('debug_info_offset'),
257 self
.Dwarf_length('debug_info_length')
260 def _create_string_offsets_table_header(self
):
261 self
.Dwarf_string_offsets_table_header
= Struct(
262 "Dwarf_string_offets_table_header",
263 self
.Dwarf_initial_length('unit_length'),
264 self
.Dwarf_uint16('version'),
265 self
.Dwarf_uint16('padding'),
268 def _create_address_table_header(self
):
269 self
.Dwarf_address_table_header
= Struct("Dwarf_address_table_header",
270 self
.Dwarf_initial_length('unit_length'),
271 self
.Dwarf_uint16('version'),
272 self
.Dwarf_uint8('address_size'),
273 self
.Dwarf_uint8('segment_selector_size'),
276 def _create_lineprog_header(self
):
277 # A file entry is terminated by a NULL byte, so we don't want to parse
278 # past it. Therefore an If is used.
279 self
.Dwarf_lineprog_file_entry
= Struct('file_entry',
281 If(lambda ctx
: len(ctx
.name
) != 0,
283 self
.Dwarf_uleb128('dir_index'),
284 self
.Dwarf_uleb128('mtime'),
285 self
.Dwarf_uleb128('length')))))
287 class FormattedEntry(Construct
):
288 # Generates a parser based on a previously parsed piece,
289 # similar to deprecared Dynamic.
290 # Strings are resolved later, since it potentially requires
291 # looking at another section.
292 def __init__(self
, name
, structs
, format_field
):
293 Construct
.__init
__(self
, name
)
294 self
.structs
= structs
295 self
.format_field
= format_field
297 def _parse(self
, stream
, context
):
298 # Somewhat tricky technique here, explicitly writing back to the context
299 if self
.format_field
+ "_parser" in context
:
300 parser
= context
[self
.format_field
+ "_parser"]
303 Rename(f
.content_type
, self
.structs
.Dwarf_dw_form
[f
.form
])
304 for f
in context
[self
.format_field
])
305 parser
= Struct('formatted_entry', *fields
)
306 context
[self
.format_field
+ "_parser"] = parser
307 return parser
._parse
(stream
, context
)
309 ver5
= lambda ctx
: ctx
.version
>= 5
311 self
.Dwarf_lineprog_header
= Struct('Dwarf_lineprog_header',
312 self
.Dwarf_initial_length('unit_length'),
313 self
.Dwarf_uint16('version'),
315 self
.Dwarf_uint8("address_size"),
318 self
.Dwarf_uint8("segment_selector_size"),
320 self
.Dwarf_offset('header_length'),
321 self
.Dwarf_uint8('minimum_instruction_length'),
322 If(lambda ctx
: ctx
.version
>= 4,
323 self
.Dwarf_uint8("maximum_operations_per_instruction"),
325 self
.Dwarf_uint8('default_is_stmt'),
326 self
.Dwarf_int8('line_base'),
327 self
.Dwarf_uint8('line_range'),
328 self
.Dwarf_uint8('opcode_base'),
329 Array(lambda ctx
: ctx
.opcode_base
- 1,
330 self
.Dwarf_uint8('standard_opcode_lengths')),
333 Struct('directory_entry_format',
334 Enum(self
.Dwarf_uleb128('content_type'), **ENUM_DW_LNCT
),
335 Enum(self
.Dwarf_uleb128('form'), **ENUM_DW_FORM
)),
336 self
.Dwarf_uint8("directory_entry_format_count"))),
337 If(ver5
, # Name deliberately doesn't match the legacy object, since the format can't be made compatible
339 FormattedEntry('directories', self
, "directory_entry_format"),
340 self
.Dwarf_uleb128('directories_count'))),
343 Struct('file_name_entry_format',
344 Enum(self
.Dwarf_uleb128('content_type'), **ENUM_DW_LNCT
),
345 Enum(self
.Dwarf_uleb128('form'), **ENUM_DW_FORM
)),
346 self
.Dwarf_uint8("file_name_entry_format_count"))),
349 FormattedEntry('file_names', self
, "file_name_entry_format"),
350 self
.Dwarf_uleb128('file_names_count'))),
351 # Legacy directories/files - DWARF < 5 only
352 If(lambda ctx
: ctx
.version
< 5,
353 RepeatUntilExcluding(
354 lambda obj
, ctx
: obj
== b
'',
355 CString('include_directory'))),
356 If(lambda ctx
: ctx
.version
< 5,
357 RepeatUntilExcluding(
358 lambda obj
, ctx
: len(obj
.name
) == 0,
359 self
.Dwarf_lineprog_file_entry
)) # array name is file_entry
362 def _create_callframe_entry_headers(self
):
363 self
.Dwarf_CIE_header
= Struct('Dwarf_CIE_header',
364 self
.Dwarf_initial_length('length'),
365 self
.Dwarf_offset('CIE_id'),
366 self
.Dwarf_uint8('version'),
367 CString('augmentation'),
368 self
.Dwarf_uleb128('code_alignment_factor'),
369 self
.Dwarf_sleb128('data_alignment_factor'),
370 self
.Dwarf_uleb128('return_address_register'))
371 self
.EH_CIE_header
= self
.Dwarf_CIE_header
373 # The CIE header was modified in DWARFv4.
374 if self
.dwarf_version
== 4:
375 self
.Dwarf_CIE_header
= Struct('Dwarf_CIE_header',
376 self
.Dwarf_initial_length('length'),
377 self
.Dwarf_offset('CIE_id'),
378 self
.Dwarf_uint8('version'),
379 CString('augmentation'),
380 self
.Dwarf_uint8('address_size'),
381 self
.Dwarf_uint8('segment_size'),
382 self
.Dwarf_uleb128('code_alignment_factor'),
383 self
.Dwarf_sleb128('data_alignment_factor'),
384 self
.Dwarf_uleb128('return_address_register'))
386 self
.Dwarf_FDE_header
= Struct('Dwarf_FDE_header',
387 self
.Dwarf_initial_length('length'),
388 self
.Dwarf_offset('CIE_pointer'),
389 self
.Dwarf_target_addr('initial_location'),
390 self
.Dwarf_target_addr('address_range'))
392 def _make_block_struct(self
, length_field
):
393 """ Create a struct for DW_FORM_block<size>
395 return PrefixedArray(
396 subcon
=self
.Dwarf_uint8('elem'),
397 length_field
=length_field(''))
400 class _InitialLengthAdapter(Adapter
):
401 """ A standard Construct adapter that expects a sub-construct
402 as a struct with one or two values (first, second).
404 def _decode(self
, obj
, context
):
405 if obj
.first
< 0xFFFFFF00:
408 if obj
.first
== 0xFFFFFFFF:
411 raise ConstructError("Failed decoding initial length for %X" % (