Whitespace cleanups
[pyelftools.git] / elftools / dwarf / structs.py
1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/structs.py
3 #
4 # Encapsulation of Construct structs for parsing DWARF, adjusted for correct
5 # endianness and word-size.
6 #
7 # Eli Bendersky (eliben@gmail.com)
8 # This code is in the public domain
9 #-------------------------------------------------------------------------------
10 from elftools.construct.core import Subconstruct
11 from elftools.construct.macros import Embedded
12 from ..construct import (
13 UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64,
14 SBInt8, SBInt16, SBInt32, SBInt64, SLInt8, SLInt16, SLInt32, SLInt64,
15 Adapter, Struct, ConstructError, If, Enum, Array, PrefixedArray,
16 CString, Embed, StaticField, IfThenElse, Construct, Rename, Sequence
17 )
18 from ..common.construct_utils import RepeatUntilExcluding, ULEB128, SLEB128
19 from .enums import *
20
21
22 class DWARFStructs(object):
23 """ Exposes Construct structs suitable for parsing information from DWARF
24 sections. Each compile unit in DWARF info can have its own structs
25 object. Keep in mind that these structs have to be given a name (by
26 calling them with a name) before being used for parsing (like other
27 Construct structs). Those that should be used without a name are marked
28 by (+).
29
30 Accessible attributes (mostly as described in chapter 7 of the DWARF
31 spec v3):
32
33 Dwarf_[u]int{8,16,32,64):
34 Data chunks of the common sizes
35
36 Dwarf_offset:
37 32-bit or 64-bit word, depending on dwarf_format
38
39 Dwarf_length:
40 32-bit or 64-bit word, depending on dwarf_format
41
42 Dwarf_target_addr:
43 32-bit or 64-bit word, depending on address size
44
45 Dwarf_initial_length:
46 "Initial length field" encoding
47 section 7.4
48
49 Dwarf_{u,s}leb128:
50 ULEB128 and SLEB128 variable-length encoding
51
52 Dwarf_CU_header (+):
53 Compilation unit header
54
55 Dwarf_abbrev_declaration (+):
56 Abbreviation table declaration - doesn't include the initial
57 code, only the contents.
58
59 Dwarf_dw_form (+):
60 A dictionary mapping 'DW_FORM_*' keys into construct Structs
61 that parse such forms. These Structs have already been given
62 dummy names.
63
64 Dwarf_lineprog_header (+):
65 Line program header
66
67 Dwarf_lineprog_file_entry (+):
68 A single file entry in a line program header or instruction
69
70 Dwarf_CIE_header (+):
71 A call-frame CIE
72
73 Dwarf_FDE_header (+):
74 A call-frame FDE
75
76 See also the documentation of public methods.
77 """
78 def __init__(self,
79 little_endian, dwarf_format, address_size, dwarf_version=2):
80 """ dwarf_version:
81 Numeric DWARF version
82
83 little_endian:
84 True if the file is little endian, False if big
85
86 dwarf_format:
87 DWARF Format: 32 or 64-bit (see spec section 7.4)
88
89 address_size:
90 Target machine address size, in bytes (4 or 8). (See spec
91 section 7.5.1)
92 """
93 assert dwarf_format == 32 or dwarf_format == 64
94 assert address_size == 8 or address_size == 4, str(address_size)
95 self.little_endian = little_endian
96 self.dwarf_format = dwarf_format
97 self.address_size = address_size
98 self.dwarf_version = dwarf_version
99 self._create_structs()
100
101 def initial_length_field_size(self):
102 """ Size of an initial length field.
103 """
104 return 4 if self.dwarf_format == 32 else 12
105
106 def _create_structs(self):
107 if self.little_endian:
108 self.Dwarf_uint8 = ULInt8
109 self.Dwarf_uint16 = ULInt16
110 self.Dwarf_uint32 = ULInt32
111 self.Dwarf_uint64 = ULInt64
112 self.Dwarf_offset = ULInt32 if self.dwarf_format == 32 else ULInt64
113 self.Dwarf_length = ULInt32 if self.dwarf_format == 32 else ULInt64
114 self.Dwarf_target_addr = (
115 ULInt32 if self.address_size == 4 else ULInt64)
116 self.Dwarf_int8 = SLInt8
117 self.Dwarf_int16 = SLInt16
118 self.Dwarf_int32 = SLInt32
119 self.Dwarf_int64 = SLInt64
120 else:
121 self.Dwarf_uint8 = UBInt8
122 self.Dwarf_uint16 = UBInt16
123 self.Dwarf_uint32 = UBInt32
124 self.Dwarf_uint64 = UBInt64
125 self.Dwarf_offset = UBInt32 if self.dwarf_format == 32 else UBInt64
126 self.Dwarf_length = UBInt32 if self.dwarf_format == 32 else UBInt64
127 self.Dwarf_target_addr = (
128 UBInt32 if self.address_size == 4 else UBInt64)
129 self.Dwarf_int8 = SBInt8
130 self.Dwarf_int16 = SBInt16
131 self.Dwarf_int32 = SBInt32
132 self.Dwarf_int64 = SBInt64
133
134 self._create_initial_length()
135 self._create_leb128()
136 self._create_cu_header()
137 self._create_abbrev_declaration()
138 self._create_dw_form()
139 self._create_lineprog_header()
140 self._create_callframe_entry_headers()
141 self._create_aranges_header()
142 self._create_nameLUT_header()
143 self._create_string_offsets_table_header()
144 self._create_address_table_header()
145
146 def _create_initial_length(self):
147 def _InitialLength(name):
148 # Adapts a Struct that parses forward a full initial length field.
149 # Only if the first word is the continuation value, the second
150 # word is parsed from the stream.
151 return _InitialLengthAdapter(
152 Struct(name,
153 self.Dwarf_uint32('first'),
154 If(lambda ctx: ctx.first == 0xFFFFFFFF,
155 self.Dwarf_uint64('second'),
156 elsevalue=None)))
157 self.Dwarf_initial_length = _InitialLength
158
159 def _create_leb128(self):
160 self.Dwarf_uleb128 = ULEB128
161 self.Dwarf_sleb128 = SLEB128
162
163 def _create_cu_header(self):
164 self.Dwarf_CU_header = Struct('Dwarf_CU_header',
165 self.Dwarf_initial_length('unit_length'),
166 self.Dwarf_uint16('version'),
167 # DWARFv5 reverses the order of address_size and debug_abbrev_offset.
168 IfThenElse('', lambda ctx: ctx['version'] >= 5,
169 Embed(Struct('',
170 self.Dwarf_uint8('unit_type'),
171 self.Dwarf_uint8('address_size'),
172 self.Dwarf_offset('debug_abbrev_offset'))),
173 Embed(Struct('',
174 self.Dwarf_offset('debug_abbrev_offset'),
175 self.Dwarf_uint8('address_size'))),
176 ))
177
178 def _create_abbrev_declaration(self):
179 self.Dwarf_abbrev_declaration = Struct('Dwarf_abbrev_entry',
180 Enum(self.Dwarf_uleb128('tag'), **ENUM_DW_TAG),
181 Enum(self.Dwarf_uint8('children_flag'), **ENUM_DW_CHILDREN),
182 RepeatUntilExcluding(
183 lambda obj, ctx:
184 obj.name == 'DW_AT_null' and obj.form == 'DW_FORM_null',
185 Struct('attr_spec',
186 Enum(self.Dwarf_uleb128('name'), **ENUM_DW_AT),
187 Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM),
188 If(lambda ctx: ctx['form'] == 'DW_FORM_implicit_const',
189 self.Dwarf_sleb128('value')))))
190
191 def _create_dw_form(self):
192 self.Dwarf_dw_form = dict(
193 DW_FORM_addr=self.Dwarf_target_addr(''),
194 DW_FORM_addrx=self.Dwarf_uleb128(''),
195 DW_FORM_addrx1=self.Dwarf_uint8(''),
196 DW_FORM_addrx2=self.Dwarf_uint16(''),
197 # DW_FORM_addrx3=self.Dwarf_uint24(''), # TODO
198 DW_FORM_addrx4=self.Dwarf_uint32(''),
199
200 DW_FORM_block1=self._make_block_struct(self.Dwarf_uint8),
201 DW_FORM_block2=self._make_block_struct(self.Dwarf_uint16),
202 DW_FORM_block4=self._make_block_struct(self.Dwarf_uint32),
203 DW_FORM_block=self._make_block_struct(self.Dwarf_uleb128),
204
205 # All DW_FORM_data<n> forms are assumed to be unsigned
206 DW_FORM_data1=self.Dwarf_uint8(''),
207 DW_FORM_data2=self.Dwarf_uint16(''),
208 DW_FORM_data4=self.Dwarf_uint32(''),
209 DW_FORM_data8=self.Dwarf_uint64(''),
210 DW_FORM_sdata=self.Dwarf_sleb128(''),
211 DW_FORM_udata=self.Dwarf_uleb128(''),
212
213 DW_FORM_string=CString(''),
214 DW_FORM_strp=self.Dwarf_offset(''),
215 DW_FORM_line_strp=self.Dwarf_offset(''),
216 DW_FORM_strx1=self.Dwarf_uint8(''),
217 DW_FORM_strx2=self.Dwarf_uint16(''),
218 # DW_FORM_strx3=self.Dwarf_uint24(''), # TODO
219 DW_FORM_strx4=self.Dwarf_uint64(''),
220 DW_FORM_flag=self.Dwarf_uint8(''),
221
222 DW_FORM_ref=self.Dwarf_uint32(''),
223 DW_FORM_ref1=self.Dwarf_uint8(''),
224 DW_FORM_ref2=self.Dwarf_uint16(''),
225 DW_FORM_ref4=self.Dwarf_uint32(''),
226 DW_FORM_ref8=self.Dwarf_uint64(''),
227 DW_FORM_ref_udata=self.Dwarf_uleb128(''),
228 DW_FORM_ref_addr=self.Dwarf_target_addr('') if self.dwarf_version == 2 else self.Dwarf_offset(''),
229
230 DW_FORM_indirect=self.Dwarf_uleb128(''),
231
232 # New forms in DWARFv4
233 DW_FORM_flag_present = StaticField('', 0),
234 DW_FORM_sec_offset = self.Dwarf_offset(''),
235 DW_FORM_exprloc = self._make_block_struct(self.Dwarf_uleb128),
236 DW_FORM_ref_sig8 = self.Dwarf_uint64(''),
237
238 DW_FORM_GNU_strp_alt=self.Dwarf_offset(''),
239 DW_FORM_GNU_ref_alt=self.Dwarf_offset(''),
240 DW_AT_GNU_all_call_sites=self.Dwarf_uleb128(''),
241 )
242
243 def _create_aranges_header(self):
244 self.Dwarf_aranges_header = Struct("Dwarf_aranges_header",
245 self.Dwarf_initial_length('unit_length'),
246 self.Dwarf_uint16('version'),
247 self.Dwarf_offset('debug_info_offset'), # a little tbd
248 self.Dwarf_uint8('address_size'),
249 self.Dwarf_uint8('segment_size')
250 )
251
252 def _create_nameLUT_header(self):
253 self.Dwarf_nameLUT_header = Struct("Dwarf_nameLUT_header",
254 self.Dwarf_initial_length('unit_length'),
255 self.Dwarf_uint16('version'),
256 self.Dwarf_offset('debug_info_offset'),
257 self.Dwarf_length('debug_info_length')
258 )
259
260 def _create_string_offsets_table_header(self):
261 self.Dwarf_string_offsets_table_header = Struct(
262 "Dwarf_string_offets_table_header",
263 self.Dwarf_initial_length('unit_length'),
264 self.Dwarf_uint16('version'),
265 self.Dwarf_uint16('padding'),
266 )
267
268 def _create_address_table_header(self):
269 self.Dwarf_address_table_header = Struct("Dwarf_address_table_header",
270 self.Dwarf_initial_length('unit_length'),
271 self.Dwarf_uint16('version'),
272 self.Dwarf_uint8('address_size'),
273 self.Dwarf_uint8('segment_selector_size'),
274 )
275
276 def _create_lineprog_header(self):
277 # A file entry is terminated by a NULL byte, so we don't want to parse
278 # past it. Therefore an If is used.
279 self.Dwarf_lineprog_file_entry = Struct('file_entry',
280 CString('name'),
281 If(lambda ctx: len(ctx.name) != 0,
282 Embed(Struct('',
283 self.Dwarf_uleb128('dir_index'),
284 self.Dwarf_uleb128('mtime'),
285 self.Dwarf_uleb128('length')))))
286
287 class FormattedEntry(Construct):
288 # Generates a parser based on a previously parsed piece,
289 # similar to deprecared Dynamic.
290 # Strings are resolved later, since it potentially requires
291 # looking at another section.
292 def __init__(self, name, structs, format_field):
293 Construct.__init__(self, name)
294 self.structs = structs
295 self.format_field = format_field
296
297 def _parse(self, stream, context):
298 # Somewhat tricky technique here, explicitly writing back to the context
299 if self.format_field + "_parser" in context:
300 parser = context[self.format_field + "_parser"]
301 else:
302 fields = tuple(
303 Rename(f.content_type, self.structs.Dwarf_dw_form[f.form])
304 for f in context[self.format_field])
305 parser = Struct('formatted_entry', *fields)
306 context[self.format_field + "_parser"] = parser
307 return parser._parse(stream, context)
308
309 ver5 = lambda ctx: ctx.version >= 5
310
311 self.Dwarf_lineprog_header = Struct('Dwarf_lineprog_header',
312 self.Dwarf_initial_length('unit_length'),
313 self.Dwarf_uint16('version'),
314 If(ver5,
315 self.Dwarf_uint8("address_size"),
316 None),
317 If(ver5,
318 self.Dwarf_uint8("segment_selector_size"),
319 None),
320 self.Dwarf_offset('header_length'),
321 self.Dwarf_uint8('minimum_instruction_length'),
322 If(lambda ctx: ctx.version >= 4,
323 self.Dwarf_uint8("maximum_operations_per_instruction"),
324 1),
325 self.Dwarf_uint8('default_is_stmt'),
326 self.Dwarf_int8('line_base'),
327 self.Dwarf_uint8('line_range'),
328 self.Dwarf_uint8('opcode_base'),
329 Array(lambda ctx: ctx.opcode_base - 1,
330 self.Dwarf_uint8('standard_opcode_lengths')),
331 If(ver5,
332 PrefixedArray(
333 Struct('directory_entry_format',
334 Enum(self.Dwarf_uleb128('content_type'), **ENUM_DW_LNCT),
335 Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM)),
336 self.Dwarf_uint8("directory_entry_format_count"))),
337 If(ver5, # Name deliberately doesn't match the legacy object, since the format can't be made compatible
338 PrefixedArray(
339 FormattedEntry('directories', self, "directory_entry_format"),
340 self.Dwarf_uleb128('directories_count'))),
341 If(ver5,
342 PrefixedArray(
343 Struct('file_name_entry_format',
344 Enum(self.Dwarf_uleb128('content_type'), **ENUM_DW_LNCT),
345 Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM)),
346 self.Dwarf_uint8("file_name_entry_format_count"))),
347 If(ver5,
348 PrefixedArray(
349 FormattedEntry('file_names', self, "file_name_entry_format"),
350 self.Dwarf_uleb128('file_names_count'))),
351 # Legacy directories/files - DWARF < 5 only
352 If(lambda ctx: ctx.version < 5,
353 RepeatUntilExcluding(
354 lambda obj, ctx: obj == b'',
355 CString('include_directory'))),
356 If(lambda ctx: ctx.version < 5,
357 RepeatUntilExcluding(
358 lambda obj, ctx: len(obj.name) == 0,
359 self.Dwarf_lineprog_file_entry)) # array name is file_entry
360 )
361
362 def _create_callframe_entry_headers(self):
363 self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
364 self.Dwarf_initial_length('length'),
365 self.Dwarf_offset('CIE_id'),
366 self.Dwarf_uint8('version'),
367 CString('augmentation'),
368 self.Dwarf_uleb128('code_alignment_factor'),
369 self.Dwarf_sleb128('data_alignment_factor'),
370 self.Dwarf_uleb128('return_address_register'))
371 self.EH_CIE_header = self.Dwarf_CIE_header
372
373 # The CIE header was modified in DWARFv4.
374 if self.dwarf_version == 4:
375 self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
376 self.Dwarf_initial_length('length'),
377 self.Dwarf_offset('CIE_id'),
378 self.Dwarf_uint8('version'),
379 CString('augmentation'),
380 self.Dwarf_uint8('address_size'),
381 self.Dwarf_uint8('segment_size'),
382 self.Dwarf_uleb128('code_alignment_factor'),
383 self.Dwarf_sleb128('data_alignment_factor'),
384 self.Dwarf_uleb128('return_address_register'))
385
386 self.Dwarf_FDE_header = Struct('Dwarf_FDE_header',
387 self.Dwarf_initial_length('length'),
388 self.Dwarf_offset('CIE_pointer'),
389 self.Dwarf_target_addr('initial_location'),
390 self.Dwarf_target_addr('address_range'))
391
392 def _make_block_struct(self, length_field):
393 """ Create a struct for DW_FORM_block<size>
394 """
395 return PrefixedArray(
396 subcon=self.Dwarf_uint8('elem'),
397 length_field=length_field(''))
398
399
400 class _InitialLengthAdapter(Adapter):
401 """ A standard Construct adapter that expects a sub-construct
402 as a struct with one or two values (first, second).
403 """
404 def _decode(self, obj, context):
405 if obj.first < 0xFFFFFF00:
406 return obj.first
407 else:
408 if obj.first == 0xFFFFFFFF:
409 return obj.second
410 else:
411 raise ConstructError("Failed decoding initial length for %X" % (
412 obj.first))