Improve DWARF 5 compatibility. (#400)
[pyelftools.git] / elftools / dwarf / structs.py
1 #-------------------------------------------------------------------------------
2 # elftools: dwarf/structs.py
3 #
4 # Encapsulation of Construct structs for parsing DWARF, adjusted for correct
5 # endianness and word-size.
6 #
7 # Eli Bendersky (eliben@gmail.com)
8 # This code is in the public domain
9 #-------------------------------------------------------------------------------
10 from ..construct import (
11 UBInt8, UBInt16, UBInt32, UBInt64, ULInt8, ULInt16, ULInt32, ULInt64,
12 SBInt8, SBInt16, SBInt32, SBInt64, SLInt8, SLInt16, SLInt32, SLInt64,
13 Adapter, Struct, ConstructError, If, Enum, Array, PrefixedArray,
14 CString, Embed, StaticField, IfThenElse
15 )
16 from ..common.construct_utils import RepeatUntilExcluding, ULEB128, SLEB128
17 from .enums import *
18
19
20 class DWARFStructs(object):
21 """ Exposes Construct structs suitable for parsing information from DWARF
22 sections. Each compile unit in DWARF info can have its own structs
23 object. Keep in mind that these structs have to be given a name (by
24 calling them with a name) before being used for parsing (like other
25 Construct structs). Those that should be used without a name are marked
26 by (+).
27
28 Accessible attributes (mostly as described in chapter 7 of the DWARF
29 spec v3):
30
31 Dwarf_[u]int{8,16,32,64):
32 Data chunks of the common sizes
33
34 Dwarf_offset:
35 32-bit or 64-bit word, depending on dwarf_format
36
37 Dwarf_length:
38 32-bit or 64-bit word, depending on dwarf_format
39
40 Dwarf_target_addr:
41 32-bit or 64-bit word, depending on address size
42
43 Dwarf_initial_length:
44 "Initial length field" encoding
45 section 7.4
46
47 Dwarf_{u,s}leb128:
48 ULEB128 and SLEB128 variable-length encoding
49
50 Dwarf_CU_header (+):
51 Compilation unit header
52
53 Dwarf_abbrev_declaration (+):
54 Abbreviation table declaration - doesn't include the initial
55 code, only the contents.
56
57 Dwarf_dw_form (+):
58 A dictionary mapping 'DW_FORM_*' keys into construct Structs
59 that parse such forms. These Structs have already been given
60 dummy names.
61
62 Dwarf_lineprog_header (+):
63 Line program header
64
65 Dwarf_lineprog_file_entry (+):
66 A single file entry in a line program header or instruction
67
68 Dwarf_CIE_header (+):
69 A call-frame CIE
70
71 Dwarf_FDE_header (+):
72 A call-frame FDE
73
74 See also the documentation of public methods.
75 """
76 def __init__(self,
77 little_endian, dwarf_format, address_size, dwarf_version=2):
78 """ dwarf_version:
79 Numeric DWARF version
80
81 little_endian:
82 True if the file is little endian, False if big
83
84 dwarf_format:
85 DWARF Format: 32 or 64-bit (see spec section 7.4)
86
87 address_size:
88 Target machine address size, in bytes (4 or 8). (See spec
89 section 7.5.1)
90 """
91 assert dwarf_format == 32 or dwarf_format == 64
92 assert address_size == 8 or address_size == 4, str(address_size)
93 self.little_endian = little_endian
94 self.dwarf_format = dwarf_format
95 self.address_size = address_size
96 self.dwarf_version = dwarf_version
97 self._create_structs()
98
99 def initial_length_field_size(self):
100 """ Size of an initial length field.
101 """
102 return 4 if self.dwarf_format == 32 else 12
103
104 def _create_structs(self):
105 if self.little_endian:
106 self.Dwarf_uint8 = ULInt8
107 self.Dwarf_uint16 = ULInt16
108 self.Dwarf_uint32 = ULInt32
109 self.Dwarf_uint64 = ULInt64
110 self.Dwarf_offset = ULInt32 if self.dwarf_format == 32 else ULInt64
111 self.Dwarf_length = ULInt32 if self.dwarf_format == 32 else ULInt64
112 self.Dwarf_target_addr = (
113 ULInt32 if self.address_size == 4 else ULInt64)
114 self.Dwarf_int8 = SLInt8
115 self.Dwarf_int16 = SLInt16
116 self.Dwarf_int32 = SLInt32
117 self.Dwarf_int64 = SLInt64
118 else:
119 self.Dwarf_uint8 = UBInt8
120 self.Dwarf_uint16 = UBInt16
121 self.Dwarf_uint32 = UBInt32
122 self.Dwarf_uint64 = UBInt64
123 self.Dwarf_offset = UBInt32 if self.dwarf_format == 32 else UBInt64
124 self.Dwarf_length = UBInt32 if self.dwarf_format == 32 else UBInt64
125 self.Dwarf_target_addr = (
126 UBInt32 if self.address_size == 4 else UBInt64)
127 self.Dwarf_int8 = SBInt8
128 self.Dwarf_int16 = SBInt16
129 self.Dwarf_int32 = SBInt32
130 self.Dwarf_int64 = SBInt64
131
132 self._create_initial_length()
133 self._create_leb128()
134 self._create_cu_header()
135 self._create_abbrev_declaration()
136 self._create_dw_form()
137 self._create_lineprog_header()
138 self._create_callframe_entry_headers()
139 self._create_aranges_header()
140 self._create_nameLUT_header()
141 self._create_string_offsets_table_header()
142 self._create_address_table_header()
143
144 def _create_initial_length(self):
145 def _InitialLength(name):
146 # Adapts a Struct that parses forward a full initial length field.
147 # Only if the first word is the continuation value, the second
148 # word is parsed from the stream.
149 return _InitialLengthAdapter(
150 Struct(name,
151 self.Dwarf_uint32('first'),
152 If(lambda ctx: ctx.first == 0xFFFFFFFF,
153 self.Dwarf_uint64('second'),
154 elsevalue=None)))
155 self.Dwarf_initial_length = _InitialLength
156
157 def _create_leb128(self):
158 self.Dwarf_uleb128 = ULEB128
159 self.Dwarf_sleb128 = SLEB128
160
161 def _create_cu_header(self):
162 self.Dwarf_CU_header = Struct('Dwarf_CU_header',
163 self.Dwarf_initial_length('unit_length'),
164 self.Dwarf_uint16('version'),
165 # DWARFv5 reverses the order of address_size and debug_abbrev_offset.
166 IfThenElse('', lambda ctx: ctx['version'] >= 5,
167 Embed(Struct('',
168 self.Dwarf_uint8('unit_type'),
169 self.Dwarf_uint8('address_size'),
170 self.Dwarf_offset('debug_abbrev_offset'))),
171 Embed(Struct('',
172 self.Dwarf_offset('debug_abbrev_offset'),
173 self.Dwarf_uint8('address_size'))),
174 ))
175
176 def _create_abbrev_declaration(self):
177 self.Dwarf_abbrev_declaration = Struct('Dwarf_abbrev_entry',
178 Enum(self.Dwarf_uleb128('tag'), **ENUM_DW_TAG),
179 Enum(self.Dwarf_uint8('children_flag'), **ENUM_DW_CHILDREN),
180 RepeatUntilExcluding(
181 lambda obj, ctx:
182 obj.name == 'DW_AT_null' and obj.form == 'DW_FORM_null',
183 Struct('attr_spec',
184 Enum(self.Dwarf_uleb128('name'), **ENUM_DW_AT),
185 Enum(self.Dwarf_uleb128('form'), **ENUM_DW_FORM),
186 If(lambda ctx: ctx['form'] == 'DW_FORM_implicit_const',
187 self.Dwarf_sleb128('value')))))
188
189 def _create_dw_form(self):
190 self.Dwarf_dw_form = dict(
191 DW_FORM_addr=self.Dwarf_target_addr(''),
192 DW_FORM_addrx=self.Dwarf_uleb128(''),
193 DW_FORM_addrx1=self.Dwarf_uint8(''),
194 DW_FORM_addrx2=self.Dwarf_uint16(''),
195 # DW_FORM_addrx3=self.Dwarf_uint24(''), # TODO
196 DW_FORM_addrx4=self.Dwarf_uint32(''),
197
198 DW_FORM_block1=self._make_block_struct(self.Dwarf_uint8),
199 DW_FORM_block2=self._make_block_struct(self.Dwarf_uint16),
200 DW_FORM_block4=self._make_block_struct(self.Dwarf_uint32),
201 DW_FORM_block=self._make_block_struct(self.Dwarf_uleb128),
202
203 # All DW_FORM_data<n> forms are assumed to be unsigned
204 DW_FORM_data1=self.Dwarf_uint8(''),
205 DW_FORM_data2=self.Dwarf_uint16(''),
206 DW_FORM_data4=self.Dwarf_uint32(''),
207 DW_FORM_data8=self.Dwarf_uint64(''),
208 DW_FORM_sdata=self.Dwarf_sleb128(''),
209 DW_FORM_udata=self.Dwarf_uleb128(''),
210
211 DW_FORM_string=CString(''),
212 DW_FORM_strp=self.Dwarf_offset(''),
213 DW_FORM_line_strp=self.Dwarf_offset(''),
214 DW_FORM_strx1=self.Dwarf_uint8(''),
215 DW_FORM_strx2=self.Dwarf_uint16(''),
216 # DW_FORM_strx3=self.Dwarf_uint24(''), # TODO
217 DW_FORM_strx4=self.Dwarf_uint64(''),
218 DW_FORM_flag=self.Dwarf_uint8(''),
219
220 DW_FORM_ref=self.Dwarf_uint32(''),
221 DW_FORM_ref1=self.Dwarf_uint8(''),
222 DW_FORM_ref2=self.Dwarf_uint16(''),
223 DW_FORM_ref4=self.Dwarf_uint32(''),
224 DW_FORM_ref8=self.Dwarf_uint64(''),
225 DW_FORM_ref_udata=self.Dwarf_uleb128(''),
226 DW_FORM_ref_addr=self.Dwarf_target_addr('') if self.dwarf_version == 2 else self.Dwarf_offset(''),
227
228 DW_FORM_indirect=self.Dwarf_uleb128(''),
229
230 # New forms in DWARFv4
231 DW_FORM_flag_present = StaticField('', 0),
232 DW_FORM_sec_offset = self.Dwarf_offset(''),
233 DW_FORM_exprloc = self._make_block_struct(self.Dwarf_uleb128),
234 DW_FORM_ref_sig8 = self.Dwarf_uint64(''),
235
236 DW_FORM_GNU_strp_alt=self.Dwarf_offset(''),
237 DW_FORM_GNU_ref_alt=self.Dwarf_offset(''),
238 DW_AT_GNU_all_call_sites=self.Dwarf_uleb128(''),
239 )
240
241 def _create_aranges_header(self):
242 self.Dwarf_aranges_header = Struct("Dwarf_aranges_header",
243 self.Dwarf_initial_length('unit_length'),
244 self.Dwarf_uint16('version'),
245 self.Dwarf_offset('debug_info_offset'), # a little tbd
246 self.Dwarf_uint8('address_size'),
247 self.Dwarf_uint8('segment_size')
248 )
249
250 def _create_nameLUT_header(self):
251 self.Dwarf_nameLUT_header = Struct("Dwarf_nameLUT_header",
252 self.Dwarf_initial_length('unit_length'),
253 self.Dwarf_uint16('version'),
254 self.Dwarf_offset('debug_info_offset'),
255 self.Dwarf_length('debug_info_length')
256 )
257
258 def _create_string_offsets_table_header(self):
259 self.Dwarf_string_offsets_table_header = Struct(
260 "Dwarf_string_offets_table_header",
261 self.Dwarf_initial_length('unit_length'),
262 self.Dwarf_uint16('version'),
263 self.Dwarf_uint16('padding'),
264 )
265
266 def _create_address_table_header(self):
267 self.Dwarf_address_table_header = Struct("Dwarf_address_table_header",
268 self.Dwarf_initial_length('unit_length'),
269 self.Dwarf_uint16('version'),
270 self.Dwarf_uint8('address_size'),
271 self.Dwarf_uint8('segment_selector_size'),
272 )
273
274 def _create_lineprog_header(self):
275 # A file entry is terminated by a NULL byte, so we don't want to parse
276 # past it. Therefore an If is used.
277 self.Dwarf_lineprog_file_entry = Struct('file_entry',
278 CString('name'),
279 If(lambda ctx: len(ctx.name) != 0,
280 Embed(Struct('',
281 self.Dwarf_uleb128('dir_index'),
282 self.Dwarf_uleb128('mtime'),
283 self.Dwarf_uleb128('length')))))
284
285 self.Dwarf_lineprog_header = Struct('Dwarf_lineprog_header',
286 self.Dwarf_initial_length('unit_length'),
287 self.Dwarf_uint16('version'),
288 self.Dwarf_offset('header_length'),
289 self.Dwarf_uint8('minimum_instruction_length'),
290 If(lambda ctx: ctx['version'] >= 4,
291 self.Dwarf_uint8("maximum_operations_per_instruction"),
292 1),
293 self.Dwarf_uint8('default_is_stmt'),
294 self.Dwarf_int8('line_base'),
295 self.Dwarf_uint8('line_range'),
296 self.Dwarf_uint8('opcode_base'),
297 Array(lambda ctx: ctx['opcode_base'] - 1,
298 self.Dwarf_uint8('standard_opcode_lengths')),
299 RepeatUntilExcluding(
300 lambda obj, ctx: obj == b'',
301 CString('include_directory')),
302 RepeatUntilExcluding(
303 lambda obj, ctx: len(obj.name) == 0,
304 self.Dwarf_lineprog_file_entry),
305 )
306
307 def _create_callframe_entry_headers(self):
308 self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
309 self.Dwarf_initial_length('length'),
310 self.Dwarf_offset('CIE_id'),
311 self.Dwarf_uint8('version'),
312 CString('augmentation'),
313 self.Dwarf_uleb128('code_alignment_factor'),
314 self.Dwarf_sleb128('data_alignment_factor'),
315 self.Dwarf_uleb128('return_address_register'))
316 self.EH_CIE_header = self.Dwarf_CIE_header
317
318 # The CIE header was modified in DWARFv4.
319 if self.dwarf_version == 4:
320 self.Dwarf_CIE_header = Struct('Dwarf_CIE_header',
321 self.Dwarf_initial_length('length'),
322 self.Dwarf_offset('CIE_id'),
323 self.Dwarf_uint8('version'),
324 CString('augmentation'),
325 self.Dwarf_uint8('address_size'),
326 self.Dwarf_uint8('segment_size'),
327 self.Dwarf_uleb128('code_alignment_factor'),
328 self.Dwarf_sleb128('data_alignment_factor'),
329 self.Dwarf_uleb128('return_address_register'))
330
331 self.Dwarf_FDE_header = Struct('Dwarf_FDE_header',
332 self.Dwarf_initial_length('length'),
333 self.Dwarf_offset('CIE_pointer'),
334 self.Dwarf_target_addr('initial_location'),
335 self.Dwarf_target_addr('address_range'))
336
337 def _make_block_struct(self, length_field):
338 """ Create a struct for DW_FORM_block<size>
339 """
340 return PrefixedArray(
341 subcon=self.Dwarf_uint8('elem'),
342 length_field=length_field(''))
343
344
345 class _InitialLengthAdapter(Adapter):
346 """ A standard Construct adapter that expects a sub-construct
347 as a struct with one or two values (first, second).
348 """
349 def _decode(self, obj, context):
350 if obj.first < 0xFFFFFF00:
351 return obj.first
352 else:
353 if obj.first == 0xFFFFFFFF:
354 return obj.second
355 else:
356 raise ConstructError("Failed decoding initial length for %X" % (
357 obj.first))