Add support for RISC-V attributes (#459)
[pyelftools.git] / elftools / elf / structs.py
1 #-------------------------------------------------------------------------------
2 # elftools: elf/structs.py
3 #
4 # Encapsulation of Construct structs for parsing an ELF file, adjusted for
5 # correct endianness and word-size.
6 #
7 # Eli Bendersky (eliben@gmail.com)
8 # This code is in the public domain
9 #-------------------------------------------------------------------------------
10 from ..construct import (
11 UBInt8, UBInt16, UBInt32, UBInt64,
12 ULInt8, ULInt16, ULInt32, ULInt64,
13 SBInt32, SLInt32, SBInt64, SLInt64,
14 Struct, Array, Enum, Padding, BitStruct, BitField, Value, String, CString,
15 Switch, Field
16 )
17 from ..common.construct_utils import ULEB128
18 from ..common.utils import roundup
19 from .enums import *
20
21
22 class ELFStructs(object):
23 """ Accessible attributes:
24
25 Elf_{byte|half|word|word64|addr|offset|sword|xword|xsword}:
26 Data chunks, as specified by the ELF standard, adjusted for
27 correct endianness and word-size.
28
29 Elf_Ehdr:
30 ELF file header
31
32 Elf_Phdr:
33 Program header
34
35 Elf_Shdr:
36 Section header
37
38 Elf_Sym:
39 Symbol table entry
40
41 Elf_Rel, Elf_Rela:
42 Entries in relocation sections
43 """
44 def __init__(self, little_endian=True, elfclass=32):
45 assert elfclass == 32 or elfclass == 64
46 self.little_endian = little_endian
47 self.elfclass = elfclass
48 self.e_type = None
49 self.e_machine = None
50 self.e_ident_osabi = None
51
52 def __getstate__(self):
53 return self.little_endian, self.elfclass, self.e_type, self.e_machine, self.e_ident_osabi
54
55 def __setstate__(self, state):
56 self.little_endian, self.elfclass, e_type, e_machine, e_osabi = state
57 self.create_basic_structs()
58 self.create_advanced_structs(e_type, e_machine, e_osabi)
59
60 def create_basic_structs(self):
61 """ Create word-size related structs and ehdr struct needed for
62 initial determining of ELF type.
63 """
64 if self.little_endian:
65 self.Elf_byte = ULInt8
66 self.Elf_half = ULInt16
67 self.Elf_word = ULInt32
68 self.Elf_word64 = ULInt64
69 self.Elf_addr = ULInt32 if self.elfclass == 32 else ULInt64
70 self.Elf_offset = self.Elf_addr
71 self.Elf_sword = SLInt32
72 self.Elf_xword = ULInt32 if self.elfclass == 32 else ULInt64
73 self.Elf_sxword = SLInt32 if self.elfclass == 32 else SLInt64
74 else:
75 self.Elf_byte = UBInt8
76 self.Elf_half = UBInt16
77 self.Elf_word = UBInt32
78 self.Elf_word64 = UBInt64
79 self.Elf_addr = UBInt32 if self.elfclass == 32 else UBInt64
80 self.Elf_offset = self.Elf_addr
81 self.Elf_sword = SBInt32
82 self.Elf_xword = UBInt32 if self.elfclass == 32 else UBInt64
83 self.Elf_sxword = SBInt32 if self.elfclass == 32 else SBInt64
84 self._create_ehdr()
85 self._create_leb128()
86 self._create_ntbs()
87
88 def create_advanced_structs(self, e_type=None, e_machine=None, e_ident_osabi=None):
89 """ Create all ELF structs except the ehdr. They may possibly depend
90 on provided e_type and/or e_machine parsed from ehdr.
91 """
92 self.e_type = e_type
93 self.e_machine = e_machine
94 self.e_ident_osabi = e_ident_osabi
95
96 self._create_phdr()
97 self._create_shdr()
98 self._create_chdr()
99 self._create_sym()
100 self._create_rel()
101 self._create_dyn()
102 self._create_sunw_syminfo()
103 self._create_gnu_verneed()
104 self._create_gnu_verdef()
105 self._create_gnu_versym()
106 self._create_gnu_abi()
107 self._create_gnu_property()
108 self._create_note(e_type)
109 self._create_stabs()
110 self._create_attributes_subsection()
111 self._create_arm_attributes()
112 self._create_riscv_attributes()
113 self._create_elf_hash()
114 self._create_gnu_hash()
115
116 #-------------------------------- PRIVATE --------------------------------#
117
118 def _create_ehdr(self):
119 self.Elf_Ehdr = Struct('Elf_Ehdr',
120 Struct('e_ident',
121 Array(4, self.Elf_byte('EI_MAG')),
122 Enum(self.Elf_byte('EI_CLASS'), **ENUM_EI_CLASS),
123 Enum(self.Elf_byte('EI_DATA'), **ENUM_EI_DATA),
124 Enum(self.Elf_byte('EI_VERSION'), **ENUM_E_VERSION),
125 Enum(self.Elf_byte('EI_OSABI'), **ENUM_EI_OSABI),
126 self.Elf_byte('EI_ABIVERSION'),
127 Padding(7)
128 ),
129 Enum(self.Elf_half('e_type'), **ENUM_E_TYPE),
130 Enum(self.Elf_half('e_machine'), **ENUM_E_MACHINE),
131 Enum(self.Elf_word('e_version'), **ENUM_E_VERSION),
132 self.Elf_addr('e_entry'),
133 self.Elf_offset('e_phoff'),
134 self.Elf_offset('e_shoff'),
135 self.Elf_word('e_flags'),
136 self.Elf_half('e_ehsize'),
137 self.Elf_half('e_phentsize'),
138 self.Elf_half('e_phnum'),
139 self.Elf_half('e_shentsize'),
140 self.Elf_half('e_shnum'),
141 self.Elf_half('e_shstrndx'),
142 )
143
144 def _create_leb128(self):
145 self.Elf_uleb128 = ULEB128
146
147 def _create_ntbs(self):
148 self.Elf_ntbs = CString
149
150 def _create_phdr(self):
151 p_type_dict = ENUM_P_TYPE_BASE
152 if self.e_machine == 'EM_ARM':
153 p_type_dict = ENUM_P_TYPE_ARM
154 elif self.e_machine == 'EM_AARCH64':
155 p_type_dict = ENUM_P_TYPE_AARCH64
156 elif self.e_machine == 'EM_MIPS':
157 p_type_dict = ENUM_P_TYPE_MIPS
158 elif self.e_machine == 'EM_RISCV':
159 p_type_dict = ENUM_P_TYPE_RISCV
160
161 if self.elfclass == 32:
162 self.Elf_Phdr = Struct('Elf_Phdr',
163 Enum(self.Elf_word('p_type'), **p_type_dict),
164 self.Elf_offset('p_offset'),
165 self.Elf_addr('p_vaddr'),
166 self.Elf_addr('p_paddr'),
167 self.Elf_word('p_filesz'),
168 self.Elf_word('p_memsz'),
169 self.Elf_word('p_flags'),
170 self.Elf_word('p_align'),
171 )
172 else: # 64
173 self.Elf_Phdr = Struct('Elf_Phdr',
174 Enum(self.Elf_word('p_type'), **p_type_dict),
175 self.Elf_word('p_flags'),
176 self.Elf_offset('p_offset'),
177 self.Elf_addr('p_vaddr'),
178 self.Elf_addr('p_paddr'),
179 self.Elf_xword('p_filesz'),
180 self.Elf_xword('p_memsz'),
181 self.Elf_xword('p_align'),
182 )
183
184 def _create_shdr(self):
185 """Section header parsing.
186
187 Depends on e_machine because of machine-specific values in sh_type.
188 """
189 sh_type_dict = ENUM_SH_TYPE_BASE
190 if self.e_machine == 'EM_ARM':
191 sh_type_dict = ENUM_SH_TYPE_ARM
192 elif self.e_machine == 'EM_X86_64':
193 sh_type_dict = ENUM_SH_TYPE_AMD64
194 elif self.e_machine == 'EM_MIPS':
195 sh_type_dict = ENUM_SH_TYPE_MIPS
196 if self.e_machine == 'EM_RISCV':
197 sh_type_dict = ENUM_SH_TYPE_RISCV
198
199 self.Elf_Shdr = Struct('Elf_Shdr',
200 self.Elf_word('sh_name'),
201 Enum(self.Elf_word('sh_type'), **sh_type_dict),
202 self.Elf_xword('sh_flags'),
203 self.Elf_addr('sh_addr'),
204 self.Elf_offset('sh_offset'),
205 self.Elf_xword('sh_size'),
206 self.Elf_word('sh_link'),
207 self.Elf_word('sh_info'),
208 self.Elf_xword('sh_addralign'),
209 self.Elf_xword('sh_entsize'),
210 )
211
212 def _create_chdr(self):
213 # Structure of compressed sections header. It is documented in Oracle
214 # "Linker and Libraries Guide", Part IV ELF Application Binary
215 # Interface, Chapter 13 Object File Format, Section Compression:
216 # https://docs.oracle.com/cd/E53394_01/html/E54813/section_compression.html
217 fields = [
218 Enum(self.Elf_word('ch_type'), **ENUM_ELFCOMPRESS_TYPE),
219 self.Elf_xword('ch_size'),
220 self.Elf_xword('ch_addralign'),
221 ]
222 if self.elfclass == 64:
223 fields.insert(1, self.Elf_word('ch_reserved'))
224 self.Elf_Chdr = Struct('Elf_Chdr', *fields)
225
226 def _create_rel(self):
227 # r_info is also taken apart into r_info_sym and r_info_type. This is
228 # done in Value to avoid endianity issues while parsing.
229 if self.elfclass == 32:
230 fields = [self.Elf_xword('r_info'),
231 Value('r_info_sym',
232 lambda ctx: (ctx['r_info'] >> 8) & 0xFFFFFF),
233 Value('r_info_type',
234 lambda ctx: ctx['r_info'] & 0xFF)]
235 elif self.e_machine == 'EM_MIPS': # ELF64 MIPS
236 fields = [
237 # The MIPS ELF64 specification
238 # (https://www.linux-mips.org/pub/linux/mips/doc/ABI/elf64-2.4.pdf)
239 # provides a non-standard relocation structure definition.
240 self.Elf_word('r_sym'),
241 self.Elf_byte('r_ssym'),
242 self.Elf_byte('r_type3'),
243 self.Elf_byte('r_type2'),
244 self.Elf_byte('r_type'),
245
246 # Synthetize usual fields for compatibility with other
247 # architectures. This allows relocation consumers (including
248 # our readelf tests) to work without worrying about MIPS64
249 # oddities.
250 Value('r_info_sym', lambda ctx: ctx['r_sym']),
251 Value('r_info_ssym', lambda ctx: ctx['r_ssym']),
252 Value('r_info_type', lambda ctx: ctx['r_type']),
253 Value('r_info_type2', lambda ctx: ctx['r_type2']),
254 Value('r_info_type3', lambda ctx: ctx['r_type3']),
255 Value('r_info',
256 lambda ctx: (ctx['r_sym'] << 32)
257 | (ctx['r_ssym'] << 24)
258 | (ctx['r_type3'] << 16)
259 | (ctx['r_type2'] << 8)
260 | ctx['r_type']),
261 ]
262 else: # Other 64 ELFs
263 fields = [self.Elf_xword('r_info'),
264 Value('r_info_sym',
265 lambda ctx: (ctx['r_info'] >> 32) & 0xFFFFFFFF),
266 Value('r_info_type',
267 lambda ctx: ctx['r_info'] & 0xFFFFFFFF)]
268
269 self.Elf_Rel = Struct('Elf_Rel',
270 self.Elf_addr('r_offset'),
271 *fields)
272
273 fields_and_addend = fields + [self.Elf_sxword('r_addend')]
274 self.Elf_Rela = Struct('Elf_Rela',
275 self.Elf_addr('r_offset'),
276 *fields_and_addend
277 )
278
279 # Elf32_Relr is typedef'd as Elf32_Word, Elf64_Relr as Elf64_Xword
280 # (see the glibc patch, for example:
281 # https://sourceware.org/pipermail/libc-alpha/2021-October/132029.html)
282 # For us, this is the same as self.Elf_addr (or self.Elf_xword).
283 self.Elf_Relr = Struct('Elf_Relr', self.Elf_addr('r_offset'))
284
285 def _create_dyn(self):
286 d_tag_dict = dict(ENUM_D_TAG_COMMON)
287 if self.e_machine in ENUMMAP_EXTRA_D_TAG_MACHINE:
288 d_tag_dict.update(ENUMMAP_EXTRA_D_TAG_MACHINE[self.e_machine])
289 elif self.e_ident_osabi == 'ELFOSABI_SOLARIS':
290 d_tag_dict.update(ENUM_D_TAG_SOLARIS)
291
292 self.Elf_Dyn = Struct('Elf_Dyn',
293 Enum(self.Elf_sxword('d_tag'), **d_tag_dict),
294 self.Elf_xword('d_val'),
295 Value('d_ptr', lambda ctx: ctx['d_val']),
296 )
297
298 def _create_sym(self):
299 # st_info is hierarchical. To access the type, use
300 # container['st_info']['type']
301 st_info_struct = BitStruct('st_info',
302 Enum(BitField('bind', 4), **ENUM_ST_INFO_BIND),
303 Enum(BitField('type', 4), **ENUM_ST_INFO_TYPE))
304 # st_other is hierarchical. To access the visibility,
305 # use container['st_other']['visibility']
306 st_other_struct = BitStruct('st_other',
307 # https://openpowerfoundation.org/wp-content/uploads/2016/03/ABI64BitOpenPOWERv1.1_16July2015_pub4.pdf
308 # See 3.4.1 Symbol Values.
309 Enum(BitField('local', 3), **ENUM_ST_LOCAL),
310 Padding(2),
311 Enum(BitField('visibility', 3), **ENUM_ST_VISIBILITY))
312 if self.elfclass == 32:
313 self.Elf_Sym = Struct('Elf_Sym',
314 self.Elf_word('st_name'),
315 self.Elf_addr('st_value'),
316 self.Elf_word('st_size'),
317 st_info_struct,
318 st_other_struct,
319 Enum(self.Elf_half('st_shndx'), **ENUM_ST_SHNDX),
320 )
321 else:
322 self.Elf_Sym = Struct('Elf_Sym',
323 self.Elf_word('st_name'),
324 st_info_struct,
325 st_other_struct,
326 Enum(self.Elf_half('st_shndx'), **ENUM_ST_SHNDX),
327 self.Elf_addr('st_value'),
328 self.Elf_xword('st_size'),
329 )
330
331 def _create_sunw_syminfo(self):
332 self.Elf_Sunw_Syminfo = Struct('Elf_Sunw_Syminfo',
333 Enum(self.Elf_half('si_boundto'), **ENUM_SUNW_SYMINFO_BOUNDTO),
334 self.Elf_half('si_flags'),
335 )
336
337 def _create_gnu_verneed(self):
338 # Structure of "version needed" entries is documented in
339 # Oracle "Linker and Libraries Guide", Chapter 13 Object File Format
340 self.Elf_Verneed = Struct('Elf_Verneed',
341 self.Elf_half('vn_version'),
342 self.Elf_half('vn_cnt'),
343 self.Elf_word('vn_file'),
344 self.Elf_word('vn_aux'),
345 self.Elf_word('vn_next'),
346 )
347 self.Elf_Vernaux = Struct('Elf_Vernaux',
348 self.Elf_word('vna_hash'),
349 self.Elf_half('vna_flags'),
350 self.Elf_half('vna_other'),
351 self.Elf_word('vna_name'),
352 self.Elf_word('vna_next'),
353 )
354
355 def _create_gnu_verdef(self):
356 # Structure of "version definition" entries are documented in
357 # Oracle "Linker and Libraries Guide", Chapter 13 Object File Format
358 self.Elf_Verdef = Struct('Elf_Verdef',
359 self.Elf_half('vd_version'),
360 self.Elf_half('vd_flags'),
361 self.Elf_half('vd_ndx'),
362 self.Elf_half('vd_cnt'),
363 self.Elf_word('vd_hash'),
364 self.Elf_word('vd_aux'),
365 self.Elf_word('vd_next'),
366 )
367 self.Elf_Verdaux = Struct('Elf_Verdaux',
368 self.Elf_word('vda_name'),
369 self.Elf_word('vda_next'),
370 )
371
372 def _create_gnu_versym(self):
373 # Structure of "version symbol" entries are documented in
374 # Oracle "Linker and Libraries Guide", Chapter 13 Object File Format
375 self.Elf_Versym = Struct('Elf_Versym',
376 Enum(self.Elf_half('ndx'), **ENUM_VERSYM),
377 )
378
379 def _create_gnu_abi(self):
380 # Structure of GNU ABI notes is documented in
381 # https://code.woboq.org/userspace/glibc/csu/abi-note.S.html
382 self.Elf_abi = Struct('Elf_abi',
383 Enum(self.Elf_word('abi_os'), **ENUM_NOTE_ABI_TAG_OS),
384 self.Elf_word('abi_major'),
385 self.Elf_word('abi_minor'),
386 self.Elf_word('abi_tiny'),
387 )
388
389 def _create_gnu_debugaltlink(self):
390 self.Elf_debugaltlink = Struct('Elf_debugaltlink',
391 CString("sup_filename"),
392 String("sup_checksum", length=20))
393
394 def _create_gnu_property(self):
395 # Structure of GNU property notes is documented in
396 # https://github.com/hjl-tools/linux-abi/wiki/linux-abi-draft.pdf
397 def roundup_padding(ctx):
398 if self.elfclass == 32:
399 return roundup(ctx.pr_datasz, 2) - ctx.pr_datasz
400 return roundup(ctx.pr_datasz, 3) - ctx.pr_datasz
401
402 def classify_pr_data(ctx):
403 if type(ctx.pr_type) is not str:
404 return None
405 if ctx.pr_type.startswith('GNU_PROPERTY_X86_'):
406 return ('GNU_PROPERTY_X86_*', 4, 0)
407 return (ctx.pr_type, ctx.pr_datasz, self.elfclass)
408
409 self.Elf_Prop = Struct('Elf_Prop',
410 Enum(self.Elf_word('pr_type'), **ENUM_NOTE_GNU_PROPERTY_TYPE),
411 self.Elf_word('pr_datasz'),
412 Switch('pr_data', classify_pr_data, {
413 ('GNU_PROPERTY_STACK_SIZE', 4, 32): self.Elf_word('pr_data'),
414 ('GNU_PROPERTY_STACK_SIZE', 8, 64): self.Elf_word64('pr_data'),
415 ('GNU_PROPERTY_X86_*', 4, 0): self.Elf_word('pr_data'),
416 },
417 default=Field('pr_data', lambda ctx: ctx.pr_datasz)
418 ),
419 Padding(roundup_padding)
420 )
421
422 def _create_note(self, e_type=None):
423 # Structure of "PT_NOTE" section
424
425 self.Elf_ugid = self.Elf_half if self.elfclass == 32 and self.e_machine in {
426 'EM_MN10300',
427 'EM_ARM',
428 'EM_CRIS',
429 'EM_CYGNUS_FRV',
430 'EM_386',
431 'EM_M32R',
432 'EM_68K',
433 'EM_S390',
434 'EM_SH',
435 'EM_SPARC',
436 } else self.Elf_word
437
438 self.Elf_Nhdr = Struct('Elf_Nhdr',
439 self.Elf_word('n_namesz'),
440 self.Elf_word('n_descsz'),
441 Enum(self.Elf_word('n_type'),
442 **(ENUM_NOTE_N_TYPE if e_type != "ET_CORE"
443 else ENUM_CORE_NOTE_N_TYPE)),
444 )
445
446 # A process psinfo structure according to
447 # http://elixir.free-electrons.com/linux/v2.6.35/source/include/linux/elfcore.h#L84
448 if self.elfclass == 32:
449 self.Elf_Prpsinfo = Struct('Elf_Prpsinfo',
450 self.Elf_byte('pr_state'),
451 String('pr_sname', 1),
452 self.Elf_byte('pr_zomb'),
453 self.Elf_byte('pr_nice'),
454 self.Elf_xword('pr_flag'),
455 self.Elf_ugid('pr_uid'),
456 self.Elf_ugid('pr_gid'),
457 self.Elf_word('pr_pid'),
458 self.Elf_word('pr_ppid'),
459 self.Elf_word('pr_pgrp'),
460 self.Elf_word('pr_sid'),
461 String('pr_fname', 16),
462 String('pr_psargs', 80),
463 )
464 else: # 64
465 self.Elf_Prpsinfo = Struct('Elf_Prpsinfo',
466 self.Elf_byte('pr_state'),
467 String('pr_sname', 1),
468 self.Elf_byte('pr_zomb'),
469 self.Elf_byte('pr_nice'),
470 Padding(4),
471 self.Elf_xword('pr_flag'),
472 self.Elf_ugid('pr_uid'),
473 self.Elf_ugid('pr_gid'),
474 self.Elf_word('pr_pid'),
475 self.Elf_word('pr_ppid'),
476 self.Elf_word('pr_pgrp'),
477 self.Elf_word('pr_sid'),
478 String('pr_fname', 16),
479 String('pr_psargs', 80),
480 )
481
482 # A PT_NOTE of type NT_FILE matching the definition in
483 # https://chromium.googlesource.com/
484 # native_client/nacl-binutils/+/upstream/master/binutils/readelf.c
485 # Line 15121
486 self.Elf_Nt_File = Struct('Elf_Nt_File',
487 self.Elf_xword("num_map_entries"),
488 self.Elf_xword("page_size"),
489 Array(lambda ctx: ctx.num_map_entries,
490 Struct('Elf_Nt_File_Entry',
491 self.Elf_addr('vm_start'),
492 self.Elf_addr('vm_end'),
493 self.Elf_offset('page_offset'))),
494 Array(lambda ctx: ctx.num_map_entries,
495 CString('filename')))
496
497 def _create_stabs(self):
498 # Structure of one stabs entry, see binutils/bfd/stabs.c
499 # Names taken from https://sourceware.org/gdb/current/onlinedocs/stabs.html#Overview
500 self.Elf_Stabs = Struct('Elf_Stabs',
501 self.Elf_word('n_strx'),
502 self.Elf_byte('n_type'),
503 self.Elf_byte('n_other'),
504 self.Elf_half('n_desc'),
505 self.Elf_word('n_value'),
506 )
507
508 def _create_attributes_subsection(self):
509 # Structure of a build attributes subsection header. A subsection is
510 # either public to all tools that process the ELF file or private to
511 # the vendor's tools.
512 self.Elf_Attr_Subsection_Header = Struct('Elf_Attr_Subsection',
513 self.Elf_word('length'),
514 self.Elf_ntbs('vendor_name',
515 encoding='utf-8')
516 )
517
518 def _create_arm_attributes(self):
519 # Structure of an ARM build attribute tag.
520 self.Elf_Arm_Attribute_Tag = Struct('Elf_Arm_Attribute_Tag',
521 Enum(self.Elf_uleb128('tag'),
522 **ENUM_ATTR_TAG_ARM)
523 )
524
525 def _create_riscv_attributes(self):
526 # Structure of a RISC-V build attribute tag.
527 self.Elf_RiscV_Attribute_Tag = Struct('Elf_RiscV_Attribute_Tag',
528 Enum(self.Elf_uleb128('tag'),
529 **ENUM_ATTR_TAG_RISCV)
530 )
531
532 def _create_elf_hash(self):
533 # Structure of the old SYSV-style hash table header. It is documented
534 # in the Oracle "Linker and Libraries Guide", Part IV ELF Application
535 # Binary Interface, Chapter 14 Object File Format, Section Hash Table
536 # Section:
537 # https://docs.oracle.com/cd/E53394_01/html/E54813/chapter6-48031.html
538
539 self.Elf_Hash = Struct('Elf_Hash',
540 self.Elf_word('nbuckets'),
541 self.Elf_word('nchains'),
542 Array(lambda ctx: ctx['nbuckets'], self.Elf_word('buckets')),
543 Array(lambda ctx: ctx['nchains'], self.Elf_word('chains')))
544
545 def _create_gnu_hash(self):
546 # Structure of the GNU-style hash table header. Documentation for this
547 # table is mostly in the GLIBC source code, a good explanation of the
548 # format can be found in this blog post:
549 # https://flapenguin.me/2017/05/10/elf-lookup-dt-gnu-hash/
550 self.Gnu_Hash = Struct('Gnu_Hash',
551 self.Elf_word('nbuckets'),
552 self.Elf_word('symoffset'),
553 self.Elf_word('bloom_size'),
554 self.Elf_word('bloom_shift'),
555 Array(lambda ctx: ctx['bloom_size'], self.Elf_xword('bloom')),
556 Array(lambda ctx: ctx['nbuckets'], self.Elf_word('buckets')))