Fix the LoongArch support code and some more (#483)
[pyelftools.git] / elftools / elf / relocation.py
1 #-------------------------------------------------------------------------------
2 # elftools: elf/relocation.py
3 #
4 # ELF relocations
5 #
6 # Eli Bendersky (eliben@gmail.com)
7 # This code is in the public domain
8 #-------------------------------------------------------------------------------
9 from collections import namedtuple
10
11 from ..common.exceptions import ELFRelocationError
12 from ..common.utils import elf_assert, struct_parse
13 from .sections import Section
14 from .enums import (
15 ENUM_RELOC_TYPE_i386, ENUM_RELOC_TYPE_x64, ENUM_RELOC_TYPE_MIPS,
16 ENUM_RELOC_TYPE_ARM, ENUM_RELOC_TYPE_AARCH64, ENUM_RELOC_TYPE_PPC64,
17 ENUM_RELOC_TYPE_BPF, ENUM_RELOC_TYPE_LOONGARCH, ENUM_D_TAG)
18 from ..construct import Container
19
20
21 class Relocation(object):
22 """ Relocation object - representing a single relocation entry. Allows
23 dictionary-like access to the entry's fields.
24
25 Can be either a REL or RELA relocation.
26 """
27 def __init__(self, entry, elffile):
28 self.entry = entry
29 self.elffile = elffile
30
31 def is_RELA(self):
32 """ Is this a RELA relocation? If not, it's REL.
33 """
34 return 'r_addend' in self.entry
35
36 def __getitem__(self, name):
37 """ Dict-like access to entries
38 """
39 return self.entry[name]
40
41 def __repr__(self):
42 return '<Relocation (%s): %s>' % (
43 'RELA' if self.is_RELA() else 'REL',
44 self.entry)
45
46 def __str__(self):
47 return self.__repr__()
48
49
50 class RelocationTable(object):
51 """ Shared functionality between relocation sections and relocation tables
52 """
53
54 def __init__(self, elffile, offset, size, is_rela):
55 self._stream = elffile.stream
56 self._elffile = elffile
57 self._elfstructs = elffile.structs
58 self._size = size
59 self._offset = offset
60 self._is_rela = is_rela
61
62 if is_rela:
63 self.entry_struct = self._elfstructs.Elf_Rela
64 else:
65 self.entry_struct = self._elfstructs.Elf_Rel
66
67 self.entry_size = self.entry_struct.sizeof()
68
69 def is_RELA(self):
70 """ Is this a RELA relocation section? If not, it's REL.
71 """
72 return self._is_rela
73
74 def num_relocations(self):
75 """ Number of relocations in the section
76 """
77 return self._size // self.entry_size
78
79 def get_relocation(self, n):
80 """ Get the relocation at index #n from the section (Relocation object)
81 """
82 entry_offset = self._offset + n * self.entry_size
83 entry = struct_parse(
84 self.entry_struct,
85 self._stream,
86 stream_pos=entry_offset)
87 return Relocation(entry, self._elffile)
88
89 def iter_relocations(self):
90 """ Yield all the relocations in the section
91 """
92 for i in range(self.num_relocations()):
93 yield self.get_relocation(i)
94
95
96 class RelocationSection(Section, RelocationTable):
97 """ ELF relocation section. Serves as a collection of Relocation entries.
98 """
99 def __init__(self, header, name, elffile):
100 Section.__init__(self, header, name, elffile)
101 RelocationTable.__init__(self, self.elffile,
102 self['sh_offset'], self['sh_size'], header['sh_type'] == 'SHT_RELA')
103
104 elf_assert(header['sh_type'] in ('SHT_REL', 'SHT_RELA'),
105 'Unknown relocation type section')
106 elf_assert(header['sh_entsize'] == self.entry_size,
107 'Expected sh_entsize of %s section to be %s' % (
108 header['sh_type'], self.entry_size))
109
110 class RelrRelocationSection(Section):
111 """ RELR compressed relocation section. This stores relative relocations
112 in a compressed format. An entry with an even value serves as an
113 'anchor' that defines a base address. Following this entry are one or
114 more bitmaps for consecutive addresses after the anchor which determine
115 if the corresponding relocation exists (if the bit is 1) or if it is
116 skipped. Addends are stored at the respective addresses (as in REL
117 relocations).
118 """
119 def __init__(self, header, name, elffile):
120 Section.__init__(self, header, name, elffile)
121 self._offset = self['sh_offset']
122 self._size = self['sh_size']
123 self._relr_struct = self.elffile.structs.Elf_Relr
124 self._entrysize = self._relr_struct.sizeof()
125 self._cached_relocations = None
126
127 def iter_relocations(self):
128 """ Yield all the relocations in the section
129 """
130 limit = self._offset + self._size
131 relr = self._offset
132 # The addresses of relocations in a bitmap are calculated from a base
133 # value provided in an initial 'anchor' relocation.
134 base = None
135 while relr < limit:
136 entry = struct_parse(self._relr_struct,
137 self.elffile.stream,
138 stream_pos=relr)
139 entry_offset = entry['r_offset']
140 if (entry_offset & 1) == 0:
141 # We found an anchor, take the current value as the base address
142 # for the following bitmaps and move the 'where' pointer to the
143 # beginning of the first bitmap.
144 base = entry_offset
145 base += self._entrysize
146 yield Relocation(entry, self.elffile)
147 else:
148 # We're processing a bitmap.
149 elf_assert(base is not None, 'RELR bitmap without base address')
150 i = 0
151 while True:
152 # Iterate over all bits except the least significant one.
153 entry_offset = (entry_offset >> 1)
154 if entry_offset == 0:
155 break
156 # if the current LSB is set, we have a relocation at the
157 # corresponding address so generate a Relocation with the
158 # matching offset
159 if (entry_offset & 1) != 0:
160 calc_offset = base + i * self._entrysize
161 yield Relocation(Container(r_offset = calc_offset),
162 self.elffile)
163 i += 1
164 # Advance 'base' past the current bitmap (8 == CHAR_BIT). There
165 # are 63 (or 31 for 32-bit ELFs) entries in each bitmap, and
166 # every bit corresponds to an ELF_addr-sized relocation.
167 base += (8 * self._entrysize - 1) * self.elffile.structs.Elf_addr('').sizeof()
168 # Advance to the next entry
169 relr += self._entrysize
170
171 def num_relocations(self):
172 """ Number of relocations in the section
173 """
174 if self._cached_relocations is None:
175 self._cached_relocations = list(self.iter_relocations())
176 return len(self._cached_relocations)
177
178 def get_relocation(self, n):
179 """ Get the relocation at index #n from the section (Relocation object)
180 """
181 if self._cached_relocations is None:
182 self._cached_relocations = list(self.iter_relocations())
183 return self._cached_relocations[n]
184
185 class RelocationHandler(object):
186 """ Handles the logic of relocations in ELF files.
187 """
188 def __init__(self, elffile):
189 self.elffile = elffile
190
191 def find_relocations_for_section(self, section):
192 """ Given a section, find the relocation section for it in the ELF
193 file. Return a RelocationSection object, or None if none was
194 found.
195 """
196 reloc_section_names = (
197 '.rel' + section.name,
198 '.rela' + section.name)
199 # Find the relocation section aimed at this one. Currently assume
200 # that either .rel or .rela section exists for this section, but
201 # not both.
202 for relsection in self.elffile.iter_sections():
203 if ( isinstance(relsection, RelocationSection) and
204 relsection.name in reloc_section_names):
205 return relsection
206 return None
207
208 def apply_section_relocations(self, stream, reloc_section):
209 """ Apply all relocations in reloc_section (a RelocationSection object)
210 to the given stream, that contains the data of the section that is
211 being relocated. The stream is modified as a result.
212 """
213 # The symbol table associated with this relocation section
214 symtab = self.elffile.get_section(reloc_section['sh_link'])
215 for reloc in reloc_section.iter_relocations():
216 self._do_apply_relocation(stream, reloc, symtab)
217
218 def _do_apply_relocation(self, stream, reloc, symtab):
219 # Preparations for performing the relocation: obtain the value of
220 # the symbol mentioned in the relocation, as well as the relocation
221 # recipe which tells us how to actually perform it.
222 # All peppered with some sanity checking.
223 if reloc['r_info_sym'] >= symtab.num_symbols():
224 raise ELFRelocationError(
225 'Invalid symbol reference in relocation: index %s' % (
226 reloc['r_info_sym']))
227 sym_value = symtab.get_symbol(reloc['r_info_sym'])['st_value']
228
229 reloc_type = reloc['r_info_type']
230 recipe = None
231
232 if self.elffile.get_machine_arch() == 'x86':
233 if reloc.is_RELA():
234 raise ELFRelocationError(
235 'Unexpected RELA relocation for x86: %s' % reloc)
236 recipe = self._RELOCATION_RECIPES_X86.get(reloc_type, None)
237 elif self.elffile.get_machine_arch() == 'x64':
238 if not reloc.is_RELA():
239 raise ELFRelocationError(
240 'Unexpected REL relocation for x64: %s' % reloc)
241 recipe = self._RELOCATION_RECIPES_X64.get(reloc_type, None)
242 elif self.elffile.get_machine_arch() == 'MIPS':
243 if reloc.is_RELA():
244 if reloc_type == ENUM_RELOC_TYPE_MIPS['R_MIPS_64']:
245 if reloc['r_type2'] != 0 or reloc['r_type3'] != 0 or reloc['r_ssym'] != 0:
246 raise ELFRelocationError(
247 'Multiple relocations in R_MIPS_64 are not implemented: %s' % reloc)
248 recipe = self._RELOCATION_RECIPES_MIPS_RELA.get(reloc_type, None)
249 else:
250 recipe = self._RELOCATION_RECIPES_MIPS_REL.get(reloc_type, None)
251 elif self.elffile.get_machine_arch() == 'ARM':
252 if reloc.is_RELA():
253 raise ELFRelocationError(
254 'Unexpected RELA relocation for ARM: %s' % reloc)
255 recipe = self._RELOCATION_RECIPES_ARM.get(reloc_type, None)
256 elif self.elffile.get_machine_arch() == 'AArch64':
257 recipe = self._RELOCATION_RECIPES_AARCH64.get(reloc_type, None)
258 elif self.elffile.get_machine_arch() == '64-bit PowerPC':
259 recipe = self._RELOCATION_RECIPES_PPC64.get(reloc_type, None)
260 elif self.elffile.get_machine_arch() == 'Linux BPF - in-kernel virtual machine':
261 recipe = self._RELOCATION_RECIPES_EBPF.get(reloc_type, None)
262 elif self.elffile.get_machine_arch() == 'LoongArch':
263 if not reloc.is_RELA():
264 raise ELFRelocationError(
265 'Unexpected REL relocation for LoongArch: %s' % reloc)
266 recipe = self._RELOCATION_RECIPES_LOONGARCH.get(reloc_type, None)
267
268 if recipe is None:
269 raise ELFRelocationError(
270 'Unsupported relocation type: %s' % reloc_type)
271
272 # So now we have everything we need to actually perform the relocation.
273 # Let's get to it:
274
275 # 0. Find out which struct we're going to be using to read this value
276 # from the stream and write it back.
277 if recipe.bytesize == 4:
278 value_struct = self.elffile.structs.Elf_word('')
279 elif recipe.bytesize == 8:
280 value_struct = self.elffile.structs.Elf_word64('')
281 elif recipe.bytesize == 1:
282 value_struct = self.elffile.structs.Elf_byte('')
283 elif recipe.bytesize == 2:
284 value_struct = self.elffile.structs.Elf_half('')
285 else:
286 raise ELFRelocationError('Invalid bytesize %s for relocation' %
287 recipe.bytesize)
288
289 # 1. Read the value from the stream (with correct size and endianness)
290 original_value = struct_parse(
291 value_struct,
292 stream,
293 stream_pos=reloc['r_offset'])
294 # 2. Apply the relocation to the value, acting according to the recipe
295 relocated_value = recipe.calc_func(
296 value=original_value,
297 sym_value=sym_value,
298 offset=reloc['r_offset'],
299 addend=reloc['r_addend'] if recipe.has_addend else 0)
300 # 3. Write the relocated value back into the stream
301 stream.seek(reloc['r_offset'])
302
303 # Make sure the relocated value fits back by wrapping it around. This
304 # looks like a problem, but it seems to be the way this is done in
305 # binutils too.
306 relocated_value = relocated_value % (2 ** (recipe.bytesize * 8))
307 value_struct.build_stream(relocated_value, stream)
308
309 # Relocations are represented by "recipes". Each recipe specifies:
310 # bytesize: The number of bytes to read (and write back) to the section.
311 # This is the unit of data on which relocation is performed.
312 # has_addend: Does this relocation have an extra addend?
313 # calc_func: A function that performs the relocation on an extracted
314 # value, and returns the updated value.
315 #
316 _RELOCATION_RECIPE_TYPE = namedtuple('_RELOCATION_RECIPE_TYPE',
317 'bytesize has_addend calc_func')
318
319 def _reloc_calc_identity(value, sym_value, offset, addend=0):
320 return value
321
322 def _reloc_calc_sym_plus_value(value, sym_value, offset, addend=0):
323 return sym_value + value + addend
324
325 def _reloc_calc_sym_plus_value_pcrel(value, sym_value, offset, addend=0):
326 return sym_value + value - offset
327
328 def _reloc_calc_sym_plus_addend(value, sym_value, offset, addend=0):
329 return sym_value + addend
330
331 def _reloc_calc_sym_plus_addend_pcrel(value, sym_value, offset, addend=0):
332 return sym_value + addend - offset
333
334 def _reloc_calc_value_minus_sym_addend(value, sym_value, offset, addend=0):
335 return value - sym_value - addend
336
337 def _arm_reloc_calc_sym_plus_value_pcrel(value, sym_value, offset, addend=0):
338 return sym_value // 4 + value - offset // 4
339
340 def _bpf_64_32_reloc_calc_sym_plus_addend(value, sym_value, offset, addend=0):
341 return (sym_value + addend) // 8 - 1
342
343 _RELOCATION_RECIPES_ARM = {
344 ENUM_RELOC_TYPE_ARM['R_ARM_ABS32']: _RELOCATION_RECIPE_TYPE(
345 bytesize=4, has_addend=False,
346 calc_func=_reloc_calc_sym_plus_value),
347 ENUM_RELOC_TYPE_ARM['R_ARM_CALL']: _RELOCATION_RECIPE_TYPE(
348 bytesize=4, has_addend=False,
349 calc_func=_arm_reloc_calc_sym_plus_value_pcrel),
350 }
351
352 _RELOCATION_RECIPES_AARCH64 = {
353 ENUM_RELOC_TYPE_AARCH64['R_AARCH64_ABS64']: _RELOCATION_RECIPE_TYPE(
354 bytesize=8, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
355 ENUM_RELOC_TYPE_AARCH64['R_AARCH64_ABS32']: _RELOCATION_RECIPE_TYPE(
356 bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
357 ENUM_RELOC_TYPE_AARCH64['R_AARCH64_PREL32']: _RELOCATION_RECIPE_TYPE(
358 bytesize=4, has_addend=True,
359 calc_func=_reloc_calc_sym_plus_addend_pcrel),
360 }
361
362 # https://dmz-portal.mips.com/wiki/MIPS_relocation_types
363 _RELOCATION_RECIPES_MIPS_REL = {
364 ENUM_RELOC_TYPE_MIPS['R_MIPS_NONE']: _RELOCATION_RECIPE_TYPE(
365 bytesize=4, has_addend=False, calc_func=_reloc_calc_identity),
366 ENUM_RELOC_TYPE_MIPS['R_MIPS_32']: _RELOCATION_RECIPE_TYPE(
367 bytesize=4, has_addend=False,
368 calc_func=_reloc_calc_sym_plus_value),
369 }
370 _RELOCATION_RECIPES_MIPS_RELA = {
371 ENUM_RELOC_TYPE_MIPS['R_MIPS_NONE']: _RELOCATION_RECIPE_TYPE(
372 bytesize=4, has_addend=True, calc_func=_reloc_calc_identity),
373 ENUM_RELOC_TYPE_MIPS['R_MIPS_32']: _RELOCATION_RECIPE_TYPE(
374 bytesize=4, has_addend=True,
375 calc_func=_reloc_calc_sym_plus_value),
376 ENUM_RELOC_TYPE_MIPS['R_MIPS_64']: _RELOCATION_RECIPE_TYPE(
377 bytesize=8, has_addend=True,
378 calc_func=_reloc_calc_sym_plus_value),
379 }
380
381 _RELOCATION_RECIPES_PPC64 = {
382 ENUM_RELOC_TYPE_PPC64['R_PPC64_ADDR32']: _RELOCATION_RECIPE_TYPE(
383 bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
384 ENUM_RELOC_TYPE_PPC64['R_PPC64_REL32']: _RELOCATION_RECIPE_TYPE(
385 bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend_pcrel),
386 ENUM_RELOC_TYPE_PPC64['R_PPC64_ADDR64']: _RELOCATION_RECIPE_TYPE(
387 bytesize=8, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
388 }
389
390 _RELOCATION_RECIPES_X86 = {
391 ENUM_RELOC_TYPE_i386['R_386_NONE']: _RELOCATION_RECIPE_TYPE(
392 bytesize=4, has_addend=False, calc_func=_reloc_calc_identity),
393 ENUM_RELOC_TYPE_i386['R_386_32']: _RELOCATION_RECIPE_TYPE(
394 bytesize=4, has_addend=False,
395 calc_func=_reloc_calc_sym_plus_value),
396 ENUM_RELOC_TYPE_i386['R_386_PC32']: _RELOCATION_RECIPE_TYPE(
397 bytesize=4, has_addend=False,
398 calc_func=_reloc_calc_sym_plus_value_pcrel),
399 }
400
401 _RELOCATION_RECIPES_X64 = {
402 ENUM_RELOC_TYPE_x64['R_X86_64_NONE']: _RELOCATION_RECIPE_TYPE(
403 bytesize=8, has_addend=True, calc_func=_reloc_calc_identity),
404 ENUM_RELOC_TYPE_x64['R_X86_64_64']: _RELOCATION_RECIPE_TYPE(
405 bytesize=8, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
406 ENUM_RELOC_TYPE_x64['R_X86_64_PC32']: _RELOCATION_RECIPE_TYPE(
407 bytesize=4, has_addend=True,
408 calc_func=_reloc_calc_sym_plus_addend_pcrel),
409 ENUM_RELOC_TYPE_x64['R_X86_64_32']: _RELOCATION_RECIPE_TYPE(
410 bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
411 ENUM_RELOC_TYPE_x64['R_X86_64_32S']: _RELOCATION_RECIPE_TYPE(
412 bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
413 }
414
415 # https://www.kernel.org/doc/html/latest/bpf/llvm_reloc.html#different-relocation-types
416 _RELOCATION_RECIPES_EBPF = {
417 ENUM_RELOC_TYPE_BPF['R_BPF_NONE']: _RELOCATION_RECIPE_TYPE(
418 bytesize=8, has_addend=False, calc_func=_reloc_calc_identity),
419 ENUM_RELOC_TYPE_BPF['R_BPF_64_64']: _RELOCATION_RECIPE_TYPE(
420 bytesize=8, has_addend=False, calc_func=_reloc_calc_identity),
421 ENUM_RELOC_TYPE_BPF['R_BPF_64_32']: _RELOCATION_RECIPE_TYPE(
422 bytesize=8, has_addend=False, calc_func=_bpf_64_32_reloc_calc_sym_plus_addend),
423 ENUM_RELOC_TYPE_BPF['R_BPF_64_NODYLD32']: _RELOCATION_RECIPE_TYPE(
424 bytesize=4, has_addend=False, calc_func=_reloc_calc_identity),
425 ENUM_RELOC_TYPE_BPF['R_BPF_64_ABS64']: _RELOCATION_RECIPE_TYPE(
426 bytesize=8, has_addend=False, calc_func=_reloc_calc_identity),
427 ENUM_RELOC_TYPE_BPF['R_BPF_64_ABS32']: _RELOCATION_RECIPE_TYPE(
428 bytesize=4, has_addend=False, calc_func=_reloc_calc_identity),
429 }
430
431 # https://github.com/loongson/la-abi-specs/blob/release/laelf.adoc
432 _RELOCATION_RECIPES_LOONGARCH = {
433 ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_NONE']: _RELOCATION_RECIPE_TYPE(
434 bytesize=4, has_addend=False, calc_func=_reloc_calc_identity),
435 ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_32']: _RELOCATION_RECIPE_TYPE(
436 bytesize=4, has_addend=True,
437 calc_func=_reloc_calc_sym_plus_addend),
438 ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_64']: _RELOCATION_RECIPE_TYPE(
439 bytesize=8, has_addend=True,
440 calc_func=_reloc_calc_sym_plus_addend),
441 ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_ADD8']: _RELOCATION_RECIPE_TYPE(
442 bytesize=1, has_addend=True,
443 calc_func=_reloc_calc_sym_plus_value),
444 ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_SUB8']: _RELOCATION_RECIPE_TYPE(
445 bytesize=1, has_addend=True,
446 calc_func=_reloc_calc_value_minus_sym_addend),
447 ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_ADD16']: _RELOCATION_RECIPE_TYPE(
448 bytesize=2, has_addend=True,
449 calc_func=_reloc_calc_sym_plus_value),
450 ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_SUB16']: _RELOCATION_RECIPE_TYPE(
451 bytesize=2, has_addend=True,
452 calc_func=_reloc_calc_value_minus_sym_addend),
453 ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_ADD32']: _RELOCATION_RECIPE_TYPE(
454 bytesize=4, has_addend=True,
455 calc_func=_reloc_calc_sym_plus_value),
456 ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_SUB32']: _RELOCATION_RECIPE_TYPE(
457 bytesize=4, has_addend=True,
458 calc_func=_reloc_calc_value_minus_sym_addend),
459 ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_ADD64']: _RELOCATION_RECIPE_TYPE(
460 bytesize=8, has_addend=True,
461 calc_func=_reloc_calc_sym_plus_value),
462 ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_SUB64']: _RELOCATION_RECIPE_TYPE(
463 bytesize=8, has_addend=True,
464 calc_func=_reloc_calc_value_minus_sym_addend),
465 ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_32_PCREL']: _RELOCATION_RECIPE_TYPE(
466 bytesize=4, has_addend=True,
467 calc_func=_reloc_calc_sym_plus_addend_pcrel),
468 ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_64_PCREL']: _RELOCATION_RECIPE_TYPE(
469 bytesize=8, has_addend=True,
470 calc_func=_reloc_calc_sym_plus_addend_pcrel),
471 }
472
473