ebd399ec3b290548e94544454fb0bf5de2e8e5fd
[pyelftools.git] / elftools / elf / relocation.py
1 #-------------------------------------------------------------------------------
2 # elftools: elf/relocation.py
3 #
4 # ELF relocations
5 #
6 # Eli Bendersky (eliben@gmail.com)
7 # This code is in the public domain
8 #-------------------------------------------------------------------------------
9 from collections import namedtuple
10
11 from ..common.exceptions import ELFRelocationError
12 from ..common.utils import elf_assert, struct_parse
13 from .sections import Section
14 from .enums import (
15 ENUM_RELOC_TYPE_i386, ENUM_RELOC_TYPE_x64, ENUM_RELOC_TYPE_MIPS,
16 ENUM_RELOC_TYPE_ARM, ENUM_RELOC_TYPE_AARCH64, ENUM_RELOC_TYPE_PPC64,
17 ENUM_RELOC_TYPE_BPF, ENUM_RELOC_TYPE_LOONGARCH, ENUM_D_TAG)
18 from ..construct import Container
19
20
21 class Relocation(object):
22 """ Relocation object - representing a single relocation entry. Allows
23 dictionary-like access to the entry's fields.
24
25 Can be either a REL or RELA relocation.
26 """
27 def __init__(self, entry, elffile):
28 self.entry = entry
29 self.elffile = elffile
30
31 def is_RELA(self):
32 """ Is this a RELA relocation? If not, it's REL.
33 """
34 return 'r_addend' in self.entry
35
36 def __getitem__(self, name):
37 """ Dict-like access to entries
38 """
39 return self.entry[name]
40
41 def __repr__(self):
42 return '<Relocation (%s): %s>' % (
43 'RELA' if self.is_RELA() else 'REL',
44 self.entry)
45
46 def __str__(self):
47 return self.__repr__()
48
49
50 class RelocationTable(object):
51 """ Shared functionality between relocation sections and relocation tables
52 """
53
54 def __init__(self, elffile, offset, size, is_rela):
55 self._stream = elffile.stream
56 self._elffile = elffile
57 self._elfstructs = elffile.structs
58 self._size = size
59 self._offset = offset
60 self._is_rela = is_rela
61
62 if is_rela:
63 self.entry_struct = self._elfstructs.Elf_Rela
64 else:
65 self.entry_struct = self._elfstructs.Elf_Rel
66
67 self.entry_size = self.entry_struct.sizeof()
68
69 def is_RELA(self):
70 """ Is this a RELA relocation section? If not, it's REL.
71 """
72 return self._is_rela
73
74 def num_relocations(self):
75 """ Number of relocations in the section
76 """
77 return self._size // self.entry_size
78
79 def get_relocation(self, n):
80 """ Get the relocation at index #n from the section (Relocation object)
81 """
82 entry_offset = self._offset + n * self.entry_size
83 entry = struct_parse(
84 self.entry_struct,
85 self._stream,
86 stream_pos=entry_offset)
87 return Relocation(entry, self._elffile)
88
89 def iter_relocations(self):
90 """ Yield all the relocations in the section
91 """
92 for i in range(self.num_relocations()):
93 yield self.get_relocation(i)
94
95
96 class RelocationSection(Section, RelocationTable):
97 """ ELF relocation section. Serves as a collection of Relocation entries.
98 """
99 def __init__(self, header, name, elffile):
100 Section.__init__(self, header, name, elffile)
101 RelocationTable.__init__(self, self.elffile,
102 self['sh_offset'], self['sh_size'], header['sh_type'] == 'SHT_RELA')
103
104 elf_assert(header['sh_type'] in ('SHT_REL', 'SHT_RELA'),
105 'Unknown relocation type section')
106 elf_assert(header['sh_entsize'] == self.entry_size,
107 'Expected sh_entsize of %s section to be %s' % (
108 header['sh_type'], self.entry_size))
109
110
111 class RelrRelocationTable(object):
112 """ RELR compressed relocation table. This stores relative relocations
113 in a compressed format. An entry with an even value serves as an
114 'anchor' that defines a base address. Following this entry are one or
115 more bitmaps for consecutive addresses after the anchor which determine
116 if the corresponding relocation exists (if the bit is 1) or if it is
117 skipped. Addends are stored at the respective addresses (as in REL
118 relocations).
119 """
120
121 def __init__(self, elffile, offset, size, entrysize):
122 self._elffile = elffile
123 self._offset = offset
124 self._size = size
125 self._relr_struct = self._elffile.structs.Elf_Relr
126 self._entrysize = self._relr_struct.sizeof()
127 self._cached_relocations = None
128
129 elf_assert(self._entrysize == entrysize,
130 'Expected RELR entry size to be %s, got %s' % (
131 self._entrysize, entrysize))
132
133 def iter_relocations(self):
134 """ Yield all the relocations in the section
135 """
136
137 # If DT_RELRSZ is zero, offset is meaningless and could be None.
138 if self._size == 0:
139 return []
140
141 limit = self._offset + self._size
142 relr = self._offset
143 # The addresses of relocations in a bitmap are calculated from a base
144 # value provided in an initial 'anchor' relocation.
145 base = None
146 while relr < limit:
147 entry = struct_parse(self._relr_struct,
148 self._elffile.stream,
149 stream_pos=relr)
150 entry_offset = entry['r_offset']
151 if (entry_offset & 1) == 0:
152 # We found an anchor, take the current value as the base address
153 # for the following bitmaps and move the 'where' pointer to the
154 # beginning of the first bitmap.
155 base = entry_offset
156 base += self._entrysize
157 yield Relocation(entry, self._elffile)
158 else:
159 # We're processing a bitmap.
160 elf_assert(base is not None, 'RELR bitmap without base address')
161 i = 0
162 while True:
163 # Iterate over all bits except the least significant one.
164 entry_offset = (entry_offset >> 1)
165 if entry_offset == 0:
166 break
167 # if the current LSB is set, we have a relocation at the
168 # corresponding address so generate a Relocation with the
169 # matching offset
170 if (entry_offset & 1) != 0:
171 calc_offset = base + i * self._entrysize
172 yield Relocation(Container(r_offset = calc_offset),
173 self._elffile)
174 i += 1
175 # Advance 'base' past the current bitmap (8 == CHAR_BIT). There
176 # are 63 (or 31 for 32-bit ELFs) entries in each bitmap, and
177 # every bit corresponds to an ELF_addr-sized relocation.
178 base += (8 * self._entrysize - 1) * self._elffile.structs.Elf_addr('').sizeof()
179 # Advance to the next entry
180 relr += self._entrysize
181
182 def num_relocations(self):
183 """ Number of relocations in the section
184 """
185 if self._cached_relocations is None:
186 self._cached_relocations = list(self.iter_relocations())
187 return len(self._cached_relocations)
188
189 def get_relocation(self, n):
190 """ Get the relocation at index #n from the section (Relocation object)
191 """
192 if self._cached_relocations is None:
193 self._cached_relocations = list(self.iter_relocations())
194 return self._cached_relocations[n]
195
196
197 class RelrRelocationSection(Section, RelrRelocationTable):
198 """ ELF RELR relocation section. Serves as a collection of RELR relocation entries.
199 """
200 def __init__(self, header, name, elffile):
201 Section.__init__(self, header, name, elffile)
202 RelrRelocationTable.__init__(self, self.elffile,
203 self['sh_offset'], self['sh_size'], self['sh_entsize'])
204
205
206 class RelocationHandler(object):
207 """ Handles the logic of relocations in ELF files.
208 """
209 def __init__(self, elffile):
210 self.elffile = elffile
211
212 def find_relocations_for_section(self, section):
213 """ Given a section, find the relocation section for it in the ELF
214 file. Return a RelocationSection object, or None if none was
215 found.
216 """
217 reloc_section_names = (
218 '.rel' + section.name,
219 '.rela' + section.name)
220 # Find the relocation section aimed at this one. Currently assume
221 # that either .rel or .rela section exists for this section, but
222 # not both.
223 for relsection in self.elffile.iter_sections():
224 if ( isinstance(relsection, RelocationSection) and
225 relsection.name in reloc_section_names):
226 return relsection
227 return None
228
229 def apply_section_relocations(self, stream, reloc_section):
230 """ Apply all relocations in reloc_section (a RelocationSection object)
231 to the given stream, that contains the data of the section that is
232 being relocated. The stream is modified as a result.
233 """
234 # The symbol table associated with this relocation section
235 symtab = self.elffile.get_section(reloc_section['sh_link'])
236 for reloc in reloc_section.iter_relocations():
237 self._do_apply_relocation(stream, reloc, symtab)
238
239 def _do_apply_relocation(self, stream, reloc, symtab):
240 # Preparations for performing the relocation: obtain the value of
241 # the symbol mentioned in the relocation, as well as the relocation
242 # recipe which tells us how to actually perform it.
243 # All peppered with some sanity checking.
244 if reloc['r_info_sym'] >= symtab.num_symbols():
245 raise ELFRelocationError(
246 'Invalid symbol reference in relocation: index %s' % (
247 reloc['r_info_sym']))
248 sym_value = symtab.get_symbol(reloc['r_info_sym'])['st_value']
249
250 reloc_type = reloc['r_info_type']
251 recipe = None
252
253 if self.elffile.get_machine_arch() == 'x86':
254 if reloc.is_RELA():
255 raise ELFRelocationError(
256 'Unexpected RELA relocation for x86: %s' % reloc)
257 recipe = self._RELOCATION_RECIPES_X86.get(reloc_type, None)
258 elif self.elffile.get_machine_arch() == 'x64':
259 if not reloc.is_RELA():
260 raise ELFRelocationError(
261 'Unexpected REL relocation for x64: %s' % reloc)
262 recipe = self._RELOCATION_RECIPES_X64.get(reloc_type, None)
263 elif self.elffile.get_machine_arch() == 'MIPS':
264 if reloc.is_RELA():
265 if reloc_type == ENUM_RELOC_TYPE_MIPS['R_MIPS_64']:
266 if reloc['r_type2'] != 0 or reloc['r_type3'] != 0 or reloc['r_ssym'] != 0:
267 raise ELFRelocationError(
268 'Multiple relocations in R_MIPS_64 are not implemented: %s' % reloc)
269 recipe = self._RELOCATION_RECIPES_MIPS_RELA.get(reloc_type, None)
270 else:
271 recipe = self._RELOCATION_RECIPES_MIPS_REL.get(reloc_type, None)
272 elif self.elffile.get_machine_arch() == 'ARM':
273 if reloc.is_RELA():
274 raise ELFRelocationError(
275 'Unexpected RELA relocation for ARM: %s' % reloc)
276 recipe = self._RELOCATION_RECIPES_ARM.get(reloc_type, None)
277 elif self.elffile.get_machine_arch() == 'AArch64':
278 recipe = self._RELOCATION_RECIPES_AARCH64.get(reloc_type, None)
279 elif self.elffile.get_machine_arch() == '64-bit PowerPC':
280 recipe = self._RELOCATION_RECIPES_PPC64.get(reloc_type, None)
281 elif self.elffile.get_machine_arch() == 'Linux BPF - in-kernel virtual machine':
282 recipe = self._RELOCATION_RECIPES_EBPF.get(reloc_type, None)
283 elif self.elffile.get_machine_arch() == 'LoongArch':
284 if not reloc.is_RELA():
285 raise ELFRelocationError(
286 'Unexpected REL relocation for LoongArch: %s' % reloc)
287 recipe = self._RELOCATION_RECIPES_LOONGARCH.get(reloc_type, None)
288
289 if recipe is None:
290 raise ELFRelocationError(
291 'Unsupported relocation type: %s' % reloc_type)
292
293 # So now we have everything we need to actually perform the relocation.
294 # Let's get to it:
295
296 # 0. Find out which struct we're going to be using to read this value
297 # from the stream and write it back.
298 if recipe.bytesize == 4:
299 value_struct = self.elffile.structs.Elf_word('')
300 elif recipe.bytesize == 8:
301 value_struct = self.elffile.structs.Elf_word64('')
302 elif recipe.bytesize == 1:
303 value_struct = self.elffile.structs.Elf_byte('')
304 elif recipe.bytesize == 2:
305 value_struct = self.elffile.structs.Elf_half('')
306 else:
307 raise ELFRelocationError('Invalid bytesize %s for relocation' %
308 recipe.bytesize)
309
310 # 1. Read the value from the stream (with correct size and endianness)
311 original_value = struct_parse(
312 value_struct,
313 stream,
314 stream_pos=reloc['r_offset'])
315 # 2. Apply the relocation to the value, acting according to the recipe
316 relocated_value = recipe.calc_func(
317 value=original_value,
318 sym_value=sym_value,
319 offset=reloc['r_offset'],
320 addend=reloc['r_addend'] if recipe.has_addend else 0)
321 # 3. Write the relocated value back into the stream
322 stream.seek(reloc['r_offset'])
323
324 # Make sure the relocated value fits back by wrapping it around. This
325 # looks like a problem, but it seems to be the way this is done in
326 # binutils too.
327 relocated_value = relocated_value % (2 ** (recipe.bytesize * 8))
328 value_struct.build_stream(relocated_value, stream)
329
330 # Relocations are represented by "recipes". Each recipe specifies:
331 # bytesize: The number of bytes to read (and write back) to the section.
332 # This is the unit of data on which relocation is performed.
333 # has_addend: Does this relocation have an extra addend?
334 # calc_func: A function that performs the relocation on an extracted
335 # value, and returns the updated value.
336 #
337 _RELOCATION_RECIPE_TYPE = namedtuple('_RELOCATION_RECIPE_TYPE',
338 'bytesize has_addend calc_func')
339
340 def _reloc_calc_identity(value, sym_value, offset, addend=0):
341 return value
342
343 def _reloc_calc_sym_plus_value(value, sym_value, offset, addend=0):
344 return sym_value + value + addend
345
346 def _reloc_calc_sym_plus_value_pcrel(value, sym_value, offset, addend=0):
347 return sym_value + value - offset
348
349 def _reloc_calc_sym_plus_addend(value, sym_value, offset, addend=0):
350 return sym_value + addend
351
352 def _reloc_calc_sym_plus_addend_pcrel(value, sym_value, offset, addend=0):
353 return sym_value + addend - offset
354
355 def _reloc_calc_value_minus_sym_addend(value, sym_value, offset, addend=0):
356 return value - sym_value - addend
357
358 def _arm_reloc_calc_sym_plus_value_pcrel(value, sym_value, offset, addend=0):
359 return sym_value // 4 + value - offset // 4
360
361 def _bpf_64_32_reloc_calc_sym_plus_addend(value, sym_value, offset, addend=0):
362 return (sym_value + addend) // 8 - 1
363
364 _RELOCATION_RECIPES_ARM = {
365 ENUM_RELOC_TYPE_ARM['R_ARM_ABS32']: _RELOCATION_RECIPE_TYPE(
366 bytesize=4, has_addend=False,
367 calc_func=_reloc_calc_sym_plus_value),
368 ENUM_RELOC_TYPE_ARM['R_ARM_CALL']: _RELOCATION_RECIPE_TYPE(
369 bytesize=4, has_addend=False,
370 calc_func=_arm_reloc_calc_sym_plus_value_pcrel),
371 }
372
373 _RELOCATION_RECIPES_AARCH64 = {
374 ENUM_RELOC_TYPE_AARCH64['R_AARCH64_ABS64']: _RELOCATION_RECIPE_TYPE(
375 bytesize=8, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
376 ENUM_RELOC_TYPE_AARCH64['R_AARCH64_ABS32']: _RELOCATION_RECIPE_TYPE(
377 bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
378 ENUM_RELOC_TYPE_AARCH64['R_AARCH64_PREL32']: _RELOCATION_RECIPE_TYPE(
379 bytesize=4, has_addend=True,
380 calc_func=_reloc_calc_sym_plus_addend_pcrel),
381 }
382
383 # https://dmz-portal.mips.com/wiki/MIPS_relocation_types
384 _RELOCATION_RECIPES_MIPS_REL = {
385 ENUM_RELOC_TYPE_MIPS['R_MIPS_NONE']: _RELOCATION_RECIPE_TYPE(
386 bytesize=4, has_addend=False, calc_func=_reloc_calc_identity),
387 ENUM_RELOC_TYPE_MIPS['R_MIPS_32']: _RELOCATION_RECIPE_TYPE(
388 bytesize=4, has_addend=False,
389 calc_func=_reloc_calc_sym_plus_value),
390 }
391 _RELOCATION_RECIPES_MIPS_RELA = {
392 ENUM_RELOC_TYPE_MIPS['R_MIPS_NONE']: _RELOCATION_RECIPE_TYPE(
393 bytesize=4, has_addend=True, calc_func=_reloc_calc_identity),
394 ENUM_RELOC_TYPE_MIPS['R_MIPS_32']: _RELOCATION_RECIPE_TYPE(
395 bytesize=4, has_addend=True,
396 calc_func=_reloc_calc_sym_plus_value),
397 ENUM_RELOC_TYPE_MIPS['R_MIPS_64']: _RELOCATION_RECIPE_TYPE(
398 bytesize=8, has_addend=True,
399 calc_func=_reloc_calc_sym_plus_value),
400 }
401
402 _RELOCATION_RECIPES_PPC64 = {
403 ENUM_RELOC_TYPE_PPC64['R_PPC64_ADDR32']: _RELOCATION_RECIPE_TYPE(
404 bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
405 ENUM_RELOC_TYPE_PPC64['R_PPC64_REL32']: _RELOCATION_RECIPE_TYPE(
406 bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend_pcrel),
407 ENUM_RELOC_TYPE_PPC64['R_PPC64_ADDR64']: _RELOCATION_RECIPE_TYPE(
408 bytesize=8, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
409 }
410
411 _RELOCATION_RECIPES_X86 = {
412 ENUM_RELOC_TYPE_i386['R_386_NONE']: _RELOCATION_RECIPE_TYPE(
413 bytesize=4, has_addend=False, calc_func=_reloc_calc_identity),
414 ENUM_RELOC_TYPE_i386['R_386_32']: _RELOCATION_RECIPE_TYPE(
415 bytesize=4, has_addend=False,
416 calc_func=_reloc_calc_sym_plus_value),
417 ENUM_RELOC_TYPE_i386['R_386_PC32']: _RELOCATION_RECIPE_TYPE(
418 bytesize=4, has_addend=False,
419 calc_func=_reloc_calc_sym_plus_value_pcrel),
420 }
421
422 _RELOCATION_RECIPES_X64 = {
423 ENUM_RELOC_TYPE_x64['R_X86_64_NONE']: _RELOCATION_RECIPE_TYPE(
424 bytesize=8, has_addend=True, calc_func=_reloc_calc_identity),
425 ENUM_RELOC_TYPE_x64['R_X86_64_64']: _RELOCATION_RECIPE_TYPE(
426 bytesize=8, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
427 ENUM_RELOC_TYPE_x64['R_X86_64_PC32']: _RELOCATION_RECIPE_TYPE(
428 bytesize=4, has_addend=True,
429 calc_func=_reloc_calc_sym_plus_addend_pcrel),
430 ENUM_RELOC_TYPE_x64['R_X86_64_32']: _RELOCATION_RECIPE_TYPE(
431 bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
432 ENUM_RELOC_TYPE_x64['R_X86_64_32S']: _RELOCATION_RECIPE_TYPE(
433 bytesize=4, has_addend=True, calc_func=_reloc_calc_sym_plus_addend),
434 }
435
436 # https://www.kernel.org/doc/html/latest/bpf/llvm_reloc.html#different-relocation-types
437 _RELOCATION_RECIPES_EBPF = {
438 ENUM_RELOC_TYPE_BPF['R_BPF_NONE']: _RELOCATION_RECIPE_TYPE(
439 bytesize=8, has_addend=False, calc_func=_reloc_calc_identity),
440 ENUM_RELOC_TYPE_BPF['R_BPF_64_64']: _RELOCATION_RECIPE_TYPE(
441 bytesize=8, has_addend=False, calc_func=_reloc_calc_identity),
442 ENUM_RELOC_TYPE_BPF['R_BPF_64_32']: _RELOCATION_RECIPE_TYPE(
443 bytesize=8, has_addend=False, calc_func=_bpf_64_32_reloc_calc_sym_plus_addend),
444 ENUM_RELOC_TYPE_BPF['R_BPF_64_NODYLD32']: _RELOCATION_RECIPE_TYPE(
445 bytesize=4, has_addend=False, calc_func=_reloc_calc_identity),
446 ENUM_RELOC_TYPE_BPF['R_BPF_64_ABS64']: _RELOCATION_RECIPE_TYPE(
447 bytesize=8, has_addend=False, calc_func=_reloc_calc_identity),
448 ENUM_RELOC_TYPE_BPF['R_BPF_64_ABS32']: _RELOCATION_RECIPE_TYPE(
449 bytesize=4, has_addend=False, calc_func=_reloc_calc_identity),
450 }
451
452 # https://github.com/loongson/la-abi-specs/blob/release/laelf.adoc
453 _RELOCATION_RECIPES_LOONGARCH = {
454 ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_NONE']: _RELOCATION_RECIPE_TYPE(
455 bytesize=4, has_addend=False, calc_func=_reloc_calc_identity),
456 ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_32']: _RELOCATION_RECIPE_TYPE(
457 bytesize=4, has_addend=True,
458 calc_func=_reloc_calc_sym_plus_addend),
459 ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_64']: _RELOCATION_RECIPE_TYPE(
460 bytesize=8, has_addend=True,
461 calc_func=_reloc_calc_sym_plus_addend),
462 ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_ADD8']: _RELOCATION_RECIPE_TYPE(
463 bytesize=1, has_addend=True,
464 calc_func=_reloc_calc_sym_plus_value),
465 ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_SUB8']: _RELOCATION_RECIPE_TYPE(
466 bytesize=1, has_addend=True,
467 calc_func=_reloc_calc_value_minus_sym_addend),
468 ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_ADD16']: _RELOCATION_RECIPE_TYPE(
469 bytesize=2, has_addend=True,
470 calc_func=_reloc_calc_sym_plus_value),
471 ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_SUB16']: _RELOCATION_RECIPE_TYPE(
472 bytesize=2, has_addend=True,
473 calc_func=_reloc_calc_value_minus_sym_addend),
474 ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_ADD32']: _RELOCATION_RECIPE_TYPE(
475 bytesize=4, has_addend=True,
476 calc_func=_reloc_calc_sym_plus_value),
477 ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_SUB32']: _RELOCATION_RECIPE_TYPE(
478 bytesize=4, has_addend=True,
479 calc_func=_reloc_calc_value_minus_sym_addend),
480 ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_ADD64']: _RELOCATION_RECIPE_TYPE(
481 bytesize=8, has_addend=True,
482 calc_func=_reloc_calc_sym_plus_value),
483 ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_SUB64']: _RELOCATION_RECIPE_TYPE(
484 bytesize=8, has_addend=True,
485 calc_func=_reloc_calc_value_minus_sym_addend),
486 ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_32_PCREL']: _RELOCATION_RECIPE_TYPE(
487 bytesize=4, has_addend=True,
488 calc_func=_reloc_calc_sym_plus_addend_pcrel),
489 ENUM_RELOC_TYPE_LOONGARCH['R_LARCH_64_PCREL']: _RELOCATION_RECIPE_TYPE(
490 bytesize=8, has_addend=True,
491 calc_func=_reloc_calc_sym_plus_addend_pcrel),
492 }
493
494