cavatools: initialize repository
[cavatools.git] / caveat / elf_loader.c
1 /*
2 Copyright (c) 2020 Peter Hsu. All Rights Reserved. See LICENCE file for details.
3 */
4
5 //#include "config.h"
6
7 #include <unistd.h>
8 #include <stdint.h>
9 #include <stdlib.h>
10 #include <stdio.h>
11 #include <string.h>
12 #include <assert.h>
13 #include <sys/mman.h>
14 #include <sys/types.h>
15 #include <sys/stat.h>
16 #include <fcntl.h>
17 #include <elf.h>
18
19 // BFD files pulled from riscv-gnu-toolchain
20 //#include "bfd.h"
21 //#include "ansidecl.h"
22 //#include "elf.h"
23
24 #include "caveat.h"
25 #include "opcodes.h"
26 #include "insn.h"
27
28
29
30 #define MEM_END 0x60000000L
31 #define STACK_SIZE 0x01000000L
32 #define BRK_SIZE 0x01000000L
33
34 struct pinfo_t current;
35
36
37 static long phdrs[128];
38
39 static char* strtbl;
40 static Elf64_Sym* symtbl;
41 static long num_syms;
42
43
44 #define MAX(a, b) ((a) > (b) ? (a) : (b))
45 #define MIN(a, b) ((a) < (b) ? (a) : (b))
46 #define CLAMP(a, lo, hi) MIN(MAX(a, lo), hi)
47
48
49
50 /**
51 * Get an annoymous memory segment using mmap() and load
52 * from file at offset. Return 0 if fail.
53 */
54 static void* load_elf_section(int file, ssize_t offset, ssize_t size)
55 {
56 void* where = mmap(0, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
57 if (where == 0)
58 return 0;
59 ssize_t ret = lseek(file, offset, SEEK_SET);
60 if (ret < 0)
61 return 0;
62 ret = read(file, where, size);
63 if (ret < size)
64 return 0;
65 return where;
66 }
67
68 /**
69 * The protection flags are in the p_flags section of the program header.
70 * But rather annoyingly, they are the reverse of what mmap expects.
71 */
72 static inline int get_prot(uint32_t p_flags)
73 {
74 int prot_x = (p_flags & PF_X) ? PROT_EXEC : PROT_NONE;
75 int prot_w = (p_flags & PF_W) ? PROT_WRITE : PROT_NONE;
76 int prot_r = (p_flags & PF_R) ? PROT_READ : PROT_NONE;
77 return (prot_x | prot_w | prot_r);
78 }
79
80
81 Addr_t load_elf_binary( const char* file_name, int include_data )
82 /* file_name - name of ELF binary, must be statically linked for now
83 include_data - 1=load DATA and BSS segments, 0=load TEXT only
84 returns entry point address */
85 {
86 current.phdr = (uint64_t)phdrs;
87 current.phdr_size = sizeof(phdrs);
88 struct pinfo_t* info = &current;
89 int flags = MAP_FIXED | MAP_PRIVATE;
90 ssize_t ehdr_size;
91 size_t phdr_size;
92 long number_of_insn;
93 Addr_t stack_lowest;
94 size_t tblsz;
95 char* shstrtbl;
96 ssize_t ret;
97
98 int file = open(file_name, O_RDONLY, 0);
99 quitif(file<0, "Unable to open binary file \"%s\"\n", file_name);
100
101 Elf64_Ehdr eh;
102 ehdr_size = read(file, &eh, sizeof(eh));
103 quitif(ehdr_size < (ssize_t)sizeof(eh) ||
104 !(eh.e_ident[0] == '\177' && eh.e_ident[1] == 'E' &&
105 eh.e_ident[2] == 'L' && eh.e_ident[3] == 'F'),
106 "Elf header not correct");
107 phdr_size = eh.e_phnum * sizeof(Elf64_Phdr);
108 quitif(phdr_size > info->phdr_size, "Phdr too big");
109
110 dieif(lseek(file, eh.e_shoff, SEEK_SET) < 0, "lseek failed");
111 dieif(read(file, (void*)info->phdr, phdr_size) != (ssize_t)phdr_size, "read(phdr) failed");
112 info->phnum = eh.e_phnum;
113 info->phent = sizeof(Elf64_Phdr);
114 Elf64_Phdr* ph = (Elf64_Phdr*)load_elf_section(file, eh.e_phoff, phdr_size);
115 dieif(ph==0, "cannot load phdr");
116 info->phdr = (size_t)ph;
117
118 // don't load dynamic linker at 0, else we can't catch NULL pointer derefs
119 uintptr_t bias = 0;
120 if (eh.e_type == ET_DYN)
121 bias = RISCV_PGSIZE;
122
123 info->entry = eh.e_entry + bias;
124 for (int i = eh.e_phnum - 1; i >= 0; i--) {
125 quitif(ph[i].p_type==PT_INTERP, "Not a statically linked ELF program");
126 if(ph[i].p_type == PT_LOAD && ph[i].p_memsz) {
127 uintptr_t prepad = ph[i].p_vaddr % RISCV_PGSIZE;
128 uintptr_t vaddr = ph[i].p_vaddr + bias;
129 if (vaddr + ph[i].p_memsz > info->brk_min)
130 info->brk_min = vaddr + ph[i].p_memsz;
131 int flags2 = flags | (prepad ? MAP_POPULATE : 0);
132 int prot = get_prot(ph[i].p_flags);
133 void* rc = mmap((void*)(vaddr-prepad), ph[i].p_filesz + prepad, prot | PROT_WRITE, flags2, file, ph[i].p_offset - prepad);
134 dieif(rc != (void*)(vaddr-prepad), "mmap(0x%ld) returned %p\n", (vaddr-prepad), rc);
135 memset((void*)(vaddr-prepad), 0, prepad);
136 if (!(prot & PROT_WRITE))
137 dieif(mprotect((void*)(vaddr-prepad), ph[i].p_filesz + prepad, prot), "Could not mprotect()\n");
138 size_t mapped = ROUNDUP(ph[i].p_filesz + prepad, RISCV_PGSIZE) - prepad;
139 if (ph[i].p_memsz > mapped)
140 dieif(mmap((void*)(vaddr+mapped), ph[i].p_memsz - mapped, prot, flags|MAP_ANONYMOUS, 0, 0) != (void*)(vaddr+mapped), "Could not mmap()\n");
141 }
142 info->brk_max = info->brk_min + BRK_SIZE;
143 }
144
145 /* Read section header string table. */
146 Elf64_Shdr header;
147 assert(lseek(file, eh.e_shoff + eh.e_shstrndx * sizeof(Elf64_Shdr), SEEK_SET) >= 0);
148 assert(read(file, &header, sizeof header) >= 0);
149 shstrtbl = (char*)load_elf_section(file, header.sh_offset, header.sh_size);
150 assert(shstrtbl);
151 /*
152 * Loop through section headers:
153 * 1. load string table and symbol table
154 * 2. zero out BSS and SBSS segments
155 * 3. find lower and upper bounds of executable instructions
156 */
157 uintptr_t low_bound = 0-1;
158 uintptr_t high_bound = 0;
159 for (int i=0; i<eh.e_shnum; i++) {
160 assert(lseek(file, eh.e_shoff + i * sizeof(Elf64_Shdr), SEEK_SET) >= 0);
161 assert(read(file, &header, sizeof header) >= 0);
162 if (strcmp(shstrtbl+header.sh_name, ".bss") == 0 ||
163 strcmp(shstrtbl+header.sh_name, ".sbss") == 0) {
164 memset((void*)header.sh_addr, 0, header.sh_size);
165 }
166 if (strcmp(shstrtbl+header.sh_name, ".strtab") == 0) {
167 strtbl = (char*)load_elf_section(file, header.sh_offset, header.sh_size);
168 dieif(strtbl==0, "could not load string table");
169 }
170 if (strcmp(shstrtbl+header.sh_name, ".symtab") == 0) {
171 symtbl = (Elf64_Sym*)load_elf_section(file, header.sh_offset, header.sh_size);
172 dieif(symtbl==0, "could not read symbol table");
173 num_syms = header.sh_size / sizeof(Elf64_Sym);
174 }
175 /* find bounds of instruction segment */
176 if (header.sh_flags & SHF_EXECINSTR) {
177 if (header.sh_addr < low_bound)
178 low_bound = header.sh_addr;
179 if (header.sh_addr+header.sh_size > high_bound)
180 high_bound = header.sh_addr+header.sh_size;
181 }
182 }
183 insnSpace.base = low_bound;
184 insnSpace.bound = high_bound;
185 // fprintf(stderr, "Text segment [0x%lx, 0x%lx)\n", low_bound, high_bound);
186 //insnSpace_init(low_bound, high_bound);
187 close(file);
188
189 // info->stack_top = MEM_END + 0x1000;
190 info->stack_top = MEM_END;
191 stack_lowest = (Addr_t)mmap((void*)(info->stack_top-STACK_SIZE), STACK_SIZE, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
192 dieif(stack_lowest != info->stack_top-STACK_SIZE, "Could not allocate stack\n");
193
194 return current.entry;
195 }
196
197
198 Addr_t initialize_stack(int argc, const char** argv, const char** envp)
199 {
200 // fprintf(stderr, "current.stack_top=%lx, phdr_size=%lx\n", current.stack_top, current.phdr_size);
201
202 // copy phdrs to user stack
203 size_t stack_top = current.stack_top - current.phdr_size;
204 memcpy((void*)stack_top, (void*)current.phdr, current.phdr_size);
205 current.phdr = stack_top;
206
207 // copy argv to user stack
208 for (size_t i = 0; i < argc; i++) {
209 size_t len = strlen((char*)(uintptr_t)argv[i])+1;
210 stack_top -= len;
211 memcpy((void*)stack_top, (void*)(uintptr_t)argv[i], len);
212 argv[i] = (char*)stack_top;
213 }
214
215 // copy envp to user stack
216 size_t envc = sizeof(envp) / sizeof(envp[0]);
217 for (size_t i = 0; i < envc; i++) {
218 size_t len = strlen(envp[i]) + 1;
219 stack_top -= len;
220 memcpy((void*)stack_top, envp[i], len);
221 envp[i] = (char*)stack_top;
222 }
223
224 // align stack
225 stack_top &= -sizeof(void*);
226
227 // fprintf(stderr, "AT_RANDOM = stack_top = 0x%016lx\n", stack_top);
228
229 struct {
230 long key;
231 size_t value;
232 } aux[] = {
233 {AT_ENTRY, current.entry},
234 {AT_PHNUM, (size_t)current.phnum},
235 {AT_PHENT, (size_t)current.phent},
236 {AT_PHDR, current.phdr},
237 {AT_PAGESZ, RISCV_PGSIZE},
238 {AT_SECURE, 0},
239 {AT_RANDOM, stack_top},
240 {AT_NULL, 0}
241 };
242
243 // place argc, argv, envp, auxp on stack
244 #define PUSH_ARG(type, value) do { \
245 *((type*)sp) = (type)value; \
246 sp += sizeof(type); \
247 } while (0)
248
249 unsigned naux = sizeof(aux)/sizeof(aux[0]);
250 stack_top -= (1 + argc + 1 + envc + 1 + 2*naux) * sizeof(uintptr_t);
251 stack_top &= -16;
252 long sp = stack_top;
253 PUSH_ARG(uintptr_t, argc);
254 for (unsigned i = 0; i < argc; i++)
255 PUSH_ARG(uintptr_t, argv[i]);
256 PUSH_ARG(uintptr_t, 0); /* argv[argc] = NULL */
257 for (unsigned i = 0; i < envc; i++)
258 PUSH_ARG(uintptr_t, envp[i]);
259 PUSH_ARG(uintptr_t, 0); /* envp[envc] = NULL */
260 for (unsigned i = 0; i < naux; i++) {
261 PUSH_ARG(uintptr_t, aux[i].key);
262 PUSH_ARG(uintptr_t, aux[i].value);
263 }
264
265 current.stack_top = stack_top;
266 return stack_top;
267 }
268
269
270 int find_symbol( const char* name, Addr_t* begin, Addr_t* end )
271 {
272 if (strtbl) {
273 for (int i=0; i<num_syms; i++) {
274 int n = strlen(name);
275 if (strncmp(strtbl+symtbl[i].st_name, name, n) == 0) {
276 *begin = symtbl[i].st_value;
277 if (end)
278 *end = *begin + symtbl[i].st_size;
279 return 1;
280 }
281 }
282 }
283 return 0;
284 }
285
286
287 int find_pc( long pc, const char** name, long* offset )
288 {
289 if (symtbl) {
290 for (int i=0; i<num_syms; i++) {
291 if (symtbl[i].st_value <= pc && pc < symtbl[i].st_value+symtbl[i].st_size) {
292 *name = strtbl + symtbl[i].st_name;
293 *offset = pc - symtbl[i].st_value;
294 return 1;
295 }
296 }
297 }
298 return 0;
299 }
300