hack offset into boot address as well
[ls2.git] / coldboot / coldboot.c
1 #include <stdint.h>
2 #include <stdbool.h>
3
4 #include "console.h"
5 #include "microwatt_soc.h"
6 #include "io.h"
7
8 #include <stdlib.h>
9 #include <stdint.h>
10 #include <gram.h>
11
12 #include "elf64.h"
13
14 static inline void mtspr(int sprnum, unsigned long val)
15 {
16 __asm__ volatile("mtspr %0,%1" : : "i" (sprnum), "r" (val));
17 }
18
19 static inline uint32_t read32(const void *addr)
20 {
21 return *(volatile uint32_t *)addr;
22 }
23
24 static inline void write32(void *addr, uint32_t value)
25 {
26 *(volatile uint32_t *)addr = value;
27 }
28
29 struct uart_regs {
30 uint32_t divisor;
31 uint32_t rx_data;
32 uint32_t rx_rdy;
33 uint32_t rx_err;
34 uint32_t tx_data;
35 uint32_t tx_rdy;
36 uint32_t zero0; // reserved
37 uint32_t zero1; // reserved
38 uint32_t ev_status;
39 uint32_t ev_pending;
40 uint32_t ev_enable;
41 };
42
43 void uart_writeuint32(uint32_t val) {
44 const char lut[] = { '0', '1', '2', '3', '4', '5', '6', '7',
45 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
46 uint8_t *val_arr = (uint8_t*)(&val);
47 size_t i;
48
49 for (i = 0; i < 4; i++) {
50 putchar(lut[(val_arr[3-i] >> 4) & 0xF]);
51 putchar(lut[val_arr[3-i] & 0xF]);
52 }
53 }
54
55 void memcpy(void *dest, void *src, size_t n) {
56 int i;
57 //cast src and dest to char*
58 char *src_char = (char *)src;
59 char *dest_char = (char *)dest;
60 for (i=0; i<n; i++) {
61 #if 1
62 if ((i % 4096) == 0) {
63 puts("memcpy ");
64 uart_writeuint32(i);
65 puts("\r\n");
66 }
67 #endif
68 dest_char[i] = src_char[i]; //copy contents byte by byte
69 }
70 }
71
72 void isr(void) {
73
74 }
75
76 // WARNING
77 // KESTREL SPECIFIC
78 #define TERCEL_SPI_REG_SYS_PHY_CFG1 0x10
79 #define TERCEL_SPI_REG_SYS_FLASH_CFG5 0x24
80 #define TERCEL_SPI_PHY_CLOCK_DIVISOR_MASK 0xff
81 #define TERCEL_SPI_PHY_CLOCK_DIVISOR_SHIFT 0
82 #define TERCEL_SPI_FLASH_EN_MULTCYC_READ_MASK 0x1
83 #define TERCEL_SPI_FLASH_EN_MULTCYC_READ_SHIFT 0
84 static inline uint32_t read_tercel_register(uint8_t reg)
85 {
86 return readl((unsigned long)(SPI_FCTRL_BASE+reg));
87 }
88
89 static inline void write_tercel_register(uint8_t reg, uint32_t value)
90 {
91 writel(value, (unsigned long)(SPI_FCTRL_BASE+reg));
92 }
93
94 // TODO: need to use this
95 // https://gitlab.raptorengineering.com/kestrel-collaboration/kestrel-litex/litex/-/blob/master/litex/soc/software/bios/boot.c#L575
96 static bool fl_read(void *dst, uint32_t offset, uint32_t size)
97 {
98 uint8_t *d = dst;
99 memcpy(d, (void *)(unsigned long)(SPI_FLASH_BASE + offset), size);
100 return true;
101 }
102
103 static unsigned long copy_flash(unsigned int offset, unsigned int dst_offs)
104 {
105 Elf64_Ehdr ehdr;
106 Elf64_Phdr ph;
107 unsigned int i, poff, size, off;
108 void *addr;
109
110 // WARNING
111 // KESTREL SPECIFIC
112 // Set SPI clock cycle divider to 1
113 uint32_t dword;
114 dword = read_tercel_register(TERCEL_SPI_REG_SYS_PHY_CFG1);
115 dword &= ~(TERCEL_SPI_PHY_CLOCK_DIVISOR_MASK <<
116 TERCEL_SPI_PHY_CLOCK_DIVISOR_SHIFT);
117 dword |= ((1 & TERCEL_SPI_PHY_CLOCK_DIVISOR_MASK) <<
118 TERCEL_SPI_PHY_CLOCK_DIVISOR_SHIFT);
119 write_tercel_register(TERCEL_SPI_REG_SYS_PHY_CFG1, dword);
120 // Enable read merging
121 dword = read_tercel_register(TERCEL_SPI_REG_SYS_FLASH_CFG5);
122 dword |= (TERCEL_SPI_FLASH_EN_MULTCYC_READ_MASK <<
123 TERCEL_SPI_FLASH_EN_MULTCYC_READ_SHIFT);
124 write_tercel_register(TERCEL_SPI_REG_SYS_FLASH_CFG5, dword);
125
126 puts("Trying flash...\r\n");
127 if (!fl_read(&ehdr, offset, sizeof(ehdr)))
128 return -1ul;
129 if (!IS_ELF(ehdr) || ehdr.e_ident[EI_CLASS] != ELFCLASS64) {
130 puts("Doesn't look like an elf64\r\n");
131 goto dump;
132 }
133 if (ehdr.e_ident[EI_DATA] != ELFDATA2LSB ||
134 ehdr.e_machine != EM_PPC64) {
135 puts("Not a ppc64le binary\r\n");
136 goto dump;
137 }
138
139 poff = offset + ehdr.e_phoff;
140 for (i = 0; i < ehdr.e_phnum; i++) {
141 if (!fl_read(&ph, poff, sizeof(ph)))
142 goto dump;
143 if (ph.p_type != PT_LOAD)
144 continue;
145
146 /* XXX Add bound checking ! */
147 size = ph.p_filesz;
148 addr = (void *)ph.p_vaddr;
149 off = offset + ph.p_offset;
150 //printf("Copy segment %d (0x%x bytes) to %p\n", i, size, addr);
151 puts("Copy segment ");
152 uart_writeuint32(i);
153 puts(" size ");
154 uart_writeuint32(size);
155 puts(" addr ");
156 uart_writeuint32((uint32_t)(unsigned long)addr);
157 puts("\r\n");
158 fl_read(addr+dst_offs, off, size);
159 poff += ehdr.e_phentsize;
160 }
161
162 puts("Booting from DRAM at");
163 uart_writeuint32((unsigned int)(dst_offs+ehdr.e_entry));
164 puts("\r\n");
165
166 puts("Dump DRAM\r\n");
167 for (i = 0; i < 64; i++) {
168 uart_writeuint32(readl(dst_offs+ehdr.e_entry+(i*4)));
169 puts(" ");
170 if ((i & 7) == 7) puts("\r\n");
171 }
172 puts("\r\n");
173
174 //flush_cpu_icache();
175 return dst_offs+ehdr.e_entry;
176 dump:
177 puts("HDR: \r\n");
178 for (i = 0; i < 8; i++) {
179 uart_writeuint32(ehdr.e_ident[i]);
180 puts("\r\n");
181 }
182
183 return -1ul;
184 }
185
186
187 // XXX
188 // Defining gram_[read|write] allows a trace of all register
189 // accesses to be dumped to console for debugging purposes.
190 // To use, define GRAM_RW_FUNC in gram.h
191 uint32_t gram_read(const struct gramCtx *ctx, void *addr) {
192 uint32_t dword;
193
194 puts("gram_read: ");
195 uart_writeuint32((unsigned long)addr);
196 dword = readl((unsigned long)addr);
197 puts(": ");
198 uart_writeuint32((unsigned long)dword);
199 puts("\n");
200
201 return dword;
202 }
203
204 int gram_write(const struct gramCtx *ctx, void *addr, uint32_t value) {
205 puts("gram_write: ");
206 uart_writeuint32((unsigned long)addr);
207 puts(": ");
208 uart_writeuint32((unsigned long)value);
209 writel(value, (unsigned long)addr);
210 puts("\n");
211
212 return 0;
213 }
214
215 int main(void) {
216 const int kNumIterations = 14;
217 int res, failcnt = 0;
218 uint32_t tmp;
219 unsigned long ftr, spi_offs=0x0;
220 volatile uint32_t *ram = (uint32_t*)MEMORY_BASE;
221
222 console_init();
223 //puts("Firmware launched...\n");
224
225 #if 1
226 puts(" Soc signature: ");
227 tmp = readl(SYSCON_BASE + SYS_REG_SIGNATURE);
228 uart_writeuint32(tmp);
229 puts(" Soc features: ");
230 ftr = readl(SYSCON_BASE + SYS_REG_INFO);
231 if (ftr & SYS_REG_INFO_HAS_UART)
232 puts("UART ");
233 if (ftr & SYS_REG_INFO_HAS_DRAM)
234 puts("DRAM ");
235 if (ftr & SYS_REG_INFO_HAS_BRAM)
236 puts("BRAM ");
237 if (ftr & SYS_REG_INFO_HAS_SPI_FLASH)
238 puts("SPIFLASH ");
239 if (ftr & SYS_REG_INFO_HAS_LITEETH)
240 puts("ETHERNET ");
241 puts("\r\n");
242
243 if (ftr & SYS_REG_INFO_HAS_SPI_FLASH) {
244 puts("SPI Offset: ");
245 spi_offs = readl(SYSCON_BASE + SYS_REG_SPI_INFO);
246 uart_writeuint32(spi_offs);
247 puts("\r\n");
248 }
249
250 #endif
251
252 #if 1
253 #if 1
254 // print out configuration parameters for QSPI
255 volatile uint32_t *qspi_cfg = (uint32_t*)SPI_FCTRL_BASE;
256 for (int k=0; k < 2; k++) {
257 tmp = readl((unsigned long)&(qspi_cfg[k]));
258 puts("cfg");
259 uart_writeuint32(k);
260 puts(" ");
261 uart_writeuint32(tmp);
262 puts("\n");
263 }
264 #endif
265 volatile uint32_t *qspi = (uint32_t*)SPI_FLASH_BASE+spi_offs;
266 //volatile uint8_t *qspi_bytes = (uint8_t*)spi_offs;
267 // let's not, eh? writel(0xDEAF0123, (unsigned long)&(qspi[0]));
268 // tmp = readl((unsigned long)&(qspi[0]));
269 for (int i=0;i<256;i++) {
270 tmp = readl((unsigned long)&(qspi[i]));
271 uart_writeuint32(tmp);
272 puts(" ");
273 if ((i & 0x7) == 0x7) puts("\r\n");
274 }
275 puts("\r\n");
276 /*
277 for (i=0;i<256;i++) {
278 tmp = readb((unsigned long)&(qspi_bytes[i]));
279 uart_writeuint32(tmp);
280 puts(" ");
281 }
282 */
283 #if 0
284 while (1) {
285 // quick read
286 tmp = readl((unsigned long)&(qspi[0x1000/4]));
287 puts("read 0x1000");
288 uart_writeuint32(tmp);
289 putchar(10);
290 }
291 while (1) {
292 unsigned char c = getchar();
293 putchar(c);
294 if (c == 13) { // if CR send LF
295
296 // quick read
297 tmp = readl((unsigned long)&(qspi[1<<i]));
298 puts("read ");
299 uart_writeuint32(1<<i);
300 puts(" ");
301 uart_writeuint32(tmp);
302 putchar(10);
303 i++;
304 }
305 }
306
307 return 0;
308 #endif
309 #endif
310 #if 0
311 volatile uint32_t *hyperram = (uint32_t*)0xa0000000;
312 writel(0xDEAF0123, (unsigned long)&(hyperram[0]));
313 tmp = readl((unsigned long)&(hyperram[0]));
314 while (1) {
315 unsigned char c = getchar();
316 putchar(c);
317 if (c == 13) { // if CR send LF
318
319 // quick write/read
320 writel(0xDEAF0123+i, (unsigned long)&(hyperram[1<<i]));
321 tmp = readl((unsigned long)&(hyperram[1<<i]));
322 puts("read ");
323 uart_writeuint32(1<<i);
324 puts(" ");
325 uart_writeuint32(tmp);
326 putchar(10);
327 i++;
328 }
329 }
330
331 return 0;
332 #endif
333
334 // init DRAM only if SYSCON says it exists (duh)
335 if (ftr & SYS_REG_INFO_HAS_DRAM)
336 {
337 puts("DRAM init... ");
338
339 struct gramCtx ctx;
340 #if 1
341 struct gramProfile profile = {
342 .mode_registers = {
343 0xb20, 0x806, 0x200, 0x0
344 },
345 .rdly_p0 = 2,
346 .rdly_p1 = 2,
347 };
348 #endif
349 #if 0
350 struct gramProfile profile = {
351 .mode_registers = {
352 0x0320, 0x0006, 0x0200, 0x0000
353 },
354 .rdly_p0 = 1,
355 .rdly_p1 = 1,
356 };
357 #endif
358 struct gramProfile profile2;
359 gram_init(&ctx, &profile, (void*)MEMORY_BASE,
360 (void*)DRAM_CTRL_BASE,
361 (void*)DRAM_INIT_BASE);
362 puts("done\n");
363
364 puts("MR profile: ");
365 uart_writeuint32(profile.mode_registers[0]);
366 puts(" ");
367 uart_writeuint32(profile.mode_registers[1]);
368 puts(" ");
369 uart_writeuint32(profile.mode_registers[2]);
370 puts(" ");
371 uart_writeuint32(profile.mode_registers[3]);
372 puts("\n");
373
374 // FIXME
375 // Early read test for WB access sim
376 //uart_writeuint32(*ram);
377
378 #if 1
379 puts("Rdly\np0: ");
380 for (size_t i = 0; i < 8; i++) {
381 profile2.rdly_p0 = i;
382 gram_load_calibration(&ctx, &profile2);
383 gram_reset_burstdet(&ctx);
384
385 for (size_t j = 0; j < 128; j++) {
386 tmp = readl((unsigned long)&(ram[i]));
387 }
388 if (gram_read_burstdet(&ctx, 0)) {
389 puts("1");
390 } else {
391 puts("0");
392 }
393 }
394 puts("\n");
395
396 puts("Rdly\np1: ");
397 for (size_t i = 0; i < 8; i++) {
398 profile2.rdly_p1 = i;
399 gram_load_calibration(&ctx, &profile2);
400 gram_reset_burstdet(&ctx);
401 for (size_t j = 0; j < 128; j++) {
402 tmp = readl((unsigned long)&(ram[i]));
403 }
404 if (gram_read_burstdet(&ctx, 1)) {
405 puts("1");
406 } else {
407 puts("0");
408 }
409 }
410 puts("\n");
411
412 puts("Auto calibrating... ");
413 res = gram_generate_calibration(&ctx, &profile2);
414 if (res != GRAM_ERR_NONE) {
415 puts("failed\n");
416 gram_load_calibration(&ctx, &profile);
417 } else {
418 gram_load_calibration(&ctx, &profile2);
419 }
420 puts("done\n");
421
422 puts("Auto calibration profile:");
423 puts("p0 rdly:");
424 uart_writeuint32(profile2.rdly_p0);
425 puts(" p1 rdly:");
426 uart_writeuint32(profile2.rdly_p1);
427 puts("\n");
428 #endif
429
430 puts("Reloading built-in calibration profile...");
431 gram_load_calibration(&ctx, &profile);
432
433 puts("DRAM test... \n");
434 for (size_t i = 0; i < kNumIterations; i++) {
435 writel(0xDEAF0000 | i*4, (unsigned long)&(ram[i]));
436 }
437
438 #if 0
439 for (int dly = 0; dly < 8; dly++) {
440 failcnt = 0;
441 profile2.rdly_p0 = dly;
442 profile2.rdly_p1 = dly;
443 puts("p0 rdly:");
444 uart_writeuint32(profile2.rdly_p0);
445 puts(" p1 rdly:");
446 uart_writeuint32(profile2.rdly_p1);
447 gram_load_calibration(&ctx, &profile2);
448 for (size_t i = 0; i < kNumIterations; i++) {
449 if (readl((unsigned long)&(ram[i])) != (0xDEAF0000 | i*4)) {
450 puts("fail : *(0x");
451 uart_writeuint32((unsigned long)(&ram[i]));
452 puts(") = ");
453 uart_writeuint32(readl((unsigned long)&(ram[i])));
454 puts("\n");
455 failcnt++;
456
457 if (failcnt > 10) {
458 puts("Test canceled (more than 10 errors)\n");
459 break;
460 }
461 }
462 }
463 }
464 #else
465 failcnt = 0;
466 for (size_t i = 0; i < kNumIterations; i++) {
467 if (readl((unsigned long)&(ram[i])) != (0xDEAF0000 | i*4)) {
468 puts("fail : *(0x");
469 uart_writeuint32((unsigned long)(&ram[i]));
470 puts(") = ");
471 uart_writeuint32(readl((unsigned long)&(ram[i])));
472 puts("\n");
473 failcnt++;
474
475 if (failcnt > 10) {
476 puts("Test canceled (more than 10 errors)\n");
477 break;
478 }
479 }
480 }
481 }
482 #endif
483 puts("done\n");
484
485 // memcpy from SPI Flash then boot
486 if ((ftr & SYS_REG_INFO_HAS_SPI_FLASH) &&
487 (failcnt == 0))
488 {
489 // identify ELF, copy if present, and get the start address
490 unsigned long faddr = copy_flash(spi_offs,
491 0x600000); // hack!
492 if (faddr != -1ul) {
493 // jump to absolute address
494 mtspr(8, faddr); // move address to LR
495 __asm__ volatile("blr");
496
497 // works with head.S which copies r3 into ctr then does bctr
498 return faddr;
499 }
500 }
501
502 return 0;
503 }
504