ee7b74ae2bf36a15db0786116754118e84923bb5
[ls2.git] / coldboot / coldboot.c
1 #include <stdint.h>
2 #include <stdbool.h>
3
4 #include "console.h"
5 #include "microwatt_soc.h"
6 #include "io.h"
7
8 #include <stdlib.h>
9 #include <stdint.h>
10 #include <gram.h>
11
12 #include "elf64.h"
13
14 static inline void mtspr(int sprnum, unsigned long val)
15 {
16 __asm__ volatile("mtspr %0,%1" : : "i" (sprnum), "r" (val));
17 }
18
19 static inline uint32_t read32(const void *addr)
20 {
21 return *(volatile uint32_t *)addr;
22 }
23
24 static inline void write32(void *addr, uint32_t value)
25 {
26 *(volatile uint32_t *)addr = value;
27 }
28
29 struct uart_regs {
30 uint32_t divisor;
31 uint32_t rx_data;
32 uint32_t rx_rdy;
33 uint32_t rx_err;
34 uint32_t tx_data;
35 uint32_t tx_rdy;
36 uint32_t zero0; // reserved
37 uint32_t zero1; // reserved
38 uint32_t ev_status;
39 uint32_t ev_pending;
40 uint32_t ev_enable;
41 };
42
43 void uart_writeuint32(uint32_t val) {
44 const char lut[] = { '0', '1', '2', '3', '4', '5', '6', '7',
45 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
46 uint8_t *val_arr = (uint8_t*)(&val);
47 size_t i;
48
49 for (i = 0; i < 4; i++) {
50 putchar(lut[(val_arr[3-i] >> 4) & 0xF]);
51 putchar(lut[val_arr[3-i] & 0xF]);
52 }
53 }
54
55 void memcpy(void *dest, void *src, size_t n) {
56 int i;
57 //cast src and dest to char*
58 char *src_char = (char *)src;
59 char *dest_char = (char *)dest;
60 for (i=0; i<n; i++) {
61 #if 1
62 if ((i % 4096) == 0) {
63 puts("memcpy ");
64 uart_writeuint32(i);
65 puts("\r\n");
66 }
67 #endif
68 dest_char[i] = src_char[i]; //copy contents byte by byte
69 }
70 }
71
72 void isr(void) {
73
74 }
75
76 // WARNING
77 // KESTREL SPECIFIC
78 #define TERCEL_SPI_REG_SYS_PHY_CFG1 0x10
79 #define TERCEL_SPI_REG_SYS_FLASH_CFG5 0x24
80 #define TERCEL_SPI_PHY_CLOCK_DIVISOR_MASK 0xff
81 #define TERCEL_SPI_PHY_CLOCK_DIVISOR_SHIFT 0
82 #define TERCEL_SPI_FLASH_EN_MULTCYC_READ_MASK 0x1
83 #define TERCEL_SPI_FLASH_EN_MULTCYC_READ_SHIFT 0
84 static inline uint32_t read_tercel_register(uint8_t reg)
85 {
86 return readl((unsigned long)(SPI_FCTRL_BASE+reg));
87 }
88
89 static inline void write_tercel_register(uint8_t reg, uint32_t value)
90 {
91 writel(value, (unsigned long)(SPI_FCTRL_BASE+reg));
92 }
93
94 // TODO: need to use this
95 // https://gitlab.raptorengineering.com/kestrel-collaboration/kestrel-firmware/bare-metal-firmware/-/blob/master/main.c#L2328
96
97 /* this is a "level 1" speed-up, which gets an initial improvement of 10-50x
98 * over the default speed (which is a scant 100 bytes per second).
99 */
100 static void crank_up_qspi_level1(void)
101 {
102 // WARNING: KESTREL SPECIFIC
103 // Set SPI clock cycle divider to 1
104 uint32_t dword;
105 dword = read_tercel_register(TERCEL_SPI_REG_SYS_PHY_CFG1);
106 dword &= ~(TERCEL_SPI_PHY_CLOCK_DIVISOR_MASK <<
107 TERCEL_SPI_PHY_CLOCK_DIVISOR_SHIFT);
108 dword |= ((1 & TERCEL_SPI_PHY_CLOCK_DIVISOR_MASK) <<
109 TERCEL_SPI_PHY_CLOCK_DIVISOR_SHIFT);
110 write_tercel_register(TERCEL_SPI_REG_SYS_PHY_CFG1, dword);
111 // Enable read merging
112 dword = read_tercel_register(TERCEL_SPI_REG_SYS_FLASH_CFG5);
113 dword |= (TERCEL_SPI_FLASH_EN_MULTCYC_READ_MASK <<
114 TERCEL_SPI_FLASH_EN_MULTCYC_READ_SHIFT);
115 write_tercel_register(TERCEL_SPI_REG_SYS_FLASH_CFG5, dword);
116 }
117
118 static bool fl_read(void *dst, uint32_t offset, uint32_t size)
119 {
120 uint8_t *d = dst;
121 memcpy(d, (void *)(unsigned long)(SPI_FLASH_BASE + offset), size);
122 return true;
123 }
124
125 static unsigned long copy_flash(unsigned int offset, unsigned int dst_offs)
126 {
127 Elf64_Ehdr ehdr;
128 Elf64_Phdr ph;
129 unsigned int i, poff, size, off;
130 void *addr;
131
132 puts("Trying flash...\r\n");
133 if (!fl_read(&ehdr, offset, sizeof(ehdr)))
134 return -1ul;
135 if (!IS_ELF(ehdr) || ehdr.e_ident[EI_CLASS] != ELFCLASS64) {
136 puts("Doesn't look like an elf64\r\n");
137 goto dump;
138 }
139 if (ehdr.e_ident[EI_DATA] != ELFDATA2LSB ||
140 ehdr.e_machine != EM_PPC64) {
141 puts("Not a ppc64le binary\r\n");
142 goto dump;
143 }
144
145 poff = offset + ehdr.e_phoff;
146 for (i = 0; i < ehdr.e_phnum; i++) {
147 if (!fl_read(&ph, poff, sizeof(ph)))
148 goto dump;
149 if (ph.p_type != PT_LOAD)
150 continue;
151
152 /* XXX Add bound checking ! */
153 size = ph.p_filesz;
154 addr = (void *)ph.p_vaddr;
155 off = offset + ph.p_offset;
156 //printf("Copy segment %d (0x%x bytes) to %p\n", i, size, addr);
157 puts("Copy segment ");
158 uart_writeuint32(i);
159 puts(" size ");
160 uart_writeuint32(size);
161 puts(" addr ");
162 uart_writeuint32((uint32_t)(unsigned long)addr);
163 puts("\r\n");
164 fl_read(addr+dst_offs, off, size);
165 poff += ehdr.e_phentsize;
166 }
167
168 puts("Booting from DRAM at");
169 uart_writeuint32((unsigned int)(dst_offs+ehdr.e_entry));
170 puts("\r\n");
171
172 puts("Dump DRAM\r\n");
173 for (i = 0; i < 64; i++) {
174 uart_writeuint32(readl(dst_offs+ehdr.e_entry+(i*4)));
175 puts(" ");
176 if ((i & 7) == 7) puts("\r\n");
177 }
178 puts("\r\n");
179
180 //flush_cpu_icache();
181 return dst_offs+ehdr.e_entry;
182 dump:
183 puts("HDR: \r\n");
184 for (i = 0; i < 8; i++) {
185 uart_writeuint32(ehdr.e_ident[i]);
186 puts("\r\n");
187 }
188
189 return -1ul;
190 }
191
192
193 // XXX
194 // Defining gram_[read|write] allows a trace of all register
195 // accesses to be dumped to console for debugging purposes.
196 // To use, define GRAM_RW_FUNC in gram.h
197 uint32_t gram_read(const struct gramCtx *ctx, void *addr) {
198 uint32_t dword;
199
200 puts("gram_read: ");
201 uart_writeuint32((unsigned long)addr);
202 dword = readl((unsigned long)addr);
203 puts(": ");
204 uart_writeuint32((unsigned long)dword);
205 puts("\n");
206
207 return dword;
208 }
209
210 int gram_write(const struct gramCtx *ctx, void *addr, uint32_t value) {
211 puts("gram_write: ");
212 uart_writeuint32((unsigned long)addr);
213 puts(": ");
214 uart_writeuint32((unsigned long)value);
215 writel(value, (unsigned long)addr);
216 puts("\n");
217
218 return 0;
219 }
220
221 int main(void) {
222 const int kNumIterations = 14;
223 int res, failcnt = 0;
224 uint32_t tmp;
225 unsigned long ftr, spi_offs=0x0;
226 volatile uint32_t *ram = (uint32_t*)MEMORY_BASE;
227
228 console_init();
229 //puts("Firmware launched...\n");
230
231 #if 1
232 puts(" Soc signature: ");
233 tmp = readl(SYSCON_BASE + SYS_REG_SIGNATURE);
234 uart_writeuint32(tmp);
235 tmp = readl(SYSCON_BASE + SYS_REG_SIGNATURE+4);
236 uart_writeuint32(tmp);
237 puts(" Soc features: ");
238 ftr = readl(SYSCON_BASE + SYS_REG_INFO);
239 if (ftr & SYS_REG_INFO_HAS_UART)
240 puts("UART ");
241 if (ftr & SYS_REG_INFO_HAS_DRAM)
242 puts("DRAM ");
243 if (ftr & SYS_REG_INFO_HAS_BRAM)
244 puts("BRAM ");
245 if (ftr & SYS_REG_INFO_HAS_SPI_FLASH)
246 puts("SPIFLASH ");
247 if (ftr & SYS_REG_INFO_HAS_LITEETH)
248 puts("ETHERNET ");
249 puts("\r\n");
250
251 if (ftr & SYS_REG_INFO_HAS_SPI_FLASH) {
252 // speed up the QSPI to at least a sane level
253 crank_up_qspi_level1();
254
255 puts("SPI Offset: ");
256 spi_offs = readl(SYSCON_BASE + SYS_REG_SPI_INFO);
257 uart_writeuint32(spi_offs);
258 puts("\r\n");
259 }
260
261 #endif
262
263 #if 1
264 #if 1
265 if (ftr & SYS_REG_INFO_HAS_SPI_FLASH) {
266 // print out configuration parameters for QSPI
267 volatile uint32_t *qspi_cfg = (uint32_t*)SPI_FCTRL_BASE;
268 for (int k=0; k < 2; k++) {
269 tmp = readl((unsigned long)&(qspi_cfg[k]));
270 puts("cfg");
271 uart_writeuint32(k);
272 puts(" ");
273 uart_writeuint32(tmp);
274 puts("\r\n");
275 }
276 }
277 #endif
278 if (ftr & SYS_REG_INFO_HAS_SPI_FLASH) {
279 volatile uint32_t *qspi = (uint32_t*)SPI_FLASH_BASE+0x600000;
280 //volatile uint8_t *qspi_bytes = (uint8_t*)spi_offs;
281 // let's not, eh? writel(0xDEAF0123, (unsigned long)&(qspi[0]));
282 // tmp = readl((unsigned long)&(qspi[0]));
283 for (int i=0;i<2;i++) {
284 tmp = readl((unsigned long)&(qspi[i]));
285 uart_writeuint32(tmp);
286 puts(" ");
287 if ((i & 0x7) == 0x7) puts("\r\n");
288 }
289 puts("\r\n");
290 /*
291 for (i=0;i<256;i++) {
292 tmp = readb((unsigned long)&(qspi_bytes[i]));
293 uart_writeuint32(tmp);
294 puts(" ");
295 }
296 */
297 #if 0
298 while (1) {
299 // quick read
300 tmp = readl((unsigned long)&(qspi[0x1000/4]));
301 puts("read 0x1000");
302 uart_writeuint32(tmp);
303 putchar(10);
304 }
305 while (1) {
306 unsigned char c = getchar();
307 putchar(c);
308 if (c == 13) { // if CR send LF
309
310 // quick read
311 tmp = readl((unsigned long)&(qspi[1<<i]));
312 puts("read ");
313 uart_writeuint32(1<<i);
314 puts(" ");
315 uart_writeuint32(tmp);
316 putchar(10);
317 i++;
318 }
319 }
320
321 return 0;
322 #endif
323 }
324 #endif
325 #if 0
326 volatile uint32_t *hyperram = (uint32_t*)0x00000000; // at 0x0 for arty
327 writel(0xDEAF0123, (unsigned long)&(hyperram[0]));
328 tmp = readl((unsigned long)&(hyperram[0]));
329 int i = 0;
330 while (1) {
331 unsigned char c = getchar();
332 putchar(c);
333 if (c == 13) { // if CR send LF
334
335 // quick write/read
336 writel(0xDEAF0123+i, (unsigned long)&(hyperram[1<<i]));
337 tmp = readl((unsigned long)&(hyperram[1<<i]));
338 puts("read ");
339 uart_writeuint32(1<<i);
340 puts(" ");
341 uart_writeuint32(tmp);
342 putchar(10);
343 i++;
344 }
345 }
346
347 return 0;
348 #endif
349
350 // init DRAM only if SYSCON says it exists (duh)
351 if (ftr & SYS_REG_INFO_HAS_DRAM)
352 {
353 puts("DRAM init... ");
354
355 struct gramCtx ctx;
356 #if 1
357 struct gramProfile profile = {
358 .mode_registers = {
359 0xb20, 0x806, 0x200, 0x0
360 },
361 .rdly_p0 = 2,
362 .rdly_p1 = 2,
363 };
364 #endif
365 #if 0
366 struct gramProfile profile = {
367 .mode_registers = {
368 0x0320, 0x0006, 0x0200, 0x0000
369 },
370 .rdly_p0 = 1,
371 .rdly_p1 = 1,
372 };
373 #endif
374 struct gramProfile profile2;
375 gram_init(&ctx, &profile, (void*)MEMORY_BASE,
376 (void*)DRAM_CTRL_BASE,
377 (void*)DRAM_INIT_BASE);
378 puts("done\n");
379
380 puts("MR profile: ");
381 uart_writeuint32(profile.mode_registers[0]);
382 puts(" ");
383 uart_writeuint32(profile.mode_registers[1]);
384 puts(" ");
385 uart_writeuint32(profile.mode_registers[2]);
386 puts(" ");
387 uart_writeuint32(profile.mode_registers[3]);
388 puts("\n");
389
390 // FIXME
391 // Early read test for WB access sim
392 //uart_writeuint32(*ram);
393
394 #if 1
395 puts("Rdly\np0: ");
396 for (size_t i = 0; i < 8; i++) {
397 profile2.rdly_p0 = i;
398 gram_load_calibration(&ctx, &profile2);
399 gram_reset_burstdet(&ctx);
400
401 for (size_t j = 0; j < 128; j++) {
402 tmp = readl((unsigned long)&(ram[i]));
403 }
404 if (gram_read_burstdet(&ctx, 0)) {
405 puts("1");
406 } else {
407 puts("0");
408 }
409 }
410 puts("\n");
411
412 puts("Rdly\np1: ");
413 for (size_t i = 0; i < 8; i++) {
414 profile2.rdly_p1 = i;
415 gram_load_calibration(&ctx, &profile2);
416 gram_reset_burstdet(&ctx);
417 for (size_t j = 0; j < 128; j++) {
418 tmp = readl((unsigned long)&(ram[i]));
419 }
420 if (gram_read_burstdet(&ctx, 1)) {
421 puts("1");
422 } else {
423 puts("0");
424 }
425 }
426 puts("\n");
427
428 puts("Auto calibrating... ");
429 res = gram_generate_calibration(&ctx, &profile2);
430 if (res != GRAM_ERR_NONE) {
431 puts("failed\n");
432 gram_load_calibration(&ctx, &profile);
433 } else {
434 gram_load_calibration(&ctx, &profile2);
435 }
436 puts("done\n");
437
438 puts("Auto calibration profile:");
439 puts("p0 rdly:");
440 uart_writeuint32(profile2.rdly_p0);
441 puts(" p1 rdly:");
442 uart_writeuint32(profile2.rdly_p1);
443 puts("\n");
444 #endif
445
446 puts("Reloading built-in calibration profile...");
447 gram_load_calibration(&ctx, &profile);
448
449 puts("DRAM test... \n");
450 for (size_t i = 0; i < kNumIterations; i++) {
451 writel(0xDEAF0000 | i*4, (unsigned long)&(ram[i]));
452 }
453
454 #if 0
455 for (int dly = 0; dly < 8; dly++) {
456 failcnt = 0;
457 profile2.rdly_p0 = dly;
458 profile2.rdly_p1 = dly;
459 puts("p0 rdly:");
460 uart_writeuint32(profile2.rdly_p0);
461 puts(" p1 rdly:");
462 uart_writeuint32(profile2.rdly_p1);
463 gram_load_calibration(&ctx, &profile2);
464 for (size_t i = 0; i < kNumIterations; i++) {
465 if (readl((unsigned long)&(ram[i])) != (0xDEAF0000 | i*4)) {
466 puts("fail : *(0x");
467 uart_writeuint32((unsigned long)(&ram[i]));
468 puts(") = ");
469 uart_writeuint32(readl((unsigned long)&(ram[i])));
470 puts("\n");
471 failcnt++;
472
473 if (failcnt > 10) {
474 puts("Test canceled (more than 10 errors)\n");
475 break;
476 }
477 }
478 }
479 }
480 #else
481 failcnt = 0;
482 for (size_t i = 0; i < kNumIterations; i++) {
483 if (readl((unsigned long)&(ram[i])) != (0xDEAF0000 | i*4)) {
484 puts("fail : *(0x");
485 uart_writeuint32((unsigned long)(&ram[i]));
486 puts(") = ");
487 uart_writeuint32(readl((unsigned long)&(ram[i])));
488 puts("\n");
489 failcnt++;
490
491 if (failcnt > 10) {
492 puts("Test canceled (more than 10 errors)\n");
493 break;
494 }
495 }
496 }
497 }
498 #endif
499 puts("done\n");
500
501 #if 0 // ooo, annoying: won't work. no idea why
502 // temporary hard-hack: boot directly from QSPI. really
503 // should do something like detect at least... something
504 if ((ftr & SYS_REG_INFO_HAS_SPI_FLASH))
505 {
506 // jump to absolute address
507 mtspr(8, SPI_FLASH_BASE); // move address to LR
508 __asm__ volatile("blr");
509 return 0;
510 }
511 #endif
512
513 // memcpy from SPI Flash then boot
514 if ((ftr & SYS_REG_INFO_HAS_SPI_FLASH) &&
515 (failcnt == 0))
516 {
517 /*
518 puts("ELF @ QSPI\n");
519 // identify ELF, copy if present, and get the start address
520 unsigned long faddr = copy_flash(spi_offs,
521 0x600000); // hack!
522 if (faddr != -1ul) {
523 // jump to absolute address
524 mtspr(8, faddr); // move address to LR
525 __asm__ volatile("blr");
526
527 // works with head.S which copies r3 into ctr then does bctr
528 return faddr;
529 }
530 puts("copy QSPI\n");
531 */
532 // another terrible hack: copy from flash at offset 0x600000
533 // a block of size 0x600000 into mem address 0x600000, then
534 // jump to it. this allows a dtb image to be executed
535 puts("copy QSPI\n");
536 volatile uint32_t *mem = (uint32_t*)0x600000;
537 fl_read(mem, 0x600000, 0x600000); // shorter (testing) 0x8000);
538 puts("dump mem\n");
539 for (int i=0;i<256;i++) {
540 tmp = readl((unsigned long)&(mem[i]));
541 uart_writeuint32(tmp);
542 puts(" ");
543 if ((i & 0x7) == 0x7) puts("\r\n");
544 }
545 puts("\r\n");
546 mtspr(8, 0x600000); // move address to LR
547 __asm__ volatile("blr");
548 }
549
550 return 0;
551 }
552