6c7b8cccd20db70dc318c8875906aebed78f2b55
[ls2.git] / coldboot / coldboot.c
1 #include <stdint.h>
2 #include <stdbool.h>
3
4 #include "console.h"
5 #include "microwatt_soc.h"
6 #include "io.h"
7
8 #include <stdlib.h>
9 #include <stdint.h>
10 #include <gram.h>
11
12 #include "elf64.h"
13
14 static inline void mtspr(int sprnum, unsigned long val)
15 {
16 __asm__ volatile("mtspr %0,%1" : : "i" (sprnum), "r" (val));
17 }
18
19 static inline uint32_t read32(const void *addr)
20 {
21 return *(volatile uint32_t *)addr;
22 }
23
24 static inline void write32(void *addr, uint32_t value)
25 {
26 *(volatile uint32_t *)addr = value;
27 }
28
29 struct uart_regs {
30 uint32_t divisor;
31 uint32_t rx_data;
32 uint32_t rx_rdy;
33 uint32_t rx_err;
34 uint32_t tx_data;
35 uint32_t tx_rdy;
36 uint32_t zero0; // reserved
37 uint32_t zero1; // reserved
38 uint32_t ev_status;
39 uint32_t ev_pending;
40 uint32_t ev_enable;
41 };
42
43 void uart_writeuint32(uint32_t val) {
44 const char lut[] = { '0', '1', '2', '3', '4', '5', '6', '7',
45 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
46 uint8_t *val_arr = (uint8_t*)(&val);
47 size_t i;
48
49 for (i = 0; i < 4; i++) {
50 putchar(lut[(val_arr[3-i] >> 4) & 0xF]);
51 putchar(lut[val_arr[3-i] & 0xF]);
52 }
53 }
54
55 void memcpy(void *dest, void *src, size_t n) {
56 int i;
57 //cast src and dest to char*
58 char *src_char = (char *)src;
59 char *dest_char = (char *)dest;
60 for (i=0; i<n; i++) {
61 #if 1
62 if ((i % 4096) == 0) {
63 puts("memcpy ");
64 uart_writeuint32(i);
65 puts("\r\n");
66 }
67 #endif
68 dest_char[i] = src_char[i]; //copy contents byte by byte
69 }
70 }
71
72 #if 0
73 void memcpy4(void *dest, void *src, size_t n) {
74 int i;
75 //cast src and dest to char*
76 uint32_t *src_char = (uint32_t *)src;
77 uint32_t *dest_char = (uint32_t *)dest;
78 for (i=0; i<n/4; i++) {
79 #if 1
80 if ((i % 4096) == 0) {
81 puts("memcpy4 ");
82 uart_writeuint32(i);
83 puts("\r\n");
84 }
85 #endif
86 dest_char[i] = src_char[i]; //copy contents byte by byte
87 }
88 }
89 #endif
90
91 void isr(void) {
92
93 }
94
95 // WARNING
96 // KESTREL SPECIFIC
97 #define TERCEL_SPI_REG_SYS_PHY_CFG1 0x10
98 #define TERCEL_SPI_REG_SYS_FLASH_CFG5 0x24
99 #define TERCEL_SPI_PHY_CLOCK_DIVISOR_MASK 0xff
100 #define TERCEL_SPI_PHY_CLOCK_DIVISOR_SHIFT 0
101 #define TERCEL_SPI_FLASH_EN_MULTCYC_READ_MASK 0x1
102 #define TERCEL_SPI_FLASH_EN_MULTCYC_READ_SHIFT 0
103 static inline uint32_t read_tercel_register(uint8_t reg)
104 {
105 return readl((unsigned long)(SPI_FCTRL_BASE+reg));
106 }
107
108 static inline void write_tercel_register(uint8_t reg, uint32_t value)
109 {
110 writel(value, (unsigned long)(SPI_FCTRL_BASE+reg));
111 }
112
113 // TODO: need to use this
114 // https://gitlab.raptorengineering.com/kestrel-collaboration/kestrel-firmware/bare-metal-firmware/-/blob/master/main.c#L2328
115
116 /* this is a "level 1" speed-up, which gets an initial improvement of 10-50x
117 * over the default speed (which is a scant 100 bytes per second).
118 */
119 static void crank_up_qspi_level1(void)
120 {
121 // WARNING: KESTREL SPECIFIC
122 // Set SPI clock cycle divider to 1
123 uint32_t dword;
124 dword = read_tercel_register(TERCEL_SPI_REG_SYS_PHY_CFG1);
125 dword &= ~(TERCEL_SPI_PHY_CLOCK_DIVISOR_MASK <<
126 TERCEL_SPI_PHY_CLOCK_DIVISOR_SHIFT);
127 dword |= ((1 & TERCEL_SPI_PHY_CLOCK_DIVISOR_MASK) <<
128 TERCEL_SPI_PHY_CLOCK_DIVISOR_SHIFT);
129 write_tercel_register(TERCEL_SPI_REG_SYS_PHY_CFG1, dword);
130 // Enable read merging
131 dword = read_tercel_register(TERCEL_SPI_REG_SYS_FLASH_CFG5);
132 dword |= (TERCEL_SPI_FLASH_EN_MULTCYC_READ_MASK <<
133 TERCEL_SPI_FLASH_EN_MULTCYC_READ_SHIFT);
134 write_tercel_register(TERCEL_SPI_REG_SYS_FLASH_CFG5, dword);
135 }
136
137 static bool fl_read(void *dst, uint32_t offset, uint32_t size)
138 {
139 uint8_t *d = dst;
140 memcpy(d, (void *)(unsigned long)(SPI_FLASH_BASE + offset), size);
141 return true;
142 }
143
144 static unsigned long copy_flash(unsigned int offset, unsigned int dst_offs)
145 {
146 Elf64_Ehdr ehdr;
147 Elf64_Phdr ph;
148 unsigned int i, poff, size, off;
149 void *addr;
150
151 puts("Trying flash...\r\n");
152 if (!fl_read(&ehdr, offset, sizeof(ehdr)))
153 return -1ul;
154 if (!IS_ELF(ehdr) || ehdr.e_ident[EI_CLASS] != ELFCLASS64) {
155 puts("Doesn't look like an elf64\r\n");
156 goto dump;
157 }
158 if (ehdr.e_ident[EI_DATA] != ELFDATA2LSB ||
159 ehdr.e_machine != EM_PPC64) {
160 puts("Not a ppc64le binary\r\n");
161 goto dump;
162 }
163
164 poff = offset + ehdr.e_phoff;
165 for (i = 0; i < ehdr.e_phnum; i++) {
166 if (!fl_read(&ph, poff, sizeof(ph)))
167 goto dump;
168 if (ph.p_type != PT_LOAD)
169 continue;
170
171 /* XXX Add bound checking ! */
172 size = ph.p_filesz;
173 addr = (void *)ph.p_vaddr;
174 off = offset + ph.p_offset;
175 //printf("Copy segment %d (0x%x bytes) to %p\n", i, size, addr);
176 puts("Copy segment ");
177 uart_writeuint32(i);
178 puts(" size ");
179 uart_writeuint32(size);
180 puts(" addr ");
181 uart_writeuint32((uint32_t)(unsigned long)addr);
182 puts("\r\n");
183 fl_read(addr+dst_offs, off, size);
184 poff += ehdr.e_phentsize;
185 }
186
187 puts("Booting from DRAM at");
188 uart_writeuint32((unsigned int)(dst_offs+ehdr.e_entry));
189 puts("\r\n");
190
191 puts("Dump DRAM\r\n");
192 for (i = 0; i < 64; i++) {
193 uart_writeuint32(readl(dst_offs+ehdr.e_entry+(i*4)));
194 puts(" ");
195 if ((i & 7) == 7) puts("\r\n");
196 }
197 puts("\r\n");
198
199 //flush_cpu_icache();
200 return dst_offs+ehdr.e_entry;
201 dump:
202 puts("HDR: \r\n");
203 for (i = 0; i < 8; i++) {
204 uart_writeuint32(ehdr.e_ident[i]);
205 puts("\r\n");
206 }
207
208 return -1ul;
209 }
210
211
212 // XXX
213 // Defining gram_[read|write] allows a trace of all register
214 // accesses to be dumped to console for debugging purposes.
215 // To use, define GRAM_RW_FUNC in gram.h
216 uint32_t gram_read(const struct gramCtx *ctx, void *addr) {
217 uint32_t dword;
218
219 puts("gram_read: ");
220 uart_writeuint32((unsigned long)addr);
221 dword = readl((unsigned long)addr);
222 puts(": ");
223 uart_writeuint32((unsigned long)dword);
224 puts("\n");
225
226 return dword;
227 }
228
229 int gram_write(const struct gramCtx *ctx, void *addr, uint32_t value) {
230 puts("gram_write: ");
231 uart_writeuint32((unsigned long)addr);
232 puts(": ");
233 uart_writeuint32((unsigned long)value);
234 writel(value, (unsigned long)addr);
235 puts("\n");
236
237 return 0;
238 }
239
240 int main(void) {
241 const int kNumIterations = 14;
242 int res, failcnt = 0;
243 uint32_t tmp;
244 unsigned long ftr, spi_offs=0x0;
245 volatile uint32_t *ram = (uint32_t*)MEMORY_BASE;
246
247 console_init();
248 //puts("Firmware launched...\n");
249
250 #if 1
251 puts(" Soc signature: ");
252 tmp = readl(SYSCON_BASE + SYS_REG_SIGNATURE);
253 uart_writeuint32(tmp);
254 tmp = readl(SYSCON_BASE + SYS_REG_SIGNATURE+4);
255 uart_writeuint32(tmp);
256 puts(" Soc features: ");
257 ftr = readl(SYSCON_BASE + SYS_REG_INFO);
258 if (ftr & SYS_REG_INFO_HAS_UART)
259 puts("UART ");
260 if (ftr & SYS_REG_INFO_HAS_DRAM)
261 puts("DRAM ");
262 if (ftr & SYS_REG_INFO_HAS_BRAM)
263 puts("BRAM ");
264 if (ftr & SYS_REG_INFO_HAS_SPI_FLASH)
265 puts("SPIFLASH ");
266 if (ftr & SYS_REG_INFO_HAS_LITEETH)
267 puts("ETHERNET ");
268 puts("\r\n");
269
270 if (ftr & SYS_REG_INFO_HAS_SPI_FLASH) {
271 // speed up the QSPI to at least a sane level
272 crank_up_qspi_level1();
273
274 puts("SPI Offset: ");
275 spi_offs = readl(SYSCON_BASE + SYS_REG_SPI_INFO);
276 uart_writeuint32(spi_offs);
277 puts("\r\n");
278 }
279
280 #endif
281
282 #if 1
283 #if 1
284 if (ftr & SYS_REG_INFO_HAS_SPI_FLASH) {
285 // print out configuration parameters for QSPI
286 volatile uint32_t *qspi_cfg = (uint32_t*)SPI_FCTRL_BASE;
287 for (int k=0; k < 2; k++) {
288 tmp = readl((unsigned long)&(qspi_cfg[k]));
289 puts("cfg");
290 uart_writeuint32(k);
291 puts(" ");
292 uart_writeuint32(tmp);
293 puts("\r\n");
294 }
295 }
296 #endif
297 if (ftr & SYS_REG_INFO_HAS_SPI_FLASH) {
298 volatile uint32_t *qspi = (uint32_t*)SPI_FLASH_BASE+0x900000;
299 //volatile uint8_t *qspi_bytes = (uint8_t*)spi_offs;
300 // let's not, eh? writel(0xDEAF0123, (unsigned long)&(qspi[0]));
301 // tmp = readl((unsigned long)&(qspi[0]));
302 for (int i=0;i<2;i++) {
303 tmp = readl((unsigned long)&(qspi[i]));
304 uart_writeuint32(tmp);
305 puts(" ");
306 if ((i & 0x7) == 0x7) puts("\r\n");
307 }
308 puts("\r\n");
309 /*
310 for (i=0;i<256;i++) {
311 tmp = readb((unsigned long)&(qspi_bytes[i]));
312 uart_writeuint32(tmp);
313 puts(" ");
314 }
315 */
316 #if 0
317 while (1) {
318 // quick read
319 tmp = readl((unsigned long)&(qspi[0x1000/4]));
320 puts("read 0x1000");
321 uart_writeuint32(tmp);
322 putchar(10);
323 }
324 while (1) {
325 unsigned char c = getchar();
326 putchar(c);
327 if (c == 13) { // if CR send LF
328
329 // quick read
330 tmp = readl((unsigned long)&(qspi[1<<i]));
331 puts("read ");
332 uart_writeuint32(1<<i);
333 puts(" ");
334 uart_writeuint32(tmp);
335 putchar(10);
336 i++;
337 }
338 }
339
340 return 0;
341 #endif
342 }
343 #endif
344 #if 0
345 volatile uint32_t *hyperram = (uint32_t*)0x00000000; // at 0x0 for arty
346 writel(0xDEAF0123, (unsigned long)&(hyperram[0]));
347 tmp = readl((unsigned long)&(hyperram[0]));
348 int i = 0;
349 while (1) {
350 unsigned char c = getchar();
351 putchar(c);
352 if (c == 13) { // if CR send LF
353
354 // quick write/read
355 writel(0xDEAF0123+i, (unsigned long)&(hyperram[1<<i]));
356 tmp = readl((unsigned long)&(hyperram[1<<i]));
357 puts("read ");
358 uart_writeuint32(1<<i);
359 puts(" ");
360 uart_writeuint32(tmp);
361 putchar(10);
362 i++;
363 }
364 }
365
366 return 0;
367 #endif
368
369 // init DRAM only if SYSCON says it exists (duh)
370 if (ftr & SYS_REG_INFO_HAS_DRAM)
371 {
372 puts("DRAM init... ");
373
374 struct gramCtx ctx;
375 #if 1
376 struct gramProfile profile = {
377 .mode_registers = {
378 0xb20, 0x806, 0x200, 0x0
379 },
380 .rdly_p0 = 2,
381 .rdly_p1 = 2,
382 };
383 #endif
384 #if 0
385 struct gramProfile profile = {
386 .mode_registers = {
387 0x0320, 0x0006, 0x0200, 0x0000
388 },
389 .rdly_p0 = 1,
390 .rdly_p1 = 1,
391 };
392 #endif
393 struct gramProfile profile2;
394 gram_init(&ctx, &profile, (void*)MEMORY_BASE,
395 (void*)DRAM_CTRL_BASE,
396 (void*)DRAM_INIT_BASE);
397 puts("done\n");
398
399 puts("MR profile: ");
400 uart_writeuint32(profile.mode_registers[0]);
401 puts(" ");
402 uart_writeuint32(profile.mode_registers[1]);
403 puts(" ");
404 uart_writeuint32(profile.mode_registers[2]);
405 puts(" ");
406 uart_writeuint32(profile.mode_registers[3]);
407 puts("\n");
408
409 // FIXME
410 // Early read test for WB access sim
411 //uart_writeuint32(*ram);
412
413 #if 1
414 puts("Rdly\np0: ");
415 for (size_t i = 0; i < 8; i++) {
416 profile2.rdly_p0 = i;
417 gram_load_calibration(&ctx, &profile2);
418 gram_reset_burstdet(&ctx);
419
420 for (size_t j = 0; j < 128; j++) {
421 tmp = readl((unsigned long)&(ram[i]));
422 }
423 if (gram_read_burstdet(&ctx, 0)) {
424 puts("1");
425 } else {
426 puts("0");
427 }
428 }
429 puts("\n");
430
431 puts("Rdly\np1: ");
432 for (size_t i = 0; i < 8; i++) {
433 profile2.rdly_p1 = i;
434 gram_load_calibration(&ctx, &profile2);
435 gram_reset_burstdet(&ctx);
436 for (size_t j = 0; j < 128; j++) {
437 tmp = readl((unsigned long)&(ram[i]));
438 }
439 if (gram_read_burstdet(&ctx, 1)) {
440 puts("1");
441 } else {
442 puts("0");
443 }
444 }
445 puts("\n");
446
447 puts("Auto calibrating... ");
448 res = gram_generate_calibration(&ctx, &profile2);
449 if (res != GRAM_ERR_NONE) {
450 puts("failed\n");
451 gram_load_calibration(&ctx, &profile);
452 } else {
453 gram_load_calibration(&ctx, &profile2);
454 }
455 puts("done\n");
456
457 puts("Auto calibration profile:");
458 puts("p0 rdly:");
459 uart_writeuint32(profile2.rdly_p0);
460 puts(" p1 rdly:");
461 uart_writeuint32(profile2.rdly_p1);
462 puts("\n");
463 #endif
464
465 puts("Reloading built-in calibration profile...");
466 gram_load_calibration(&ctx, &profile);
467
468 puts("DRAM test... \n");
469 for (size_t i = 0; i < kNumIterations; i++) {
470 writel(0xDEAF0000 | i*4, (unsigned long)&(ram[i]));
471 }
472
473 #if 0
474 for (int dly = 0; dly < 8; dly++) {
475 failcnt = 0;
476 profile2.rdly_p0 = dly;
477 profile2.rdly_p1 = dly;
478 puts("p0 rdly:");
479 uart_writeuint32(profile2.rdly_p0);
480 puts(" p1 rdly:");
481 uart_writeuint32(profile2.rdly_p1);
482 gram_load_calibration(&ctx, &profile2);
483 for (size_t i = 0; i < kNumIterations; i++) {
484 if (readl((unsigned long)&(ram[i])) != (0xDEAF0000 | i*4)) {
485 puts("fail : *(0x");
486 uart_writeuint32((unsigned long)(&ram[i]));
487 puts(") = ");
488 uart_writeuint32(readl((unsigned long)&(ram[i])));
489 puts("\n");
490 failcnt++;
491
492 if (failcnt > 10) {
493 puts("Test canceled (more than 10 errors)\n");
494 break;
495 }
496 }
497 }
498 }
499 #else
500 failcnt = 0;
501 for (size_t i = 0; i < kNumIterations; i++) {
502 if (readl((unsigned long)&(ram[i])) != (0xDEAF0000 | i*4)) {
503 puts("fail : *(0x");
504 uart_writeuint32((unsigned long)(&ram[i]));
505 puts(") = ");
506 uart_writeuint32(readl((unsigned long)&(ram[i])));
507 puts("\n");
508 failcnt++;
509
510 if (failcnt > 10) {
511 puts("Test canceled (more than 10 errors)\n");
512 break;
513 }
514 }
515 }
516 }
517 #endif
518 puts("done\n");
519
520 #if 0 // ooo, annoying: won't work. no idea why
521 // temporary hard-hack: boot directly from QSPI. really
522 // should do something like detect at least... something
523 if ((ftr & SYS_REG_INFO_HAS_SPI_FLASH))
524 {
525 // jump to absolute address
526 mtspr(8, SPI_FLASH_BASE); // move address to LR
527 __asm__ volatile("blr");
528 return 0;
529 }
530 #endif
531
532 // memcpy from SPI Flash then boot
533 if ((ftr & SYS_REG_INFO_HAS_SPI_FLASH) &&
534 (failcnt == 0))
535 {
536 /*
537 puts("ELF @ QSPI\n");
538 // identify ELF, copy if present, and get the start address
539 unsigned long faddr = copy_flash(spi_offs,
540 0x600000); // hack!
541 if (faddr != -1ul) {
542 // jump to absolute address
543 mtspr(8, faddr); // move address to LR
544 __asm__ volatile("blr");
545
546 // works with head.S which copies r3 into ctr then does bctr
547 return faddr;
548 }
549 puts("copy QSPI\n");
550 */
551 // another terrible hack: copy from flash at offset 0x600000
552 // a block of size 0x600000 into mem address 0x600000, then
553 // jump to it. this allows a dtb image to be executed
554 puts("copy QSPI\n");
555 volatile uint32_t *mem = (uint32_t*)0x1000000;
556 fl_read(mem, // destination in RAM
557 0x600000, // offset into QSPI
558 0x1000000); // length - shorter (testing) 0x8000);
559 puts("dump mem\n");
560 for (int i=0;i<256;i++) {
561 tmp = readl((unsigned long)&(mem[i]));
562 uart_writeuint32(tmp);
563 puts(" ");
564 if ((i & 0x7) == 0x7) puts("\r\n");
565 }
566 puts("\r\n");
567 mtspr(8, 0x1000000); // move address to LR
568 __asm__ volatile("blr");
569 }
570
571 return 0;
572 }
573