e5d97a1e3a34ae8e331d5e0628c7057276f42cee
[ls2.git] / coldboot / coldboot.c
1 #include <stdint.h>
2 #include <stdbool.h>
3
4 #include "console.h"
5 #include "microwatt_soc.h"
6 #include "io.h"
7
8 #include <stdlib.h>
9 #include <stdint.h>
10 #include <gram.h>
11
12 #include "elf64.h"
13
14 static inline void mtspr(int sprnum, unsigned long val)
15 {
16 __asm__ volatile("mtspr %0,%1" : : "i" (sprnum), "r" (val));
17 }
18
19 static inline uint32_t read32(const void *addr)
20 {
21 return *(volatile uint32_t *)addr;
22 }
23
24 static inline void write32(void *addr, uint32_t value)
25 {
26 *(volatile uint32_t *)addr = value;
27 }
28
29 struct uart_regs {
30 uint32_t divisor;
31 uint32_t rx_data;
32 uint32_t rx_rdy;
33 uint32_t rx_err;
34 uint32_t tx_data;
35 uint32_t tx_rdy;
36 uint32_t zero0; // reserved
37 uint32_t zero1; // reserved
38 uint32_t ev_status;
39 uint32_t ev_pending;
40 uint32_t ev_enable;
41 };
42
43 void uart_writeuint32(uint32_t val) {
44 const char lut[] = { '0', '1', '2', '3', '4', '5', '6', '7',
45 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
46 uint8_t *val_arr = (uint8_t*)(&val);
47 size_t i;
48
49 for (i = 0; i < 4; i++) {
50 putchar(lut[(val_arr[3-i] >> 4) & 0xF]);
51 putchar(lut[val_arr[3-i] & 0xF]);
52 }
53 }
54
55 void memcpy(void *dest, void *src, size_t n) {
56 int i;
57 //cast src and dest to char*
58 char *src_char = (char *)src;
59 char *dest_char = (char *)dest;
60 for (i=0; i<n; i++) {
61 #if 1
62 if ((i % 4096) == 0) {
63 puts("memcpy ");
64 uart_writeuint32(i);
65 puts("\r\n");
66 }
67 #endif
68 dest_char[i] = src_char[i]; //copy contents byte by byte
69 }
70 }
71
72 #if 0
73 void memcpy4(void *dest, void *src, size_t n) {
74 int i;
75 //cast src and dest to char*
76 uint32_t *src_char = (uint32_t *)src;
77 uint32_t *dest_char = (uint32_t *)dest;
78 for (i=0; i<n/4; i++) {
79 #if 1
80 if ((i % 4096) == 0) {
81 puts("memcpy4 ");
82 uart_writeuint32(i);
83 puts("\r\n");
84 }
85 #endif
86 dest_char[i] = src_char[i]; //copy contents byte by byte
87 }
88 }
89 #endif
90
91 void isr(void) {
92
93 }
94
95 extern void crank_up_qspi_level1(void);
96 extern int host_spi_flash_init(void);
97
98 static bool fl_read(void *dst, uint32_t offset, uint32_t size)
99 {
100 uint8_t *d = dst;
101 memcpy(d, (void *)(unsigned long)(SPI_FLASH_BASE + offset), size);
102 return true;
103 }
104
105 static unsigned long copy_flash(unsigned int offset, unsigned int dst_offs)
106 {
107 Elf64_Ehdr ehdr;
108 Elf64_Phdr ph;
109 unsigned int i, poff, size, off;
110 void *addr;
111
112 puts("Trying flash...\r\n");
113 if (!fl_read(&ehdr, offset, sizeof(ehdr)))
114 return -1ul;
115 if (!IS_ELF(ehdr) || ehdr.e_ident[EI_CLASS] != ELFCLASS64) {
116 puts("Doesn't look like an elf64\r\n");
117 goto dump;
118 }
119 if (ehdr.e_ident[EI_DATA] != ELFDATA2LSB ||
120 ehdr.e_machine != EM_PPC64) {
121 puts("Not a ppc64le binary\r\n");
122 goto dump;
123 }
124
125 poff = offset + ehdr.e_phoff;
126 for (i = 0; i < ehdr.e_phnum; i++) {
127 if (!fl_read(&ph, poff, sizeof(ph)))
128 goto dump;
129 if (ph.p_type != PT_LOAD)
130 continue;
131
132 /* XXX Add bound checking ! */
133 size = ph.p_filesz;
134 addr = (void *)ph.p_vaddr;
135 off = offset + ph.p_offset;
136 //printf("Copy segment %d (0x%x bytes) to %p\n", i, size, addr);
137 puts("Copy segment ");
138 uart_writeuint32(i);
139 puts(" size ");
140 uart_writeuint32(size);
141 puts(" addr ");
142 uart_writeuint32((uint32_t)(unsigned long)addr);
143 puts("\r\n");
144 fl_read(addr+dst_offs, off, size);
145 poff += ehdr.e_phentsize;
146 }
147
148 puts("Booting from DRAM at");
149 uart_writeuint32((unsigned int)(dst_offs+ehdr.e_entry));
150 puts("\r\n");
151
152 puts("Dump DRAM\r\n");
153 for (i = 0; i < 64; i++) {
154 uart_writeuint32(readl(dst_offs+ehdr.e_entry+(i*4)));
155 puts(" ");
156 if ((i & 7) == 7) puts("\r\n");
157 }
158 puts("\r\n");
159
160 //flush_cpu_icache();
161 return dst_offs+ehdr.e_entry;
162 dump:
163 puts("HDR: \r\n");
164 for (i = 0; i < 8; i++) {
165 uart_writeuint32(ehdr.e_ident[i]);
166 puts("\r\n");
167 }
168
169 return -1ul;
170 }
171
172
173 // XXX
174 // Defining gram_[read|write] allows a trace of all register
175 // accesses to be dumped to console for debugging purposes.
176 // To use, define GRAM_RW_FUNC in gram.h
177 uint32_t gram_read(const struct gramCtx *ctx, void *addr) {
178 uint32_t dword;
179
180 puts("gram_read: ");
181 uart_writeuint32((unsigned long)addr);
182 dword = readl((unsigned long)addr);
183 puts(": ");
184 uart_writeuint32((unsigned long)dword);
185 puts("\n");
186
187 return dword;
188 }
189
190 int gram_write(const struct gramCtx *ctx, void *addr, uint32_t value) {
191 puts("gram_write: ");
192 uart_writeuint32((unsigned long)addr);
193 puts(": ");
194 uart_writeuint32((unsigned long)value);
195 writel(value, (unsigned long)addr);
196 puts("\n");
197
198 return 0;
199 }
200
201 int main(void) {
202 const int kNumIterations = 14;
203 int res, failcnt = 0;
204 uint32_t tmp;
205 unsigned long ftr, spi_offs=0x0;
206 volatile uint32_t *ram = (uint32_t*)MEMORY_BASE;
207
208 console_init();
209 //puts("Firmware launched...\n");
210
211 #if 1
212 puts(" Soc signature: ");
213 tmp = readl(SYSCON_BASE + SYS_REG_SIGNATURE);
214 uart_writeuint32(tmp);
215 tmp = readl(SYSCON_BASE + SYS_REG_SIGNATURE+4);
216 uart_writeuint32(tmp);
217 puts(" Soc features: ");
218 ftr = readl(SYSCON_BASE + SYS_REG_INFO);
219 if (ftr & SYS_REG_INFO_HAS_UART)
220 puts("UART ");
221 if (ftr & SYS_REG_INFO_HAS_DRAM)
222 puts("DRAM ");
223 if (ftr & SYS_REG_INFO_HAS_BRAM)
224 puts("BRAM ");
225 if (ftr & SYS_REG_INFO_HAS_SPI_FLASH)
226 puts("SPIFLASH ");
227 if (ftr & SYS_REG_INFO_HAS_LITEETH)
228 puts("ETHERNET ");
229 puts("\r\n");
230
231 if (ftr & SYS_REG_INFO_HAS_SPI_FLASH) {
232 // speed up the QSPI to at least a sane level
233 crank_up_qspi_level1();
234 // run at saner level
235 host_spi_flash_init();
236
237 puts("SPI Offset: ");
238 spi_offs = readl(SYSCON_BASE + SYS_REG_SPI_INFO);
239 uart_writeuint32(spi_offs);
240 puts("\r\n");
241 }
242
243 #endif
244
245 #if 1
246 #if 1
247 if (ftr & SYS_REG_INFO_HAS_SPI_FLASH) {
248 // print out configuration parameters for QSPI
249 volatile uint32_t *qspi_cfg = (uint32_t*)SPI_FCTRL_BASE;
250 for (int k=0; k < 2; k++) {
251 tmp = readl((unsigned long)&(qspi_cfg[k]));
252 puts("cfg");
253 uart_writeuint32(k);
254 puts(" ");
255 uart_writeuint32(tmp);
256 puts("\r\n");
257 }
258 }
259 #endif
260 if (ftr & SYS_REG_INFO_HAS_SPI_FLASH) {
261 volatile uint32_t *qspi = (uint32_t*)SPI_FLASH_BASE+0x900000;
262 //volatile uint8_t *qspi_bytes = (uint8_t*)spi_offs;
263 // let's not, eh? writel(0xDEAF0123, (unsigned long)&(qspi[0]));
264 // tmp = readl((unsigned long)&(qspi[0]));
265 for (int i=0;i<10;i++) {
266 tmp = readl((unsigned long)&(qspi[i]));
267 uart_writeuint32(tmp);
268 puts(" ");
269 if ((i & 0x7) == 0x7) puts("\r\n");
270 }
271 puts("\r\n");
272 /*
273 for (i=0;i<256;i++) {
274 tmp = readb((unsigned long)&(qspi_bytes[i]));
275 uart_writeuint32(tmp);
276 puts(" ");
277 }
278 */
279 #if 0
280 while (1) {
281 // quick read
282 tmp = readl((unsigned long)&(qspi[0x1000/4]));
283 puts("read 0x1000");
284 uart_writeuint32(tmp);
285 putchar(10);
286 }
287 while (1) {
288 unsigned char c = getchar();
289 putchar(c);
290 if (c == 13) { // if CR send LF
291
292 // quick read
293 tmp = readl((unsigned long)&(qspi[1<<i]));
294 puts("read ");
295 uart_writeuint32(1<<i);
296 puts(" ");
297 uart_writeuint32(tmp);
298 putchar(10);
299 i++;
300 }
301 }
302
303 return 0;
304 #endif
305 }
306 #endif
307 #if 0
308 volatile uint32_t *hyperram = (uint32_t*)0x00000000; // at 0x0 for arty
309 writel(0xDEAF0123, (unsigned long)&(hyperram[0]));
310 tmp = readl((unsigned long)&(hyperram[0]));
311 int i = 0;
312 while (1) {
313 unsigned char c = getchar();
314 putchar(c);
315 if (c == 13) { // if CR send LF
316
317 // quick write/read
318 writel(0xDEAF0123+i, (unsigned long)&(hyperram[1<<i]));
319 tmp = readl((unsigned long)&(hyperram[1<<i]));
320 puts("read ");
321 uart_writeuint32(1<<i);
322 puts(" ");
323 uart_writeuint32(tmp);
324 putchar(10);
325 i++;
326 }
327 }
328
329 return 0;
330 #endif
331
332 // init DRAM only if SYSCON says it exists (duh)
333 if (ftr & SYS_REG_INFO_HAS_DRAM)
334 {
335 puts("DRAM init... ");
336
337 struct gramCtx ctx;
338 #if 1
339 struct gramProfile profile = {
340 .mode_registers = {
341 0xb20, 0x806, 0x200, 0x0
342 },
343 .rdly_p0 = 2,
344 .rdly_p1 = 2,
345 };
346 #endif
347 #if 0
348 struct gramProfile profile = {
349 .mode_registers = {
350 0x0320, 0x0006, 0x0200, 0x0000
351 },
352 .rdly_p0 = 1,
353 .rdly_p1 = 1,
354 };
355 #endif
356 struct gramProfile profile2;
357 gram_init(&ctx, &profile, (void*)MEMORY_BASE,
358 (void*)DRAM_CTRL_BASE,
359 (void*)DRAM_INIT_BASE);
360 puts("done\n");
361
362 puts("MR profile: ");
363 uart_writeuint32(profile.mode_registers[0]);
364 puts(" ");
365 uart_writeuint32(profile.mode_registers[1]);
366 puts(" ");
367 uart_writeuint32(profile.mode_registers[2]);
368 puts(" ");
369 uart_writeuint32(profile.mode_registers[3]);
370 puts("\n");
371
372 // FIXME
373 // Early read test for WB access sim
374 //uart_writeuint32(*ram);
375
376 #if 1
377 puts("Rdly\np0: ");
378 for (size_t i = 0; i < 8; i++) {
379 profile2.rdly_p0 = i;
380 gram_load_calibration(&ctx, &profile2);
381 gram_reset_burstdet(&ctx);
382
383 for (size_t j = 0; j < 128; j++) {
384 tmp = readl((unsigned long)&(ram[i]));
385 }
386 if (gram_read_burstdet(&ctx, 0)) {
387 puts("1");
388 } else {
389 puts("0");
390 }
391 }
392 puts("\n");
393
394 puts("Rdly\np1: ");
395 for (size_t i = 0; i < 8; i++) {
396 profile2.rdly_p1 = i;
397 gram_load_calibration(&ctx, &profile2);
398 gram_reset_burstdet(&ctx);
399 for (size_t j = 0; j < 128; j++) {
400 tmp = readl((unsigned long)&(ram[i]));
401 }
402 if (gram_read_burstdet(&ctx, 1)) {
403 puts("1");
404 } else {
405 puts("0");
406 }
407 }
408 puts("\n");
409
410 puts("Auto calibrating... ");
411 res = gram_generate_calibration(&ctx, &profile2);
412 if (res != GRAM_ERR_NONE) {
413 puts("failed\n");
414 gram_load_calibration(&ctx, &profile);
415 } else {
416 gram_load_calibration(&ctx, &profile2);
417 }
418 puts("done\n");
419
420 puts("Auto calibration profile:");
421 puts("p0 rdly:");
422 uart_writeuint32(profile2.rdly_p0);
423 puts(" p1 rdly:");
424 uart_writeuint32(profile2.rdly_p1);
425 puts("\n");
426 #endif
427
428 puts("Reloading built-in calibration profile...");
429 gram_load_calibration(&ctx, &profile);
430
431 puts("DRAM test... \n");
432 for (size_t i = 0; i < kNumIterations; i++) {
433 writel(0xDEAF0000 | i*4, (unsigned long)&(ram[i]));
434 }
435
436 #if 0
437 for (int dly = 0; dly < 8; dly++) {
438 failcnt = 0;
439 profile2.rdly_p0 = dly;
440 profile2.rdly_p1 = dly;
441 puts("p0 rdly:");
442 uart_writeuint32(profile2.rdly_p0);
443 puts(" p1 rdly:");
444 uart_writeuint32(profile2.rdly_p1);
445 gram_load_calibration(&ctx, &profile2);
446 for (size_t i = 0; i < kNumIterations; i++) {
447 if (readl((unsigned long)&(ram[i])) != (0xDEAF0000 | i*4)) {
448 puts("fail : *(0x");
449 uart_writeuint32((unsigned long)(&ram[i]));
450 puts(") = ");
451 uart_writeuint32(readl((unsigned long)&(ram[i])));
452 puts("\n");
453 failcnt++;
454
455 if (failcnt > 10) {
456 puts("Test canceled (more than 10 errors)\n");
457 break;
458 }
459 }
460 }
461 }
462 #else
463 failcnt = 0;
464 for (size_t i = 0; i < kNumIterations; i++) {
465 if (readl((unsigned long)&(ram[i])) != (0xDEAF0000 | i*4)) {
466 puts("fail : *(0x");
467 uart_writeuint32((unsigned long)(&ram[i]));
468 puts(") = ");
469 uart_writeuint32(readl((unsigned long)&(ram[i])));
470 puts("\n");
471 failcnt++;
472
473 if (failcnt > 10) {
474 puts("Test canceled (more than 10 errors)\n");
475 break;
476 }
477 }
478 }
479 }
480 #endif
481 puts("done\n");
482
483 #if 0 // ooo, annoying: won't work. no idea why
484 // temporary hard-hack: boot directly from QSPI. really
485 // should do something like detect at least... something
486 if ((ftr & SYS_REG_INFO_HAS_SPI_FLASH))
487 {
488 // jump to absolute address
489 mtspr(8, SPI_FLASH_BASE); // move address to LR
490 __asm__ volatile("blr");
491 return 0;
492 }
493 #endif
494
495 // memcpy from SPI Flash then boot
496 if ((ftr & SYS_REG_INFO_HAS_SPI_FLASH) &&
497 (failcnt == 0))
498 {
499 /*
500 puts("ELF @ QSPI\n");
501 // identify ELF, copy if present, and get the start address
502 unsigned long faddr = copy_flash(spi_offs,
503 0x600000); // hack!
504 if (faddr != -1ul) {
505 // jump to absolute address
506 mtspr(8, faddr); // move address to LR
507 __asm__ volatile("blr");
508
509 // works with head.S which copies r3 into ctr then does bctr
510 return faddr;
511 }
512 puts("copy QSPI\n");
513 */
514 // another terrible hack: copy from flash at offset 0x600000
515 // a block of size 0x600000 into mem address 0x600000, then
516 // jump to it. this allows a dtb image to be executed
517 puts("copy QSPI\n");
518 volatile uint32_t *mem = (uint32_t*)0x1000000;
519 fl_read(mem, // destination in RAM
520 0x600000, // offset into QSPI
521 0x8000); // length - shorter (testing) 0x8000);
522 //0x1000000); // length
523 puts("dump mem\n");
524 for (int i=0;i<256;i++) {
525 tmp = readl((unsigned long)&(mem[i]));
526 uart_writeuint32(tmp);
527 puts(" ");
528 if ((i & 0x7) == 0x7) puts("\r\n");
529 }
530 puts("\r\n");
531 mtspr(8, 0x1000000); // move address to LR
532 __asm__ volatile("blr");
533 }
534
535 return 0;
536 }
537