add mode registers macro for orangecrab, extracted from litedram generated files
[ls2.git] / coldboot / coldboot.c
1 #include <stdint.h>
2 #include <stdbool.h>
3
4 #include "console.h"
5 #include "microwatt_soc.h"
6 #include "io.h"
7
8 #include <stdlib.h>
9 #include <stdint.h>
10 #include <gram.h>
11
12 #include "elf64.h"
13
14 #define ORANGECRAB_MODE_REGISTERS 0x0320, 0x0002, 0x0200, 0x0000
15
16 static inline void mtspr(int sprnum, unsigned long val)
17 {
18 __asm__ volatile("mtspr %0,%1" : : "i" (sprnum), "r" (val));
19 }
20
21 static inline uint32_t read32(const void *addr)
22 {
23 return *(volatile uint32_t *)addr;
24 }
25
26 static inline void write32(void *addr, uint32_t value)
27 {
28 *(volatile uint32_t *)addr = value;
29 }
30
31 struct uart_regs {
32 uint32_t divisor;
33 uint32_t rx_data;
34 uint32_t rx_rdy;
35 uint32_t rx_err;
36 uint32_t tx_data;
37 uint32_t tx_rdy;
38 uint32_t zero0; // reserved
39 uint32_t zero1; // reserved
40 uint32_t ev_status;
41 uint32_t ev_pending;
42 uint32_t ev_enable;
43 };
44
45 void uart_writeuint32(uint32_t val) {
46 const char lut[] = { '0', '1', '2', '3', '4', '5', '6', '7',
47 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
48 uint8_t *val_arr = (uint8_t*)(&val);
49 size_t i;
50
51 for (i = 0; i < 4; i++) {
52 putchar(lut[(val_arr[3-i] >> 4) & 0xF]);
53 putchar(lut[val_arr[3-i] & 0xF]);
54 }
55 }
56
57 void memcpy(void *dest, void *src, size_t n) {
58 int i;
59 //cast src and dest to char*
60 char *src_char = (char *)src;
61 char *dest_char = (char *)dest;
62 for (i=0; i<n; i++) {
63 #if 1
64 if ((i % 4096) == 0) {
65 puts("memcpy ");
66 uart_writeuint32(i);
67 puts("\r\n");
68 }
69 #endif
70 dest_char[i] = src_char[i]; //copy contents byte by byte
71 }
72 }
73
74 #if 0
75 void memcpy4(void *dest, void *src, size_t n) {
76 int i;
77 //cast src and dest to char*
78 uint32_t *src_char = (uint32_t *)src;
79 uint32_t *dest_char = (uint32_t *)dest;
80 for (i=0; i<n/4; i++) {
81 #if 1
82 if ((i % 4096) == 0) {
83 puts("memcpy4 ");
84 uart_writeuint32(i);
85 puts("\r\n");
86 }
87 #endif
88 dest_char[i] = src_char[i]; //copy contents byte by byte
89 }
90 }
91 #endif
92
93 void isr(void) {
94
95 }
96
97 extern void crank_up_qspi_level1(void);
98 extern int host_spi_flash_init(void);
99
100 static bool fl_read(void *dst, uint32_t offset, uint32_t size)
101 {
102 uint8_t *d = dst;
103 memcpy(d, (void *)(unsigned long)(SPI_FLASH_BASE + offset), size);
104 return true;
105 }
106
107 static unsigned long copy_flash(unsigned int offset, unsigned int dst_offs)
108 {
109 Elf64_Ehdr ehdr;
110 Elf64_Phdr ph;
111 unsigned int i, poff, size, off;
112 void *addr;
113
114 puts("Trying flash...\r\n");
115 if (!fl_read(&ehdr, offset, sizeof(ehdr)))
116 return -1ul;
117 if (!IS_ELF(ehdr) || ehdr.e_ident[EI_CLASS] != ELFCLASS64) {
118 puts("Doesn't look like an elf64\r\n");
119 goto dump;
120 }
121 if (ehdr.e_ident[EI_DATA] != ELFDATA2LSB ||
122 ehdr.e_machine != EM_PPC64) {
123 puts("Not a ppc64le binary\r\n");
124 goto dump;
125 }
126
127 poff = offset + ehdr.e_phoff;
128 for (i = 0; i < ehdr.e_phnum; i++) {
129 if (!fl_read(&ph, poff, sizeof(ph)))
130 goto dump;
131 if (ph.p_type != PT_LOAD)
132 continue;
133
134 /* XXX Add bound checking ! */
135 size = ph.p_filesz;
136 addr = (void *)ph.p_vaddr;
137 off = offset + ph.p_offset;
138 //printf("Copy segment %d (0x%x bytes) to %p\n", i, size, addr);
139 puts("Copy segment ");
140 uart_writeuint32(i);
141 puts(" size ");
142 uart_writeuint32(size);
143 puts(" addr ");
144 uart_writeuint32((uint32_t)(unsigned long)addr);
145 puts("\r\n");
146 fl_read(addr+dst_offs, off, size);
147 poff += ehdr.e_phentsize;
148 }
149
150 puts("Booting from DRAM at");
151 uart_writeuint32((unsigned int)(dst_offs+ehdr.e_entry));
152 puts("\r\n");
153
154 puts("Dump DRAM\r\n");
155 for (i = 0; i < 64; i++) {
156 uart_writeuint32(readl(dst_offs+ehdr.e_entry+(i*4)));
157 puts(" ");
158 if ((i & 7) == 7) puts("\r\n");
159 }
160 puts("\r\n");
161
162 //flush_cpu_icache();
163 return dst_offs+ehdr.e_entry;
164 dump:
165 puts("HDR: \r\n");
166 for (i = 0; i < 8; i++) {
167 uart_writeuint32(ehdr.e_ident[i]);
168 puts("\r\n");
169 }
170
171 return -1ul;
172 }
173
174
175 // XXX
176 // Defining gram_[read|write] allows a trace of all register
177 // accesses to be dumped to console for debugging purposes.
178 // To use, define GRAM_RW_FUNC in gram.h
179 uint32_t gram_read(const struct gramCtx *ctx, void *addr) {
180 uint32_t dword;
181
182 puts("gram_read: ");
183 uart_writeuint32((unsigned long)addr);
184 dword = readl((unsigned long)addr);
185 puts(": ");
186 uart_writeuint32((unsigned long)dword);
187 puts("\n");
188
189 return dword;
190 }
191
192 int gram_write(const struct gramCtx *ctx, void *addr, uint32_t value) {
193 puts("gram_write: ");
194 uart_writeuint32((unsigned long)addr);
195 puts(": ");
196 uart_writeuint32((unsigned long)value);
197 writel(value, (unsigned long)addr);
198 puts("\n");
199
200 return 0;
201 }
202
203 int main(void) {
204 const int kNumIterations = 14;
205 int res, failcnt = 0;
206 uint32_t tmp;
207 unsigned long ftr, spi_offs=0x0;
208 volatile uint32_t *ram = (uint32_t*)MEMORY_BASE;
209
210 console_init();
211 //puts("Firmware launched...\n");
212
213 #if 1
214 puts(" Soc signature: ");
215 tmp = readl(SYSCON_BASE + SYS_REG_SIGNATURE);
216 uart_writeuint32(tmp);
217 tmp = readl(SYSCON_BASE + SYS_REG_SIGNATURE+4);
218 uart_writeuint32(tmp);
219 puts(" Soc features: ");
220 ftr = readl(SYSCON_BASE + SYS_REG_INFO);
221 if (ftr & SYS_REG_INFO_HAS_UART)
222 puts("UART ");
223 if (ftr & SYS_REG_INFO_HAS_DRAM)
224 puts("DRAM ");
225 if (ftr & SYS_REG_INFO_HAS_BRAM)
226 puts("BRAM ");
227 if (ftr & SYS_REG_INFO_HAS_SPI_FLASH)
228 puts("SPIFLASH ");
229 if (ftr & SYS_REG_INFO_HAS_LITEETH)
230 puts("ETHERNET ");
231 puts("\r\n");
232
233 if (ftr & SYS_REG_INFO_HAS_SPI_FLASH) {
234 puts("SPI Offset: ");
235 spi_offs = readl(SYSCON_BASE + SYS_REG_SPI_INFO);
236 uart_writeuint32(spi_offs);
237 puts("\r\n");
238 }
239
240 #endif
241
242 #if 1
243 #if 1
244 if (ftr & SYS_REG_INFO_HAS_SPI_FLASH) {
245 // print out configuration parameters for QSPI
246 volatile uint32_t *qspi_cfg = (uint32_t*)SPI_FCTRL_BASE;
247 for (int k=0; k < 2; k++) {
248 tmp = readl((unsigned long)&(qspi_cfg[k]));
249 puts("cfg");
250 uart_writeuint32(k);
251 puts(" ");
252 uart_writeuint32(tmp);
253 puts("\r\n");
254 }
255 }
256 #endif
257 if (ftr & SYS_REG_INFO_HAS_SPI_FLASH) {
258 volatile uint32_t *qspi = (uint32_t*)SPI_FLASH_BASE+0x900000;
259 //volatile uint8_t *qspi_bytes = (uint8_t*)spi_offs;
260 // let's not, eh? writel(0xDEAF0123, (unsigned long)&(qspi[0]));
261 // tmp = readl((unsigned long)&(qspi[0]));
262 for (int i=0;i<10;i++) {
263 tmp = readl((unsigned long)&(qspi[i]));
264 uart_writeuint32(tmp);
265 puts(" ");
266 if ((i & 0x7) == 0x7) puts("\r\n");
267 }
268 puts("\r\n");
269
270 // speed up the QSPI to at least a sane level
271 crank_up_qspi_level1();
272 // run at saner level
273 host_spi_flash_init();
274
275 puts("SPI Offset: ");
276 spi_offs = readl(SYSCON_BASE + SYS_REG_SPI_INFO);
277 uart_writeuint32(spi_offs);
278 puts("\r\n");
279
280 /*
281 for (i=0;i<256;i++) {
282 tmp = readb((unsigned long)&(qspi_bytes[i]));
283 uart_writeuint32(tmp);
284 puts(" ");
285 }
286 */
287 #if 0
288 while (1) {
289 // quick read
290 tmp = readl((unsigned long)&(qspi[0x1000/4]));
291 puts("read 0x1000");
292 uart_writeuint32(tmp);
293 putchar(10);
294 }
295 while (1) {
296 unsigned char c = getchar();
297 putchar(c);
298 if (c == 13) { // if CR send LF
299
300 // quick read
301 tmp = readl((unsigned long)&(qspi[1<<i]));
302 puts("read ");
303 uart_writeuint32(1<<i);
304 puts(" ");
305 uart_writeuint32(tmp);
306 putchar(10);
307 i++;
308 }
309 }
310
311 return 0;
312 #endif
313 }
314 #endif
315 #if 0
316 volatile uint32_t *hyperram = (uint32_t*)0x00000000; // at 0x0 for arty
317 writel(0xDEAF0123, (unsigned long)&(hyperram[0]));
318 tmp = readl((unsigned long)&(hyperram[0]));
319 int i = 0;
320 while (1) {
321 unsigned char c = getchar();
322 putchar(c);
323 if (c == 13) { // if CR send LF
324
325 // quick write/read
326 writel(0xDEAF0123+i, (unsigned long)&(hyperram[1<<i]));
327 tmp = readl((unsigned long)&(hyperram[1<<i]));
328 puts("read ");
329 uart_writeuint32(1<<i);
330 puts(" ");
331 uart_writeuint32(tmp);
332 putchar(10);
333 i++;
334 }
335 }
336
337 return 0;
338 #endif
339
340 // init DRAM only if SYSCON says it exists (duh)
341 if (ftr & SYS_REG_INFO_HAS_DRAM)
342 {
343 puts("DRAM init... ");
344
345 struct gramCtx ctx;
346 #if 1
347 struct gramProfile profile = {
348 .mode_registers = {
349 0xb20, 0x806, 0x200, 0x0
350 },
351 .rdly_p0 = 2,
352 .rdly_p1 = 2,
353 };
354 #endif
355 #if 0
356 struct gramProfile profile = {
357 .mode_registers = {
358 0x0320, 0x0006, 0x0200, 0x0000
359 },
360 .rdly_p0 = 1,
361 .rdly_p1 = 1,
362 };
363 #endif
364 struct gramProfile profile2;
365 gram_init(&ctx, &profile, (void*)MEMORY_BASE,
366 (void*)DRAM_CTRL_BASE,
367 (void*)DRAM_INIT_BASE);
368 puts("done\n");
369
370 puts("MR profile: ");
371 uart_writeuint32(profile.mode_registers[0]);
372 puts(" ");
373 uart_writeuint32(profile.mode_registers[1]);
374 puts(" ");
375 uart_writeuint32(profile.mode_registers[2]);
376 puts(" ");
377 uart_writeuint32(profile.mode_registers[3]);
378 puts("\n");
379
380 // FIXME
381 // Early read test for WB access sim
382 //uart_writeuint32(*ram);
383
384 #if 1
385 puts("Rdly\np0: ");
386 for (size_t i = 0; i < 8; i++) {
387 profile2.rdly_p0 = i;
388 gram_load_calibration(&ctx, &profile2);
389 gram_reset_burstdet(&ctx);
390
391 for (size_t j = 0; j < 128; j++) {
392 tmp = readl((unsigned long)&(ram[i]));
393 }
394 if (gram_read_burstdet(&ctx, 0)) {
395 puts("1");
396 } else {
397 puts("0");
398 }
399 }
400 puts("\n");
401
402 puts("Rdly\np1: ");
403 for (size_t i = 0; i < 8; i++) {
404 profile2.rdly_p1 = i;
405 gram_load_calibration(&ctx, &profile2);
406 gram_reset_burstdet(&ctx);
407 for (size_t j = 0; j < 128; j++) {
408 tmp = readl((unsigned long)&(ram[i]));
409 }
410 if (gram_read_burstdet(&ctx, 1)) {
411 puts("1");
412 } else {
413 puts("0");
414 }
415 }
416 puts("\n");
417
418 puts("Auto calibrating... ");
419 res = gram_generate_calibration(&ctx, &profile2);
420 if (res != GRAM_ERR_NONE) {
421 puts("failed\n");
422 gram_load_calibration(&ctx, &profile);
423 } else {
424 gram_load_calibration(&ctx, &profile2);
425 }
426 puts("done\n");
427
428 puts("Auto calibration profile:");
429 puts("p0 rdly:");
430 uart_writeuint32(profile2.rdly_p0);
431 puts(" p1 rdly:");
432 uart_writeuint32(profile2.rdly_p1);
433 puts("\n");
434 #endif
435
436 puts("Reloading built-in calibration profile...");
437 gram_load_calibration(&ctx, &profile);
438
439 puts("DRAM test... \n");
440 for (size_t i = 0; i < kNumIterations; i++) {
441 writel(0xDEAF0000 | i*4, (unsigned long)&(ram[i]));
442 }
443
444 #if 0
445 for (int dly = 0; dly < 8; dly++) {
446 failcnt = 0;
447 profile2.rdly_p0 = dly;
448 profile2.rdly_p1 = dly;
449 puts("p0 rdly:");
450 uart_writeuint32(profile2.rdly_p0);
451 puts(" p1 rdly:");
452 uart_writeuint32(profile2.rdly_p1);
453 gram_load_calibration(&ctx, &profile2);
454 for (size_t i = 0; i < kNumIterations; i++) {
455 if (readl((unsigned long)&(ram[i])) != (0xDEAF0000 | i*4)) {
456 puts("fail : *(0x");
457 uart_writeuint32((unsigned long)(&ram[i]));
458 puts(") = ");
459 uart_writeuint32(readl((unsigned long)&(ram[i])));
460 puts("\n");
461 failcnt++;
462
463 if (failcnt > 10) {
464 puts("Test canceled (more than 10 errors)\n");
465 break;
466 }
467 }
468 }
469 }
470 #else
471 failcnt = 0;
472 for (size_t i = 0; i < kNumIterations; i++) {
473 if (readl((unsigned long)&(ram[i])) != (0xDEAF0000 | i*4)) {
474 puts("fail : *(0x");
475 uart_writeuint32((unsigned long)(&ram[i]));
476 puts(") = ");
477 uart_writeuint32(readl((unsigned long)&(ram[i])));
478 puts("\n");
479 failcnt++;
480
481 if (failcnt > 10) {
482 puts("Test canceled (more than 10 errors)\n");
483 break;
484 }
485 }
486 }
487 }
488 #endif
489 puts("done\n");
490
491 #if 0 // ooo, annoying: won't work. no idea why
492 // temporary hard-hack: boot directly from QSPI. really
493 // should do something like detect at least... something
494 if ((ftr & SYS_REG_INFO_HAS_SPI_FLASH))
495 {
496 // jump to absolute address
497 mtspr(8, SPI_FLASH_BASE); // move address to LR
498 __asm__ volatile("blr");
499 return 0;
500 }
501 #endif
502
503 // memcpy from SPI Flash then boot
504 if ((ftr & SYS_REG_INFO_HAS_SPI_FLASH) &&
505 (failcnt == 0))
506 {
507 /*
508 puts("ELF @ QSPI\n");
509 // identify ELF, copy if present, and get the start address
510 unsigned long faddr = copy_flash(spi_offs,
511 0x600000); // hack!
512 if (faddr != -1ul) {
513 // jump to absolute address
514 mtspr(8, faddr); // move address to LR
515 __asm__ volatile("blr");
516
517 // works with head.S which copies r3 into ctr then does bctr
518 return faddr;
519 }
520 puts("copy QSPI\n");
521 */
522 // another terrible hack: copy from flash at offset 0x600000
523 // a block of size 0x600000 into mem address 0x600000, then
524 // jump to it. this allows a dtb image to be executed
525 puts("copy QSPI\n");
526 volatile uint32_t *mem = (uint32_t*)0x1000000;
527 fl_read(mem, // destination in RAM
528 0x600000, // offset into QSPI
529 0x8000); // length - shorter (testing) 0x8000);
530 //0x1000000); // length
531 puts("dump mem\n");
532 for (int i=0;i<256;i++) {
533 tmp = readl((unsigned long)&(mem[i]));
534 uart_writeuint32(tmp);
535 puts(" ");
536 if ((i & 0x7) == 0x7) puts("\r\n");
537 }
538 puts("\r\n");
539 mtspr(8, 0x1000000); // move address to LR
540 __asm__ volatile("blr");
541 }
542
543 return 0;
544 }
545