add micron n25q 128mb QSPI device to table of
[ls2.git] / coldboot / coldboot.c
1 #include <stdint.h>
2 #include <stdbool.h>
3
4 #include "console.h"
5 #include "microwatt_soc.h"
6 #include "io.h"
7
8 #include <stdlib.h>
9 #include <stdint.h>
10 #include <gram.h>
11
12 #include "elf64.h"
13
14 static inline void mtspr(int sprnum, unsigned long val)
15 {
16 __asm__ volatile("mtspr %0,%1" : : "i" (sprnum), "r" (val));
17 }
18
19 static inline uint32_t read32(const void *addr)
20 {
21 return *(volatile uint32_t *)addr;
22 }
23
24 static inline void write32(void *addr, uint32_t value)
25 {
26 *(volatile uint32_t *)addr = value;
27 }
28
29 struct uart_regs {
30 uint32_t divisor;
31 uint32_t rx_data;
32 uint32_t rx_rdy;
33 uint32_t rx_err;
34 uint32_t tx_data;
35 uint32_t tx_rdy;
36 uint32_t zero0; // reserved
37 uint32_t zero1; // reserved
38 uint32_t ev_status;
39 uint32_t ev_pending;
40 uint32_t ev_enable;
41 };
42
43 void uart_writeuint32(uint32_t val) {
44 const char lut[] = { '0', '1', '2', '3', '4', '5', '6', '7',
45 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
46 uint8_t *val_arr = (uint8_t*)(&val);
47 size_t i;
48
49 for (i = 0; i < 4; i++) {
50 putchar(lut[(val_arr[3-i] >> 4) & 0xF]);
51 putchar(lut[val_arr[3-i] & 0xF]);
52 }
53 }
54
55 void memcpy(void *dest, void *src, size_t n) {
56 int i;
57 //cast src and dest to char*
58 char *src_char = (char *)src;
59 char *dest_char = (char *)dest;
60 for (i=0; i<n; i++) {
61 #if 1
62 if ((i % 4096) == 0) {
63 puts("memcpy ");
64 uart_writeuint32(i);
65 puts("\r\n");
66 }
67 #endif
68 dest_char[i] = src_char[i]; //copy contents byte by byte
69 }
70 }
71
72 #if 0
73 void memcpy4(void *dest, void *src, size_t n) {
74 int i;
75 //cast src and dest to char*
76 uint32_t *src_char = (uint32_t *)src;
77 uint32_t *dest_char = (uint32_t *)dest;
78 for (i=0; i<n/4; i++) {
79 #if 1
80 if ((i % 4096) == 0) {
81 puts("memcpy4 ");
82 uart_writeuint32(i);
83 puts("\r\n");
84 }
85 #endif
86 dest_char[i] = src_char[i]; //copy contents byte by byte
87 }
88 }
89 #endif
90
91 void isr(void) {
92
93 }
94
95 extern void crank_up_qspi_level1(void);
96 extern int host_spi_flash_init(void);
97
98 static bool fl_read(void *dst, uint32_t offset, uint32_t size)
99 {
100 uint8_t *d = dst;
101 memcpy(d, (void *)(unsigned long)(SPI_FLASH_BASE + offset), size);
102 return true;
103 }
104
105 static unsigned long copy_flash(unsigned int offset, unsigned int dst_offs)
106 {
107 Elf64_Ehdr ehdr;
108 Elf64_Phdr ph;
109 unsigned int i, poff, size, off;
110 void *addr;
111
112 puts("Trying flash...\r\n");
113 if (!fl_read(&ehdr, offset, sizeof(ehdr)))
114 return -1ul;
115 if (!IS_ELF(ehdr) || ehdr.e_ident[EI_CLASS] != ELFCLASS64) {
116 puts("Doesn't look like an elf64\r\n");
117 goto dump;
118 }
119 if (ehdr.e_ident[EI_DATA] != ELFDATA2LSB ||
120 ehdr.e_machine != EM_PPC64) {
121 puts("Not a ppc64le binary\r\n");
122 goto dump;
123 }
124
125 poff = offset + ehdr.e_phoff;
126 for (i = 0; i < ehdr.e_phnum; i++) {
127 if (!fl_read(&ph, poff, sizeof(ph)))
128 goto dump;
129 if (ph.p_type != PT_LOAD)
130 continue;
131
132 /* XXX Add bound checking ! */
133 size = ph.p_filesz;
134 addr = (void *)ph.p_vaddr;
135 off = offset + ph.p_offset;
136 //printf("Copy segment %d (0x%x bytes) to %p\n", i, size, addr);
137 puts("Copy segment ");
138 uart_writeuint32(i);
139 puts(" size ");
140 uart_writeuint32(size);
141 puts(" addr ");
142 uart_writeuint32((uint32_t)(unsigned long)addr);
143 puts("\r\n");
144 fl_read(addr+dst_offs, off, size);
145 poff += ehdr.e_phentsize;
146 }
147
148 puts("Booting from DRAM at");
149 uart_writeuint32((unsigned int)(dst_offs+ehdr.e_entry));
150 puts("\r\n");
151
152 puts("Dump DRAM\r\n");
153 for (i = 0; i < 64; i++) {
154 uart_writeuint32(readl(dst_offs+ehdr.e_entry+(i*4)));
155 puts(" ");
156 if ((i & 7) == 7) puts("\r\n");
157 }
158 puts("\r\n");
159
160 //flush_cpu_icache();
161 return dst_offs+ehdr.e_entry;
162 dump:
163 puts("HDR: \r\n");
164 for (i = 0; i < 8; i++) {
165 uart_writeuint32(ehdr.e_ident[i]);
166 puts("\r\n");
167 }
168
169 return -1ul;
170 }
171
172
173 // XXX
174 // Defining gram_[read|write] allows a trace of all register
175 // accesses to be dumped to console for debugging purposes.
176 // To use, define GRAM_RW_FUNC in gram.h
177 uint32_t gram_read(const struct gramCtx *ctx, void *addr) {
178 uint32_t dword;
179
180 puts("gram_read: ");
181 uart_writeuint32((unsigned long)addr);
182 dword = readl((unsigned long)addr);
183 puts(": ");
184 uart_writeuint32((unsigned long)dword);
185 puts("\n");
186
187 return dword;
188 }
189
190 int gram_write(const struct gramCtx *ctx, void *addr, uint32_t value) {
191 puts("gram_write: ");
192 uart_writeuint32((unsigned long)addr);
193 puts(": ");
194 uart_writeuint32((unsigned long)value);
195 writel(value, (unsigned long)addr);
196 puts("\n");
197
198 return 0;
199 }
200
201 int main(void) {
202 const int kNumIterations = 14;
203 int res, failcnt = 0;
204 uint32_t tmp;
205 unsigned long ftr, spi_offs=0x0;
206 volatile uint32_t *ram = (uint32_t*)MEMORY_BASE;
207
208 console_init();
209 //puts("Firmware launched...\n");
210
211 #if 1
212 puts(" Soc signature: ");
213 tmp = readl(SYSCON_BASE + SYS_REG_SIGNATURE);
214 uart_writeuint32(tmp);
215 tmp = readl(SYSCON_BASE + SYS_REG_SIGNATURE+4);
216 uart_writeuint32(tmp);
217 puts(" Soc features: ");
218 ftr = readl(SYSCON_BASE + SYS_REG_INFO);
219 if (ftr & SYS_REG_INFO_HAS_UART)
220 puts("UART ");
221 if (ftr & SYS_REG_INFO_HAS_DRAM)
222 puts("DRAM ");
223 if (ftr & SYS_REG_INFO_HAS_BRAM)
224 puts("BRAM ");
225 if (ftr & SYS_REG_INFO_HAS_SPI_FLASH)
226 puts("SPIFLASH ");
227 if (ftr & SYS_REG_INFO_HAS_LITEETH)
228 puts("ETHERNET ");
229 puts("\r\n");
230
231 if (ftr & SYS_REG_INFO_HAS_SPI_FLASH) {
232 puts("SPI Offset: ");
233 spi_offs = readl(SYSCON_BASE + SYS_REG_SPI_INFO);
234 uart_writeuint32(spi_offs);
235 puts("\r\n");
236 }
237
238 #endif
239
240 #if 1
241 #if 1
242 if (ftr & SYS_REG_INFO_HAS_SPI_FLASH) {
243 // print out configuration parameters for QSPI
244 volatile uint32_t *qspi_cfg = (uint32_t*)SPI_FCTRL_BASE;
245 for (int k=0; k < 2; k++) {
246 tmp = readl((unsigned long)&(qspi_cfg[k]));
247 puts("cfg");
248 uart_writeuint32(k);
249 puts(" ");
250 uart_writeuint32(tmp);
251 puts("\r\n");
252 }
253 }
254 #endif
255 if (ftr & SYS_REG_INFO_HAS_SPI_FLASH) {
256 volatile uint32_t *qspi = (uint32_t*)SPI_FLASH_BASE+0x900000;
257 //volatile uint8_t *qspi_bytes = (uint8_t*)spi_offs;
258 // let's not, eh? writel(0xDEAF0123, (unsigned long)&(qspi[0]));
259 // tmp = readl((unsigned long)&(qspi[0]));
260 for (int i=0;i<10;i++) {
261 tmp = readl((unsigned long)&(qspi[i]));
262 uart_writeuint32(tmp);
263 puts(" ");
264 if ((i & 0x7) == 0x7) puts("\r\n");
265 }
266 puts("\r\n");
267
268 // speed up the QSPI to at least a sane level
269 crank_up_qspi_level1();
270 // run at saner level
271 host_spi_flash_init();
272
273 puts("SPI Offset: ");
274 spi_offs = readl(SYSCON_BASE + SYS_REG_SPI_INFO);
275 uart_writeuint32(spi_offs);
276 puts("\r\n");
277
278 /*
279 for (i=0;i<256;i++) {
280 tmp = readb((unsigned long)&(qspi_bytes[i]));
281 uart_writeuint32(tmp);
282 puts(" ");
283 }
284 */
285 #if 0
286 while (1) {
287 // quick read
288 tmp = readl((unsigned long)&(qspi[0x1000/4]));
289 puts("read 0x1000");
290 uart_writeuint32(tmp);
291 putchar(10);
292 }
293 while (1) {
294 unsigned char c = getchar();
295 putchar(c);
296 if (c == 13) { // if CR send LF
297
298 // quick read
299 tmp = readl((unsigned long)&(qspi[1<<i]));
300 puts("read ");
301 uart_writeuint32(1<<i);
302 puts(" ");
303 uart_writeuint32(tmp);
304 putchar(10);
305 i++;
306 }
307 }
308
309 return 0;
310 #endif
311 }
312 #endif
313 #if 0
314 volatile uint32_t *hyperram = (uint32_t*)0x00000000; // at 0x0 for arty
315 writel(0xDEAF0123, (unsigned long)&(hyperram[0]));
316 tmp = readl((unsigned long)&(hyperram[0]));
317 int i = 0;
318 while (1) {
319 unsigned char c = getchar();
320 putchar(c);
321 if (c == 13) { // if CR send LF
322
323 // quick write/read
324 writel(0xDEAF0123+i, (unsigned long)&(hyperram[1<<i]));
325 tmp = readl((unsigned long)&(hyperram[1<<i]));
326 puts("read ");
327 uart_writeuint32(1<<i);
328 puts(" ");
329 uart_writeuint32(tmp);
330 putchar(10);
331 i++;
332 }
333 }
334
335 return 0;
336 #endif
337
338 // init DRAM only if SYSCON says it exists (duh)
339 if (ftr & SYS_REG_INFO_HAS_DRAM)
340 {
341 puts("DRAM init... ");
342
343 struct gramCtx ctx;
344 #if 1
345 struct gramProfile profile = {
346 .mode_registers = {
347 0xb20, 0x806, 0x200, 0x0
348 },
349 .rdly_p0 = 2,
350 .rdly_p1 = 2,
351 };
352 #endif
353 #if 0
354 struct gramProfile profile = {
355 .mode_registers = {
356 0x0320, 0x0006, 0x0200, 0x0000
357 },
358 .rdly_p0 = 1,
359 .rdly_p1 = 1,
360 };
361 #endif
362 struct gramProfile profile2;
363 gram_init(&ctx, &profile, (void*)MEMORY_BASE,
364 (void*)DRAM_CTRL_BASE,
365 (void*)DRAM_INIT_BASE);
366 puts("done\n");
367
368 puts("MR profile: ");
369 uart_writeuint32(profile.mode_registers[0]);
370 puts(" ");
371 uart_writeuint32(profile.mode_registers[1]);
372 puts(" ");
373 uart_writeuint32(profile.mode_registers[2]);
374 puts(" ");
375 uart_writeuint32(profile.mode_registers[3]);
376 puts("\n");
377
378 // FIXME
379 // Early read test for WB access sim
380 //uart_writeuint32(*ram);
381
382 #if 1
383 puts("Rdly\np0: ");
384 for (size_t i = 0; i < 8; i++) {
385 profile2.rdly_p0 = i;
386 gram_load_calibration(&ctx, &profile2);
387 gram_reset_burstdet(&ctx);
388
389 for (size_t j = 0; j < 128; j++) {
390 tmp = readl((unsigned long)&(ram[i]));
391 }
392 if (gram_read_burstdet(&ctx, 0)) {
393 puts("1");
394 } else {
395 puts("0");
396 }
397 }
398 puts("\n");
399
400 puts("Rdly\np1: ");
401 for (size_t i = 0; i < 8; i++) {
402 profile2.rdly_p1 = i;
403 gram_load_calibration(&ctx, &profile2);
404 gram_reset_burstdet(&ctx);
405 for (size_t j = 0; j < 128; j++) {
406 tmp = readl((unsigned long)&(ram[i]));
407 }
408 if (gram_read_burstdet(&ctx, 1)) {
409 puts("1");
410 } else {
411 puts("0");
412 }
413 }
414 puts("\n");
415
416 puts("Auto calibrating... ");
417 res = gram_generate_calibration(&ctx, &profile2);
418 if (res != GRAM_ERR_NONE) {
419 puts("failed\n");
420 gram_load_calibration(&ctx, &profile);
421 } else {
422 gram_load_calibration(&ctx, &profile2);
423 }
424 puts("done\n");
425
426 puts("Auto calibration profile:");
427 puts("p0 rdly:");
428 uart_writeuint32(profile2.rdly_p0);
429 puts(" p1 rdly:");
430 uart_writeuint32(profile2.rdly_p1);
431 puts("\n");
432 #endif
433
434 puts("Reloading built-in calibration profile...");
435 gram_load_calibration(&ctx, &profile);
436
437 puts("DRAM test... \n");
438 for (size_t i = 0; i < kNumIterations; i++) {
439 writel(0xDEAF0000 | i*4, (unsigned long)&(ram[i]));
440 }
441
442 #if 0
443 for (int dly = 0; dly < 8; dly++) {
444 failcnt = 0;
445 profile2.rdly_p0 = dly;
446 profile2.rdly_p1 = dly;
447 puts("p0 rdly:");
448 uart_writeuint32(profile2.rdly_p0);
449 puts(" p1 rdly:");
450 uart_writeuint32(profile2.rdly_p1);
451 gram_load_calibration(&ctx, &profile2);
452 for (size_t i = 0; i < kNumIterations; i++) {
453 if (readl((unsigned long)&(ram[i])) != (0xDEAF0000 | i*4)) {
454 puts("fail : *(0x");
455 uart_writeuint32((unsigned long)(&ram[i]));
456 puts(") = ");
457 uart_writeuint32(readl((unsigned long)&(ram[i])));
458 puts("\n");
459 failcnt++;
460
461 if (failcnt > 10) {
462 puts("Test canceled (more than 10 errors)\n");
463 break;
464 }
465 }
466 }
467 }
468 #else
469 failcnt = 0;
470 for (size_t i = 0; i < kNumIterations; i++) {
471 if (readl((unsigned long)&(ram[i])) != (0xDEAF0000 | i*4)) {
472 puts("fail : *(0x");
473 uart_writeuint32((unsigned long)(&ram[i]));
474 puts(") = ");
475 uart_writeuint32(readl((unsigned long)&(ram[i])));
476 puts("\n");
477 failcnt++;
478
479 if (failcnt > 10) {
480 puts("Test canceled (more than 10 errors)\n");
481 break;
482 }
483 }
484 }
485 }
486 #endif
487 puts("done\n");
488
489 #if 0 // ooo, annoying: won't work. no idea why
490 // temporary hard-hack: boot directly from QSPI. really
491 // should do something like detect at least... something
492 if ((ftr & SYS_REG_INFO_HAS_SPI_FLASH))
493 {
494 // jump to absolute address
495 mtspr(8, SPI_FLASH_BASE); // move address to LR
496 __asm__ volatile("blr");
497 return 0;
498 }
499 #endif
500
501 // memcpy from SPI Flash then boot
502 if ((ftr & SYS_REG_INFO_HAS_SPI_FLASH) &&
503 (failcnt == 0))
504 {
505 /*
506 puts("ELF @ QSPI\n");
507 // identify ELF, copy if present, and get the start address
508 unsigned long faddr = copy_flash(spi_offs,
509 0x600000); // hack!
510 if (faddr != -1ul) {
511 // jump to absolute address
512 mtspr(8, faddr); // move address to LR
513 __asm__ volatile("blr");
514
515 // works with head.S which copies r3 into ctr then does bctr
516 return faddr;
517 }
518 puts("copy QSPI\n");
519 */
520 // another terrible hack: copy from flash at offset 0x600000
521 // a block of size 0x600000 into mem address 0x600000, then
522 // jump to it. this allows a dtb image to be executed
523 puts("copy QSPI\n");
524 volatile uint32_t *mem = (uint32_t*)0x1000000;
525 fl_read(mem, // destination in RAM
526 0x600000, // offset into QSPI
527 0x8000); // length - shorter (testing) 0x8000);
528 //0x1000000); // length
529 puts("dump mem\n");
530 for (int i=0;i<256;i++) {
531 tmp = readl((unsigned long)&(mem[i]));
532 uart_writeuint32(tmp);
533 puts(" ");
534 if ((i & 0x7) == 0x7) puts("\r\n");
535 }
536 puts("\r\n");
537 mtspr(8, 0x1000000); // move address to LR
538 __asm__ volatile("blr");
539 }
540
541 return 0;
542 }
543