67cb1ee2560a162ec99bf41e3bd381cb725d05f2
[ls2.git] / coldboot / coldboot.c
1 #include <stdint.h>
2 #include <stdbool.h>
3
4 #include "console.h"
5 #include "microwatt_soc.h"
6 #include "io.h"
7
8 #include <stdlib.h>
9 #include <stdint.h>
10 #include <gram.h>
11
12 #include "elf64.h"
13
14 static inline uint32_t read32(const void *addr)
15 {
16 return *(volatile uint32_t *)addr;
17 }
18
19 static inline void write32(void *addr, uint32_t value)
20 {
21 *(volatile uint32_t *)addr = value;
22 }
23
24 struct uart_regs {
25 uint32_t divisor;
26 uint32_t rx_data;
27 uint32_t rx_rdy;
28 uint32_t rx_err;
29 uint32_t tx_data;
30 uint32_t tx_rdy;
31 uint32_t zero0; // reserved
32 uint32_t zero1; // reserved
33 uint32_t ev_status;
34 uint32_t ev_pending;
35 uint32_t ev_enable;
36 };
37
38 void uart_writeuint32(uint32_t val) {
39 const char lut[] = { '0', '1', '2', '3', '4', '5', '6', '7',
40 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
41 uint8_t *val_arr = (uint8_t*)(&val);
42 size_t i;
43
44 for (i = 0; i < 4; i++) {
45 putchar(lut[(val_arr[3-i] >> 4) & 0xF]);
46 putchar(lut[val_arr[3-i] & 0xF]);
47 }
48 }
49
50 void memcpy(void *dest, void *src, size_t n) {
51 int i;
52 //cast src and dest to char*
53 char *src_char = (char *)src;
54 char *dest_char = (char *)dest;
55 for (i=0; i<n; i++) {
56 #if 1
57 if ((i % 4096) == 0) {
58 puts("memcpy ");
59 uart_writeuint32(i);
60 puts("\r\n");
61 }
62 #endif
63 dest_char[i] = src_char[i]; //copy contents byte by byte
64 }
65 }
66
67 void isr(void) {
68
69 }
70
71 // WARNING
72 // KESTREL SPECIFIC
73 #define TERCEL_SPI_REG_SYS_PHY_CFG1 0x10
74 #define TERCEL_SPI_REG_SYS_FLASH_CFG5 0x24
75 #define TERCEL_SPI_PHY_CLOCK_DIVISOR_MASK 0xff
76 #define TERCEL_SPI_PHY_CLOCK_DIVISOR_SHIFT 0
77 #define TERCEL_SPI_FLASH_EN_MULTCYC_READ_MASK 0x1
78 #define TERCEL_SPI_FLASH_EN_MULTCYC_READ_SHIFT 0
79 static inline uint32_t read_tercel_register(uint8_t reg)
80 {
81 return readl((unsigned long)(SPI_FCTRL_BASE+reg));
82 }
83
84 static inline void write_tercel_register(uint8_t reg, uint32_t value)
85 {
86 writel(value, (unsigned long)(SPI_FCTRL_BASE+reg));
87 }
88
89 // TODO: need to use this
90 // https://gitlab.raptorengineering.com/kestrel-collaboration/kestrel-litex/litex/-/blob/master/litex/soc/software/bios/boot.c#L575
91 static bool fl_read(void *dst, uint32_t offset, uint32_t size)
92 {
93 uint8_t *d = dst;
94 memcpy(d, (void *)(unsigned long)(SPI_FLASH_BASE + offset), size);
95 return true;
96 }
97
98 static unsigned long copy_flash(unsigned int offset)
99 {
100 Elf64_Ehdr ehdr;
101 Elf64_Phdr ph;
102 unsigned int i, poff, size, off;
103 void *addr;
104
105 // WARNING
106 // KESTREL SPECIFIC
107 // Set SPI clock cycle divider to 1
108 uint32_t dword;
109 dword = read_tercel_register(TERCEL_SPI_REG_SYS_PHY_CFG1);
110 dword &= ~(TERCEL_SPI_PHY_CLOCK_DIVISOR_MASK <<
111 TERCEL_SPI_PHY_CLOCK_DIVISOR_SHIFT);
112 dword |= ((1 & TERCEL_SPI_PHY_CLOCK_DIVISOR_MASK) <<
113 TERCEL_SPI_PHY_CLOCK_DIVISOR_SHIFT);
114 write_tercel_register(TERCEL_SPI_REG_SYS_PHY_CFG1, dword);
115 // Enable read merging
116 dword = read_tercel_register(TERCEL_SPI_REG_SYS_FLASH_CFG5);
117 dword |= (TERCEL_SPI_FLASH_EN_MULTCYC_READ_MASK <<
118 TERCEL_SPI_FLASH_EN_MULTCYC_READ_SHIFT);
119 write_tercel_register(TERCEL_SPI_REG_SYS_FLASH_CFG5, dword);
120
121 puts("Trying flash...\r\n");
122 if (!fl_read(&ehdr, offset, sizeof(ehdr)))
123 return -1ul;
124 if (!IS_ELF(ehdr) || ehdr.e_ident[EI_CLASS] != ELFCLASS64) {
125 puts("Doesn't look like an elf64\r\n");
126 goto dump;
127 }
128 if (ehdr.e_ident[EI_DATA] != ELFDATA2LSB ||
129 ehdr.e_machine != EM_PPC64) {
130 puts("Not a ppc64le binary\r\n");
131 goto dump;
132 }
133
134 poff = offset + ehdr.e_phoff;
135 for (i = 0; i < ehdr.e_phnum; i++) {
136 if (!fl_read(&ph, poff, sizeof(ph)))
137 goto dump;
138 if (ph.p_type != PT_LOAD)
139 continue;
140
141 /* XXX Add bound checking ! */
142 size = ph.p_filesz;
143 addr = (void *)ph.p_vaddr;
144 off = offset + ph.p_offset;
145 //printf("Copy segment %d (0x%x bytes) to %p\n", i, size, addr);
146 puts("Copy segment ");
147 uart_writeuint32(i);
148 puts(" size ");
149 uart_writeuint32(size);
150 puts(" addr ");
151 uart_writeuint32((uint32_t)(unsigned long)addr);
152 puts("\r\n");
153 fl_read(addr, off, size);
154 poff += ehdr.e_phentsize;
155 }
156
157 puts("Booting from DRAM at");
158 uart_writeuint32((unsigned int)ehdr.e_entry);
159 puts("\r\n");
160
161 puts("Dump DRAM\r\n");
162 for (i = 0; i < 64; i++) {
163 uart_writeuint32(ehdr.e_entry+(i*4));
164 if ((i & 7) == 7) puts("\r\n");
165 }
166 puts("\r\n");
167
168 //flush_cpu_icache();
169 return ehdr.e_entry;
170 dump:
171 puts("HDR: \r\n");
172 for (i = 0; i < 8; i++) {
173 uart_writeuint32(ehdr.e_ident[i]);
174 puts("\r\n");
175 }
176
177 return -1ul;
178 }
179
180
181 // XXX
182 // Defining gram_[read|write] allows a trace of all register
183 // accesses to be dumped to console for debugging purposes.
184 // To use, define GRAM_RW_FUNC in gram.h
185 uint32_t gram_read(const struct gramCtx *ctx, void *addr) {
186 uint32_t dword;
187
188 puts("gram_read: ");
189 uart_writeuint32((unsigned long)addr);
190 dword = readl((unsigned long)addr);
191 puts(": ");
192 uart_writeuint32((unsigned long)dword);
193 puts("\n");
194
195 return dword;
196 }
197
198 int gram_write(const struct gramCtx *ctx, void *addr, uint32_t value) {
199 puts("gram_write: ");
200 uart_writeuint32((unsigned long)addr);
201 puts(": ");
202 uart_writeuint32((unsigned long)value);
203 writel(value, (unsigned long)addr);
204 puts("\n");
205
206 return 0;
207 }
208
209 int main(void) {
210 const int kNumIterations = 14;
211 int res, failcnt = 0;
212 uint32_t tmp;
213 unsigned long ftr, spi_offs=0x0;
214 volatile uint32_t *ram = (uint32_t*)MEMORY_BASE;
215
216 console_init();
217 //puts("Firmware launched...\n");
218
219 #if 1
220 puts(" Soc signature: ");
221 tmp = readl(SYSCON_BASE + SYS_REG_SIGNATURE);
222 uart_writeuint32(tmp);
223 puts(" Soc features: ");
224 ftr = readl(SYSCON_BASE + SYS_REG_INFO);
225 if (ftr & SYS_REG_INFO_HAS_UART)
226 puts("UART ");
227 if (ftr & SYS_REG_INFO_HAS_DRAM)
228 puts("DRAM ");
229 if (ftr & SYS_REG_INFO_HAS_BRAM)
230 puts("BRAM ");
231 if (ftr & SYS_REG_INFO_HAS_SPI_FLASH)
232 puts("SPIFLASH ");
233 if (ftr & SYS_REG_INFO_HAS_LITEETH)
234 puts("ETHERNET ");
235 puts("\r\n");
236
237 if (ftr & SYS_REG_INFO_HAS_SPI_FLASH) {
238 puts("SPI Offset: ");
239 spi_offs = readl(SYSCON_BASE + SYS_REG_SPI_INFO);
240 uart_writeuint32(spi_offs);
241 puts("\r\n");
242 }
243
244 #endif
245
246 #if 1
247 #if 1
248 // print out configuration parameters for QSPI
249 volatile uint32_t *qspi_cfg = (uint32_t*)SPI_FCTRL_BASE;
250 for (int k=0; k < 2; k++) {
251 tmp = readl((unsigned long)&(qspi_cfg[k]));
252 puts("cfg");
253 uart_writeuint32(k);
254 puts(" ");
255 uart_writeuint32(tmp);
256 puts("\n");
257 }
258 #endif
259 volatile uint32_t *qspi = (uint32_t*)spi_offs;
260 //volatile uint8_t *qspi_bytes = (uint8_t*)spi_offs;
261 // let's not, eh? writel(0xDEAF0123, (unsigned long)&(qspi[0]));
262 // tmp = readl((unsigned long)&(qspi[0]));
263 for (int i=0;i<256;i++) {
264 tmp = readl((unsigned long)&(qspi[i]));
265 uart_writeuint32(tmp);
266 puts(" ");
267 if ((i & 0x7) == 0x7) puts("\r\n");
268 }
269 puts("\r\n");
270 /*
271 for (i=0;i<256;i++) {
272 tmp = readb((unsigned long)&(qspi_bytes[i]));
273 uart_writeuint32(tmp);
274 puts(" ");
275 }
276 */
277 #if 0
278 while (1) {
279 // quick read
280 tmp = readl((unsigned long)&(qspi[0x1000/4]));
281 puts("read 0x1000");
282 uart_writeuint32(tmp);
283 putchar(10);
284 }
285 while (1) {
286 unsigned char c = getchar();
287 putchar(c);
288 if (c == 13) { // if CR send LF
289
290 // quick read
291 tmp = readl((unsigned long)&(qspi[1<<i]));
292 puts("read ");
293 uart_writeuint32(1<<i);
294 puts(" ");
295 uart_writeuint32(tmp);
296 putchar(10);
297 i++;
298 }
299 }
300
301 return 0;
302 #endif
303 #endif
304 #if 0
305 volatile uint32_t *hyperram = (uint32_t*)0xa0000000;
306 writel(0xDEAF0123, (unsigned long)&(hyperram[0]));
307 tmp = readl((unsigned long)&(hyperram[0]));
308 while (1) {
309 unsigned char c = getchar();
310 putchar(c);
311 if (c == 13) { // if CR send LF
312
313 // quick write/read
314 writel(0xDEAF0123+i, (unsigned long)&(hyperram[1<<i]));
315 tmp = readl((unsigned long)&(hyperram[1<<i]));
316 puts("read ");
317 uart_writeuint32(1<<i);
318 puts(" ");
319 uart_writeuint32(tmp);
320 putchar(10);
321 i++;
322 }
323 }
324
325 return 0;
326 #endif
327
328 for (int persistence=0; persistence < 1000; persistence++) {
329 puts("DRAM init... ");
330
331 struct gramCtx ctx;
332 #if 1
333 struct gramProfile profile = {
334 .mode_registers = {
335 0xb20, 0x806, 0x200, 0x0
336 },
337 .rdly_p0 = 2,
338 .rdly_p1 = 2,
339 };
340 #endif
341 #if 0
342 struct gramProfile profile = {
343 .mode_registers = {
344 0x0320, 0x0006, 0x0200, 0x0000
345 },
346 .rdly_p0 = 1,
347 .rdly_p1 = 1,
348 };
349 #endif
350 struct gramProfile profile2;
351 gram_init(&ctx, &profile, (void*)MEMORY_BASE,
352 (void*)DRAM_CTRL_BASE,
353 (void*)DRAM_INIT_BASE);
354 puts("done\n");
355
356 puts("MR profile: ");
357 uart_writeuint32(profile.mode_registers[0]);
358 puts(" ");
359 uart_writeuint32(profile.mode_registers[1]);
360 puts(" ");
361 uart_writeuint32(profile.mode_registers[2]);
362 puts(" ");
363 uart_writeuint32(profile.mode_registers[3]);
364 puts("\n");
365
366 // FIXME
367 // Early read test for WB access sim
368 //uart_writeuint32(*ram);
369
370 #if 1
371 puts("Rdly\np0: ");
372 for (size_t i = 0; i < 8; i++) {
373 profile2.rdly_p0 = i;
374 gram_load_calibration(&ctx, &profile2);
375 gram_reset_burstdet(&ctx);
376
377 for (size_t j = 0; j < 128; j++) {
378 tmp = readl((unsigned long)&(ram[i]));
379 }
380 if (gram_read_burstdet(&ctx, 0)) {
381 puts("1");
382 } else {
383 puts("0");
384 }
385 }
386 puts("\n");
387
388 puts("Rdly\np1: ");
389 for (size_t i = 0; i < 8; i++) {
390 profile2.rdly_p1 = i;
391 gram_load_calibration(&ctx, &profile2);
392 gram_reset_burstdet(&ctx);
393 for (size_t j = 0; j < 128; j++) {
394 tmp = readl((unsigned long)&(ram[i]));
395 }
396 if (gram_read_burstdet(&ctx, 1)) {
397 puts("1");
398 } else {
399 puts("0");
400 }
401 }
402 puts("\n");
403
404 puts("Auto calibrating... ");
405 res = gram_generate_calibration(&ctx, &profile2);
406 if (res != GRAM_ERR_NONE) {
407 puts("failed\n");
408 gram_load_calibration(&ctx, &profile);
409 } else {
410 gram_load_calibration(&ctx, &profile2);
411 }
412 puts("done\n");
413
414 puts("Auto calibration profile:");
415 puts("p0 rdly:");
416 uart_writeuint32(profile2.rdly_p0);
417 puts(" p1 rdly:");
418 uart_writeuint32(profile2.rdly_p1);
419 puts("\n");
420 #endif
421
422 puts("Reloading built-in calibration profile...");
423 gram_load_calibration(&ctx, &profile);
424
425 puts("DRAM test... \n");
426 for (size_t i = 0; i < kNumIterations; i++) {
427 writel(0xDEAF0000 | i*4, (unsigned long)&(ram[i]));
428 }
429
430 #if 0
431 for (int dly = 0; dly < 8; dly++) {
432 failcnt = 0;
433 profile2.rdly_p0 = dly;
434 profile2.rdly_p1 = dly;
435 puts("p0 rdly:");
436 uart_writeuint32(profile2.rdly_p0);
437 puts(" p1 rdly:");
438 uart_writeuint32(profile2.rdly_p1);
439 gram_load_calibration(&ctx, &profile2);
440 for (size_t i = 0; i < kNumIterations; i++) {
441 if (readl((unsigned long)&(ram[i])) != (0xDEAF0000 | i*4)) {
442 puts("fail : *(0x");
443 uart_writeuint32((unsigned long)(&ram[i]));
444 puts(") = ");
445 uart_writeuint32(readl((unsigned long)&(ram[i])));
446 puts("\n");
447 failcnt++;
448
449 if (failcnt > 10) {
450 puts("Test canceled (more than 10 errors)\n");
451 break;
452 }
453 }
454 }
455 }
456 #else
457 failcnt = 0;
458 for (size_t i = 0; i < kNumIterations; i++) {
459 if (readl((unsigned long)&(ram[i])) != (0xDEAF0000 | i*4)) {
460 puts("fail : *(0x");
461 uart_writeuint32((unsigned long)(&ram[i]));
462 puts(") = ");
463 uart_writeuint32(readl((unsigned long)&(ram[i])));
464 puts("\n");
465 failcnt++;
466
467 if (failcnt > 10) {
468 puts("Test canceled (more than 10 errors)\n");
469 break;
470 }
471 }
472 }
473 if (failcnt == 0) { // fiinally...
474 break;
475 }
476 }
477 #endif
478 puts("done\n");
479
480 // memcpy from SPI Flash to SDRAM then boot
481 if ((ftr & SYS_REG_INFO_HAS_SPI_FLASH) &&
482 (ftr & SYS_REG_INFO_HAS_DRAM) &&
483 (failcnt == 0))
484 {
485 // identify ELF, copy if present, and get the start address
486 unsigned long faddr = copy_flash(spi_offs);
487 if (faddr != -1ul) {
488 // jump to absolute address
489 return faddr;
490 }
491 }
492
493 return 0;
494 }
495