5 #include "microwatt_soc.h"
14 static inline void mtspr(int sprnum
, unsigned long val
)
16 __asm__
volatile("mtspr %0,%1" : : "i" (sprnum
), "r" (val
));
19 static inline uint32_t read32(const void *addr
)
21 return *(volatile uint32_t *)addr
;
24 static inline void write32(void *addr
, uint32_t value
)
26 *(volatile uint32_t *)addr
= value
;
36 uint32_t zero0
; // reserved
37 uint32_t zero1
; // reserved
43 void uart_writeuint32(uint32_t val
) {
44 const char lut
[] = { '0', '1', '2', '3', '4', '5', '6', '7',
45 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
46 uint8_t *val_arr
= (uint8_t*)(&val
);
49 for (i
= 0; i
< 4; i
++) {
50 putchar(lut
[(val_arr
[3-i
] >> 4) & 0xF]);
51 putchar(lut
[val_arr
[3-i
] & 0xF]);
55 void memcpy(void *dest
, void *src
, size_t n
) {
57 //cast src and dest to char*
58 char *src_char
= (char *)src
;
59 char *dest_char
= (char *)dest
;
62 if ((i
% 4096) == 0) {
68 dest_char
[i
] = src_char
[i
]; //copy contents byte by byte
73 void memcpy4(void *dest
, void *src
, size_t n
) {
75 //cast src and dest to char*
76 uint32_t *src_char
= (uint32_t *)src
;
77 uint32_t *dest_char
= (uint32_t *)dest
;
78 for (i
=0; i
<n
/4; i
++) {
80 if ((i
% 4096) == 0) {
86 dest_char
[i
] = src_char
[i
]; //copy contents byte by byte
97 #define TERCEL_SPI_REG_SYS_PHY_CFG1 0x10
98 #define TERCEL_SPI_REG_SYS_FLASH_CFG5 0x24
99 #define TERCEL_SPI_PHY_CLOCK_DIVISOR_MASK 0xff
100 #define TERCEL_SPI_PHY_CLOCK_DIVISOR_SHIFT 0
101 #define TERCEL_SPI_FLASH_EN_MULTCYC_READ_MASK 0x1
102 #define TERCEL_SPI_FLASH_EN_MULTCYC_READ_SHIFT 0
103 static inline uint32_t read_tercel_register(uint8_t reg
)
105 return readl((unsigned long)(SPI_FCTRL_BASE
+reg
));
108 static inline void write_tercel_register(uint8_t reg
, uint32_t value
)
110 writel(value
, (unsigned long)(SPI_FCTRL_BASE
+reg
));
113 // TODO: need to use this
114 // https://gitlab.raptorengineering.com/kestrel-collaboration/kestrel-firmware/bare-metal-firmware/-/blob/master/main.c#L2328
116 /* this is a "level 1" speed-up, which gets an initial improvement of 10-50x
117 * over the default speed (which is a scant 100 bytes per second).
119 static void crank_up_qspi_level1(void)
121 // WARNING: KESTREL SPECIFIC
122 // Set SPI clock cycle divider to 1
124 dword
= read_tercel_register(TERCEL_SPI_REG_SYS_PHY_CFG1
);
125 dword
&= ~(TERCEL_SPI_PHY_CLOCK_DIVISOR_MASK
<<
126 TERCEL_SPI_PHY_CLOCK_DIVISOR_SHIFT
);
127 dword
|= ((1 & TERCEL_SPI_PHY_CLOCK_DIVISOR_MASK
) <<
128 TERCEL_SPI_PHY_CLOCK_DIVISOR_SHIFT
);
129 write_tercel_register(TERCEL_SPI_REG_SYS_PHY_CFG1
, dword
);
130 // Enable read merging
131 dword
= read_tercel_register(TERCEL_SPI_REG_SYS_FLASH_CFG5
);
132 dword
|= (TERCEL_SPI_FLASH_EN_MULTCYC_READ_MASK
<<
133 TERCEL_SPI_FLASH_EN_MULTCYC_READ_SHIFT
);
134 write_tercel_register(TERCEL_SPI_REG_SYS_FLASH_CFG5
, dword
);
137 static bool fl_read(void *dst
, uint32_t offset
, uint32_t size
)
140 memcpy(d
, (void *)(unsigned long)(SPI_FLASH_BASE
+ offset
), size
);
144 static unsigned long copy_flash(unsigned int offset
, unsigned int dst_offs
)
148 unsigned int i
, poff
, size
, off
;
151 puts("Trying flash...\r\n");
152 if (!fl_read(&ehdr
, offset
, sizeof(ehdr
)))
154 if (!IS_ELF(ehdr
) || ehdr
.e_ident
[EI_CLASS
] != ELFCLASS64
) {
155 puts("Doesn't look like an elf64\r\n");
158 if (ehdr
.e_ident
[EI_DATA
] != ELFDATA2LSB
||
159 ehdr
.e_machine
!= EM_PPC64
) {
160 puts("Not a ppc64le binary\r\n");
164 poff
= offset
+ ehdr
.e_phoff
;
165 for (i
= 0; i
< ehdr
.e_phnum
; i
++) {
166 if (!fl_read(&ph
, poff
, sizeof(ph
)))
168 if (ph
.p_type
!= PT_LOAD
)
171 /* XXX Add bound checking ! */
173 addr
= (void *)ph
.p_vaddr
;
174 off
= offset
+ ph
.p_offset
;
175 //printf("Copy segment %d (0x%x bytes) to %p\n", i, size, addr);
176 puts("Copy segment ");
179 uart_writeuint32(size
);
181 uart_writeuint32((uint32_t)(unsigned long)addr
);
183 fl_read(addr
+dst_offs
, off
, size
);
184 poff
+= ehdr
.e_phentsize
;
187 puts("Booting from DRAM at");
188 uart_writeuint32((unsigned int)(dst_offs
+ehdr
.e_entry
));
191 puts("Dump DRAM\r\n");
192 for (i
= 0; i
< 64; i
++) {
193 uart_writeuint32(readl(dst_offs
+ehdr
.e_entry
+(i
*4)));
195 if ((i
& 7) == 7) puts("\r\n");
199 //flush_cpu_icache();
200 return dst_offs
+ehdr
.e_entry
;
203 for (i
= 0; i
< 8; i
++) {
204 uart_writeuint32(ehdr
.e_ident
[i
]);
213 // Defining gram_[read|write] allows a trace of all register
214 // accesses to be dumped to console for debugging purposes.
215 // To use, define GRAM_RW_FUNC in gram.h
216 uint32_t gram_read(const struct gramCtx
*ctx
, void *addr
) {
220 uart_writeuint32((unsigned long)addr
);
221 dword
= readl((unsigned long)addr
);
223 uart_writeuint32((unsigned long)dword
);
229 int gram_write(const struct gramCtx
*ctx
, void *addr
, uint32_t value
) {
230 puts("gram_write: ");
231 uart_writeuint32((unsigned long)addr
);
233 uart_writeuint32((unsigned long)value
);
234 writel(value
, (unsigned long)addr
);
241 const int kNumIterations
= 14;
242 int res
, failcnt
= 0;
244 unsigned long ftr
, spi_offs
=0x0;
245 volatile uint32_t *ram
= (uint32_t*)MEMORY_BASE
;
248 //puts("Firmware launched...\n");
251 puts(" Soc signature: ");
252 tmp
= readl(SYSCON_BASE
+ SYS_REG_SIGNATURE
);
253 uart_writeuint32(tmp
);
254 tmp
= readl(SYSCON_BASE
+ SYS_REG_SIGNATURE
+4);
255 uart_writeuint32(tmp
);
256 puts(" Soc features: ");
257 ftr
= readl(SYSCON_BASE
+ SYS_REG_INFO
);
258 if (ftr
& SYS_REG_INFO_HAS_UART
)
260 if (ftr
& SYS_REG_INFO_HAS_DRAM
)
262 if (ftr
& SYS_REG_INFO_HAS_BRAM
)
264 if (ftr
& SYS_REG_INFO_HAS_SPI_FLASH
)
266 if (ftr
& SYS_REG_INFO_HAS_LITEETH
)
270 if (ftr
& SYS_REG_INFO_HAS_SPI_FLASH
) {
271 // speed up the QSPI to at least a sane level
272 crank_up_qspi_level1();
274 puts("SPI Offset: ");
275 spi_offs
= readl(SYSCON_BASE
+ SYS_REG_SPI_INFO
);
276 uart_writeuint32(spi_offs
);
284 if (ftr
& SYS_REG_INFO_HAS_SPI_FLASH
) {
285 // print out configuration parameters for QSPI
286 volatile uint32_t *qspi_cfg
= (uint32_t*)SPI_FCTRL_BASE
;
287 for (int k
=0; k
< 2; k
++) {
288 tmp
= readl((unsigned long)&(qspi_cfg
[k
]));
292 uart_writeuint32(tmp
);
297 if (ftr
& SYS_REG_INFO_HAS_SPI_FLASH
) {
298 volatile uint32_t *qspi
= (uint32_t*)SPI_FLASH_BASE
+0x900000;
299 //volatile uint8_t *qspi_bytes = (uint8_t*)spi_offs;
300 // let's not, eh? writel(0xDEAF0123, (unsigned long)&(qspi[0]));
301 // tmp = readl((unsigned long)&(qspi[0]));
302 for (int i
=0;i
<2;i
++) {
303 tmp
= readl((unsigned long)&(qspi
[i
]));
304 uart_writeuint32(tmp
);
306 if ((i
& 0x7) == 0x7) puts("\r\n");
310 for (i=0;i<256;i++) {
311 tmp = readb((unsigned long)&(qspi_bytes[i]));
312 uart_writeuint32(tmp);
319 tmp
= readl((unsigned long)&(qspi
[0x1000/4]));
321 uart_writeuint32(tmp
);
325 unsigned char c
= getchar();
327 if (c
== 13) { // if CR send LF
330 tmp
= readl((unsigned long)&(qspi
[1<<i
]));
332 uart_writeuint32(1<<i
);
334 uart_writeuint32(tmp
);
345 volatile uint32_t *hyperram
= (uint32_t*)0x00000000; // at 0x0 for arty
346 writel(0xDEAF0123, (unsigned long)&(hyperram
[0]));
347 tmp
= readl((unsigned long)&(hyperram
[0]));
350 unsigned char c
= getchar();
352 if (c
== 13) { // if CR send LF
355 writel(0xDEAF0123+i
, (unsigned long)&(hyperram
[1<<i
]));
356 tmp
= readl((unsigned long)&(hyperram
[1<<i
]));
358 uart_writeuint32(1<<i
);
360 uart_writeuint32(tmp
);
369 // init DRAM only if SYSCON says it exists (duh)
370 if (ftr
& SYS_REG_INFO_HAS_DRAM
)
372 puts("DRAM init... ");
376 struct gramProfile profile
= {
378 0xb20, 0x806, 0x200, 0x0
385 struct gramProfile profile
= {
387 0x0320, 0x0006, 0x0200, 0x0000
393 struct gramProfile profile2
;
394 gram_init(&ctx
, &profile
, (void*)MEMORY_BASE
,
395 (void*)DRAM_CTRL_BASE
,
396 (void*)DRAM_INIT_BASE
);
399 puts("MR profile: ");
400 uart_writeuint32(profile
.mode_registers
[0]);
402 uart_writeuint32(profile
.mode_registers
[1]);
404 uart_writeuint32(profile
.mode_registers
[2]);
406 uart_writeuint32(profile
.mode_registers
[3]);
410 // Early read test for WB access sim
411 //uart_writeuint32(*ram);
415 for (size_t i
= 0; i
< 8; i
++) {
416 profile2
.rdly_p0
= i
;
417 gram_load_calibration(&ctx
, &profile2
);
418 gram_reset_burstdet(&ctx
);
420 for (size_t j
= 0; j
< 128; j
++) {
421 tmp
= readl((unsigned long)&(ram
[i
]));
423 if (gram_read_burstdet(&ctx
, 0)) {
432 for (size_t i
= 0; i
< 8; i
++) {
433 profile2
.rdly_p1
= i
;
434 gram_load_calibration(&ctx
, &profile2
);
435 gram_reset_burstdet(&ctx
);
436 for (size_t j
= 0; j
< 128; j
++) {
437 tmp
= readl((unsigned long)&(ram
[i
]));
439 if (gram_read_burstdet(&ctx
, 1)) {
447 puts("Auto calibrating... ");
448 res
= gram_generate_calibration(&ctx
, &profile2
);
449 if (res
!= GRAM_ERR_NONE
) {
451 gram_load_calibration(&ctx
, &profile
);
453 gram_load_calibration(&ctx
, &profile2
);
457 puts("Auto calibration profile:");
459 uart_writeuint32(profile2
.rdly_p0
);
461 uart_writeuint32(profile2
.rdly_p1
);
465 puts("Reloading built-in calibration profile...");
466 gram_load_calibration(&ctx
, &profile
);
468 puts("DRAM test... \n");
469 for (size_t i
= 0; i
< kNumIterations
; i
++) {
470 writel(0xDEAF0000 | i
*4, (unsigned long)&(ram
[i
]));
474 for (int dly
= 0; dly
< 8; dly
++) {
476 profile2
.rdly_p0
= dly
;
477 profile2
.rdly_p1
= dly
;
479 uart_writeuint32(profile2
.rdly_p0
);
481 uart_writeuint32(profile2
.rdly_p1
);
482 gram_load_calibration(&ctx
, &profile2
);
483 for (size_t i
= 0; i
< kNumIterations
; i
++) {
484 if (readl((unsigned long)&(ram
[i
])) != (0xDEAF0000 | i
*4)) {
486 uart_writeuint32((unsigned long)(&ram
[i
]));
488 uart_writeuint32(readl((unsigned long)&(ram
[i
])));
493 puts("Test canceled (more than 10 errors)\n");
501 for (size_t i
= 0; i
< kNumIterations
; i
++) {
502 if (readl((unsigned long)&(ram
[i
])) != (0xDEAF0000 | i
*4)) {
504 uart_writeuint32((unsigned long)(&ram
[i
]));
506 uart_writeuint32(readl((unsigned long)&(ram
[i
])));
511 puts("Test canceled (more than 10 errors)\n");
520 #if 0 // ooo, annoying: won't work. no idea why
521 // temporary hard-hack: boot directly from QSPI. really
522 // should do something like detect at least... something
523 if ((ftr
& SYS_REG_INFO_HAS_SPI_FLASH
))
525 // jump to absolute address
526 mtspr(8, SPI_FLASH_BASE
); // move address to LR
527 __asm__
volatile("blr");
532 // memcpy from SPI Flash then boot
533 if ((ftr
& SYS_REG_INFO_HAS_SPI_FLASH
) &&
537 puts("ELF @ QSPI\n");
538 // identify ELF, copy if present, and get the start address
539 unsigned long faddr = copy_flash(spi_offs,
542 // jump to absolute address
543 mtspr(8, faddr); // move address to LR
544 __asm__ volatile("blr");
546 // works with head.S which copies r3 into ctr then does bctr
551 // another terrible hack: copy from flash at offset 0x600000
552 // a block of size 0x600000 into mem address 0x600000, then
553 // jump to it. this allows a dtb image to be executed
555 volatile uint32_t *mem
= (uint32_t*)0x1000000;
556 fl_read(mem
, // destination in RAM
557 0x600000, // offset into QSPI
558 0x1000000); // length - shorter (testing) 0x8000);
560 for (int i
=0;i
<256;i
++) {
561 tmp
= readl((unsigned long)&(mem
[i
]));
562 uart_writeuint32(tmp
);
564 if ((i
& 0x7) == 0x7) puts("\r\n");
567 mtspr(8, 0x1000000); // move address to LR
568 __asm__
volatile("blr");