From d9d10ada1e5ade369128c4fd12fcfe1693288eed Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Tue, 11 Feb 2014 17:51:08 -0800 Subject: [PATCH] Run benchmarks in user mode --- benchmarks/common/crt.S | 141 ++++++++++++++++++++++++++++------- benchmarks/common/syscalls.c | 81 +++++++++++++++----- benchmarks/common/test.ld | 13 +++- 3 files changed, 186 insertions(+), 49 deletions(-) diff --git a/benchmarks/common/crt.S b/benchmarks/common/crt.S index fb2cc25..b273900 100644 --- a/benchmarks/common/crt.S +++ b/benchmarks/common/crt.S @@ -1,5 +1,13 @@ #include "encoding.h" +#ifdef __riscv64 +# define LREG ld +# define SREG sd +#else +# define LREG lw +# define SREG sw +#endif + .data .globl _heapend .globl environ @@ -48,6 +56,7 @@ _start: li a0, SR_U64 | SR_S64 csrs status, a0 #endif + csrc status, SR_PS # enable fp and accelerator li a0, SR_EF | SR_EA @@ -103,42 +112,120 @@ _start: csrr a0, hartid lw a1, 4(zero) - # give each core a 1KB TLS and a 127KB stack + # give each core 128KB of stack + TLS #define STKSHIFT 17 sll a2, a0, STKSHIFT add tp, tp, a2 add sp, a0, 1 sll sp, sp, STKSHIFT add sp, sp, tp - add tp, tp, 1024 - jal _init - unimp + lui t0, %tprel_hi(tls_start) + add t0, t0, %tprel_lo(tls_start) + sub tp, tp, t0 + + la t0, _init + csrw epc, t0 + sret trap_entry: - csrw sup0, t0 - csrw sup1, t1 - la t0, uarch_insn - lw t0, (t0) - csrr t1, epc - and t1, t1, ~3 - lw t1, (t1) - and t1, t1, t0 - beq t1, t0, handle_uarch_insn - - # a trap occurred that shouldn't have. - li t0, 1337 - csrw tohost, t0 -1:j 1b - -handle_uarch_insn: - # we trapped on an illegal uarch-specific CSR. just skip over it. - csrr t1, epc - add t1, t1, 4 - csrw epc, t1 + csrw sup0, sp + csrw sup1, t0 + csrr t0, status + andi t0, t0, SR_PS + bnez t0, 1f + la sp, kstacktop +1: + addi sp, sp, -272 + csrr t0, sup1 + + SREG x1, 8(sp) + SREG x2, 16(sp) + SREG x3, 24(sp) + SREG x4, 32(sp) + SREG x5, 40(sp) + SREG x6, 48(sp) + SREG x7, 56(sp) + SREG x8, 64(sp) + SREG x9, 72(sp) + SREG x10, 80(sp) + SREG x11, 88(sp) + SREG x12, 96(sp) + SREG x13, 104(sp) + SREG x14, 112(sp) + SREG x15, 120(sp) + SREG x16, 128(sp) + SREG x17, 136(sp) + SREG x18, 144(sp) + SREG x19, 152(sp) + SREG x20, 160(sp) + SREG x21, 168(sp) + SREG x22, 176(sp) + SREG x23, 184(sp) + SREG x24, 192(sp) + SREG x25, 200(sp) + SREG x26, 208(sp) + SREG x27, 216(sp) + SREG x28, 224(sp) + SREG x29, 232(sp) + SREG x30, 240(sp) + SREG x31, 248(sp) + csrr t0, sup0 - csrr t1, sup1 + csrr t1, status + SREG t0, 256(sp) + SREG t1, 264(sp) + + csrr a0, cause + csrr a1, epc + mv a2, sp + jal handle_trap + csrw epc, v0 + + LREG t0, 256(sp) + LREG t1, 264(sp) + csrw sup0, t0 + csrw status, t1 + + LREG x1, 8(sp) + LREG x2, 16(sp) + LREG x3, 24(sp) + LREG x4, 32(sp) + LREG x5, 40(sp) + LREG x6, 48(sp) + LREG x7, 56(sp) + LREG x8, 64(sp) + LREG x9, 72(sp) + LREG x10, 80(sp) + LREG x11, 88(sp) + LREG x12, 96(sp) + LREG x13, 104(sp) + LREG x14, 112(sp) + LREG x15, 120(sp) + LREG x16, 128(sp) + LREG x17, 136(sp) + LREG x18, 144(sp) + LREG x19, 152(sp) + LREG x20, 160(sp) + LREG x21, 168(sp) + LREG x22, 176(sp) + LREG x23, 184(sp) + LREG x24, 192(sp) + LREG x25, 200(sp) + LREG x26, 208(sp) + LREG x27, 216(sp) + LREG x28, 224(sp) + LREG x29, 232(sp) + LREG x30, 240(sp) + LREG x31, 248(sp) + + csrr sp, sup0 sret -uarch_insn: - csrr x0, uarch0 +.bss +.align 4 +.skip 4096 +kstacktop: + +.section .tbss +tls_start: diff --git a/benchmarks/common/syscalls.c b/benchmarks/common/syscalls.c index 1a53349..d506536 100644 --- a/benchmarks/common/syscalls.c +++ b/benchmarks/common/syscalls.c @@ -6,9 +6,10 @@ #include #include "encoding.h" +#define SYS_stats 1234 #define static_assert(cond) switch(0) { case 0: case !!(long)(cond): ; } -void syscall(long which, long arg0, long arg1, long arg2) +static long handle_frontend_syscall(long which, long arg0, long arg1, long arg2) { volatile uint64_t magic_mem[8] __attribute__((aligned(64))); magic_mem[0] = which; @@ -18,34 +19,24 @@ void syscall(long which, long arg0, long arg1, long arg2) __sync_synchronize(); write_csr(tohost, (long)magic_mem); while (swap_csr(fromhost, 0) == 0); -} - -void exit(int code) -{ - write_csr(tohost, (code << 1) | 1); - while (1); -} - -void printstr(const char* s) -{ - syscall(SYS_write, 1, (long)s, strlen(s)); + return magic_mem[0]; } // In setStats, we might trap reading uarch-specific counters. -// The trap handler will skip over the instruction, but we want -// to pretend as though we read the value 0 in this case. -#define read_csr_safe(reg) ({ long __tmp = 0; \ - asm volatile ("csrr %0, " #reg : "+r"(__tmp)); \ +// The trap handler will skip over the instruction and write 0, +// but only if v0 is the destination register. +#define read_csr_safe(reg) ({ register long __tmp asm("v0"); \ + asm volatile ("csrr %0, " #reg : "=r"(__tmp)); \ __tmp; }) #define NUM_COUNTERS 18 static long counters[NUM_COUNTERS]; static char* counter_names[NUM_COUNTERS]; -void setStats(int enable) +static int handle_stats(int enable) { int i = 0; #define READ_CTR(name) do { \ - if (i >= NUM_COUNTERS) exit(-1); \ + while (i >= NUM_COUNTERS) ; \ long csr = read_csr_safe(name); \ if (!enable) { csr -= counters[i]; counter_names[i] = #name; } \ counters[i++] = csr; \ @@ -56,6 +47,60 @@ void setStats(int enable) READ_CTR(uarch8); READ_CTR(uarch9); READ_CTR(uarch10); READ_CTR(uarch11); READ_CTR(uarch12); READ_CTR(uarch13); READ_CTR(uarch14); READ_CTR(uarch15); #undef READ_CTR + return 0; +} + +static void tohost_exit(int code) +{ + write_csr(tohost, (code << 1) | 1); + while (1); +} + +long handle_trap(long cause, long epc, long regs[32]) +{ + int csr_insn; + asm volatile ("lw %0, 1f; j 2f; 1: csrr v0, uarch0; 2:" : "=r"(csr_insn)); + long sys_ret = 0; + + if (cause == CAUSE_ILLEGAL_INSTRUCTION && + (*(int*)epc & csr_insn) == csr_insn) + ; + else if (cause != CAUSE_SYSCALL) + tohost_exit(1337); + else if (regs[16] == SYS_exit) + tohost_exit(regs[18]); + else if (regs[16] == SYS_stats) + sys_ret = handle_stats(regs[18]); + else + sys_ret = handle_frontend_syscall(regs[16], regs[18], regs[19], regs[20]); + + regs[16] = sys_ret; + return epc+4; +} + +static long syscall(long num, long arg0, long arg1, long arg2) +{ + register long v0 asm("v0") = num; + register long a0 asm("a0") = arg0; + register long a1 asm("a1") = arg1; + register long a2 asm("a2") = arg2; + asm volatile ("scall" : "+r"(v0) : "r"(a0), "r"(a1), "r"(a2) : "s0"); + return v0; +} + +void exit(int code) +{ + syscall(SYS_exit, code, 0, 0); +} + +void setStats(int enable) +{ + syscall(SYS_stats, enable, 0, 0); +} + +void printstr(const char* s) +{ + syscall(SYS_write, 1, (long)s, strlen(s)); } void __attribute__((weak)) thread_entry(int cid, int nc) diff --git a/benchmarks/common/test.ld b/benchmarks/common/test.ld index 952bf53..dda2a7f 100644 --- a/benchmarks/common/test.ld +++ b/benchmarks/common/test.ld @@ -33,11 +33,16 @@ SECTIONS *(.text) } - /* data: Initialized data segment */ - .data : - { - *(.data) + /* data segmemt */ + .data : { *(.data) } + .bss : { *(.bss) } + + /* thread-local data segment */ + .tbss : { + crt.o(.tbss) /* Make sure tls_start is the first TLS symbol */ + *(.tbss) } + .tdata : { *(.tdata) } /* End of uninitalized data segement */ _end = .; -- 2.30.2