From d6fd350f0c3c0af1760748b358534feb3defcc21 Mon Sep 17 00:00:00 2001 From: Andrew Waterman Date: Sat, 11 Jun 2011 16:45:01 -0700 Subject: [PATCH] [xcc] cleaned up mmu code --- riscv/applink.cc | 9 ++- riscv/insns/mfpcr.h | 2 +- riscv/mmu.cc | 96 +++++++++++++++++++++++++++++++- riscv/mmu.h | 122 +++++++++-------------------------------- riscv/processor.cc | 6 +- riscv/processor.h | 4 +- riscv/riscv-isa-run.cc | 2 +- riscv/sim.cc | 74 ++++++++++++++++--------- riscv/sim.h | 7 +-- 9 files changed, 185 insertions(+), 137 deletions(-) diff --git a/riscv/applink.cc b/riscv/applink.cc index 476c108..e3e979d 100644 --- a/riscv/applink.cc +++ b/riscv/applink.cc @@ -113,15 +113,20 @@ int appserver_link_t::wait_for_packet() demand(p.data_size % APP_DATA_ALIGN == 0, "misaligned data"); demand(p.data_size <= APP_MAX_DATA_SIZE, "long read data"); demand(p.addr <= sim->memsz && p.addr+p.data_size <= sim->memsz, "out of bounds: 0x%llx",(unsigned long long)p.addr); - memcpy(ackpacket.data,sim->mem+p.addr,p.data_size); ackpacket.data_size = p.data_size; + + static_assert(APP_DATA_ALIGN >= sizeof(uint64_t)) + for(size_t i = 0; i < p.data_size/8; i++) + ((uint64_t*)ackpacket.data)[i] = sim->mmu->load_uint64(p.addr+i*8); break; case APP_CMD_WRITE_MEM: demand(p.addr % APP_DATA_ALIGN == 0, "misaligned address"); demand(p.data_size % APP_DATA_ALIGN == 0, "misaligned data"); demand(p.data_size <= bytes - offsetof(packet,data), "short packet"); demand(p.addr <= sim->memsz && p.addr+p.data_size <= sim->memsz, "out of bounds: 0x%llx",(unsigned long long)p.addr); - memcpy(sim->mem+p.addr,p.data,p.data_size); + + for(size_t i = 0; i < p.data_size/8; i++) + sim->mmu->store_uint64(p.addr+i*8, ((uint64_t*)p.data)[i]); break; case APP_CMD_READ_CONTROL_REG: demand(p.addr == 16,"bad control reg"); diff --git a/riscv/insns/mfpcr.h b/riscv/insns/mfpcr.h index 3f47a10..e0c67ae 100644 --- a/riscv/insns/mfpcr.h +++ b/riscv/insns/mfpcr.h @@ -31,7 +31,7 @@ switch(insn.rtype.rs2) break; case 8: - val = MEMSIZE >> PGSHIFT; + val = mmu.memsz >> PGSHIFT; break; case 9: diff --git a/riscv/mmu.cc b/riscv/mmu.cc index 4c29000..05db18f 100644 --- a/riscv/mmu.cc +++ b/riscv/mmu.cc @@ -1,12 +1,104 @@ #include "mmu.h" +#include "sim.h" +#include "processor.h" + +mmu_t::mmu_t(char* _mem, size_t _memsz) + : mem(_mem), memsz(_memsz), badvaddr(0), + ptbr(0), supervisor(true), vm_enabled(false), + icsim(NULL), dcsim(NULL), itlbsim(NULL), dtlbsim(NULL) +{ + flush_tlb(); +} + +mmu_t::~mmu_t() +{ +} void mmu_t::flush_tlb() { - memset(tlb_data, 0, sizeof(tlb_data)); // TLB entry itself has valid bit + memset(tlb_insn_tag, -1, sizeof(tlb_insn_tag)); + memset(tlb_load_tag, -1, sizeof(tlb_load_tag)); + memset(tlb_store_tag, -1, sizeof(tlb_store_tag)); flush_icache(); } void mmu_t::flush_icache() { - memset(icache_tag, 0, sizeof(icache_tag)); // I$ tag contains valid bit + memset(icache_tag, -1, sizeof(icache_tag)); +} + +reg_t mmu_t::refill(reg_t addr, bool store, bool fetch) +{ + reg_t idx = (addr >> PGSHIFT) % TLB_ENTRIES; + reg_t expected_tag = addr & ~(PGSIZE-1); + + reg_t pte = walk(addr); + + reg_t pte_perm = pte & PTE_PERM; + if(supervisor) // shift supervisor permission bits into user perm bits + pte_perm = (pte_perm >> 3) & PTE_PERM; + pte_perm |= pte & PTE_E; + + reg_t perm = (fetch ? PTE_UX : store ? PTE_UW : PTE_UR) | PTE_E; + if(unlikely((pte_perm & perm) != perm)) + { + badvaddr = addr; + throw store ? trap_store_access_fault + : fetch ? trap_instruction_access_fault + : trap_load_access_fault; + } + + tlb_load_tag[idx] = (pte_perm & PTE_UR) ? expected_tag : -1; + tlb_store_tag[idx] = (pte_perm & PTE_UW) ? expected_tag : -1; + tlb_insn_tag[idx] = (pte_perm & PTE_UX) ? expected_tag : -1; + tlb_data[idx] = pte >> PTE_PPN_SHIFT << PGSHIFT; + + return (addr & (PGSIZE-1)) | tlb_data[idx]; +} + +pte_t mmu_t::walk(reg_t addr) +{ + pte_t pte = 0; + + if(!vm_enabled) + { + if(addr < memsz) + pte = PTE_E | PTE_PERM | ((addr >> PGSHIFT) << PTE_PPN_SHIFT); + } + else + { + reg_t base = ptbr; + reg_t ptd; + + int ptshift = (LEVELS-1)*PTIDXBITS; + for(reg_t i = 0; i < LEVELS; i++, ptshift -= PTIDXBITS) + { + reg_t idx = (addr >> (PGSHIFT+ptshift)) & ((1<= memsz) + break; + + ptd = *(pte_t*)(mem+pte_addr); + if(ptd & PTE_E) + { + // if this PTE is from a larger PT, fake a leaf + // PTE so the TLB will work right + reg_t vpn = addr >> PGSHIFT; + ptd |= (vpn & ((1<<(ptshift))-1)) << PTE_PPN_SHIFT; + + // fault if physical addr is invalid + reg_t ppn = ptd >> PTE_PPN_SHIFT; + if((ppn << PGSHIFT) + (addr & (PGSIZE-1)) < memsz) + pte = ptd; + break; + } + else if(!(ptd & PTE_T)) + break; + + base = (ptd >> PTE_PPN_SHIFT) << PGSHIFT; + } + } + + return pte; } diff --git a/riscv/mmu.h b/riscv/mmu.h index 6033e98..d5e144b 100644 --- a/riscv/mmu.h +++ b/riscv/mmu.h @@ -1,8 +1,10 @@ +#ifndef _RISCV_MMU_H +#define _RISCV_MMU_H + #include "decode.h" #include "trap.h" #include "icsim.h" #include "common.h" -#include class processor_t; @@ -31,12 +33,8 @@ const reg_t PPN_BITS = 8*sizeof(reg_t) - PGSHIFT; class mmu_t { public: - mmu_t(char* _mem, size_t _memsz) - : mem(_mem), memsz(_memsz), badvaddr(0), - ptbr(0), supervisor(true), vm_enabled(false), - icsim(NULL), dcsim(NULL), itlbsim(NULL), dtlbsim(NULL) - { - } + mmu_t(char* _mem, size_t _memsz); + ~mmu_t(); #ifdef RISCV_ENABLE_ICSIM # define dcsim_tick(dcsim, dtlbsim, addr, size, st) \ @@ -74,13 +72,8 @@ public: { insn_t insn; - reg_t idx = (addr/sizeof(insn_t)) % ICACHE_ENTRIES; - bool hit = addr % 4 == 0 && icache_tag[idx] == (addr | 1); - if(likely(hit)) - return icache_data[idx]; - #ifdef RISCV_ENABLE_RVC - if(addr % 4 == 2 && rvc) + if(addr % 4 == 2 && rvc) // fetch across word boundary { reg_t paddr_lo = translate(addr, false, true); insn.bits = *(uint16_t*)(mem+paddr_lo); @@ -94,15 +87,16 @@ public: else #endif { - if(unlikely(addr % 4)) - { - badvaddr = addr; - throw trap_instruction_address_misaligned; - } + reg_t idx = (addr/sizeof(insn_t)) % ICACHE_ENTRIES; + bool hit = icache_tag[idx] == addr; + if(likely(hit)) + return icache_data[idx]; + + // the processor guarantees alignment based upon rvc mode reg_t paddr = translate(addr, false, true); insn = *(insn_t*)(mem+paddr); - icache_tag[idx] = addr | 1; + icache_tag[idx] = addr; icache_data[idx] = insn; } @@ -157,7 +151,9 @@ private: static const reg_t TLB_ENTRIES = 256; pte_t tlb_data[TLB_ENTRIES]; - reg_t tlb_tag[TLB_ENTRIES]; + reg_t tlb_insn_tag[TLB_ENTRIES]; + reg_t tlb_load_tag[TLB_ENTRIES]; + reg_t tlb_store_tag[TLB_ENTRIES]; static const reg_t ICACHE_ENTRIES = 256; insn_t icache_data[ICACHE_ENTRIES]; @@ -168,88 +164,22 @@ private: icsim_t* itlbsim; icsim_t* dtlbsim; + reg_t refill(reg_t addr, bool store, bool fetch); + pte_t walk(reg_t addr); + reg_t translate(reg_t addr, bool store, bool fetch) { reg_t idx = (addr >> PGSHIFT) % TLB_ENTRIES; - pte_t pte = tlb_data[idx]; - reg_t tag = tlb_tag[idx]; - - trap_t trap = store ? trap_store_access_fault - : fetch ? trap_instruction_access_fault - : trap_load_access_fault; - - bool hit = (pte & PTE_E) && tag == (addr >> PGSHIFT); - if(unlikely(!hit)) - { - pte = walk(addr); - if(!(pte & PTE_E)) - { - badvaddr = addr; - throw trap; - } - - tlb_data[idx] = pte; - tlb_tag[idx] = addr >> PGSHIFT; - } - - reg_t access_type = store ? PTE_UW : fetch ? PTE_UX : PTE_UR; - if(supervisor) - access_type <<= 3; - if(unlikely(!(access_type & pte & PTE_PERM))) - { - badvaddr = addr; - throw trap; - } - return (addr & (PGSIZE-1)) | ((pte >> PTE_PPN_SHIFT) << PGSHIFT); - } + reg_t* tlb_tag = fetch ? tlb_insn_tag : store ? tlb_store_tag :tlb_load_tag; + reg_t expected_tag = addr & ~(PGSIZE-1); + if(likely(tlb_tag[idx] == expected_tag)) + return (addr & (PGSIZE-1)) | tlb_data[idx]; - pte_t walk(reg_t addr) - { - pte_t pte = 0; - - if(!vm_enabled) - { - if(addr < memsz) - pte = PTE_E | PTE_PERM | ((addr >> PGSHIFT) << PTE_PPN_SHIFT); - } - else - { - reg_t base = ptbr; - reg_t ptd; - - int ptshift = (LEVELS-1)*PTIDXBITS; - for(reg_t i = 0; i < LEVELS; i++, ptshift -= PTIDXBITS) - { - reg_t idx = (addr >> (PGSHIFT+ptshift)) & ((1<= memsz) - break; - - ptd = *(pte_t*)(mem+pte_addr); - if(ptd & PTE_E) - { - // if this PTE is from a larger PT, fake a leaf - // PTE so the TLB will work right - reg_t vpn = addr >> PGSHIFT; - ptd |= (vpn & ((1<<(ptshift))-1)) << PTE_PPN_SHIFT; - - // fault if physical addr is invalid - reg_t ppn = ptd >> PTE_PPN_SHIFT; - if((ppn << PGSHIFT) + (addr & (PGSIZE-1)) < memsz) - pte = ptd; - break; - } - else if(!(ptd & PTE_T)) - break; - - base = (ptd >> PTE_PPN_SHIFT) << PGSHIFT; - } - } - - return pte; + return refill(addr, store, fetch); } friend class processor_t; }; + +#endif diff --git a/riscv/processor.cc b/riscv/processor.cc index cb2188b..bd22924 100644 --- a/riscv/processor.cc +++ b/riscv/processor.cc @@ -9,8 +9,8 @@ #include "sim.h" #include "icsim.h" -processor_t::processor_t(sim_t* _sim, char* _mem, size_t _memsz) - : sim(_sim), mmu(_mem,_memsz) +processor_t::processor_t(sim_t* _sim, mmu_t* _mmu) + : sim(_sim), mmu(*_mmu) { initialize_dispatch_table(); // a few assumptions about endianness, including freg_t union @@ -55,7 +55,7 @@ void processor_t::init(uint32_t _id, icsim_t* default_icache, for (int i=0; iid = id; uts[i]->set_sr(uts[i]->sr | SR_EF); uts[i]->set_sr(uts[i]->sr | SR_EV); diff --git a/riscv/processor.h b/riscv/processor.h index b72fa27..ce07d46 100644 --- a/riscv/processor.h +++ b/riscv/processor.h @@ -18,7 +18,7 @@ class sim_t; class processor_t { public: - processor_t(sim_t* _sim, char* _mem, size_t _memsz); + processor_t(sim_t* _sim, mmu_t* _mmu); ~processor_t(); void init(uint32_t _id, icsim_t* defualt_icache, icsim_t* default_dcache); void step(size_t n, bool noisy); @@ -55,7 +55,7 @@ private: int xprlen; // shared memory - mmu_t mmu; + mmu_t& mmu; // counters reg_t cycle; diff --git a/riscv/riscv-isa-run.cc b/riscv/riscv-isa-run.cc index c14cbbb..bd3b102 100644 --- a/riscv/riscv-isa-run.cc +++ b/riscv/riscv-isa-run.cc @@ -52,7 +52,7 @@ int main(int argc, char** argv) appserver_link_t applink(tohost_fd, fromhost_fd); - sim_t s(nprocs, MEMSIZE, &applink, &icache, &dcache); + sim_t s(nprocs, &applink, &icache, &dcache); try { s.run(debug); diff --git a/riscv/sim.cc b/riscv/sim.cc index ba8992f..9b2677c 100644 --- a/riscv/sim.cc +++ b/riscv/sim.cc @@ -5,23 +5,49 @@ #include #include #include +#include -sim_t::sim_t(int _nprocs, size_t _memsz, appserver_link_t* _applink, icsim_t* default_icache, icsim_t* default_dcache) +sim_t::sim_t(int _nprocs, appserver_link_t* _applink, icsim_t* default_icache, icsim_t* default_dcache) : applink(_applink), - memsz(_memsz), - mem((char*)mmap64(NULL, memsz, PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0)), - procs(std::vector(_nprocs,processor_t(this,mem,memsz))) + procs(_nprocs) { - demand(mem != MAP_FAILED, "couldn't allocate target machine's memory"); + size_t memsz0 = sizeof(size_t) == 8 ? 0x100000000ULL : 0x70000000UL; + size_t quantum = std::max(PGSIZE, (reg_t)sysconf(_SC_PAGESIZE)); + memsz0 = memsz0/quantum*quantum; - for(int i = 0; i < (int)num_cores(); i++) - procs[i].init(i, default_icache, default_dcache); + memsz = memsz0; + mem = (char*)mmap64(NULL, memsz, PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0); + + if(mem == MAP_FAILED) + { + while(mem == MAP_FAILED && (memsz = memsz*10/11/quantum*quantum)) + mem = (char*)mmap64(NULL, memsz, PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1, 0); + assert(mem != MAP_FAILED); + fprintf(stderr, "warning: only got %lu bytes of target mem (wanted %lu)\n", + (unsigned long)memsz, (unsigned long)memsz0); + } + + mmu = new mmu_t(mem, memsz); + + for(size_t i = 0; i < num_cores(); i++) + { + procs[i] = new processor_t(this, new mmu_t(mem, memsz)); + procs[i]->init(i, default_icache, default_dcache); + } applink->init(this); } sim_t::~sim_t() { + for(size_t i = 0; i < num_cores(); i++) + { + mmu_t* pmmu = &procs[i]->mmu; + delete procs[i]; + delete pmmu; + } + delete mmu; + munmap(mem, memsz); } void sim_t::set_tohost(reg_t val) @@ -40,7 +66,7 @@ reg_t sim_t::get_fromhost() void sim_t::send_ipi(reg_t who) { if(who < num_cores()) - procs[who].deliver_ipi(); + procs[who]->deliver_ipi(); } void sim_t::run(bool debug) @@ -103,7 +129,7 @@ void sim_t::step_all(size_t n, size_t interleave, bool noisy) { for(size_t j = 0; j < n; j+=interleave) for(int i = 0; i < (int)num_cores(); i++) - procs[i].step(interleave,noisy); + procs[i]->step(interleave,noisy); } void sim_t::interactive_run_noisy(const std::string& cmd, const std::vector& args) @@ -144,9 +170,9 @@ void sim_t::interactive_run_proc(const std::string& cmd, const std::vectorstep(atoi(a[1].c_str()),noisy); else - while(1) procs[p].step(1,noisy); + while(1) procs[p]->step(1,noisy); } void sim_t::interactive_quit(const std::string& cmd, const std::vector& args) @@ -163,7 +189,7 @@ reg_t sim_t::get_pc(const std::vector& args) if(p >= (int)num_cores()) throw trap_illegal_instruction; - return procs[p].pc; + return procs[p]->pc; } reg_t sim_t::get_reg(const std::vector& args) @@ -176,7 +202,7 @@ reg_t sim_t::get_reg(const std::vector& args) if(p >= (int)num_cores() || r >= NXPR) throw trap_illegal_instruction; - return procs[p].XPR[r]; + return procs[p]->XPR[r]; } reg_t sim_t::get_freg(const std::vector& args) @@ -189,7 +215,7 @@ reg_t sim_t::get_freg(const std::vector& args) if(p >= (int)num_cores() || r >= NFPR) throw trap_illegal_instruction; - return procs[p].FPR[r]; + return procs[p]->FPR[r]; } reg_t sim_t::get_tohost(const std::vector& args) @@ -201,7 +227,7 @@ reg_t sim_t::get_tohost(const std::vector& args) if(p >= (int)num_cores()) throw trap_illegal_instruction; - return procs[p].tohost; + return procs[p]->tohost; } void sim_t::interactive_reg(const std::string& cmd, const std::vector& args) @@ -236,15 +262,13 @@ reg_t sim_t::get_mem(const std::vector& args) throw trap_illegal_instruction; std::string addr_str = args[0]; - mmu_t mmu(mem, memsz); - mmu.set_supervisor(true); if(args.size() == 2) { int p = atoi(args[0].c_str()); if(p >= (int)num_cores()) throw trap_illegal_instruction; - mmu.set_vm_enabled(!!(procs[p].sr & SR_VM)); - mmu.set_ptbr(procs[p].mmu.get_ptbr()); + mmu->set_vm_enabled(!!(procs[p]->sr & SR_VM)); + mmu->set_ptbr(procs[p]->mmu.get_ptbr()); addr_str = args[1]; } @@ -255,17 +279,17 @@ reg_t sim_t::get_mem(const std::vector& args) switch(addr % 8) { case 0: - val = mmu.load_uint64(addr); + val = mmu->load_uint64(addr); break; case 4: - val = mmu.load_uint32(addr); + val = mmu->load_uint32(addr); break; case 2: case 6: - val = mmu.load_uint16(addr); + val = mmu->load_uint16(addr); break; default: - val = mmu.load_uint8(addr); + val = mmu->load_uint8(addr); break; } return val; @@ -283,10 +307,8 @@ void sim_t::interactive_str(const std::string& cmd, const std::vectorload_uint8(addr++))) putchar(ch); putchar('\n'); diff --git a/riscv/sim.h b/riscv/sim.h index 27d36a8..c56ad95 100644 --- a/riscv/sim.h +++ b/riscv/sim.h @@ -5,14 +5,12 @@ #include #include "processor.h" -const long MEMSIZE = 0x100000000; - class appserver_link_t; class sim_t { public: - sim_t(int _nprocs, size_t _memsz, appserver_link_t* _applink, icsim_t* _default_icache, icsim_t* default_dcache); + sim_t(int _nprocs, appserver_link_t* _applink, icsim_t* _default_icache, icsim_t* default_dcache); ~sim_t(); void run(bool debug); @@ -31,7 +29,8 @@ private: size_t memsz; char* mem; - std::vector procs; + mmu_t* mmu; + std::vector procs; void step_all(size_t n, size_t interleave, bool noisy); -- 2.30.2