#define INSN_RS2 (insn.rs2())
#define INSN_RS3 (insn.rs3())
#define INSN_RD (insn.rd())
-#define INSN_SEG ((insn.i_imm() >> 9)+1)
+#define INSN_SEG (((reg_t)insn.i_imm() >> 9)+1)
static inline reg_t read_xpr(hwacha_t* h, insn_t insn, uint32_t idx, size_t src)
{
{
public:
uint32_t bits() { return b; }
- reg_t i_imm() { return int64_t(int32_t(b) >> 20); }
- reg_t s_imm() { return x(7, 5) | (x(25, 7) << 5) | (imm_sign() << 12); }
- reg_t sb_imm() { return (x(8, 4) << 1) | (x(25,6) << 5) | (x(7,1) << 11) | (imm_sign() << 12); }
- reg_t u_imm() { return int64_t(int32_t(b) >> 12 << 12); }
- reg_t uj_imm() { return (x(21, 10) << 1) | (x(20, 1) << 11) | (x(12, 8) << 12) | (imm_sign() << 20); }
+ int32_t i_imm() { return int32_t(b) >> 20; }
+ int32_t s_imm() { return x(7, 5) + (xs(25, 7) << 5); }
+ int32_t sb_imm() { return (x(8, 4) << 1) + (x(25,6) << 5) + (x(7,1) << 11) + (imm_sign() << 12); }
+ int32_t u_imm() { return int32_t(b) >> 12 << 12; }
+ int32_t uj_imm() { return (x(21, 10) << 1) + (x(20, 1) << 11) + (x(12, 8) << 12) + (imm_sign() << 20); }
uint32_t rd() { return x(7, 5); }
uint32_t rs1() { return x(15, 5); }
uint32_t rs2() { return x(20, 5); }
uint32_t rs3() { return x(27, 5); }
uint32_t rm() { return x(12, 3); }
- reg_t csr() { return x(20, 12); }
+ uint32_t csr() { return x(20, 12); }
private:
uint32_t b;
- reg_t x(int lo, int len) { return b << (32-lo-len) >> (32-len); }
- reg_t imm_sign() { return int64_t(int32_t(b) >> 31); }
+ uint32_t x(int lo, int len) { return b << (32-lo-len) >> (32-len); }
+ uint32_t xs(int lo, int len) { return int32_t(b) << (32-lo-len) >> (32-len); }
+ uint32_t imm_sign() { return xs(31, 1); }
};
template <class T, size_t N, bool zero_reg>
}
void write(size_t i, T value)
{
- data[i] = value;
+ if (!(zero_reg && i == 0))
+ data[i] = value;
}
const T& operator [] (size_t i) const
{
- if (zero_reg)
- const_cast<T&>(data[0]) = 0;
return data[i];
}
private:
class memtracer_list_t : public memtracer_t
{
public:
+ bool empty() { return list.empty(); }
bool interested_in_range(uint64_t begin, uint64_t end, bool store, bool fetch)
{
for (std::vector<memtracer_t*>::iterator it = list.begin(); it != list.end(); ++it)
void mmu_t::flush_icache()
{
- memset(icache_tag, -1, sizeof(icache_tag));
+ for (size_t i = 0; i < ICACHE_ENTRIES; i++)
+ icache[i].tag = -1;
}
void mmu_t::flush_tlb()
flush_icache();
}
-reg_t mmu_t::refill_tlb(reg_t addr, reg_t bytes, bool store, bool fetch)
+void* mmu_t::refill_tlb(reg_t addr, reg_t bytes, bool store, bool fetch)
{
reg_t idx = (addr >> PGSHIFT) % TLB_ENTRIES;
reg_t expected_tag = addr & ~(PGSIZE-1);
tlb_load_tag[idx] = (pte_perm & PTE_UR) ? expected_tag : -1;
tlb_store_tag[idx] = (pte_perm & PTE_UW) ? expected_tag : -1;
tlb_insn_tag[idx] = (pte_perm & PTE_UX) ? expected_tag : -1;
- tlb_data[idx] = pgbase;
+ tlb_data[idx] = mem + pgbase - (addr & ~(PGSIZE-1));
}
- return paddr;
+ return mem + paddr;
}
pte_t mmu_t::walk(reg_t addr)
// template for functions that load an aligned value from memory
#define load_func(type) \
- type##_t load_##type(reg_t addr) { \
+ type##_t load_##type(reg_t addr) __attribute__((always_inline)) { \
if(unlikely(addr % sizeof(type##_t))) \
throw trap_load_address_misaligned(addr); \
- reg_t paddr = translate(addr, sizeof(type##_t), false, false); \
- return *(type##_t*)(mem + paddr); \
+ void* paddr = translate(addr, sizeof(type##_t), false, false); \
+ return *(type##_t*)paddr; \
}
// load value from memory at aligned address; zero extend to register width
void store_##type(reg_t addr, type##_t val) { \
if(unlikely(addr % sizeof(type##_t))) \
throw trap_store_address_misaligned(addr); \
- reg_t paddr = translate(addr, sizeof(type##_t), true, false); \
- *(type##_t*)(mem + paddr) = val; \
+ void* paddr = translate(addr, sizeof(type##_t), true, false); \
+ *(type##_t*)paddr = val; \
}
// store value to memory at aligned address
// load instruction from memory at aligned address.
inline insn_fetch_t load_insn(reg_t addr)
{
- reg_t idx = (addr/sizeof(insn_t)) % ICACHE_ENTRIES;
- if (unlikely(icache_tag[idx] != addr))
+ reg_t offset = addr & (sizeof(insn_t) * (ICACHE_ENTRIES-1));
+ offset *= sizeof(icache_entry_t) / sizeof(insn_t);
+ icache_entry_t* entry = (icache_entry_t*)((char*)icache + offset);
+ insn_fetch_t data = entry->data;
+ if (likely(entry->tag == addr))
+ return data;
+
+ void* iaddr = translate(addr, sizeof(insn_t), false, true);
+ insn_fetch_t fetch;
+ fetch.insn.pad = *(decltype(fetch.insn.insn.bits())*)iaddr;
+ fetch.func = proc->decode_insn(fetch.insn.insn);
+
+ entry->tag = addr;
+ entry->data = fetch;
+
+ reg_t paddr = (char*)iaddr - mem;
+ if (!tracer.empty() && tracer.interested_in_range(paddr, paddr + sizeof(insn_t), false, true))
{
- reg_t paddr = translate(addr, sizeof(insn_t), false, true);
- insn_fetch_t fetch;
- fetch.insn.insn = *(insn_t*)(mem + paddr);
- fetch.func = proc->decode_insn(fetch.insn.insn);
-
- reg_t idx = (paddr/sizeof(insn_t)) % ICACHE_ENTRIES;
- icache_tag[idx] = addr;
- icache_data[idx] = fetch;
-
- if (tracer.interested_in_range(paddr, paddr + sizeof(insn_t), false, true))
- {
- icache_tag[idx] = -1;
- tracer.trace(paddr, sizeof(insn_t), false, true);
- }
+ entry->tag = -1;
+ tracer.trace(paddr, sizeof(insn_t), false, true);
}
- return icache_data[idx];
+ return entry->data;
}
void set_processor(processor_t* p) { proc = p; flush_tlb(); }
memtracer_list_t tracer;
// implement an instruction cache for simulator performance
- static const reg_t ICACHE_ENTRIES = 256;
- insn_fetch_t icache_data[ICACHE_ENTRIES];
+ static const reg_t ICACHE_ENTRIES = 2048;
+ struct icache_entry_t {
+ reg_t tag;
+ reg_t pad;
+ insn_fetch_t data;
+ };
+ icache_entry_t icache[ICACHE_ENTRIES];
// implement a TLB for simulator performance
static const reg_t TLB_ENTRIES = 256;
- reg_t tlb_data[TLB_ENTRIES];
+ char* tlb_data[TLB_ENTRIES];
reg_t tlb_insn_tag[TLB_ENTRIES];
reg_t tlb_load_tag[TLB_ENTRIES];
reg_t tlb_store_tag[TLB_ENTRIES];
- reg_t icache_tag[ICACHE_ENTRIES];
// finish translation on a TLB miss and upate the TLB
- reg_t refill_tlb(reg_t addr, reg_t bytes, bool store, bool fetch);
+ void* refill_tlb(reg_t addr, reg_t bytes, bool store, bool fetch);
// perform a page table walk for a given virtual address
pte_t walk(reg_t addr);
// translate a virtual address to a physical address
- reg_t translate(reg_t addr, reg_t bytes, bool store, bool fetch)
+ void* translate(reg_t addr, reg_t bytes, bool store, bool fetch)
+ __attribute__((always_inline))
{
reg_t idx = (addr >> PGSHIFT) % TLB_ENTRIES;
+ reg_t expected_tag = addr & ~(PGSIZE-1);
reg_t* tlb_tag = fetch ? tlb_insn_tag : store ? tlb_store_tag :tlb_load_tag;
- reg_t expected_tag = addr & ~(PGSIZE-1);
- if(likely(tlb_tag[idx] == expected_tag))
- return ((uintptr_t)addr & (PGSIZE-1)) + tlb_data[idx];
+ void* data = tlb_data[idx] + addr;
+ if (likely(tlb_tag[idx] == expected_tag))
+ return data;
return refill_tlb(addr, bytes, store, fetch);
}
#include <assert.h>
#include <limits.h>
#include <stdexcept>
+#include <algorithm>
processor_t::processor_t(sim_t* _sim, mmu_t* _mmu, uint32_t _id)
- : sim(_sim), mmu(_mmu), ext(NULL), id(_id), run(false), debug(false),
- opcode_bits(0)
+ : sim(_sim), mmu(_mmu), ext(NULL), disassembler(new disassembler_t),
+ id(_id), run(false), debug(false)
{
reset(true);
mmu->set_processor(this);
#define DECLARE_INSN(name, match, mask) REGISTER_INSN(this, name, match, mask)
#include "encoding.h"
#undef DECLARE_INSN
+ build_opcode_map();
}
processor_t::~processor_t()
// the ISA guarantees on boot that the PC is 0x2000 and the the processor
// is in supervisor mode, and in 64-bit mode, if supported, with traps
// and virtual memory disabled.
- sr = SR_S;
-#ifdef RISCV_ENABLE_64BIT
- sr |= SR_S64;
-#endif
+ sr = SR_S | SR_S64;
pc = 0x2000;
// the following state is undefined upon boot-up,
run = !value;
state.reset(); // reset the core
+ set_pcr(CSR_STATUS, state.sr);
+
if (ext)
ext->reset(); // reset the extension
}
{
// the disassembler is stateless, so we share it
fprintf(stderr, "core %3d: 0x%016" PRIx64 " (0x%08" PRIx32 ") %s\n",
- id, state.pc, insn.bits(), disassembler.disassemble(insn).c_str());
+ id, state.pc, insn.bits(), disassembler->disassemble(insn).c_str());
}
reg_t processor_t::set_pcr(int which, reg_t val)
if (!ext)
state.sr &= ~SR_EA;
state.sr &= ~SR_ZERO;
+ rv64 = (state.sr & SR_S) ? (state.sr & SR_S64) : (state.sr & SR_U64);
mmu->flush_tlb();
break;
case CSR_EPC:
insn_func_t processor_t::decode_insn(insn_t insn)
{
- bool rv64 = (state.sr & SR_S) ? (state.sr & SR_S64) : (state.sr & SR_U64);
+ size_t mask = opcode_map.size()-1;
+ insn_desc_t* desc = opcode_map[insn.bits() & mask];
- auto key = insn.bits() & ((1L << opcode_bits)-1);
- for (auto it = opcode_map.find(key); it != opcode_map.end() && it->first == key; ++it)
- if ((insn.bits() & it->second.mask) == it->second.match)
- return rv64 ? it->second.rv64 : it->second.rv32;
+ while ((insn.bits() & desc->mask) != desc->match)
+ desc++;
- return &illegal_instruction;
+ return rv64 ? desc->rv64 : desc->rv32;
}
void processor_t::register_insn(insn_desc_t desc)
{
assert(desc.mask & 1);
- if (opcode_bits == 0 || (desc.mask & ((1L << opcode_bits)-1)) != ((1L << opcode_bits)-1))
+ instructions.push_back(desc);
+}
+
+void processor_t::build_opcode_map()
+{
+ size_t buckets = -1;
+ for (auto& inst : instructions)
+ while ((inst.mask & buckets) != buckets)
+ buckets /= 2;
+ buckets++;
+
+ struct cmp {
+ decltype(insn_desc_t::match) mask;
+ cmp(decltype(mask) mask) : mask(mask) {}
+ bool operator()(const insn_desc_t& lhs, const insn_desc_t& rhs) {
+ if ((lhs.match & mask) != (rhs.match & mask))
+ return (lhs.match & mask) < (rhs.match & mask);
+ return lhs.match < rhs.match;
+ }
+ };
+ std::sort(instructions.begin(), instructions.end(), cmp(buckets-1));
+
+ opcode_map.resize(buckets);
+ opcode_store.resize(instructions.size() + 1);
+
+ size_t j = 0;
+ for (size_t b = 0, i = 0; b < buckets; b++)
{
- unsigned x = 0;
- while ((desc.mask & ((1L << (x+1))-1)) == ((1L << (x+1))-1) &&
- (opcode_bits == 0 || x <= opcode_bits))
- x++;
- opcode_bits = x;
-
- decltype(opcode_map) new_map;
- for (auto it = opcode_map.begin(); it != opcode_map.end(); ++it)
- new_map.insert(std::make_pair(it->second.match & ((1L<<x)-1), it->second));
- opcode_map = new_map;
+ opcode_map[b] = &opcode_store[j];
+ while (i < instructions.size() && b == (instructions[i].match & (buckets-1)))
+ opcode_store[j++] = instructions[i++];
}
- opcode_map.insert(std::make_pair(desc.match & ((1L<<opcode_bits)-1), desc));
+ assert(j == opcode_store.size()-1);
+ opcode_store[j].match = opcode_store[j].mask = 0;
+ opcode_store[j].rv32 = &illegal_instruction;
+ opcode_store[j].rv64 = &illegal_instruction;
}
void processor_t::register_extension(extension_t* x)
{
for (auto insn : x->get_instructions())
register_insn(insn);
+ build_opcode_map();
for (auto disasm_insn : x->get_disasms())
- disassembler.add_insn(disasm_insn);
+ disassembler->add_insn(disasm_insn);
if (ext != NULL)
throw std::logic_error("only one extension may be registered");
ext = x;
#define _RISCV_PROCESSOR_H
#include "decode.h"
-#include "disasm.h"
-#include <cstring>
#include "config.h"
-#include <map>
+#include <cstring>
+#include <memory>
+#include <vector>
class processor_t;
class mmu_t;
class sim_t;
class trap_t;
class extension_t;
+class disassembler_t;
struct insn_desc_t
{
sim_t* sim;
mmu_t* mmu; // main memory is always accessed via the mmu
extension_t* ext;
- disassembler_t disassembler;
+ std::unique_ptr<disassembler_t> disassembler;
state_t state;
uint32_t id;
bool run; // !reset
bool debug;
+ bool rv64;
- unsigned opcode_bits;
- std::multimap<uint32_t, insn_desc_t> opcode_map;
+ std::vector<insn_desc_t> instructions;
+ std::vector<insn_desc_t*> opcode_map;
+ std::vector<insn_desc_t> opcode_store;
void take_interrupt(); // take a trap if any interrupts are pending
void take_trap(reg_t pc, trap_t& t); // take an exception
friend class extension_t;
friend class htif_isasim_t;
+ void build_opcode_map();
insn_func_t decode_insn(insn_t insn);
};