+from random import randint, seed
+from copy import deepcopy
+from math import log
+
+
+class Memory(Elaboratable):
+ def __init__(self, regwid, addrw):
+ self.ddepth = regwid/8
+ depth = (1<<addrw) / self.ddepth
+ self.adr = Signal(addrw)
+ self.dat_r = Signal(regwid)
+ self.dat_w = Signal(regwid)
+ self.we = Signal()
+ self.mem = Memory(width=regwid, depth=depth, init=range(0, depth))
+
+ def elaborate(self, platform):
+ m = Module()
+ m.submodules.rdport = rdport = self.mem.read_port()
+ m.submodules.wrport = wrport = self.mem.write_port()
+ m.d.comb += [
+ rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
+ self.dat_r.eq(rdport.data),
+ wrport.addr.eq(self.adr),
+ wrport.data.eq(self.dat_w),
+ wrport.en.eq(self.we),
+ ]
+ return m
+
+
+class MemSim:
+ def __init__(self, regwid, addrw):
+ self.regwid = regwid
+ self.ddepth = regwid//8
+ depth = (1<<addrw) // self.ddepth
+ self.mem = list(range(0, depth))
+
+ def ld(self, addr):
+ return self.mem[addr>>self.ddepth]
+
+ def st(self, addr, data):
+ self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
+
+
+class CompUnitsBase(Elaboratable):
+ """ Computation Unit Base class.
+
+ Amazingly, this class works recursively. It's supposed to just
+ look after some ALUs (that can handle the same operations),
+ grouping them together, however it turns out that the same code
+ can also group *groups* of Computation Units together as well.
+
+ Basically it was intended just to concatenate the ALU's issue,
+ go_rd etc. signals together, which start out as bits and become
+ sequences. Turns out that the same trick works just as well
+ on Computation Units!
+
+ So this class may be used recursively to present a top-level
+ sequential concatenation of all the signals in and out of
+ ALUs, whilst at the same time making it convenient to group
+ ALUs together.
+
+ At the lower level, the intent is that groups of (identical)
+ ALUs may be passed the same operation. Even beyond that,
+ the intent is that that group of (identical) ALUs actually
+ share the *same pipeline* and as such become a "Concurrent
+ Computation Unit" as defined by Mitch Alsup (see section
+ 11.4.9.3)
+ """
+ def __init__(self, rwid, units):
+ """ Inputs:
+
+ * :rwid: bit width of register file(s) - both FP and INT
+ * :units: sequence of ALUs (or CompUnitsBase derivatives)
+ """
+ self.units = units
+ self.rwid = rwid
+ self.rwid = rwid
+ if units and isinstance(units[0], CompUnitsBase):
+ self.n_units = 0
+ for u in self.units:
+ self.n_units += u.n_units
+ else:
+ self.n_units = len(units)
+
+ n_units = self.n_units
+
+ # inputs
+ self.issue_i = Signal(n_units, reset_less=True)
+ self.go_rd_i = Signal(n_units, reset_less=True)
+ self.go_wr_i = Signal(n_units, reset_less=True)
+ self.shadown_i = Signal(n_units, reset_less=True)
+ self.go_die_i = Signal(n_units, reset_less=True)
+
+ # outputs
+ self.busy_o = Signal(n_units, reset_less=True)
+ self.rd_rel_o = Signal(n_units, reset_less=True)
+ self.req_rel_o = Signal(n_units, reset_less=True)
+
+ # in/out register data (note: not register#, actual data)
+ self.data_o = Signal(rwid, reset_less=True)
+ self.src1_i = Signal(rwid, reset_less=True)
+ self.src2_i = Signal(rwid, reset_less=True)
+ # input operand
+
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+
+ for i, alu in enumerate(self.units):
+ setattr(m.submodules, "comp%d" % i, alu)
+
+ go_rd_l = []
+ go_wr_l = []
+ issue_l = []
+ busy_l = []
+ req_rel_l = []
+ rd_rel_l = []
+ shadow_l = []
+ godie_l = []
+ for alu in self.units:
+ req_rel_l.append(alu.req_rel_o)
+ rd_rel_l.append(alu.rd_rel_o)
+ shadow_l.append(alu.shadown_i)
+ godie_l.append(alu.go_die_i)
+ go_wr_l.append(alu.go_wr_i)
+ go_rd_l.append(alu.go_rd_i)
+ issue_l.append(alu.issue_i)
+ busy_l.append(alu.busy_o)
+ comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
+ comb += self.req_rel_o.eq(Cat(*req_rel_l))
+ comb += self.busy_o.eq(Cat(*busy_l))
+ comb += Cat(*godie_l).eq(self.go_die_i)
+ comb += Cat(*shadow_l).eq(self.shadown_i)
+ comb += Cat(*go_wr_l).eq(self.go_wr_i)
+ comb += Cat(*go_rd_l).eq(self.go_rd_i)
+ comb += Cat(*issue_l).eq(self.issue_i)
+
+ # connect data register input/output
+
+ # merge (OR) all integer FU / ALU outputs to a single value
+ # bit of a hack: treereduce needs a list with an item named "data_o"
+ if self.units:
+ data_o = treereduce(self.units)
+ comb += self.data_o.eq(data_o)
+
+ for i, alu in enumerate(self.units):
+ comb += alu.src1_i.eq(self.src1_i)
+ comb += alu.src2_i.eq(self.src2_i)
+
+ return m
+
+
+class CompUnitALUs(CompUnitsBase):
+
+ def __init__(self, rwid, opwid):
+ """ Inputs:
+
+ * :rwid: bit width of register file(s) - both FP and INT
+ * :opwid: operand bit width
+ """
+ self.opwid = opwid
+
+ # inputs
+ self.oper_i = Signal(opwid, reset_less=True)
+ self.imm_i = Signal(rwid, reset_less=True)
+
+ # Int ALUs
+ add = ALU(rwid)
+ sub = ALU(rwid)
+ mul = ALU(rwid)
+ shf = ALU(rwid)
+
+ units = []
+ for alu in [add, sub, mul, shf]:
+ aluopwid = 3 # extra bit for immediate mode
+ units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
+
+ CompUnitsBase.__init__(self, rwid, units)
+
+ def elaborate(self, platform):
+ m = CompUnitsBase.elaborate(self, platform)
+ comb = m.d.comb
+
+ # hand the same operation to all units, only lower 2 bits though
+ for alu in self.units:
+ comb += alu.oper_i[0:3].eq(self.oper_i)
+ comb += alu.imm_i.eq(self.imm_i)
+
+ return m
+
+
+class CompUnitBR(CompUnitsBase):
+
+ def __init__(self, rwid, opwid):
+ """ Inputs:
+
+ * :rwid: bit width of register file(s) - both FP and INT
+ * :opwid: operand bit width
+
+ Note: bgt unit is returned so that a shadow unit can be created
+ for it
+ """
+ self.opwid = opwid
+
+ # inputs
+ self.oper_i = Signal(opwid, reset_less=True)
+ self.imm_i = Signal(rwid, reset_less=True)
+
+ # Branch ALU and CU
+ self.bgt = BranchALU(rwid)
+ aluopwid = 3 # extra bit for immediate mode
+ self.br1 = ComputationUnitNoDelay(rwid, aluopwid, self.bgt)
+ CompUnitsBase.__init__(self, rwid, [self.br1])
+
+ def elaborate(self, platform):
+ m = CompUnitsBase.elaborate(self, platform)
+ comb = m.d.comb
+
+ # hand the same operation to all units
+ for alu in self.units:
+ comb += alu.oper_i.eq(self.oper_i)
+ comb += alu.imm_i.eq(self.imm_i)
+
+ return m
+
+
+class FunctionUnits(Elaboratable):
+
+ def __init__(self, n_regs, n_int_alus):
+ self.n_regs = n_regs
+ self.n_int_alus = n_int_alus
+
+ self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
+ self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
+ self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
+
+ self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
+ self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
+
+ self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
+ self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
+ self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
+
+ self.req_rel_i = Signal(n_int_alus, reset_less = True)
+ self.readable_o = Signal(n_int_alus, reset_less=True)
+ self.writable_o = Signal(n_int_alus, reset_less=True)
+
+ self.go_rd_i = Signal(n_int_alus, reset_less=True)
+ self.go_wr_i = Signal(n_int_alus, reset_less=True)
+ self.go_die_i = Signal(n_int_alus, reset_less=True)
+ self.req_rel_o = Signal(n_int_alus, reset_less=True)
+ self.fn_issue_i = Signal(n_int_alus, reset_less=True)
+
+ # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
+
+ def elaborate(self, platform):
+ m = Module()
+ comb = m.d.comb
+ sync = m.d.sync
+
+ n_intfus = self.n_int_alus
+
+ # Integer FU-FU Dep Matrix
+ intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
+ m.submodules.intfudeps = intfudeps
+ # Integer FU-Reg Dep Matrix
+ intregdeps = FURegDepMatrix(n_intfus, self.n_regs)
+ m.submodules.intregdeps = intregdeps
+
+ comb += self.g_int_rd_pend_o.eq(intregdeps.rd_rsel_o)
+ comb += self.g_int_wr_pend_o.eq(intregdeps.wr_rsel_o)
+
+ comb += intregdeps.rd_pend_i.eq(intregdeps.rd_rsel_o)
+ comb += intregdeps.wr_pend_i.eq(intregdeps.wr_rsel_o)
+
+ comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
+ comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
+ self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
+
+ comb += intfudeps.issue_i.eq(self.fn_issue_i)
+ comb += intfudeps.go_rd_i.eq(self.go_rd_i)
+ comb += intfudeps.go_wr_i.eq(self.go_wr_i)
+ comb += intfudeps.go_die_i.eq(self.go_die_i)
+ comb += self.readable_o.eq(intfudeps.readable_o)
+ comb += self.writable_o.eq(intfudeps.writable_o)
+
+ # Connect function issue / arrays, and dest/src1/src2
+ comb += intregdeps.dest_i.eq(self.dest_i)
+ comb += intregdeps.src1_i.eq(self.src1_i)
+ comb += intregdeps.src2_i.eq(self.src2_i)
+
+ comb += intregdeps.go_rd_i.eq(self.go_rd_i)
+ comb += intregdeps.go_wr_i.eq(self.go_wr_i)
+ comb += intregdeps.go_die_i.eq(self.go_die_i)
+ comb += intregdeps.issue_i.eq(self.fn_issue_i)
+
+ comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
+ comb += self.src1_rsel_o.eq(intregdeps.src1_rsel_o)
+ comb += self.src2_rsel_o.eq(intregdeps.src2_rsel_o)
+
+ return m