X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fexperiment%2Fscore6600.py;h=ed953e8b71a3b80ff82fde4c93a115d8fcd39303;hb=9160489ce48a008af3e07fdc81d458a0b366f997;hp=d9144826c73ee3408152833088efb0f1f3006419;hpb=5f82e9d7705c06d5ac1db3568024e4e6bd6a71b6;p=soc.git diff --git a/src/experiment/score6600.py b/src/experiment/score6600.py index d9144826..ed953e8b 100644 --- a/src/experiment/score6600.py +++ b/src/experiment/score6600.py @@ -1,6 +1,6 @@ from nmigen.compat.sim import run_simulation from nmigen.cli import verilog, rtlil -from nmigen import Module, Const, Signal, Array, Cat, Elaboratable +from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory from regfile.regfile import RegFileArray, treereduce from scoreboard.fu_fu_matrix import FUFUDepMatrix @@ -9,14 +9,57 @@ from scoreboard.global_pending import GlobalPending from scoreboard.group_picker import GroupPicker from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord +from scoreboard.instruction_q import Instruction, InstructionQ +from scoreboard.memfu import MemFunctionUnits from compalu import ComputationUnitNoDelay +from compldst import LDSTCompUnit from alu_hier import ALU, BranchALU from nmutil.latch import SRLatch +from nmutil.nmoperator import eq from random import randint, seed from copy import deepcopy +from math import log + + +class TestMemory(Elaboratable): + def __init__(self, regwid, addrw): + self.ddepth = 1 # regwid //8 + depth = (1<>self.ddepth] + + def st(self, addr, data): + self.mem[addr>>self.ddepth] = data & ((1< Mem FUs + comb += memfus.addr_we_i.eq(cul.adr_rel_o) # Match enable on adr rel + + comb += memfus.addrs_i[0].eq(cul.units[0].data_o) + comb += memfus.addrs_i[1].eq(cul.units[1].data_o) + + #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) + #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) + #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus]) + #--------- # merge shadow matrices outputs #--------- @@ -464,10 +606,6 @@ class Scoreboard(Elaboratable): for i in range(n_intfus): comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus]) - # work out the current-activated busy unit (by recording the old one) - with m.If(fn_issue_o): # only update prev bit if instruction issued - sync += fn_issue_prev.eq(fn_issue_o) - # *previous* instruction shadows *current* instruction, and, obviously, # if the previous is completed (!busy) don't cast the shadow! comb += prev_shadow.eq(~fn_issue_o & cu.busy_o) @@ -499,9 +637,9 @@ class Scoreboard(Elaboratable): with m.If(br1.issue_i): sync += bspec.active_i.eq(1) with m.If(self.branch_succ_i): - comb += bspec.good_i.eq(fn_issue_o & 0x1f) + comb += bspec.good_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT with m.If(self.branch_fail_i): - comb += bspec.fail_i.eq(fn_issue_o & 0x1f) + comb += bspec.fail_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT # branch is active (TODO: a better signal: this is over-using the # go_write signal - actually the branch should not be "writing") @@ -536,7 +674,6 @@ class Scoreboard(Elaboratable): return m - def __iter__(self): yield from self.intregs yield from self.fpregs @@ -551,6 +688,131 @@ class Scoreboard(Elaboratable): def ports(self): return list(self) + +class IssueToScoreboard(Elaboratable): + + def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs): + self.qlen = qlen + self.n_in = n_in + self.n_out = n_out + self.rwid = rwid + self.opw = opwid + self.n_regs = n_regs + + mqbits = (int(log(qlen) / log(2))+2, False) + self.p_add_i = Signal(mqbits) # instructions to add (from data_i) + self.p_ready_o = Signal() # instructions were added + self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid) + + self.busy_o = Signal(reset_less=True) # at least one CU is busy + self.qlen_o = Signal(mqbits, reset_less=True) + + def elaborate(self, platform): + m = Module() + comb = m.d.comb + sync = m.d.sync + + iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out) + sc = Scoreboard(self.rwid, self.n_regs) + mem = TestMemory(self.rwid, 8) # not too big, takes too long + m.submodules.iq = iq + m.submodules.sc = sc + m.submodules.mem = mem + + # get at the regfile for testing + self.intregs = sc.intregs + + # and the "busy" signal and instruction queue length + comb += self.busy_o.eq(sc.busy_o) + comb += self.qlen_o.eq(iq.qlen_o) + + # link up instruction queue + comb += iq.p_add_i.eq(self.p_add_i) + comb += self.p_ready_o.eq(iq.p_ready_o) + for i in range(self.n_in): + comb += eq(iq.data_i[i], self.data_i[i]) + + # take instruction and process it. note that it's possible to + # "inspect" the queue contents *without* actually removing the + # items. items are only removed when the + + # in "waiting" state + wait_issue_br = Signal() + wait_issue_alu = Signal() + wait_issue_ls = Signal() + + with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls): + # set instruction pop length to 1 if the unit accepted + with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)): + with m.If(iq.qlen_o != 0): + comb += iq.n_sub_i.eq(1) + with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)): + with m.If(iq.qlen_o != 0): + comb += iq.n_sub_i.eq(1) + with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)): + with m.If(iq.qlen_o != 0): + comb += iq.n_sub_i.eq(1) + + # see if some instruction(s) are here. note that this is + # "inspecting" the in-place queue. note also that on the + # cycle following "waiting" for fn_issue_o to be set, the + # "resetting" done above (insn_i=0) could be re-ASSERTed. + with m.If(iq.qlen_o != 0): + # get the operands and operation + imm = iq.data_o[0].imm_i + dest = iq.data_o[0].dest_i + src1 = iq.data_o[0].src1_i + src2 = iq.data_o[0].src2_i + op = iq.data_o[0].oper_i + opi = iq.data_o[0].opim_i # immediate set + + # set the src/dest regs + comb += sc.int_dest_i.eq(dest) + comb += sc.int_src1_i.eq(src1) + comb += sc.int_src2_i.eq(src2) + comb += sc.reg_enable_i.eq(1) # enable the regfile + + # choose a Function-Unit-Group + with m.If((op & (0x3<<2)) != 0): # branch + comb += sc.br_oper_i.eq(Cat(op[0:2], opi)) + comb += sc.br_imm_i.eq(imm) + comb += sc.brissue.insn_i.eq(1) + comb += wait_issue_br.eq(1) + with m.Elif((op & (0x3<<4)) != 0): # ld/st + # see compldst.py + # bit 0: ADD/SUB + # bit 1: immed + # bit 4: LD + # bit 5: ST + comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6])) + comb += sc.ls_imm_i.eq(imm) + comb += sc.lsissue.insn_i.eq(1) + comb += wait_issue_ls.eq(1) + with m.Else(): # alu + comb += sc.alu_oper_i.eq(Cat(op[0:2], opi)) + comb += sc.alu_imm_i.eq(imm) + comb += sc.aluissue.insn_i.eq(1) + comb += wait_issue_alu.eq(1) + + # XXX TODO + # these indicate that the instruction is to be made + # shadow-dependent on + # (either) branch success or branch fail + #yield sc.branch_fail_i.eq(branch_fail) + #yield sc.branch_succ_i.eq(branch_success) + + return m + + def __iter__(self): + yield self.p_ready_o + for o in self.data_i: + yield from list(o) + yield self.p_add_i + + def ports(self): + return list(self) + + IADD = 0 ISUB = 1 IMUL = 2 @@ -560,15 +822,19 @@ IBLT = 5 IBEQ = 6 IBNE = 7 + class RegSim: def __init__(self, rwidth, nregs): self.rwidth = rwidth self.regs = [0] * nregs - def op(self, op, src1, src2, dest): + def op(self, op, op_imm, imm, src1, src2, dest): maxbits = (1 << self.rwidth) - 1 src1 = self.regs[src1] & maxbits - src2 = self.regs[src2] & maxbits + if op_imm: + src2 = imm + else: + src2 = self.regs[src2] & maxbits if op == IADD: val = src1 + src2 elif op == ISUB: @@ -585,6 +851,8 @@ class RegSim: val = int(src1 == src2) elif op == IBNE: val = int(src1 != src2) + else: + return 0 # LD/ST TODO val &= maxbits self.setval(dest, val) return val @@ -607,7 +875,27 @@ class RegSim: yield from self.dump(dut) assert False -def int_instr(dut, op, src1, src2, dest, branch_success, branch_fail): +def instr_q(dut, op, op_imm, imm, src1, src2, dest, + branch_success, branch_fail): + instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm, + 'src1_i': src1, 'src2_i': src2}] + + sendlen = 1 + for idx in range(sendlen): + yield from eq(dut.data_i[idx], instrs[idx]) + di = yield dut.data_i[idx] + print ("senddata %d %x" % (idx, di)) + yield dut.p_add_i.eq(sendlen) + yield + o_p_ready = yield dut.p_ready_o + while not o_p_ready: + yield + o_p_ready = yield dut.p_ready_o + + yield dut.p_add_i.eq(0) + + +def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail): yield from disable_issue(dut) yield dut.int_dest_i.eq(dest) yield dut.int_src1_i.eq(src1) @@ -615,10 +903,12 @@ def int_instr(dut, op, src1, src2, dest, branch_success, branch_fail): if (op & (0x3<<2)) != 0: # branch yield dut.brissue.insn_i.eq(1) yield dut.br_oper_i.eq(Const(op & 0x3, 2)) + yield dut.br_imm_i.eq(imm) dut_issue = dut.brissue else: yield dut.aluissue.insn_i.eq(1) yield dut.alu_oper_i.eq(Const(op & 0x3, 2)) + yield dut.alu_imm_i.eq(imm) dut_issue = dut.aluissue yield dut.reg_enable_i.eq(1) @@ -645,13 +935,15 @@ def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3): for i in range(n_ops): src1 = randint(1, dut.n_regs-1) src2 = randint(1, dut.n_regs-1) + imm = randint(1, (1<