X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fexperiment%2Fscore6600.py;h=05de08fa1de491d280b0c635a54b705024396a6a;hb=a7b8337867b15252dc1c63b7e3dc757d3449a6e1;hp=1c0caca0a74acf1f3139fd823bbcc18bcce78440;hpb=a878168fb22776aa7a76f617255f5d88952782e6;p=soc.git diff --git a/src/experiment/score6600.py b/src/experiment/score6600.py index 1c0caca0..05de08fa 100644 --- a/src/experiment/score6600.py +++ b/src/experiment/score6600.py @@ -1,62 +1,146 @@ from nmigen.compat.sim import run_simulation from nmigen.cli import verilog, rtlil -from nmigen import Module, Const, Signal, Array, Cat, Elaboratable +from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory from regfile.regfile import RegFileArray, treereduce -from scoreboard.fn_unit import IntFnUnit, FPFnUnit, LDFnUnit, STFnUnit from scoreboard.fu_fu_matrix import FUFUDepMatrix from scoreboard.fu_reg_matrix import FURegDepMatrix from scoreboard.global_pending import GlobalPending from scoreboard.group_picker import GroupPicker -from scoreboard.issue_unit import IntFPIssueUnit, RegDecode +from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode +from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord +from scoreboard.instruction_q import Instruction, InstructionQ +from scoreboard.memfu import MemFunctionUnits from compalu import ComputationUnitNoDelay +from compldst import LDSTCompUnit -from alu_hier import ALU +from alu_hier import ALU, BranchALU from nmutil.latch import SRLatch +from nmutil.nmoperator import eq -from random import randint +from random import randint, seed +from copy import deepcopy +from math import log -class CompUnits(Elaboratable): - def __init__(self, rwid, n_units): +class TestMemory(Elaboratable): + def __init__(self, regwid, addrw): + self.ddepth = 1 # regwid //8 + depth = (1<>self.ddepth] + + def st(self, addr, data): + self.mem[addr>>self.ddepth] = data & ((1< Mem FUs + comb += memfus.addr_en_i.eq(cul.adr_rel_o) # Match enable on adr rel + comb += memfus.addr_rs_i.eq(reset_b) # reset same as LDSTCompUnit + + # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well, + # in a transitive fashion). This cycle activates based on LDSTCompUnit + # issue_i. multi-issue gets a bit more complex but not a lot. + prior_ldsts = Signal(cul.n_units, reset_less=True) + sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o) + with m.If(self.ls_oper_i[2]): # LD bit of operand + comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts) + with m.If(self.ls_oper_i[3]): # ST bit of operand + comb += memfus.st_i.eq(cul.issue_i | prior_ldsts) + + # TODO: adr_rel_o needs to go into L1 Cache. for now, + # just immediately activate go_adr + comb += cul.go_ad_i.eq(cul.adr_rel_o) + + # connect up address data + comb += memfus.addrs_i[0].eq(cul.units[0].data_o) + comb += memfus.addrs_i[1].eq(cul.units[1].data_o) + + # connect loadable / storable to go_ld/go_st. + # XXX should only be done when the memory ld/st has actually happened! + go_st_i = Signal(cul.n_units, reset_less=True) + go_ld_i = Signal(cul.n_units, reset_less=True) + comb += go_ld_i.eq(memfus.storable_o & memfus.addr_nomatch_o &\ + cul.req_rel_o & cul.ld_o) + comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &\ + cul.sto_rel_o & cul.st_o) + comb += memfus.go_ld_i.eq(go_ld_i) + comb += memfus.go_st_i.eq(go_st_i) + #comb += cul.go_wr_i.eq(memfus.loadable_o & memfus.addr_nomatch_o) + comb += cul.go_st_i.eq(go_st_i) + + #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) + #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) + #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus]) + + #--------- + # merge shadow matrices outputs + #--------- + + # these are explained in ShadowMatrix docstring, and are to be + # connected to the FUReg and FUFU Matrices, to get them to reset + anydie = Signal(n_intfus, reset_less=True) + allshadown = Signal(n_intfus, reset_less=True) + shreset = Signal(n_intfus, reset_less=True) + comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o) + comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o) + comb += shreset.eq(bspec.match_g_o | bspec.match_f_o) #--------- # connect fu-fu matrix #--------- - # Group Picker... done manually for now. TODO: cat array of pick sigs + # Group Picker... done manually for now. go_rd_o = intpick1.go_rd_o go_wr_o = intpick1.go_wr_o go_rd_i = intfus.go_rd_i go_wr_i = intfus.go_wr_i - m.d.comb += go_rd_i[0:n_int_fus].eq(go_rd_o[0:n_int_fus]) # rd - m.d.comb += go_wr_i[0:n_int_fus].eq(go_wr_o[0:n_int_fus]) # wr + go_die_i = intfus.go_die_i + # NOTE: connect to the shadowed versions so that they can "die" (reset) + comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd + comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr + comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die # Connect Picker #--------- - m.d.comb += intpick1.rd_rel_i[0:n_int_fus].eq(cu.rd_rel_o[0:n_int_fus]) - m.d.comb += intpick1.req_rel_i[0:n_int_fus].eq(cu.req_rel_o[0:n_int_fus]) + comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus]) + comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus]) int_rd_o = intfus.readable_o int_wr_o = intfus.writable_o - m.d.comb += intpick1.readable_i[0:n_int_fus].eq(int_rd_o[0:n_int_fus]) - m.d.comb += intpick1.writable_i[0:n_int_fus].eq(int_wr_o[0:n_int_fus]) + comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus]) + comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus]) + + #--------- + # Shadow Matrix + #--------- + + comb += shadows.issue_i.eq(fn_issue_o) + #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus]) + comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus]) + #--------- + # NOTE; this setup is for the instruction order preservation... + + # connect shadows / go_dies to Computation Units + comb += cu.shadown_i[0:n_intfus].eq(allshadown) + comb += cu.go_die_i[0:n_intfus].eq(anydie) + + # ok connect first n_int_fu shadows to busy lines, to create an + # instruction-order linked-list-like arrangement, using a bit-matrix + # (instead of e.g. a ring buffer). + + # when written, the shadow can be cancelled (and was good) + for i in range(n_intfus): + comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus]) + + # *previous* instruction shadows *current* instruction, and, obviously, + # if the previous is completed (!busy) don't cast the shadow! + comb += prev_shadow.eq(~fn_issue_o & cu.busy_o) + for i in range(n_intfus): + comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow) + + #--------- + # ... and this is for branch speculation. it uses the extra bit + # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1) + # only needs to set shadow_i, s_fail_i and s_good_i + + # issue captures shadow_i (if enabled) + comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus]) + + bactive = Signal(reset_less=True) + comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i) + + # instruction being issued (fn_issue_o) has a shadow cast by the branch + with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)): + comb += bshadow.issue_i.eq(fn_issue_o) + for i in range(n_intfus): + with m.If(fn_issue_o & (Const(1<> (src2 & maxbits) + elif op == IBGT: + val = int(src1 > src2) + elif op == IBLT: + val = int(src1 < src2) + elif op == IBEQ: + val = int(src1 == src2) + elif op == IBNE: + val = int(src1 != src2) + else: + return 0 # LD/ST TODO + val &= maxbits + self.setval(dest, val) + return val def setval(self, dest, val): + print ("sim setval", dest, hex(val)) self.regs[dest] = val def dump(self, dut): @@ -357,15 +917,50 @@ class RegSim: yield from self.dump(dut) assert False -def int_instr(dut, alusim, op, src1, src2, dest): - for i in range(len(dut.int_insn_i)): - yield dut.int_insn_i[i].eq(0) +def instr_q(dut, op, op_imm, imm, src1, src2, dest, + branch_success, branch_fail): + instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm, + 'src1_i': src1, 'src2_i': src2}] + + sendlen = 1 + for idx in range(sendlen): + yield from eq(dut.data_i[idx], instrs[idx]) + di = yield dut.data_i[idx] + print ("senddata %d %x" % (idx, di)) + yield dut.p_add_i.eq(sendlen) + yield + o_p_ready = yield dut.p_ready_o + while not o_p_ready: + yield + o_p_ready = yield dut.p_ready_o + + yield dut.p_add_i.eq(0) + + +def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail): + yield from disable_issue(dut) yield dut.int_dest_i.eq(dest) yield dut.int_src1_i.eq(src1) yield dut.int_src2_i.eq(src2) - yield dut.int_insn_i[op].eq(1) + if (op & (0x3<<2)) != 0: # branch + yield dut.brissue.insn_i.eq(1) + yield dut.br_oper_i.eq(Const(op & 0x3, 2)) + yield dut.br_imm_i.eq(imm) + dut_issue = dut.brissue + else: + yield dut.aluissue.insn_i.eq(1) + yield dut.alu_oper_i.eq(Const(op & 0x3, 2)) + yield dut.alu_imm_i.eq(imm) + dut_issue = dut.aluissue yield dut.reg_enable_i.eq(1) - alusim.op(op, src1, src2, dest) + + # these indicate that the instruction is to be made shadow-dependent on + # (either) branch success or branch fail + yield dut.branch_fail_i.eq(branch_fail) + yield dut.branch_succ_i.eq(branch_success) + + yield + yield from wait_for_issue(dut, dut_issue) def print_reg(dut, rnums): @@ -377,41 +972,205 @@ def print_reg(dut, rnums): print ("reg %s: %s" % (','.join(rnums), ','.join(rs))) +def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3): + insts = [] + for i in range(n_ops): + src1 = randint(1, dut.n_regs-1) + src2 = randint(1, dut.n_regs-1) + imm = randint(1, (1<= 4 + if is_branch: + branch_ok, branch_fail = dest + dest = src2 + # ok zip up the branch success / fail instructions and + # drop them into the queue, one marked "to have branch success" + # the other to be marked shadow branch "fail". + # one out of each of these will be cancelled + for ok, fl in zip(branch_ok, branch_fail): + if ok: + instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0))) + if fl: + instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1))) + print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \ + (i, src1, src2, dest, op, shadow_on, shadow_off)) + yield from int_instr(dut, op, src1, src2, dest, + shadow_on, shadow_off) + + # wait for all instructions to stop before checking + yield + yield from wait_for_busy_clear(dut) + + i = -1 + while siminsts: + instr = siminsts.pop(0) + if instr is None: + continue + (src1, src2, dest, op, (shadow_on, shadow_off)) = instr + i += 1 + is_branch = op >= 4 + if is_branch: + branch_ok, branch_fail = dest + dest = src2 + print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \ + (i, src1, src2, dest, op, shadow_on, shadow_off)) + branch_res = alusim.op(op, src1, src2, dest) + if is_branch: + if branch_res: + siminsts += branch_ok + else: + siminsts += branch_fail + + # check status + yield from alusim.check(dut) + yield from alusim.dump(dut) + + def scoreboard_sim(dut, alusim): - yield dut.int_store_i.eq(0) + seed(0) for i in range(1): # set random values in the registers for i in range(1, dut.n_regs): - yield dut.intregs.regs[i].reg.eq(31+i*3) - alusim.setval(i, 31+i*3) + val = randint(0, (1<