X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fexperiment%2Fscore6600.py;h=05de08fa1de491d280b0c635a54b705024396a6a;hb=a7b8337867b15252dc1c63b7e3dc757d3449a6e1;hp=5ab358d9e7d340d9c2a1496f6924e5240af30cdb;hpb=7ae6c5660cffacf5b49f4a6430025a9c0610af83;p=soc.git diff --git a/src/experiment/score6600.py b/src/experiment/score6600.py index 5ab358d9..05de08fa 100644 --- a/src/experiment/score6600.py +++ b/src/experiment/score6600.py @@ -1,71 +1,146 @@ from nmigen.compat.sim import run_simulation from nmigen.cli import verilog, rtlil -from nmigen import Module, Const, Signal, Array, Cat, Elaboratable +from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory from regfile.regfile import RegFileArray, treereduce -from scoreboard.fn_unit import IntFnUnit, FPFnUnit, LDFnUnit, STFnUnit from scoreboard.fu_fu_matrix import FUFUDepMatrix from scoreboard.fu_reg_matrix import FURegDepMatrix from scoreboard.global_pending import GlobalPending from scoreboard.group_picker import GroupPicker -from scoreboard.issue_unit import IntFPIssueUnit, RegDecode -from scoreboard.shadow import ShadowMatrix, WaWGrid +from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode +from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord +from scoreboard.instruction_q import Instruction, InstructionQ +from scoreboard.memfu import MemFunctionUnits from compalu import ComputationUnitNoDelay +from compldst import LDSTCompUnit from alu_hier import ALU, BranchALU from nmutil.latch import SRLatch +from nmutil.nmoperator import eq -from random import randint +from random import randint, seed +from copy import deepcopy +from math import log -class CompUnits(Elaboratable): +class TestMemory(Elaboratable): + def __init__(self, regwid, addrw): + self.ddepth = 1 # regwid //8 + depth = (1<>self.ddepth] + + def st(self, addr, data): + self.mem[addr>>self.ddepth] = data & ((1< Mem FUs + comb += memfus.addr_en_i.eq(cul.adr_rel_o) # Match enable on adr rel + comb += memfus.addr_rs_i.eq(reset_b) # reset same as LDSTCompUnit + + # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well, + # in a transitive fashion). This cycle activates based on LDSTCompUnit + # issue_i. multi-issue gets a bit more complex but not a lot. + prior_ldsts = Signal(cul.n_units, reset_less=True) + sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o) + with m.If(self.ls_oper_i[2]): # LD bit of operand + comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts) + with m.If(self.ls_oper_i[3]): # ST bit of operand + comb += memfus.st_i.eq(cul.issue_i | prior_ldsts) + + # TODO: adr_rel_o needs to go into L1 Cache. for now, + # just immediately activate go_adr + comb += cul.go_ad_i.eq(cul.adr_rel_o) + + # connect up address data + comb += memfus.addrs_i[0].eq(cul.units[0].data_o) + comb += memfus.addrs_i[1].eq(cul.units[1].data_o) + + # connect loadable / storable to go_ld/go_st. + # XXX should only be done when the memory ld/st has actually happened! + go_st_i = Signal(cul.n_units, reset_less=True) + go_ld_i = Signal(cul.n_units, reset_less=True) + comb += go_ld_i.eq(memfus.storable_o & memfus.addr_nomatch_o &\ + cul.req_rel_o & cul.ld_o) + comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &\ + cul.sto_rel_o & cul.st_o) + comb += memfus.go_ld_i.eq(go_ld_i) + comb += memfus.go_st_i.eq(go_st_i) + #comb += cul.go_wr_i.eq(memfus.loadable_o & memfus.addr_nomatch_o) + comb += cul.go_st_i.eq(go_st_i) + + #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) + #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) + #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus]) + + #--------- + # merge shadow matrices outputs + #--------- + + # these are explained in ShadowMatrix docstring, and are to be + # connected to the FUReg and FUFU Matrices, to get them to reset + anydie = Signal(n_intfus, reset_less=True) + allshadown = Signal(n_intfus, reset_less=True) + shreset = Signal(n_intfus, reset_less=True) + comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o) + comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o) + comb += shreset.eq(bspec.match_g_o | bspec.match_f_o) #--------- # connect fu-fu matrix @@ -307,78 +611,114 @@ class Scoreboard(Elaboratable): go_wr_o = intpick1.go_wr_o go_rd_i = intfus.go_rd_i go_wr_i = intfus.go_wr_i + go_die_i = intfus.go_die_i # NOTE: connect to the shadowed versions so that they can "die" (reset) - m.d.comb += go_rd_i[0:n_int_fus].eq(go_rd_rst[0:n_int_fus]) # rd - m.d.comb += go_wr_i[0:n_int_fus].eq(go_wr_rst[0:n_int_fus]) # wr + comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd + comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr + comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die # Connect Picker #--------- - m.d.comb += intpick1.rd_rel_i[0:n_int_fus].eq(cu.rd_rel_o[0:n_int_fus]) - m.d.comb += intpick1.req_rel_i[0:n_int_fus].eq(cu.req_rel_o[0:n_int_fus]) + comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus]) + comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus]) int_rd_o = intfus.readable_o int_wr_o = intfus.writable_o - m.d.comb += intpick1.readable_i[0:n_int_fus].eq(int_rd_o[0:n_int_fus]) - m.d.comb += intpick1.writable_i[0:n_int_fus].eq(int_wr_o[0:n_int_fus]) + comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus]) + comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus]) #--------- # Shadow Matrix #--------- - m.d.comb += shadows.issue_i.eq(fn_issue_o) - # these are explained in ShadowMatrix docstring, and are to be - # connected to the FUReg and FUFU Matrices, to get them to reset - # NOTE: do NOT connect these to the Computation Units. The CUs need to - # do something slightly different (due to the revolving-door SRLatches) - m.d.comb += go_rd_rst.eq(go_rd_o | shadows.go_die_o) - m.d.comb += go_wr_rst.eq(go_wr_o | shadows.go_die_o) + comb += shadows.issue_i.eq(fn_issue_o) + #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus]) + comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus]) + #--------- + # NOTE; this setup is for the instruction order preservation... # connect shadows / go_dies to Computation Units - m.d.comb += cu.shadown_i[0:n_int_fus].eq(shadows.shadown_o[0:n_int_fus]) - m.d.comb += cu.go_die_i[0:n_int_fus].eq(shadows.go_die_o[0:n_int_fus]) + comb += cu.shadown_i[0:n_intfus].eq(allshadown) + comb += cu.go_die_i[0:n_intfus].eq(anydie) # ok connect first n_int_fu shadows to busy lines, to create an # instruction-order linked-list-like arrangement, using a bit-matrix # (instead of e.g. a ring buffer). - # XXX TODO # when written, the shadow can be cancelled (and was good) - m.d.comb += shadows.s_good_i[0:n_int_fus].eq(go_wr_o[0:n_int_fus]) - - # work out the current-activated busy unit (by recording the old one) - with m.If(fn_issue_o): # only update prev bit if instruction issued - m.d.sync += fn_issue_prev.eq(fn_issue_o) + for i in range(n_intfus): + comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus]) # *previous* instruction shadows *current* instruction, and, obviously, # if the previous is completed (!busy) don't cast the shadow! - m.d.comb += prev_shadow.eq(~fn_issue_o & fn_issue_prev & cu.busy_o) - for i in range(n_int_fus): - m.d.comb += shadows.shadow_i[i].eq(prev_shadow) + comb += prev_shadow.eq(~fn_issue_o & cu.busy_o) + for i in range(n_intfus): + comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow) + + #--------- + # ... and this is for branch speculation. it uses the extra bit + # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1) + # only needs to set shadow_i, s_fail_i and s_good_i + + # issue captures shadow_i (if enabled) + comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus]) + + bactive = Signal(reset_less=True) + comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i) + + # instruction being issued (fn_issue_o) has a shadow cast by the branch + with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)): + comb += bshadow.issue_i.eq(fn_issue_o) + for i in range(n_intfus): + with m.If(fn_issue_o & (Const(1<= 4 if is_branch: branch_ok, branch_fail = dest - dest = None + dest = src2 # ok zip up the branch success / fail instructions and # drop them into the queue, one marked "to have branch success" # the other to be marked shadow branch "fail". # one out of each of these will be cancelled for ok, fl in zip(branch_ok, branch_fail): - instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0))) - instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1))) - print ("instr %d: (%d, %d, %d, %d)" % (i, src1, src2, dest, op)) + if ok: + instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0))) + if fl: + instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1))) + print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \ + (i, src1, src2, dest, op, shadow_on, shadow_off)) yield from int_instr(dut, op, src1, src2, dest, - shadow_on, shadow_off) - yield - yield from wait_for_issue(dut) - branch_direction = dut.branch_direction_o # which way branch went + shadow_on, shadow_off) # wait for all instructions to stop before checking yield yield from wait_for_busy_clear(dut) - for (src1, src2, dest, op, (shadow_on, shadow_off)) in insts: + i = -1 + while siminsts: + instr = siminsts.pop(0) + if instr is None: + continue + (src1, src2, dest, op, (shadow_on, shadow_off)) = instr + i += 1 is_branch = op >= 4 if is_branch: branch_ok, branch_fail = dest - dest = None + dest = src2 + print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \ + (i, src1, src2, dest, op, shadow_on, shadow_off)) branch_res = alusim.op(op, src1, src2, dest) if is_branch: if branch_res: - insts.append(branch_ok) + siminsts += branch_ok else: - insts.append(branch_fail) + siminsts += branch_fail # check status yield from alusim.check(dut) @@ -582,41 +1131,46 @@ def scoreboard_branch_sim(dut, alusim): def scoreboard_sim(dut, alusim): - yield dut.int_store_i.eq(1) + seed(0) - for i in range(20): + for i in range(1): # set random values in the registers for i in range(1, dut.n_regs): - val = 31+i*3 val = randint(0, (1<