src/experiment/score6600.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
   4
   5 from regfile.regfile import RegFileArray, treereduce
   6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
   7 from scoreboard.fu_reg_matrix import FURegDepMatrix
   8 from scoreboard.global_pending import GlobalPending
   9 from scoreboard.group_picker import GroupPicker
  10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  12 from scoreboard.instruction_q import Instruction, InstructionQ
  13 from scoreboard.memfu import MemFunctionUnits
  14
  15 from compalu import ComputationUnitNoDelay
  16 from compldst import LDSTCompUnit
  17
  18 from alu_hier import ALU, BranchALU
  19 from nmutil.latch import SRLatch
  20 from nmutil.nmoperator import eq
  21
  22 from random import randint, seed
  23 from copy import deepcopy
  24 from math import log
  25
  26
  27 class TestMemory(Elaboratable):
  28     def __init__(self, regwid, addrw):
  29         self.ddepth = 1 # regwid //8
  30         depth = (1<<addrw) // self.ddepth
  31         self.adr   = Signal(addrw)
  32         self.dat_r = Signal(regwid)
  33         self.dat_w = Signal(regwid)
  34         self.we    = Signal()
  35         self.mem   = Memory(width=regwid, depth=depth, init=range(0, depth))
  36
  37     def elaborate(self, platform):
  38         m = Module()
  39         m.submodules.rdport = rdport = self.mem.read_port()
  40         m.submodules.wrport = wrport = self.mem.write_port()
  41         m.d.comb += [
  42             rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
  43             self.dat_r.eq(rdport.data),
  44             wrport.addr.eq(self.adr),
  45             wrport.data.eq(self.dat_w),
  46             wrport.en.eq(self.we),
  47         ]
  48         return m
  49
  50
  51 class MemSim:
  52     def __init__(self, regwid, addrw):
  53         self.regwid = regwid
  54         self.ddepth = 1 # regwid//8
  55         depth = (1<<addrw) // self.ddepth
  56         self.mem = list(range(0, depth))
  57
  58     def ld(self, addr):
  59         return self.mem[addr>>self.ddepth]
  60
  61     def st(self, addr, data):
  62         self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
  63
  64
  65 class CompUnitsBase(Elaboratable):
  66     """ Computation Unit Base class.
  67
  68         Amazingly, this class works recursively.  It's supposed to just
  69         look after some ALUs (that can handle the same operations),
  70         grouping them together, however it turns out that the same code
  71         can also group *groups* of Computation Units together as well.
  72
  73         Basically it was intended just to concatenate the ALU's issue,
  74         go_rd etc. signals together, which start out as bits and become
  75         sequences.  Turns out that the same trick works just as well
  76         on Computation Units!
  77
  78         So this class may be used recursively to present a top-level
  79         sequential concatenation of all the signals in and out of
  80         ALUs, whilst at the same time making it convenient to group
  81         ALUs together.
  82
  83         At the lower level, the intent is that groups of (identical)
  84         ALUs may be passed the same operation.  Even beyond that,
  85         the intent is that that group of (identical) ALUs actually
  86         share the *same pipeline* and as such become a "Concurrent
  87         Computation Unit" as defined by Mitch Alsup (see section
  88         11.4.9.3)
  89     """
  90     def __init__(self, rwid, units, ldstmode=False):
  91         """ Inputs:
  92
  93             * :rwid:   bit width of register file(s) - both FP and INT
  94             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  95         """
  96         self.units = units
  97         self.ldstmode = ldstmode
  98         self.rwid = rwid
  99         self.rwid = rwid
 100         if units and isinstance(units[0], CompUnitsBase):
 101             self.n_units = 0
 102             for u in self.units:
 103                 self.n_units += u.n_units
 104         else:
 105             self.n_units = len(units)
 106
 107         n_units = self.n_units
 108
 109         # inputs
 110         self.issue_i = Signal(n_units, reset_less=True)
 111         self.go_rd_i = Signal(n_units, reset_less=True)
 112         self.go_wr_i = Signal(n_units, reset_less=True)
 113         self.shadown_i = Signal(n_units, reset_less=True)
 114         self.go_die_i = Signal(n_units, reset_less=True)
 115         if ldstmode:
 116             self.go_ad_i = Signal(n_units, reset_less=True)
 117
 118         # outputs
 119         self.busy_o = Signal(n_units, reset_less=True)
 120         self.rd_rel_o = Signal(n_units, reset_less=True)
 121         self.req_rel_o = Signal(n_units, reset_less=True)
 122         if ldstmode:
 123             self.adr_rel_o = Signal(n_units, reset_less=True)
 124             self.sto_rel_o = Signal(n_units, reset_less=True)
 125             self.req_rel_o = Signal(n_units, reset_less=True)
 126             self.load_mem_o = Signal(n_units, reset_less=True)
 127             self.stwd_mem_o = Signal(n_units, reset_less=True)
 128
 129         # in/out register data (note: not register#, actual data)
 130         self.data_o = Signal(rwid, reset_less=True)
 131         self.src1_i = Signal(rwid, reset_less=True)
 132         self.src2_i = Signal(rwid, reset_less=True)
 133         # input operand
 134
 135     def elaborate(self, platform):
 136         m = Module()
 137         comb = m.d.comb
 138
 139         for i, alu in enumerate(self.units):
 140             setattr(m.submodules, "comp%d" % i, alu)
 141
 142         go_rd_l = []
 143         go_wr_l = []
 144         issue_l = []
 145         busy_l = []
 146         req_rel_l = []
 147         rd_rel_l = []
 148         shadow_l = []
 149         godie_l = []
 150         for alu in self.units:
 151             req_rel_l.append(alu.req_rel_o)
 152             rd_rel_l.append(alu.rd_rel_o)
 153             shadow_l.append(alu.shadown_i)
 154             godie_l.append(alu.go_die_i)
 155             go_wr_l.append(alu.go_wr_i)
 156             go_rd_l.append(alu.go_rd_i)
 157             issue_l.append(alu.issue_i)
 158             busy_l.append(alu.busy_o)
 159         comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
 160         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 161         comb += self.busy_o.eq(Cat(*busy_l))
 162         comb += Cat(*godie_l).eq(self.go_die_i)
 163         comb += Cat(*shadow_l).eq(self.shadown_i)
 164         comb += Cat(*go_wr_l).eq(self.go_wr_i)
 165         comb += Cat(*go_rd_l).eq(self.go_rd_i)
 166         comb += Cat(*issue_l).eq(self.issue_i)
 167
 168         # connect data register input/output
 169
 170         # merge (OR) all integer FU / ALU outputs to a single value
 171         # bit of a hack: treereduce needs a list with an item named "data_o"
 172         if self.units:
 173             data_o = treereduce(self.units)
 174             comb += self.data_o.eq(data_o)
 175
 176         for i, alu in enumerate(self.units):
 177             comb += alu.src1_i.eq(self.src1_i)
 178             comb += alu.src2_i.eq(self.src2_i)
 179
 180         if not self.ldstmode:
 181             return m
 182
 183         ldmem_l = []
 184         stmem_l = []
 185         go_ad_l = []
 186         adr_rel_l = []
 187         sto_rel_l = []
 188         for alu in self.units:
 189             adr_rel_l.append(alu.adr_rel_o)
 190             sto_rel_l.append(alu.sto_rel_o)
 191             ldmem_l.append(alu.load_mem_o)
 192             stmem_l.append(alu.stwd_mem_o)
 193             go_ad_l.append(alu.go_ad_i)
 194         comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
 195         comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
 196         comb += self.load_mem_o.eq(Cat(*ldmem_l))
 197         comb += self.stwd_mem_o.eq(Cat(*stmem_l))
 198         comb += Cat(*go_ad_l).eq(self.go_ad_i)
 199
 200         return m
 201
 202
 203 class CompUnitLDSTs(CompUnitsBase):
 204
 205     def __init__(self, rwid, opwid, mem):
 206         """ Inputs:
 207
 208             * :rwid:   bit width of register file(s) - both FP and INT
 209             * :opwid:  operand bit width
 210         """
 211         self.opwid = opwid
 212
 213         # inputs
 214         self.oper_i = Signal(opwid, reset_less=True)
 215         self.imm_i = Signal(rwid, reset_less=True)
 216
 217         # Int ALUs
 218         add1 = ALU(rwid)
 219         add2 = ALU(rwid)
 220
 221         units = []
 222         for alu in [add1, add2]:
 223             aluopwid = 4 # see compldst.py for "internal" opcode
 224             units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
 225
 226         CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
 227
 228     def elaborate(self, platform):
 229         m = CompUnitsBase.elaborate(self, platform)
 230         comb = m.d.comb
 231
 232         # hand the same operation to all units, 4 lower bits though
 233         for alu in self.units:
 234             comb += alu.oper_i[0:4].eq(self.oper_i)
 235             comb += alu.imm_i.eq(self.imm_i)
 236             comb += alu.isalu_i.eq(0)
 237
 238         return m
 239
 240
 241 class CompUnitALUs(CompUnitsBase):
 242
 243     def __init__(self, rwid, opwid, n_alus):
 244         """ Inputs:
 245
 246             * :rwid:   bit width of register file(s) - both FP and INT
 247             * :opwid:  operand bit width
 248         """
 249         self.opwid = opwid
 250
 251         # inputs
 252         self.oper_i = Signal(opwid, reset_less=True)
 253         self.imm_i = Signal(rwid, reset_less=True)
 254
 255         # Int ALUs
 256         alus = []
 257         for i in range(n_alus):
 258             alus.append(ALU(rwid))
 259
 260         units = []
 261         for alu in alus:
 262             aluopwid = 3 # extra bit for immediate mode
 263             units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
 264
 265         CompUnitsBase.__init__(self, rwid, units)
 266
 267     def elaborate(self, platform):
 268         m = CompUnitsBase.elaborate(self, platform)
 269         comb = m.d.comb
 270
 271         # hand the same operation to all units, only lower 3 bits though
 272         for alu in self.units:
 273             comb += alu.oper_i[0:3].eq(self.oper_i)
 274             comb += alu.imm_i.eq(self.imm_i)
 275
 276         return m
 277
 278
 279 class CompUnitBR(CompUnitsBase):
 280
 281     def __init__(self, rwid, opwid):
 282         """ Inputs:
 283
 284             * :rwid:   bit width of register file(s) - both FP and INT
 285             * :opwid:  operand bit width
 286
 287             Note: bgt unit is returned so that a shadow unit can be created
 288             for it
 289         """
 290         self.opwid = opwid
 291
 292         # inputs
 293         self.oper_i = Signal(opwid, reset_less=True)
 294         self.imm_i = Signal(rwid, reset_less=True)
 295
 296         # Branch ALU and CU
 297         self.bgt = BranchALU(rwid)
 298         aluopwid = 3 # extra bit for immediate mode
 299         self.br1 = ComputationUnitNoDelay(rwid, aluopwid, self.bgt)
 300         CompUnitsBase.__init__(self, rwid, [self.br1])
 301
 302     def elaborate(self, platform):
 303         m = CompUnitsBase.elaborate(self, platform)
 304         comb = m.d.comb
 305
 306         # hand the same operation to all units
 307         for alu in self.units:
 308             comb += alu.oper_i.eq(self.oper_i)
 309             comb += alu.imm_i.eq(self.imm_i)
 310
 311         return m
 312
 313
 314 class FunctionUnits(Elaboratable):
 315
 316     def __init__(self, n_regs, n_int_alus):
 317         self.n_regs = n_regs
 318         self.n_int_alus = n_int_alus
 319
 320         self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
 321         self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
 322         self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
 323
 324         self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
 325         self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
 326
 327         self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
 328         self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
 329         self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
 330
 331         self.readable_o = Signal(n_int_alus, reset_less=True)
 332         self.writable_o = Signal(n_int_alus, reset_less=True)
 333
 334         self.go_rd_i = Signal(n_int_alus, reset_less=True)
 335         self.go_wr_i = Signal(n_int_alus, reset_less=True)
 336         self.go_die_i = Signal(n_int_alus, reset_less=True)
 337         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 338
 339         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 340
 341     def elaborate(self, platform):
 342         m = Module()
 343         comb = m.d.comb
 344         sync = m.d.sync
 345
 346         n_intfus = self.n_int_alus
 347
 348         # Integer FU-FU Dep Matrix
 349         intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
 350         m.submodules.intfudeps = intfudeps
 351         # Integer FU-Reg Dep Matrix
 352         intregdeps = FURegDepMatrix(n_intfus, self.n_regs, 2)
 353         m.submodules.intregdeps = intregdeps
 354
 355         comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
 356         comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
 357
 358         comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
 359         comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
 360
 361         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 362         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 363         self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
 364
 365         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 366         comb += intfudeps.go_rd_i.eq(self.go_rd_i)
 367         comb += intfudeps.go_wr_i.eq(self.go_wr_i)
 368         comb += intfudeps.go_die_i.eq(self.go_die_i)
 369         comb += self.readable_o.eq(intfudeps.readable_o)
 370         comb += self.writable_o.eq(intfudeps.writable_o)
 371
 372         # Connect function issue / arrays, and dest/src1/src2
 373         comb += intregdeps.dest_i.eq(self.dest_i)
 374         comb += intregdeps.src_i[0].eq(self.src1_i)
 375         comb += intregdeps.src_i[1].eq(self.src2_i)
 376
 377         comb += intregdeps.go_rd_i.eq(self.go_rd_i)
 378         comb += intregdeps.go_wr_i.eq(self.go_wr_i)
 379         comb += intregdeps.go_die_i.eq(self.go_die_i)
 380         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 381
 382         comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
 383         comb += self.src1_rsel_o.eq(intregdeps.src_rsel_o[0])
 384         comb += self.src2_rsel_o.eq(intregdeps.src_rsel_o[1])
 385
 386         return m
 387
 388
 389 class Scoreboard(Elaboratable):
 390     def __init__(self, rwid, n_regs):
 391         """ Inputs:
 392
 393             * :rwid:   bit width of register file(s) - both FP and INT
 394             * :n_regs: depth of register file(s) - number of FP and INT regs
 395         """
 396         self.rwid = rwid
 397         self.n_regs = n_regs
 398
 399         # Register Files
 400         self.intregs = RegFileArray(rwid, n_regs)
 401         self.fpregs = RegFileArray(rwid, n_regs)
 402
 403         # issue q needs to get at these
 404         self.aluissue = IssueUnitGroup(4)
 405         self.brissue = IssueUnitGroup(1)
 406         # and these
 407         self.alu_oper_i = Signal(4, reset_less=True)
 408         self.alu_imm_i = Signal(rwid, reset_less=True)
 409         self.br_oper_i = Signal(4, reset_less=True)
 410         self.br_imm_i = Signal(rwid, reset_less=True)
 411
 412         # inputs
 413         self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
 414         self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
 415         self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
 416         self.reg_enable_i = Signal(reset_less=True) # enable reg decode
 417
 418         # outputs
 419         self.issue_o = Signal(reset_less=True) # instruction was accepted
 420         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 421
 422         # for branch speculation experiment.  branch_direction = 0 if
 423         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 424         # branch_succ and branch_fail are requests to have the current
 425         # instruction be dependent on the branch unit "shadow" capability.
 426         self.branch_succ_i = Signal(reset_less=True)
 427         self.branch_fail_i = Signal(reset_less=True)
 428         self.branch_direction_o = Signal(2, reset_less=True)
 429
 430     def elaborate(self, platform):
 431         m = Module()
 432         comb = m.d.comb
 433         sync = m.d.sync
 434
 435         m.submodules.intregs = self.intregs
 436         m.submodules.fpregs = self.fpregs
 437
 438         # register ports
 439         int_dest = self.intregs.write_port("dest")
 440         int_src1 = self.intregs.read_port("src1")
 441         int_src2 = self.intregs.read_port("src2")
 442
 443         fp_dest = self.fpregs.write_port("dest")
 444         fp_src1 = self.fpregs.read_port("src1")
 445         fp_src2 = self.fpregs.read_port("src2")
 446
 447         # Int ALUs and BR ALUs
 448         n_int_alus = 5
 449         cua = CompUnitALUs(self.rwid, 3, n_alus=4)
 450         cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
 451
 452         # LDST Comp Units
 453         n_ldsts = 2
 454         cul = CompUnitLDSTs(self.rwid, 3, None)
 455
 456         # Comp Units
 457         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub, cul])
 458         bgt = cub.bgt # get at the branch computation unit
 459         br1 = cub.br1
 460
 461         # Int FUs
 462         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
 463
 464         # Memory FUs
 465         m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
 466
 467         # Count of number of FUs
 468         n_intfus = n_int_alus
 469         n_fp_fus = 0 # for now
 470
 471         # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
 472         intpick1 = GroupPicker(n_intfus) # picks 1 reader and 1 writer to intreg
 473         m.submodules.intpick1 = intpick1
 474
 475         # INT/FP Issue Unit
 476         regdecode = RegDecode(self.n_regs)
 477         m.submodules.regdecode = regdecode
 478         issueunit = IssueUnitArray([self.aluissue, self.brissue])
 479         m.submodules.issueunit = issueunit
 480
 481         # Shadow Matrix.  currently n_intfus shadows, to be used for
 482         # write-after-write hazards.  NOTE: there is one extra for branches,
 483         # so the shadow width is increased by 1
 484         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 485         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 486
 487         # record previous instruction to cast shadow on current instruction
 488         prev_shadow = Signal(n_intfus)
 489
 490         # Branch Speculation recorder.  tracks the success/fail state as
 491         # each instruction is issued, so that when the branch occurs the
 492         # allow/cancel can be issued as appropriate.
 493         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 494
 495         #---------
 496         # ok start wiring things together...
 497         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 498         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 499         #---------
 500
 501         #---------
 502         # Issue Unit is where it starts.  set up some in/outs for this module
 503         #---------
 504         comb += [    regdecode.dest_i.eq(self.int_dest_i),
 505                      regdecode.src1_i.eq(self.int_src1_i),
 506                      regdecode.src2_i.eq(self.int_src2_i),
 507                      regdecode.enable_i.eq(self.reg_enable_i),
 508                      self.issue_o.eq(issueunit.issue_o)
 509                     ]
 510
 511         # take these to outside (issue needs them)
 512         comb += cua.oper_i.eq(self.alu_oper_i)
 513         comb += cua.imm_i.eq(self.alu_imm_i)
 514         comb += cub.oper_i.eq(self.br_oper_i)
 515         comb += cub.imm_i.eq(self.br_imm_i)
 516
 517         # TODO: issueunit.f (FP)
 518
 519         # and int function issue / busy arrays, and dest/src1/src2
 520         comb += intfus.dest_i.eq(regdecode.dest_o)
 521         comb += intfus.src1_i.eq(regdecode.src1_o)
 522         comb += intfus.src2_i.eq(regdecode.src2_o)
 523
 524         fn_issue_o = issueunit.fn_issue_o
 525
 526         comb += intfus.fn_issue_i.eq(fn_issue_o)
 527         comb += issueunit.busy_i.eq(cu.busy_o)
 528         comb += self.busy_o.eq(cu.busy_o.bool())
 529
 530         #---------
 531         # merge shadow matrices outputs
 532         #---------
 533
 534         # these are explained in ShadowMatrix docstring, and are to be
 535         # connected to the FUReg and FUFU Matrices, to get them to reset
 536         anydie = Signal(n_intfus, reset_less=True)
 537         allshadown = Signal(n_intfus, reset_less=True)
 538         shreset = Signal(n_intfus, reset_less=True)
 539         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 540         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 541         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 542
 543         #---------
 544         # connect fu-fu matrix
 545         #---------
 546
 547         # Group Picker... done manually for now.
 548         go_rd_o = intpick1.go_rd_o
 549         go_wr_o = intpick1.go_wr_o
 550         go_rd_i = intfus.go_rd_i
 551         go_wr_i = intfus.go_wr_i
 552         go_die_i = intfus.go_die_i
 553         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 554         comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
 555         comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
 556         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
 557
 558         # Connect Picker
 559         #---------
 560         comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
 561         comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
 562         int_rd_o = intfus.readable_o
 563         int_wr_o = intfus.writable_o
 564         comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
 565         comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
 566
 567         #---------
 568         # Shadow Matrix
 569         #---------
 570
 571         comb += shadows.issue_i.eq(fn_issue_o)
 572         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 573         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 574         #---------
 575         # NOTE; this setup is for the instruction order preservation...
 576
 577         # connect shadows / go_dies to Computation Units
 578         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 579         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 580
 581         # ok connect first n_int_fu shadows to busy lines, to create an
 582         # instruction-order linked-list-like arrangement, using a bit-matrix
 583         # (instead of e.g. a ring buffer).
 584         # XXX TODO
 585
 586         # when written, the shadow can be cancelled (and was good)
 587         for i in range(n_intfus):
 588             comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 589
 590         # *previous* instruction shadows *current* instruction, and, obviously,
 591         # if the previous is completed (!busy) don't cast the shadow!
 592         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 593         for i in range(n_intfus):
 594             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 595
 596         #---------
 597         # ... and this is for branch speculation.  it uses the extra bit
 598         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 599         # only needs to set shadow_i, s_fail_i and s_good_i
 600
 601         # issue captures shadow_i (if enabled)
 602         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 603
 604         bactive = Signal(reset_less=True)
 605         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 606
 607         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 608         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 609             comb += bshadow.issue_i.eq(fn_issue_o)
 610             for i in range(n_intfus):
 611                 with m.If(fn_issue_o & (Const(1<<i))):
 612                     comb += bshadow.shadow_i[i][0].eq(1)
 613
 614         # finally, we need an indicator to the test infrastructure as to
 615         # whether the branch succeeded or failed, plus, link up to the
 616         # "recorder" of whether the instruction was under shadow or not
 617
 618         with m.If(br1.issue_i):
 619             sync += bspec.active_i.eq(1)
 620         with m.If(self.branch_succ_i):
 621             comb += bspec.good_i.eq(fn_issue_o & 0x1f)
 622         with m.If(self.branch_fail_i):
 623             comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
 624
 625         # branch is active (TODO: a better signal: this is over-using the
 626         # go_write signal - actually the branch should not be "writing")
 627         with m.If(br1.go_wr_i):
 628             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 629             sync += bspec.active_i.eq(0)
 630             comb += bspec.br_i.eq(1)
 631             # branch occurs if data == 1, failed if data == 0
 632             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 633             for i in range(n_intfus):
 634                 # *expected* direction of the branch matched against *actual*
 635                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 636                 # ... or it didn't
 637                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 638
 639         #---------
 640         # Connect Register File(s)
 641         #---------
 642         comb += int_dest.wen.eq(intfus.dest_rsel_o)
 643         comb += int_src1.ren.eq(intfus.src1_rsel_o)
 644         comb += int_src2.ren.eq(intfus.src2_rsel_o)
 645
 646         # connect ALUs to regfule
 647         comb += int_dest.data_i.eq(cu.data_o)
 648         comb += cu.src1_i.eq(int_src1.data_o)
 649         comb += cu.src2_i.eq(int_src2.data_o)
 650
 651         # connect ALU Computation Units
 652         comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 653         comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 654         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 655
 656         return m
 657
 658     def __iter__(self):
 659         yield from self.intregs
 660         yield from self.fpregs
 661         yield self.int_dest_i
 662         yield self.int_src1_i
 663         yield self.int_src2_i
 664         yield self.issue_o
 665         yield self.branch_succ_i
 666         yield self.branch_fail_i
 667         yield self.branch_direction_o
 668
 669     def ports(self):
 670         return list(self)
 671
 672
 673 class IssueToScoreboard(Elaboratable):
 674
 675     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 676         self.qlen = qlen
 677         self.n_in = n_in
 678         self.n_out = n_out
 679         self.rwid = rwid
 680         self.opw = opwid
 681         self.n_regs = n_regs
 682
 683         mqbits = (int(log(qlen) / log(2))+2, False)
 684         self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
 685         self.p_ready_o = Signal() # instructions were added
 686         self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
 687
 688         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 689         self.qlen_o = Signal(mqbits, reset_less=True)
 690
 691     def elaborate(self, platform):
 692         m = Module()
 693         comb = m.d.comb
 694         sync = m.d.sync
 695
 696         iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
 697         sc = Scoreboard(self.rwid, self.n_regs)
 698         mem = TestMemory(self.rwid, 8) # not too big, takes too long
 699         m.submodules.iq = iq
 700         m.submodules.sc = sc
 701         m.submodules.mem = mem
 702
 703         # get at the regfile for testing
 704         self.intregs = sc.intregs
 705
 706         # and the "busy" signal and instruction queue length
 707         comb += self.busy_o.eq(sc.busy_o)
 708         comb += self.qlen_o.eq(iq.qlen_o)
 709
 710         # link up instruction queue
 711         comb += iq.p_add_i.eq(self.p_add_i)
 712         comb += self.p_ready_o.eq(iq.p_ready_o)
 713         for i in range(self.n_in):
 714             comb += eq(iq.data_i[i], self.data_i[i])
 715
 716         # take instruction and process it.  note that it's possible to
 717         # "inspect" the queue contents *without* actually removing the
 718         # items.  items are only removed when the
 719
 720         # in "waiting" state
 721         wait_issue_br = Signal()
 722         wait_issue_alu = Signal()
 723
 724         with m.If(wait_issue_br | wait_issue_alu):
 725             # set instruction pop length to 1 if the unit accepted
 726             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 727                 with m.If(iq.qlen_o != 0):
 728                     comb += iq.n_sub_i.eq(1)
 729             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 730                 with m.If(iq.qlen_o != 0):
 731                     comb += iq.n_sub_i.eq(1)
 732
 733         # see if some instruction(s) are here.  note that this is
 734         # "inspecting" the in-place queue.  note also that on the
 735         # cycle following "waiting" for fn_issue_o to be set, the
 736         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 737         with m.If(iq.qlen_o != 0):
 738             # get the operands and operation
 739             imm = iq.data_o[0].imm_i
 740             dest = iq.data_o[0].dest_i
 741             src1 = iq.data_o[0].src1_i
 742             src2 = iq.data_o[0].src2_i
 743             op = iq.data_o[0].oper_i
 744             opi = iq.data_o[0].opim_i # immediate set
 745
 746             # set the src/dest regs
 747             comb += sc.int_dest_i.eq(dest)
 748             comb += sc.int_src1_i.eq(src1)
 749             comb += sc.int_src2_i.eq(src2)
 750             comb += sc.reg_enable_i.eq(1) # enable the regfile
 751
 752             # choose a Function-Unit-Group
 753             with m.If((op & (0x3<<2)) != 0): # branch
 754                 comb += sc.brissue.insn_i.eq(1)
 755                 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
 756                 comb += sc.br_imm_i.eq(imm)
 757                 comb += wait_issue_br.eq(1)
 758             with m.Else():                   # alu
 759                 comb += sc.aluissue.insn_i.eq(1)
 760                 comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
 761                 comb += sc.alu_imm_i.eq(imm)
 762                 comb += wait_issue_alu.eq(1)
 763
 764             # XXX TODO
 765             # these indicate that the instruction is to be made
 766             # shadow-dependent on
 767             # (either) branch success or branch fail
 768             #yield sc.branch_fail_i.eq(branch_fail)
 769             #yield sc.branch_succ_i.eq(branch_success)
 770
 771         return m
 772
 773     def __iter__(self):
 774         yield self.p_ready_o
 775         for o in self.data_i:
 776             yield from list(o)
 777         yield self.p_add_i
 778
 779     def ports(self):
 780         return list(self)
 781
 782
 783 IADD = 0
 784 ISUB = 1
 785 IMUL = 2
 786 ISHF = 3
 787 IBGT = 4
 788 IBLT = 5
 789 IBEQ = 6
 790 IBNE = 7
 791
 792 class RegSim:
 793     def __init__(self, rwidth, nregs):
 794         self.rwidth = rwidth
 795         self.regs = [0] * nregs
 796
 797     def op(self, op, op_imm, imm, src1, src2, dest):
 798         maxbits = (1 << self.rwidth) - 1
 799         src1 = self.regs[src1] & maxbits
 800         if op_imm:
 801             src2 = imm
 802         else:
 803             src2 = self.regs[src2] & maxbits
 804         if op == IADD:
 805             val = src1 + src2
 806         elif op == ISUB:
 807             val = src1 - src2
 808         elif op == IMUL:
 809             val = src1 * src2
 810         elif op == ISHF:
 811             val = src1 >> (src2 & maxbits)
 812         elif op == IBGT:
 813             val = int(src1 > src2)
 814         elif op == IBLT:
 815             val = int(src1 < src2)
 816         elif op == IBEQ:
 817             val = int(src1 == src2)
 818         elif op == IBNE:
 819             val = int(src1 != src2)
 820         val &= maxbits
 821         self.setval(dest, val)
 822         return val
 823
 824     def setval(self, dest, val):
 825         print ("sim setval", dest, hex(val))
 826         self.regs[dest] = val
 827
 828     def dump(self, dut):
 829         for i, val in enumerate(self.regs):
 830             reg = yield dut.intregs.regs[i].reg
 831             okstr = "OK" if reg == val else "!ok"
 832             print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
 833
 834     def check(self, dut):
 835         for i, val in enumerate(self.regs):
 836             reg = yield dut.intregs.regs[i].reg
 837             if reg != val:
 838                 print("reg %d expected %x received %x\n" % (i, val, reg))
 839                 yield from self.dump(dut)
 840                 assert False
 841
 842 def instr_q(dut, op, op_imm, imm, src1, src2, dest,
 843             branch_success, branch_fail):
 844     instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
 845                'src1_i': src1, 'src2_i': src2}]
 846
 847     sendlen = 1
 848     for idx in range(sendlen):
 849         yield from eq(dut.data_i[idx], instrs[idx])
 850         di = yield dut.data_i[idx]
 851         print ("senddata %d %x" % (idx, di))
 852     yield dut.p_add_i.eq(sendlen)
 853     yield
 854     o_p_ready = yield dut.p_ready_o
 855     while not o_p_ready:
 856         yield
 857         o_p_ready = yield dut.p_ready_o
 858
 859     yield dut.p_add_i.eq(0)
 860
 861
 862 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
 863     yield from disable_issue(dut)
 864     yield dut.int_dest_i.eq(dest)
 865     yield dut.int_src1_i.eq(src1)
 866     yield dut.int_src2_i.eq(src2)
 867     if (op & (0x3<<2)) != 0: # branch
 868         yield dut.brissue.insn_i.eq(1)
 869         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 870         yield dut.br_imm_i.eq(imm)
 871         dut_issue = dut.brissue
 872     else:
 873         yield dut.aluissue.insn_i.eq(1)
 874         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 875         yield dut.alu_imm_i.eq(imm)
 876         dut_issue = dut.aluissue
 877     yield dut.reg_enable_i.eq(1)
 878
 879     # these indicate that the instruction is to be made shadow-dependent on
 880     # (either) branch success or branch fail
 881     yield dut.branch_fail_i.eq(branch_fail)
 882     yield dut.branch_succ_i.eq(branch_success)
 883
 884     yield
 885     yield from wait_for_issue(dut, dut_issue)
 886
 887
 888 def print_reg(dut, rnums):
 889     rs = []
 890     for rnum in rnums:
 891         reg = yield dut.intregs.regs[rnum].reg
 892         rs.append("%x" % reg)
 893     rnums = map(str, rnums)
 894     print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 895
 896
 897 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 898     insts = []
 899     for i in range(n_ops):
 900         src1 = randint(1, dut.n_regs-1)
 901         src2 = randint(1, dut.n_regs-1)
 902         imm = randint(1, (1<<dut.rwid)-1)
 903         dest = randint(1, dut.n_regs-1)
 904         op = randint(0, max_opnums)
 905         opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
 906
 907         if shadowing:
 908             insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
 909         else:
 910             insts.append((src1, src2, dest, op, opi, imm))
 911     return insts
 912
 913
 914 def wait_for_busy_clear(dut):
 915     while True:
 916         busy_o = yield dut.busy_o
 917         if not busy_o:
 918             break
 919         print ("busy",)
 920         yield
 921
 922 def disable_issue(dut):
 923     yield dut.aluissue.insn_i.eq(0)
 924     yield dut.brissue.insn_i.eq(0)
 925
 926
 927 def wait_for_issue(dut, dut_issue):
 928     while True:
 929         issue_o = yield dut_issue.fn_issue_o
 930         if issue_o:
 931             yield from disable_issue(dut)
 932             yield dut.reg_enable_i.eq(0)
 933             break
 934         print ("busy",)
 935         #yield from print_reg(dut, [1,2,3])
 936         yield
 937     #yield from print_reg(dut, [1,2,3])
 938
 939 def scoreboard_branch_sim(dut, alusim):
 940
 941     iseed = 3
 942
 943     for i in range(1):
 944
 945         print ("rseed", iseed)
 946         seed(iseed)
 947         iseed += 1
 948
 949         yield dut.branch_direction_o.eq(0)
 950
 951         # set random values in the registers
 952         for i in range(1, dut.n_regs):
 953             val = 31+i*3
 954             val = randint(0, (1<<alusim.rwidth)-1)
 955             yield dut.intregs.regs[i].reg.eq(val)
 956             alusim.setval(i, val)
 957
 958         if False:
 959             # create some instructions: branches create a tree
 960             insts = create_random_ops(dut, 1, True, 1)
 961             #insts.append((6, 6, 1, 2, (0, 0)))
 962             #insts.append((4, 3, 3, 0, (0, 0)))
 963
 964             src1 = randint(1, dut.n_regs-1)
 965             src2 = randint(1, dut.n_regs-1)
 966             #op = randint(4, 7)
 967             op = 4 # only BGT at the moment
 968
 969             branch_ok = create_random_ops(dut, 1, True, 1)
 970             branch_fail = create_random_ops(dut, 1, True, 1)
 971
 972             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
 973
 974         if True:
 975             insts = []
 976             insts.append( (3, 5, 2, 0, (0, 0)) )
 977             branch_ok = []
 978             branch_fail = []
 979             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
 980             branch_ok.append( None )
 981             branch_fail.append( (1, 1, 2, 0, (0, 1)) )
 982             #branch_fail.append( None )
 983             insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
 984
 985         siminsts = deepcopy(insts)
 986
 987         # issue instruction(s)
 988         i = -1
 989         instrs = insts
 990         branch_direction = 0
 991         while instrs:
 992             yield
 993             yield
 994             i += 1
 995             branch_direction = yield dut.branch_direction_o # way branch went
 996             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
 997             if branch_direction == 1 and shadow_on:
 998                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
 999                 continue # branch was "success" and this is a "failed"... skip
1000             if branch_direction == 2 and shadow_off:
1001                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1002                 continue # branch was "fail" and this is a "success"... skip
1003             if branch_direction != 0:
1004                 shadow_on = 0
1005                 shadow_off = 0
1006             is_branch = op >= 4
1007             if is_branch:
1008                 branch_ok, branch_fail = dest
1009                 dest = src2
1010                 # ok zip up the branch success / fail instructions and
1011                 # drop them into the queue, one marked "to have branch success"
1012                 # the other to be marked shadow branch "fail".
1013                 # one out of each of these will be cancelled
1014                 for ok, fl in zip(branch_ok, branch_fail):
1015                     if ok:
1016                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1017                     if fl:
1018                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1019             print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
1020                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1021             yield from int_instr(dut, op, src1, src2, dest,
1022                                  shadow_on, shadow_off)
1023
1024         # wait for all instructions to stop before checking
1025         yield
1026         yield from wait_for_busy_clear(dut)
1027
1028         i = -1
1029         while siminsts:
1030             instr = siminsts.pop(0)
1031             if instr is None:
1032                 continue
1033             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1034             i += 1
1035             is_branch = op >= 4
1036             if is_branch:
1037                 branch_ok, branch_fail = dest
1038                 dest = src2
1039             print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
1040                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1041             branch_res = alusim.op(op, src1, src2, dest)
1042             if is_branch:
1043                 if branch_res:
1044                     siminsts += branch_ok
1045                 else:
1046                     siminsts += branch_fail
1047
1048         # check status
1049         yield from alusim.check(dut)
1050         yield from alusim.dump(dut)
1051
1052
1053 def scoreboard_sim(dut, alusim):
1054
1055     seed(0)
1056
1057     for i in range(1):
1058
1059         # set random values in the registers
1060         for i in range(1, dut.n_regs):
1061             val = randint(0, (1<<alusim.rwidth)-1)
1062             #val = 31+i*3
1063             #val = i
1064             yield dut.intregs.regs[i].reg.eq(val)
1065             alusim.setval(i, val)
1066
1067         # create some instructions (some random, some regression tests)
1068         instrs = []
1069         if True:
1070             instrs = create_random_ops(dut, 15, True, 4)
1071
1072         if False:
1073             instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
1074
1075         if False:
1076             instrs.append( (7, 3, 2, 4, (0, 0)) )
1077             instrs.append( (7, 6, 6, 2, (0, 0)) )
1078             instrs.append( (1, 7, 2, 2, (0, 0)) )
1079
1080         if False:
1081             instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1082             instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1083             instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1084             instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1085             instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1086
1087         if False:
1088             instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1089             instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1090             instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1091
1092         if False:
1093             instrs.append((5, 6, 2, 1))
1094             instrs.append((2, 2, 4, 0))
1095             #instrs.append((2, 2, 3, 1))
1096
1097         if False:
1098             instrs.append((2, 1, 2, 3))
1099
1100         if False:
1101             instrs.append((2, 6, 2, 1))
1102             instrs.append((2, 1, 2, 0))
1103
1104         if False:
1105             instrs.append((1, 2, 7, 2))
1106             instrs.append((7, 1, 5, 0))
1107             instrs.append((4, 4, 1, 1))
1108
1109         if False:
1110             instrs.append((5, 6, 2, 2))
1111             instrs.append((1, 1, 4, 1))
1112             instrs.append((6, 5, 3, 0))
1113
1114         if False:
1115             # Write-after-Write Hazard
1116             instrs.append( (3, 6, 7, 2) )
1117             instrs.append( (4, 4, 7, 1) )
1118
1119         if False:
1120             # self-read/write-after-write followed by Read-after-Write
1121             instrs.append((1, 1, 1, 1))
1122             instrs.append((1, 5, 3, 0))
1123
1124         if False:
1125             # Read-after-Write followed by self-read-after-write
1126             instrs.append((5, 6, 1, 2))
1127             instrs.append((1, 1, 1, 1))
1128
1129         if False:
1130             # self-read-write sandwich
1131             instrs.append((5, 6, 1, 2))
1132             instrs.append((1, 1, 1, 1))
1133             instrs.append((1, 5, 3, 0))
1134
1135         if False:
1136             # very weird failure
1137             instrs.append( (5, 2, 5, 2) )
1138             instrs.append( (2, 6, 3, 0) )
1139             instrs.append( (4, 2, 2, 1) )
1140
1141         if False:
1142             v1 = 4
1143             yield dut.intregs.regs[5].reg.eq(v1)
1144             alusim.setval(5, v1)
1145             yield dut.intregs.regs[3].reg.eq(5)
1146             alusim.setval(3, 5)
1147             instrs.append((5, 3, 3, 4, (0, 0)))
1148             instrs.append((4, 2, 1, 2, (0, 1)))
1149
1150         if False:
1151             v1 = 6
1152             yield dut.intregs.regs[5].reg.eq(v1)
1153             alusim.setval(5, v1)
1154             yield dut.intregs.regs[3].reg.eq(5)
1155             alusim.setval(3, 5)
1156             instrs.append((5, 3, 3, 4, (0, 0)))
1157             instrs.append((4, 2, 1, 2, (1, 0)))
1158
1159         if False:
1160             instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1161             instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1162             instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1163             instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1164             instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1165             instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1166             instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1167             instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1168             instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1169
1170         # issue instruction(s), wait for issue to be free before proceeding
1171         for i, instr in enumerate(instrs):
1172             src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
1173
1174             print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1175                     (i, src1, src2, dest, op, opi, imm))
1176             alusim.op(op, opi, imm, src1, src2, dest)
1177             yield from instr_q(dut, op, opi, imm, src1, src2, dest,
1178                                br_ok, br_fail)
1179
1180         # wait for all instructions to stop before checking
1181         while True:
1182             iqlen = yield dut.qlen_o
1183             if iqlen == 0:
1184                 break
1185             yield
1186         yield
1187         yield
1188         yield
1189         yield
1190         yield from wait_for_busy_clear(dut)
1191
1192         # check status
1193         yield from alusim.check(dut)
1194         yield from alusim.dump(dut)
1195
1196
1197 def test_scoreboard():
1198     dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1199     alusim = RegSim(16, 8)
1200     memsim = MemSim(16, 16)
1201     vl = rtlil.convert(dut, ports=dut.ports())
1202     with open("test_scoreboard6600.il", "w") as f:
1203         f.write(vl)
1204
1205     run_simulation(dut, scoreboard_sim(dut, alusim),
1206                         vcd_name='test_scoreboard6600.vcd')
1207
1208     #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1209     #                    vcd_name='test_scoreboard6600.vcd')
1210
1211
1212 if __name__ == '__main__':
1213     test_scoreboard()