src/experiment/score6600.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable
   4
   5 from regfile.regfile import RegFileArray, treereduce
   6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
   7 from scoreboard.fu_reg_matrix import FURegDepMatrix
   8 from scoreboard.global_pending import GlobalPending
   9 from scoreboard.group_picker import GroupPicker
  10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  12 from scoreboard.instruction_q import Instruction, InstructionQ
  13 from scoreboard.memfu import MemFunctionUnits
  14
  15 from compalu import ComputationUnitNoDelay
  16 from compldst import LDSTCompUnit
  17
  18 from alu_hier import ALU, BranchALU
  19 from nmutil.latch import SRLatch
  20 from nmutil.nmoperator import eq
  21
  22 from random import randint, seed
  23 from copy import deepcopy
  24 from math import log
  25
  26
  27 class Memory(Elaboratable):
  28     def __init__(self, regwid, addrw):
  29         self.ddepth = regwid/8
  30         depth = (1<<addrw) / self.ddepth
  31         self.adr   = Signal(addrw)
  32         self.dat_r = Signal(regwid)
  33         self.dat_w = Signal(regwid)
  34         self.we    = Signal()
  35         self.mem   = Memory(width=regwid, depth=depth, init=range(0, depth))
  36
  37     def elaborate(self, platform):
  38         m = Module()
  39         m.submodules.rdport = rdport = self.mem.read_port()
  40         m.submodules.wrport = wrport = self.mem.write_port()
  41         m.d.comb += [
  42             rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
  43             self.dat_r.eq(rdport.data),
  44             wrport.addr.eq(self.adr),
  45             wrport.data.eq(self.dat_w),
  46             wrport.en.eq(self.we),
  47         ]
  48         return m
  49
  50
  51 class MemSim:
  52     def __init__(self, regwid, addrw):
  53         self.regwid = regwid
  54         self.ddepth = regwid//8
  55         depth = (1<<addrw) // self.ddepth
  56         self.mem = list(range(0, depth))
  57
  58     def ld(self, addr):
  59         return self.mem[addr>>self.ddepth]
  60
  61     def st(self, addr, data):
  62         self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
  63
  64
  65 class CompUnitsBase(Elaboratable):
  66     """ Computation Unit Base class.
  67
  68         Amazingly, this class works recursively.  It's supposed to just
  69         look after some ALUs (that can handle the same operations),
  70         grouping them together, however it turns out that the same code
  71         can also group *groups* of Computation Units together as well.
  72
  73         Basically it was intended just to concatenate the ALU's issue,
  74         go_rd etc. signals together, which start out as bits and become
  75         sequences.  Turns out that the same trick works just as well
  76         on Computation Units!
  77
  78         So this class may be used recursively to present a top-level
  79         sequential concatenation of all the signals in and out of
  80         ALUs, whilst at the same time making it convenient to group
  81         ALUs together.
  82
  83         At the lower level, the intent is that groups of (identical)
  84         ALUs may be passed the same operation.  Even beyond that,
  85         the intent is that that group of (identical) ALUs actually
  86         share the *same pipeline* and as such become a "Concurrent
  87         Computation Unit" as defined by Mitch Alsup (see section
  88         11.4.9.3)
  89     """
  90     def __init__(self, rwid, units, ldstmode=False):
  91         """ Inputs:
  92
  93             * :rwid:   bit width of register file(s) - both FP and INT
  94             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  95         """
  96         self.units = units
  97         self.ldstmode = ldstmode
  98         self.rwid = rwid
  99         self.rwid = rwid
 100         if units and isinstance(units[0], CompUnitsBase):
 101             self.n_units = 0
 102             for u in self.units:
 103                 self.n_units += u.n_units
 104         else:
 105             self.n_units = len(units)
 106
 107         n_units = self.n_units
 108
 109         # inputs
 110         self.issue_i = Signal(n_units, reset_less=True)
 111         self.go_rd_i = Signal(n_units, reset_less=True)
 112         self.go_wr_i = Signal(n_units, reset_less=True)
 113         self.shadown_i = Signal(n_units, reset_less=True)
 114         self.go_die_i = Signal(n_units, reset_less=True)
 115         if ldstmode:
 116             self.go_ad_i = Signal(n_units, reset_less=True)
 117
 118         # outputs
 119         self.busy_o = Signal(n_units, reset_less=True)
 120         self.rd_rel_o = Signal(n_units, reset_less=True)
 121         self.req_rel_o = Signal(n_units, reset_less=True)
 122         if ldstmode:
 123             self.adr_rel_o = Signal(n_units, reset_less=True)
 124             self.sto_rel_o = Signal(n_units, reset_less=True)
 125             self.req_rel_o = Signal(n_units, reset_less=True)
 126             self.load_mem_o = Signal(n_units, reset_less=True)
 127             self.stwd_mem_o = Signal(n_units, reset_less=True)
 128
 129         # in/out register data (note: not register#, actual data)
 130         self.data_o = Signal(rwid, reset_less=True)
 131         self.src1_i = Signal(rwid, reset_less=True)
 132         self.src2_i = Signal(rwid, reset_less=True)
 133         # input operand
 134
 135     def elaborate(self, platform):
 136         m = Module()
 137         comb = m.d.comb
 138
 139         for i, alu in enumerate(self.units):
 140             setattr(m.submodules, "comp%d" % i, alu)
 141
 142         go_rd_l = []
 143         go_wr_l = []
 144         issue_l = []
 145         busy_l = []
 146         req_rel_l = []
 147         rd_rel_l = []
 148         shadow_l = []
 149         godie_l = []
 150         for alu in self.units:
 151             req_rel_l.append(alu.req_rel_o)
 152             rd_rel_l.append(alu.rd_rel_o)
 153             shadow_l.append(alu.shadown_i)
 154             godie_l.append(alu.go_die_i)
 155             go_wr_l.append(alu.go_wr_i)
 156             go_rd_l.append(alu.go_rd_i)
 157             issue_l.append(alu.issue_i)
 158             busy_l.append(alu.busy_o)
 159         comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
 160         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 161         comb += self.busy_o.eq(Cat(*busy_l))
 162         comb += Cat(*godie_l).eq(self.go_die_i)
 163         comb += Cat(*shadow_l).eq(self.shadown_i)
 164         comb += Cat(*go_wr_l).eq(self.go_wr_i)
 165         comb += Cat(*go_rd_l).eq(self.go_rd_i)
 166         comb += Cat(*issue_l).eq(self.issue_i)
 167
 168         # connect data register input/output
 169
 170         # merge (OR) all integer FU / ALU outputs to a single value
 171         # bit of a hack: treereduce needs a list with an item named "data_o"
 172         if self.units:
 173             data_o = treereduce(self.units)
 174             comb += self.data_o.eq(data_o)
 175
 176         for i, alu in enumerate(self.units):
 177             comb += alu.src1_i.eq(self.src1_i)
 178             comb += alu.src2_i.eq(self.src2_i)
 179
 180         if not self.ldstmode:
 181             return m
 182
 183         ldmem_l = []
 184         stmem_l = []
 185         go_ad_l = []
 186         adr_rel_l = []
 187         sto_rel_l = []
 188         for alu in self.units:
 189             adr_rel_l.append(alu.adr_rel_o)
 190             sto_rel_l.append(alu.sto_rel_o)
 191             ldmem_l.append(alu.load_mem_o)
 192             stmem_l.append(alu.stwd_mem_o)
 193             go_ad_l.append(alu.go_ad_i)
 194         comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
 195         comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
 196         comb += self.load_mem_o.eq(Cat(*ldmem_l))
 197         comb += self.stwd_mem_o.eq(Cat(*stmem_l))
 198         comb += Cat(*go_ad_l).eq(self.go_ad_i)
 199
 200         return m
 201
 202
 203 class CompUnitLDSTs(CompUnitsBase):
 204
 205     def __init__(self, rwid, opwid, mem):
 206         """ Inputs:
 207
 208             * :rwid:   bit width of register file(s) - both FP and INT
 209             * :opwid:  operand bit width
 210         """
 211         self.opwid = opwid
 212
 213         # inputs
 214         self.oper_i = Signal(opwid, reset_less=True)
 215         self.imm_i = Signal(rwid, reset_less=True)
 216
 217         # Int ALUs
 218         add1 = ALU(rwid)
 219         add2 = ALU(rwid)
 220
 221         units = []
 222         for alu in [add1, add2]:
 223             aluopwid = 4 # see compldst.py for "internal" opcode
 224             units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
 225
 226         CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
 227
 228     def elaborate(self, platform):
 229         m = CompUnitsBase.elaborate(self, platform)
 230         comb = m.d.comb
 231
 232         # hand the same operation to all units, 4 lower bits though
 233         for alu in self.units:
 234             comb += alu.oper_i[0:4].eq(self.oper_i)
 235             comb += alu.imm_i.eq(self.imm_i)
 236             comb += alu.isalu_i.eq(0)
 237
 238         return m
 239
 240
 241 class CompUnitALUs(CompUnitsBase):
 242
 243     def __init__(self, rwid, opwid, n_alus):
 244         """ Inputs:
 245
 246             * :rwid:   bit width of register file(s) - both FP and INT
 247             * :opwid:  operand bit width
 248         """
 249         self.opwid = opwid
 250
 251         # inputs
 252         self.oper_i = Signal(opwid, reset_less=True)
 253         self.imm_i = Signal(rwid, reset_less=True)
 254
 255         # Int ALUs
 256         alus = []
 257         for i in range(n_alus):
 258             alus.append(ALU(rwid))
 259
 260         units = []
 261         for alu in alus:
 262             aluopwid = 3 # extra bit for immediate mode
 263             units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
 264
 265         CompUnitsBase.__init__(self, rwid, units)
 266
 267     def elaborate(self, platform):
 268         m = CompUnitsBase.elaborate(self, platform)
 269         comb = m.d.comb
 270
 271         # hand the same operation to all units, only lower 3 bits though
 272         for alu in self.units:
 273             comb += alu.oper_i[0:3].eq(self.oper_i)
 274             comb += alu.imm_i.eq(self.imm_i)
 275
 276         return m
 277
 278
 279 class CompUnitBR(CompUnitsBase):
 280
 281     def __init__(self, rwid, opwid):
 282         """ Inputs:
 283
 284             * :rwid:   bit width of register file(s) - both FP and INT
 285             * :opwid:  operand bit width
 286
 287             Note: bgt unit is returned so that a shadow unit can be created
 288             for it
 289         """
 290         self.opwid = opwid
 291
 292         # inputs
 293         self.oper_i = Signal(opwid, reset_less=True)
 294         self.imm_i = Signal(rwid, reset_less=True)
 295
 296         # Branch ALU and CU
 297         self.bgt = BranchALU(rwid)
 298         aluopwid = 3 # extra bit for immediate mode
 299         self.br1 = ComputationUnitNoDelay(rwid, aluopwid, self.bgt)
 300         CompUnitsBase.__init__(self, rwid, [self.br1])
 301
 302     def elaborate(self, platform):
 303         m = CompUnitsBase.elaborate(self, platform)
 304         comb = m.d.comb
 305
 306         # hand the same operation to all units
 307         for alu in self.units:
 308             comb += alu.oper_i.eq(self.oper_i)
 309             comb += alu.imm_i.eq(self.imm_i)
 310
 311         return m
 312
 313
 314 class FunctionUnits(Elaboratable):
 315
 316     def __init__(self, n_regs, n_int_alus):
 317         self.n_regs = n_regs
 318         self.n_int_alus = n_int_alus
 319
 320         self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
 321         self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
 322         self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
 323
 324         self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
 325         self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
 326
 327         self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
 328         self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
 329         self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
 330
 331         self.readable_o = Signal(n_int_alus, reset_less=True)
 332         self.writable_o = Signal(n_int_alus, reset_less=True)
 333
 334         self.go_rd_i = Signal(n_int_alus, reset_less=True)
 335         self.go_wr_i = Signal(n_int_alus, reset_less=True)
 336         self.go_die_i = Signal(n_int_alus, reset_less=True)
 337         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 338
 339         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 340
 341     def elaborate(self, platform):
 342         m = Module()
 343         comb = m.d.comb
 344         sync = m.d.sync
 345
 346         n_intfus = self.n_int_alus
 347
 348         # Integer FU-FU Dep Matrix
 349         intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
 350         m.submodules.intfudeps = intfudeps
 351         # Integer FU-Reg Dep Matrix
 352         intregdeps = FURegDepMatrix(n_intfus, self.n_regs, 2)
 353         m.submodules.intregdeps = intregdeps
 354
 355         comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
 356         comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
 357
 358         comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
 359         comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
 360
 361         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 362         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 363         self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
 364
 365         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 366         comb += intfudeps.go_rd_i.eq(self.go_rd_i)
 367         comb += intfudeps.go_wr_i.eq(self.go_wr_i)
 368         comb += intfudeps.go_die_i.eq(self.go_die_i)
 369         comb += self.readable_o.eq(intfudeps.readable_o)
 370         comb += self.writable_o.eq(intfudeps.writable_o)
 371
 372         # Connect function issue / arrays, and dest/src1/src2
 373         comb += intregdeps.dest_i.eq(self.dest_i)
 374         comb += intregdeps.src_i[0].eq(self.src1_i)
 375         comb += intregdeps.src_i[1].eq(self.src2_i)
 376
 377         comb += intregdeps.go_rd_i.eq(self.go_rd_i)
 378         comb += intregdeps.go_wr_i.eq(self.go_wr_i)
 379         comb += intregdeps.go_die_i.eq(self.go_die_i)
 380         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 381
 382         comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
 383         comb += self.src1_rsel_o.eq(intregdeps.src_rsel_o[0])
 384         comb += self.src2_rsel_o.eq(intregdeps.src_rsel_o[1])
 385
 386         return m
 387
 388
 389 class Scoreboard(Elaboratable):
 390     def __init__(self, rwid, n_regs):
 391         """ Inputs:
 392
 393             * :rwid:   bit width of register file(s) - both FP and INT
 394             * :n_regs: depth of register file(s) - number of FP and INT regs
 395         """
 396         self.rwid = rwid
 397         self.n_regs = n_regs
 398
 399         # Register Files
 400         self.intregs = RegFileArray(rwid, n_regs)
 401         self.fpregs = RegFileArray(rwid, n_regs)
 402
 403         # issue q needs to get at these
 404         self.aluissue = IssueUnitGroup(4)
 405         self.brissue = IssueUnitGroup(1)
 406         # and these
 407         self.alu_oper_i = Signal(4, reset_less=True)
 408         self.alu_imm_i = Signal(rwid, reset_less=True)
 409         self.br_oper_i = Signal(4, reset_less=True)
 410         self.br_imm_i = Signal(rwid, reset_less=True)
 411
 412         # inputs
 413         self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
 414         self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
 415         self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
 416         self.reg_enable_i = Signal(reset_less=True) # enable reg decode
 417
 418         # outputs
 419         self.issue_o = Signal(reset_less=True) # instruction was accepted
 420         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 421
 422         # for branch speculation experiment.  branch_direction = 0 if
 423         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 424         # branch_succ and branch_fail are requests to have the current
 425         # instruction be dependent on the branch unit "shadow" capability.
 426         self.branch_succ_i = Signal(reset_less=True)
 427         self.branch_fail_i = Signal(reset_less=True)
 428         self.branch_direction_o = Signal(2, reset_less=True)
 429
 430     def elaborate(self, platform):
 431         m = Module()
 432         comb = m.d.comb
 433         sync = m.d.sync
 434
 435         m.submodules.intregs = self.intregs
 436         m.submodules.fpregs = self.fpregs
 437
 438         # register ports
 439         int_dest = self.intregs.write_port("dest")
 440         int_src1 = self.intregs.read_port("src1")
 441         int_src2 = self.intregs.read_port("src2")
 442
 443         fp_dest = self.fpregs.write_port("dest")
 444         fp_src1 = self.fpregs.read_port("src1")
 445         fp_src2 = self.fpregs.read_port("src2")
 446
 447         # Int ALUs and BR ALUs
 448         n_int_alus = 5
 449         cua = CompUnitALUs(self.rwid, 3, n_alus=4)
 450         cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
 451
 452         # LDST Comp Units
 453         n_ldsts = 2
 454         cul = CompUnitLDSTs(self.rwid, 3, None)
 455
 456         # Comp Units
 457         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub, cul])
 458         bgt = cub.bgt # get at the branch computation unit
 459         br1 = cub.br1
 460
 461         # Int FUs
 462         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
 463
 464         # Memory FUs
 465         m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 11)
 466
 467         # Count of number of FUs
 468         n_intfus = n_int_alus
 469         n_fp_fus = 0 # for now
 470
 471         # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
 472         intpick1 = GroupPicker(n_intfus) # picks 1 reader and 1 writer to intreg
 473         m.submodules.intpick1 = intpick1
 474
 475         # INT/FP Issue Unit
 476         regdecode = RegDecode(self.n_regs)
 477         m.submodules.regdecode = regdecode
 478         issueunit = IssueUnitArray([self.aluissue, self.brissue])
 479         m.submodules.issueunit = issueunit
 480
 481         # Shadow Matrix.  currently n_intfus shadows, to be used for
 482         # write-after-write hazards.  NOTE: there is one extra for branches,
 483         # so the shadow width is increased by 1
 484         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 485         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 486
 487         # record previous instruction to cast shadow on current instruction
 488         prev_shadow = Signal(n_intfus)
 489
 490         # Branch Speculation recorder.  tracks the success/fail state as
 491         # each instruction is issued, so that when the branch occurs the
 492         # allow/cancel can be issued as appropriate.
 493         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 494
 495         #---------
 496         # ok start wiring things together...
 497         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 498         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 499         #---------
 500
 501         #---------
 502         # Issue Unit is where it starts.  set up some in/outs for this module
 503         #---------
 504         comb += [    regdecode.dest_i.eq(self.int_dest_i),
 505                      regdecode.src1_i.eq(self.int_src1_i),
 506                      regdecode.src2_i.eq(self.int_src2_i),
 507                      regdecode.enable_i.eq(self.reg_enable_i),
 508                      self.issue_o.eq(issueunit.issue_o)
 509                     ]
 510
 511         # take these to outside (issue needs them)
 512         comb += cua.oper_i.eq(self.alu_oper_i)
 513         comb += cua.imm_i.eq(self.alu_imm_i)
 514         comb += cub.oper_i.eq(self.br_oper_i)
 515         comb += cub.imm_i.eq(self.br_imm_i)
 516
 517         # TODO: issueunit.f (FP)
 518
 519         # and int function issue / busy arrays, and dest/src1/src2
 520         comb += intfus.dest_i.eq(regdecode.dest_o)
 521         comb += intfus.src1_i.eq(regdecode.src1_o)
 522         comb += intfus.src2_i.eq(regdecode.src2_o)
 523
 524         fn_issue_o = issueunit.fn_issue_o
 525
 526         comb += intfus.fn_issue_i.eq(fn_issue_o)
 527         comb += issueunit.busy_i.eq(cu.busy_o)
 528         comb += self.busy_o.eq(cu.busy_o.bool())
 529
 530         #---------
 531         # merge shadow matrices outputs
 532         #---------
 533
 534         # these are explained in ShadowMatrix docstring, and are to be
 535         # connected to the FUReg and FUFU Matrices, to get them to reset
 536         anydie = Signal(n_intfus, reset_less=True)
 537         allshadown = Signal(n_intfus, reset_less=True)
 538         shreset = Signal(n_intfus, reset_less=True)
 539         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 540         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 541         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 542
 543         #---------
 544         # connect fu-fu matrix
 545         #---------
 546
 547         # Group Picker... done manually for now.
 548         go_rd_o = intpick1.go_rd_o
 549         go_wr_o = intpick1.go_wr_o
 550         go_rd_i = intfus.go_rd_i
 551         go_wr_i = intfus.go_wr_i
 552         go_die_i = intfus.go_die_i
 553         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 554         comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
 555         comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
 556         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
 557
 558         # Connect Picker
 559         #---------
 560         comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
 561         comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
 562         int_rd_o = intfus.readable_o
 563         int_wr_o = intfus.writable_o
 564         comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
 565         comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
 566
 567         #---------
 568         # Shadow Matrix
 569         #---------
 570
 571         comb += shadows.issue_i.eq(fn_issue_o)
 572         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 573         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 574         #---------
 575         # NOTE; this setup is for the instruction order preservation...
 576
 577         # connect shadows / go_dies to Computation Units
 578         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 579         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 580
 581         # ok connect first n_int_fu shadows to busy lines, to create an
 582         # instruction-order linked-list-like arrangement, using a bit-matrix
 583         # (instead of e.g. a ring buffer).
 584         # XXX TODO
 585
 586         # when written, the shadow can be cancelled (and was good)
 587         for i in range(n_intfus):
 588             comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 589
 590         # *previous* instruction shadows *current* instruction, and, obviously,
 591         # if the previous is completed (!busy) don't cast the shadow!
 592         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 593         for i in range(n_intfus):
 594             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 595
 596         #---------
 597         # ... and this is for branch speculation.  it uses the extra bit
 598         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 599         # only needs to set shadow_i, s_fail_i and s_good_i
 600
 601         # issue captures shadow_i (if enabled)
 602         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 603
 604         bactive = Signal(reset_less=True)
 605         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 606
 607         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 608         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 609             comb += bshadow.issue_i.eq(fn_issue_o)
 610             for i in range(n_intfus):
 611                 with m.If(fn_issue_o & (Const(1<<i))):
 612                     comb += bshadow.shadow_i[i][0].eq(1)
 613
 614         # finally, we need an indicator to the test infrastructure as to
 615         # whether the branch succeeded or failed, plus, link up to the
 616         # "recorder" of whether the instruction was under shadow or not
 617
 618         with m.If(br1.issue_i):
 619             sync += bspec.active_i.eq(1)
 620         with m.If(self.branch_succ_i):
 621             comb += bspec.good_i.eq(fn_issue_o & 0x1f)
 622         with m.If(self.branch_fail_i):
 623             comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
 624
 625         # branch is active (TODO: a better signal: this is over-using the
 626         # go_write signal - actually the branch should not be "writing")
 627         with m.If(br1.go_wr_i):
 628             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 629             sync += bspec.active_i.eq(0)
 630             comb += bspec.br_i.eq(1)
 631             # branch occurs if data == 1, failed if data == 0
 632             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 633             for i in range(n_intfus):
 634                 # *expected* direction of the branch matched against *actual*
 635                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 636                 # ... or it didn't
 637                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 638
 639         #---------
 640         # Connect Register File(s)
 641         #---------
 642         comb += int_dest.wen.eq(intfus.dest_rsel_o)
 643         comb += int_src1.ren.eq(intfus.src1_rsel_o)
 644         comb += int_src2.ren.eq(intfus.src2_rsel_o)
 645
 646         # connect ALUs to regfule
 647         comb += int_dest.data_i.eq(cu.data_o)
 648         comb += cu.src1_i.eq(int_src1.data_o)
 649         comb += cu.src2_i.eq(int_src2.data_o)
 650
 651         # connect ALU Computation Units
 652         comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 653         comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 654         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 655
 656         return m
 657
 658     def __iter__(self):
 659         yield from self.intregs
 660         yield from self.fpregs
 661         yield self.int_dest_i
 662         yield self.int_src1_i
 663         yield self.int_src2_i
 664         yield self.issue_o
 665         yield self.branch_succ_i
 666         yield self.branch_fail_i
 667         yield self.branch_direction_o
 668
 669     def ports(self):
 670         return list(self)
 671
 672
 673 class IssueToScoreboard(Elaboratable):
 674
 675     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 676         self.qlen = qlen
 677         self.n_in = n_in
 678         self.n_out = n_out
 679         self.rwid = rwid
 680         self.opw = opwid
 681         self.n_regs = n_regs
 682
 683         mqbits = (int(log(qlen) / log(2))+2, False)
 684         self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
 685         self.p_ready_o = Signal() # instructions were added
 686         self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
 687
 688         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 689         self.qlen_o = Signal(mqbits, reset_less=True)
 690
 691     def elaborate(self, platform):
 692         m = Module()
 693         comb = m.d.comb
 694         sync = m.d.sync
 695
 696         iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
 697         sc = Scoreboard(self.rwid, self.n_regs)
 698         m.submodules.iq = iq
 699         m.submodules.sc = sc
 700
 701         # get at the regfile for testing
 702         self.intregs = sc.intregs
 703
 704         # and the "busy" signal and instruction queue length
 705         comb += self.busy_o.eq(sc.busy_o)
 706         comb += self.qlen_o.eq(iq.qlen_o)
 707
 708         # link up instruction queue
 709         comb += iq.p_add_i.eq(self.p_add_i)
 710         comb += self.p_ready_o.eq(iq.p_ready_o)
 711         for i in range(self.n_in):
 712             comb += eq(iq.data_i[i], self.data_i[i])
 713
 714         # take instruction and process it.  note that it's possible to
 715         # "inspect" the queue contents *without* actually removing the
 716         # items.  items are only removed when the
 717
 718         # in "waiting" state
 719         wait_issue_br = Signal()
 720         wait_issue_alu = Signal()
 721
 722         with m.If(wait_issue_br | wait_issue_alu):
 723             # set instruction pop length to 1 if the unit accepted
 724             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 725                 with m.If(iq.qlen_o != 0):
 726                     comb += iq.n_sub_i.eq(1)
 727             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 728                 with m.If(iq.qlen_o != 0):
 729                     comb += iq.n_sub_i.eq(1)
 730
 731         # see if some instruction(s) are here.  note that this is
 732         # "inspecting" the in-place queue.  note also that on the
 733         # cycle following "waiting" for fn_issue_o to be set, the
 734         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 735         with m.If(iq.qlen_o != 0):
 736             # get the operands and operation
 737             imm = iq.data_o[0].imm_i
 738             dest = iq.data_o[0].dest_i
 739             src1 = iq.data_o[0].src1_i
 740             src2 = iq.data_o[0].src2_i
 741             op = iq.data_o[0].oper_i
 742             opi = iq.data_o[0].opim_i # immediate set
 743
 744             # set the src/dest regs
 745             comb += sc.int_dest_i.eq(dest)
 746             comb += sc.int_src1_i.eq(src1)
 747             comb += sc.int_src2_i.eq(src2)
 748             comb += sc.reg_enable_i.eq(1) # enable the regfile
 749
 750             # choose a Function-Unit-Group
 751             with m.If((op & (0x3<<2)) != 0): # branch
 752                 comb += sc.brissue.insn_i.eq(1)
 753                 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
 754                 comb += sc.br_imm_i.eq(imm)
 755                 comb += wait_issue_br.eq(1)
 756             with m.Else():                   # alu
 757                 comb += sc.aluissue.insn_i.eq(1)
 758                 comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
 759                 comb += sc.alu_imm_i.eq(imm)
 760                 comb += wait_issue_alu.eq(1)
 761
 762             # XXX TODO
 763             # these indicate that the instruction is to be made
 764             # shadow-dependent on
 765             # (either) branch success or branch fail
 766             #yield sc.branch_fail_i.eq(branch_fail)
 767             #yield sc.branch_succ_i.eq(branch_success)
 768
 769         return m
 770
 771     def __iter__(self):
 772         yield self.p_ready_o
 773         for o in self.data_i:
 774             yield from list(o)
 775         yield self.p_add_i
 776
 777     def ports(self):
 778         return list(self)
 779
 780
 781 IADD = 0
 782 ISUB = 1
 783 IMUL = 2
 784 ISHF = 3
 785 IBGT = 4
 786 IBLT = 5
 787 IBEQ = 6
 788 IBNE = 7
 789
 790 class RegSim:
 791     def __init__(self, rwidth, nregs):
 792         self.rwidth = rwidth
 793         self.regs = [0] * nregs
 794
 795     def op(self, op, op_imm, imm, src1, src2, dest):
 796         maxbits = (1 << self.rwidth) - 1
 797         src1 = self.regs[src1] & maxbits
 798         if op_imm:
 799             src2 = imm
 800         else:
 801             src2 = self.regs[src2] & maxbits
 802         if op == IADD:
 803             val = src1 + src2
 804         elif op == ISUB:
 805             val = src1 - src2
 806         elif op == IMUL:
 807             val = src1 * src2
 808         elif op == ISHF:
 809             val = src1 >> (src2 & maxbits)
 810         elif op == IBGT:
 811             val = int(src1 > src2)
 812         elif op == IBLT:
 813             val = int(src1 < src2)
 814         elif op == IBEQ:
 815             val = int(src1 == src2)
 816         elif op == IBNE:
 817             val = int(src1 != src2)
 818         val &= maxbits
 819         self.setval(dest, val)
 820         return val
 821
 822     def setval(self, dest, val):
 823         print ("sim setval", dest, hex(val))
 824         self.regs[dest] = val
 825
 826     def dump(self, dut):
 827         for i, val in enumerate(self.regs):
 828             reg = yield dut.intregs.regs[i].reg
 829             okstr = "OK" if reg == val else "!ok"
 830             print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
 831
 832     def check(self, dut):
 833         for i, val in enumerate(self.regs):
 834             reg = yield dut.intregs.regs[i].reg
 835             if reg != val:
 836                 print("reg %d expected %x received %x\n" % (i, val, reg))
 837                 yield from self.dump(dut)
 838                 assert False
 839
 840 def instr_q(dut, op, op_imm, imm, src1, src2, dest,
 841             branch_success, branch_fail):
 842     instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
 843                'src1_i': src1, 'src2_i': src2}]
 844
 845     sendlen = 1
 846     for idx in range(sendlen):
 847         yield from eq(dut.data_i[idx], instrs[idx])
 848         di = yield dut.data_i[idx]
 849         print ("senddata %d %x" % (idx, di))
 850     yield dut.p_add_i.eq(sendlen)
 851     yield
 852     o_p_ready = yield dut.p_ready_o
 853     while not o_p_ready:
 854         yield
 855         o_p_ready = yield dut.p_ready_o
 856
 857     yield dut.p_add_i.eq(0)
 858
 859
 860 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
 861     yield from disable_issue(dut)
 862     yield dut.int_dest_i.eq(dest)
 863     yield dut.int_src1_i.eq(src1)
 864     yield dut.int_src2_i.eq(src2)
 865     if (op & (0x3<<2)) != 0: # branch
 866         yield dut.brissue.insn_i.eq(1)
 867         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 868         yield dut.br_imm_i.eq(imm)
 869         dut_issue = dut.brissue
 870     else:
 871         yield dut.aluissue.insn_i.eq(1)
 872         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 873         yield dut.alu_imm_i.eq(imm)
 874         dut_issue = dut.aluissue
 875     yield dut.reg_enable_i.eq(1)
 876
 877     # these indicate that the instruction is to be made shadow-dependent on
 878     # (either) branch success or branch fail
 879     yield dut.branch_fail_i.eq(branch_fail)
 880     yield dut.branch_succ_i.eq(branch_success)
 881
 882     yield
 883     yield from wait_for_issue(dut, dut_issue)
 884
 885
 886 def print_reg(dut, rnums):
 887     rs = []
 888     for rnum in rnums:
 889         reg = yield dut.intregs.regs[rnum].reg
 890         rs.append("%x" % reg)
 891     rnums = map(str, rnums)
 892     print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 893
 894
 895 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 896     insts = []
 897     for i in range(n_ops):
 898         src1 = randint(1, dut.n_regs-1)
 899         src2 = randint(1, dut.n_regs-1)
 900         imm = randint(1, (1<<dut.rwid)-1)
 901         dest = randint(1, dut.n_regs-1)
 902         op = randint(0, max_opnums)
 903         opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
 904
 905         if shadowing:
 906             insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
 907         else:
 908             insts.append((src1, src2, dest, op, opi, imm))
 909     return insts
 910
 911
 912 def wait_for_busy_clear(dut):
 913     while True:
 914         busy_o = yield dut.busy_o
 915         if not busy_o:
 916             break
 917         print ("busy",)
 918         yield
 919
 920 def disable_issue(dut):
 921     yield dut.aluissue.insn_i.eq(0)
 922     yield dut.brissue.insn_i.eq(0)
 923
 924
 925 def wait_for_issue(dut, dut_issue):
 926     while True:
 927         issue_o = yield dut_issue.fn_issue_o
 928         if issue_o:
 929             yield from disable_issue(dut)
 930             yield dut.reg_enable_i.eq(0)
 931             break
 932         print ("busy",)
 933         #yield from print_reg(dut, [1,2,3])
 934         yield
 935     #yield from print_reg(dut, [1,2,3])
 936
 937 def scoreboard_branch_sim(dut, alusim):
 938
 939     iseed = 3
 940
 941     for i in range(1):
 942
 943         print ("rseed", iseed)
 944         seed(iseed)
 945         iseed += 1
 946
 947         yield dut.branch_direction_o.eq(0)
 948
 949         # set random values in the registers
 950         for i in range(1, dut.n_regs):
 951             val = 31+i*3
 952             val = randint(0, (1<<alusim.rwidth)-1)
 953             yield dut.intregs.regs[i].reg.eq(val)
 954             alusim.setval(i, val)
 955
 956         if False:
 957             # create some instructions: branches create a tree
 958             insts = create_random_ops(dut, 1, True, 1)
 959             #insts.append((6, 6, 1, 2, (0, 0)))
 960             #insts.append((4, 3, 3, 0, (0, 0)))
 961
 962             src1 = randint(1, dut.n_regs-1)
 963             src2 = randint(1, dut.n_regs-1)
 964             #op = randint(4, 7)
 965             op = 4 # only BGT at the moment
 966
 967             branch_ok = create_random_ops(dut, 1, True, 1)
 968             branch_fail = create_random_ops(dut, 1, True, 1)
 969
 970             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
 971
 972         if True:
 973             insts = []
 974             insts.append( (3, 5, 2, 0, (0, 0)) )
 975             branch_ok = []
 976             branch_fail = []
 977             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
 978             branch_ok.append( None )
 979             branch_fail.append( (1, 1, 2, 0, (0, 1)) )
 980             #branch_fail.append( None )
 981             insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
 982
 983         siminsts = deepcopy(insts)
 984
 985         # issue instruction(s)
 986         i = -1
 987         instrs = insts
 988         branch_direction = 0
 989         while instrs:
 990             yield
 991             yield
 992             i += 1
 993             branch_direction = yield dut.branch_direction_o # way branch went
 994             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
 995             if branch_direction == 1 and shadow_on:
 996                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
 997                 continue # branch was "success" and this is a "failed"... skip
 998             if branch_direction == 2 and shadow_off:
 999                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1000                 continue # branch was "fail" and this is a "success"... skip
1001             if branch_direction != 0:
1002                 shadow_on = 0
1003                 shadow_off = 0
1004             is_branch = op >= 4
1005             if is_branch:
1006                 branch_ok, branch_fail = dest
1007                 dest = src2
1008                 # ok zip up the branch success / fail instructions and
1009                 # drop them into the queue, one marked "to have branch success"
1010                 # the other to be marked shadow branch "fail".
1011                 # one out of each of these will be cancelled
1012                 for ok, fl in zip(branch_ok, branch_fail):
1013                     if ok:
1014                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1015                     if fl:
1016                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1017             print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
1018                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1019             yield from int_instr(dut, op, src1, src2, dest,
1020                                  shadow_on, shadow_off)
1021
1022         # wait for all instructions to stop before checking
1023         yield
1024         yield from wait_for_busy_clear(dut)
1025
1026         i = -1
1027         while siminsts:
1028             instr = siminsts.pop(0)
1029             if instr is None:
1030                 continue
1031             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1032             i += 1
1033             is_branch = op >= 4
1034             if is_branch:
1035                 branch_ok, branch_fail = dest
1036                 dest = src2
1037             print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
1038                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1039             branch_res = alusim.op(op, src1, src2, dest)
1040             if is_branch:
1041                 if branch_res:
1042                     siminsts += branch_ok
1043                 else:
1044                     siminsts += branch_fail
1045
1046         # check status
1047         yield from alusim.check(dut)
1048         yield from alusim.dump(dut)
1049
1050
1051 def scoreboard_sim(dut, alusim):
1052
1053     seed(0)
1054
1055     for i in range(50):
1056
1057         # set random values in the registers
1058         for i in range(1, dut.n_regs):
1059             val = randint(0, (1<<alusim.rwidth)-1)
1060             #val = 31+i*3
1061             #val = i
1062             yield dut.intregs.regs[i].reg.eq(val)
1063             alusim.setval(i, val)
1064
1065         # create some instructions (some random, some regression tests)
1066         instrs = []
1067         if True:
1068             instrs = create_random_ops(dut, 15, True, 4)
1069
1070         if False:
1071             instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
1072
1073         if False:
1074             instrs.append( (7, 3, 2, 4, (0, 0)) )
1075             instrs.append( (7, 6, 6, 2, (0, 0)) )
1076             instrs.append( (1, 7, 2, 2, (0, 0)) )
1077
1078         if False:
1079             instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1080             instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1081             instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1082             instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1083             instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1084
1085         if False:
1086             instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1087             instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1088             instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1089
1090         if False:
1091             instrs.append((5, 6, 2, 1))
1092             instrs.append((2, 2, 4, 0))
1093             #instrs.append((2, 2, 3, 1))
1094
1095         if False:
1096             instrs.append((2, 1, 2, 3))
1097
1098         if False:
1099             instrs.append((2, 6, 2, 1))
1100             instrs.append((2, 1, 2, 0))
1101
1102         if False:
1103             instrs.append((1, 2, 7, 2))
1104             instrs.append((7, 1, 5, 0))
1105             instrs.append((4, 4, 1, 1))
1106
1107         if False:
1108             instrs.append((5, 6, 2, 2))
1109             instrs.append((1, 1, 4, 1))
1110             instrs.append((6, 5, 3, 0))
1111
1112         if False:
1113             # Write-after-Write Hazard
1114             instrs.append( (3, 6, 7, 2) )
1115             instrs.append( (4, 4, 7, 1) )
1116
1117         if False:
1118             # self-read/write-after-write followed by Read-after-Write
1119             instrs.append((1, 1, 1, 1))
1120             instrs.append((1, 5, 3, 0))
1121
1122         if False:
1123             # Read-after-Write followed by self-read-after-write
1124             instrs.append((5, 6, 1, 2))
1125             instrs.append((1, 1, 1, 1))
1126
1127         if False:
1128             # self-read-write sandwich
1129             instrs.append((5, 6, 1, 2))
1130             instrs.append((1, 1, 1, 1))
1131             instrs.append((1, 5, 3, 0))
1132
1133         if False:
1134             # very weird failure
1135             instrs.append( (5, 2, 5, 2) )
1136             instrs.append( (2, 6, 3, 0) )
1137             instrs.append( (4, 2, 2, 1) )
1138
1139         if False:
1140             v1 = 4
1141             yield dut.intregs.regs[5].reg.eq(v1)
1142             alusim.setval(5, v1)
1143             yield dut.intregs.regs[3].reg.eq(5)
1144             alusim.setval(3, 5)
1145             instrs.append((5, 3, 3, 4, (0, 0)))
1146             instrs.append((4, 2, 1, 2, (0, 1)))
1147
1148         if False:
1149             v1 = 6
1150             yield dut.intregs.regs[5].reg.eq(v1)
1151             alusim.setval(5, v1)
1152             yield dut.intregs.regs[3].reg.eq(5)
1153             alusim.setval(3, 5)
1154             instrs.append((5, 3, 3, 4, (0, 0)))
1155             instrs.append((4, 2, 1, 2, (1, 0)))
1156
1157         if False:
1158             instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1159             instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1160             instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1161             instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1162             instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1163             instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1164             instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1165             instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1166             instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1167
1168         # issue instruction(s), wait for issue to be free before proceeding
1169         for i, instr in enumerate(instrs):
1170             src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
1171
1172             print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1173                     (i, src1, src2, dest, op, opi, imm))
1174             alusim.op(op, opi, imm, src1, src2, dest)
1175             yield from instr_q(dut, op, opi, imm, src1, src2, dest,
1176                                br_ok, br_fail)
1177
1178         # wait for all instructions to stop before checking
1179         while True:
1180             iqlen = yield dut.qlen_o
1181             if iqlen == 0:
1182                 break
1183             yield
1184         yield
1185         yield
1186         yield
1187         yield
1188         yield from wait_for_busy_clear(dut)
1189
1190         # check status
1191         yield from alusim.check(dut)
1192         yield from alusim.dump(dut)
1193
1194
1195 def test_scoreboard():
1196     dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1197     alusim = RegSim(16, 8)
1198     memsim = MemSim(16, 16)
1199     vl = rtlil.convert(dut, ports=dut.ports())
1200     with open("test_scoreboard6600.il", "w") as f:
1201         f.write(vl)
1202
1203     run_simulation(dut, scoreboard_sim(dut, alusim),
1204                         vcd_name='test_scoreboard6600.vcd')
1205
1206     #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1207     #                    vcd_name='test_scoreboard6600.vcd')
1208
1209
1210 if __name__ == '__main__':
1211     test_scoreboard()