src/experiment/score6600.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
   4
   5 from regfile.regfile import RegFileArray, treereduce
   6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
   7 from scoreboard.fu_reg_matrix import FURegDepMatrix
   8 from scoreboard.global_pending import GlobalPending
   9 from scoreboard.group_picker import GroupPicker
  10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  12 from scoreboard.instruction_q import Instruction, InstructionQ
  13 from scoreboard.memfu import MemFunctionUnits
  14
  15 from compalu import ComputationUnitNoDelay
  16 from compldst import LDSTCompUnit
  17
  18 from alu_hier import ALU, BranchALU
  19 from nmutil.latch import SRLatch
  20 from nmutil.nmoperator import eq
  21
  22 from random import randint, seed
  23 from copy import deepcopy
  24 from math import log
  25
  26
  27 class TestMemory(Elaboratable):
  28     def __init__(self, regwid, addrw):
  29         self.ddepth = 1 # regwid //8
  30         depth = (1<<addrw) // self.ddepth
  31         self.adr   = Signal(addrw)
  32         self.dat_r = Signal(regwid)
  33         self.dat_w = Signal(regwid)
  34         self.we    = Signal()
  35         self.mem   = Memory(width=regwid, depth=depth, init=range(0, depth))
  36
  37     def elaborate(self, platform):
  38         m = Module()
  39         m.submodules.rdport = rdport = self.mem.read_port()
  40         m.submodules.wrport = wrport = self.mem.write_port()
  41         m.d.comb += [
  42             rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
  43             self.dat_r.eq(rdport.data),
  44             wrport.addr.eq(self.adr),
  45             wrport.data.eq(self.dat_w),
  46             wrport.en.eq(self.we),
  47         ]
  48         return m
  49
  50
  51 class MemSim:
  52     def __init__(self, regwid, addrw):
  53         self.regwid = regwid
  54         self.ddepth = 1 # regwid//8
  55         depth = (1<<addrw) // self.ddepth
  56         self.mem = list(range(0, depth))
  57
  58     def ld(self, addr):
  59         return self.mem[addr>>self.ddepth]
  60
  61     def st(self, addr, data):
  62         self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
  63
  64
  65 class CompUnitsBase(Elaboratable):
  66     """ Computation Unit Base class.
  67
  68         Amazingly, this class works recursively.  It's supposed to just
  69         look after some ALUs (that can handle the same operations),
  70         grouping them together, however it turns out that the same code
  71         can also group *groups* of Computation Units together as well.
  72
  73         Basically it was intended just to concatenate the ALU's issue,
  74         go_rd etc. signals together, which start out as bits and become
  75         sequences.  Turns out that the same trick works just as well
  76         on Computation Units!
  77
  78         So this class may be used recursively to present a top-level
  79         sequential concatenation of all the signals in and out of
  80         ALUs, whilst at the same time making it convenient to group
  81         ALUs together.
  82
  83         At the lower level, the intent is that groups of (identical)
  84         ALUs may be passed the same operation.  Even beyond that,
  85         the intent is that that group of (identical) ALUs actually
  86         share the *same pipeline* and as such become a "Concurrent
  87         Computation Unit" as defined by Mitch Alsup (see section
  88         11.4.9.3)
  89     """
  90     def __init__(self, rwid, units, ldstmode=False):
  91         """ Inputs:
  92
  93             * :rwid:   bit width of register file(s) - both FP and INT
  94             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  95         """
  96         self.units = units
  97         self.ldstmode = ldstmode
  98         self.rwid = rwid
  99         self.rwid = rwid
 100         if units and isinstance(units[0], CompUnitsBase):
 101             self.n_units = 0
 102             for u in self.units:
 103                 self.n_units += u.n_units
 104         else:
 105             self.n_units = len(units)
 106
 107         n_units = self.n_units
 108
 109         # inputs
 110         self.issue_i = Signal(n_units, reset_less=True)
 111         self.go_rd_i = Signal(n_units, reset_less=True)
 112         self.go_wr_i = Signal(n_units, reset_less=True)
 113         self.shadown_i = Signal(n_units, reset_less=True)
 114         self.go_die_i = Signal(n_units, reset_less=True)
 115         if ldstmode:
 116             self.go_ad_i = Signal(n_units, reset_less=True)
 117
 118         # outputs
 119         self.busy_o = Signal(n_units, reset_less=True)
 120         self.rd_rel_o = Signal(n_units, reset_less=True)
 121         self.req_rel_o = Signal(n_units, reset_less=True)
 122         if ldstmode:
 123             self.adr_rel_o = Signal(n_units, reset_less=True)
 124             self.sto_rel_o = Signal(n_units, reset_less=True)
 125             self.req_rel_o = Signal(n_units, reset_less=True)
 126             self.load_mem_o = Signal(n_units, reset_less=True)
 127             self.stwd_mem_o = Signal(n_units, reset_less=True)
 128
 129         # in/out register data (note: not register#, actual data)
 130         self.data_o = Signal(rwid, reset_less=True)
 131         self.src1_i = Signal(rwid, reset_less=True)
 132         self.src2_i = Signal(rwid, reset_less=True)
 133         # input operand
 134
 135     def elaborate(self, platform):
 136         m = Module()
 137         comb = m.d.comb
 138
 139         for i, alu in enumerate(self.units):
 140             setattr(m.submodules, "comp%d" % i, alu)
 141
 142         go_rd_l = []
 143         go_wr_l = []
 144         issue_l = []
 145         busy_l = []
 146         req_rel_l = []
 147         rd_rel_l = []
 148         shadow_l = []
 149         godie_l = []
 150         for alu in self.units:
 151             req_rel_l.append(alu.req_rel_o)
 152             rd_rel_l.append(alu.rd_rel_o)
 153             shadow_l.append(alu.shadown_i)
 154             godie_l.append(alu.go_die_i)
 155             go_wr_l.append(alu.go_wr_i)
 156             go_rd_l.append(alu.go_rd_i)
 157             issue_l.append(alu.issue_i)
 158             busy_l.append(alu.busy_o)
 159         comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
 160         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 161         comb += self.busy_o.eq(Cat(*busy_l))
 162         comb += Cat(*godie_l).eq(self.go_die_i)
 163         comb += Cat(*shadow_l).eq(self.shadown_i)
 164         comb += Cat(*go_wr_l).eq(self.go_wr_i)
 165         comb += Cat(*go_rd_l).eq(self.go_rd_i)
 166         comb += Cat(*issue_l).eq(self.issue_i)
 167
 168         # connect data register input/output
 169
 170         # merge (OR) all integer FU / ALU outputs to a single value
 171         # bit of a hack: treereduce needs a list with an item named "data_o"
 172         if self.units:
 173             data_o = treereduce(self.units)
 174             comb += self.data_o.eq(data_o)
 175
 176         for i, alu in enumerate(self.units):
 177             comb += alu.src1_i.eq(self.src1_i)
 178             comb += alu.src2_i.eq(self.src2_i)
 179
 180         if not self.ldstmode:
 181             return m
 182
 183         ldmem_l = []
 184         stmem_l = []
 185         go_ad_l = []
 186         adr_rel_l = []
 187         sto_rel_l = []
 188         for alu in self.units:
 189             adr_rel_l.append(alu.adr_rel_o)
 190             sto_rel_l.append(alu.sto_rel_o)
 191             ldmem_l.append(alu.load_mem_o)
 192             stmem_l.append(alu.stwd_mem_o)
 193             go_ad_l.append(alu.go_ad_i)
 194         comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
 195         comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
 196         comb += self.load_mem_o.eq(Cat(*ldmem_l))
 197         comb += self.stwd_mem_o.eq(Cat(*stmem_l))
 198         comb += Cat(*go_ad_l).eq(self.go_ad_i)
 199
 200         return m
 201
 202
 203 class CompUnitLDSTs(CompUnitsBase):
 204
 205     def __init__(self, rwid, opwid, mem):
 206         """ Inputs:
 207
 208             * :rwid:   bit width of register file(s) - both FP and INT
 209             * :opwid:  operand bit width
 210         """
 211         self.opwid = opwid
 212
 213         # inputs
 214         self.oper_i = Signal(opwid, reset_less=True)
 215         self.imm_i = Signal(rwid, reset_less=True)
 216
 217         # Int ALUs
 218         add1 = ALU(rwid)
 219         add2 = ALU(rwid)
 220
 221         units = []
 222         for alu in [add1, add2]:
 223             aluopwid = 4 # see compldst.py for "internal" opcode
 224             units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
 225
 226         CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
 227
 228     def elaborate(self, platform):
 229         m = CompUnitsBase.elaborate(self, platform)
 230         comb = m.d.comb
 231
 232         # hand the same operation to all units, 4 lower bits though
 233         for alu in self.units:
 234             comb += alu.oper_i[0:4].eq(self.oper_i)
 235             comb += alu.imm_i.eq(self.imm_i)
 236             comb += alu.isalu_i.eq(0)
 237
 238         return m
 239
 240
 241 class CompUnitALUs(CompUnitsBase):
 242
 243     def __init__(self, rwid, opwid, n_alus):
 244         """ Inputs:
 245
 246             * :rwid:   bit width of register file(s) - both FP and INT
 247             * :opwid:  operand bit width
 248         """
 249         self.opwid = opwid
 250
 251         # inputs
 252         self.oper_i = Signal(opwid, reset_less=True)
 253         self.imm_i = Signal(rwid, reset_less=True)
 254
 255         # Int ALUs
 256         alus = []
 257         for i in range(n_alus):
 258             alus.append(ALU(rwid))
 259
 260         units = []
 261         for alu in alus:
 262             aluopwid = 3 # extra bit for immediate mode
 263             units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
 264
 265         CompUnitsBase.__init__(self, rwid, units)
 266
 267     def elaborate(self, platform):
 268         m = CompUnitsBase.elaborate(self, platform)
 269         comb = m.d.comb
 270
 271         # hand the same operation to all units, only lower 3 bits though
 272         for alu in self.units:
 273             comb += alu.oper_i[0:3].eq(self.oper_i)
 274             comb += alu.imm_i.eq(self.imm_i)
 275
 276         return m
 277
 278
 279 class CompUnitBR(CompUnitsBase):
 280
 281     def __init__(self, rwid, opwid):
 282         """ Inputs:
 283
 284             * :rwid:   bit width of register file(s) - both FP and INT
 285             * :opwid:  operand bit width
 286
 287             Note: bgt unit is returned so that a shadow unit can be created
 288             for it
 289         """
 290         self.opwid = opwid
 291
 292         # inputs
 293         self.oper_i = Signal(opwid, reset_less=True)
 294         self.imm_i = Signal(rwid, reset_less=True)
 295
 296         # Branch ALU and CU
 297         self.bgt = BranchALU(rwid)
 298         aluopwid = 3 # extra bit for immediate mode
 299         self.br1 = ComputationUnitNoDelay(rwid, aluopwid, self.bgt)
 300         CompUnitsBase.__init__(self, rwid, [self.br1])
 301
 302     def elaborate(self, platform):
 303         m = CompUnitsBase.elaborate(self, platform)
 304         comb = m.d.comb
 305
 306         # hand the same operation to all units
 307         for alu in self.units:
 308             comb += alu.oper_i.eq(self.oper_i)
 309             comb += alu.imm_i.eq(self.imm_i)
 310
 311         return m
 312
 313
 314 class FunctionUnits(Elaboratable):
 315
 316     def __init__(self, n_regs, n_int_alus):
 317         self.n_regs = n_regs
 318         self.n_int_alus = n_int_alus
 319
 320         self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
 321         self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
 322         self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
 323
 324         self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
 325         self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
 326
 327         self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
 328         self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
 329         self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
 330
 331         self.readable_o = Signal(n_int_alus, reset_less=True)
 332         self.writable_o = Signal(n_int_alus, reset_less=True)
 333
 334         self.go_rd_i = Signal(n_int_alus, reset_less=True)
 335         self.go_wr_i = Signal(n_int_alus, reset_less=True)
 336         self.go_die_i = Signal(n_int_alus, reset_less=True)
 337         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 338
 339         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 340
 341     def elaborate(self, platform):
 342         m = Module()
 343         comb = m.d.comb
 344         sync = m.d.sync
 345
 346         n_intfus = self.n_int_alus
 347
 348         # Integer FU-FU Dep Matrix
 349         intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
 350         m.submodules.intfudeps = intfudeps
 351         # Integer FU-Reg Dep Matrix
 352         intregdeps = FURegDepMatrix(n_intfus, self.n_regs, 2)
 353         m.submodules.intregdeps = intregdeps
 354
 355         comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
 356         comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
 357
 358         comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
 359         comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
 360
 361         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 362         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 363         self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
 364
 365         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 366         comb += intfudeps.go_rd_i.eq(self.go_rd_i)
 367         comb += intfudeps.go_wr_i.eq(self.go_wr_i)
 368         comb += intfudeps.go_die_i.eq(self.go_die_i)
 369         comb += self.readable_o.eq(intfudeps.readable_o)
 370         comb += self.writable_o.eq(intfudeps.writable_o)
 371
 372         # Connect function issue / arrays, and dest/src1/src2
 373         comb += intregdeps.dest_i.eq(self.dest_i)
 374         comb += intregdeps.src_i[0].eq(self.src1_i)
 375         comb += intregdeps.src_i[1].eq(self.src2_i)
 376
 377         comb += intregdeps.go_rd_i.eq(self.go_rd_i)
 378         comb += intregdeps.go_wr_i.eq(self.go_wr_i)
 379         comb += intregdeps.go_die_i.eq(self.go_die_i)
 380         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 381
 382         comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
 383         comb += self.src1_rsel_o.eq(intregdeps.src_rsel_o[0])
 384         comb += self.src2_rsel_o.eq(intregdeps.src_rsel_o[1])
 385
 386         return m
 387
 388
 389 class Scoreboard(Elaboratable):
 390     def __init__(self, rwid, n_regs):
 391         """ Inputs:
 392
 393             * :rwid:   bit width of register file(s) - both FP and INT
 394             * :n_regs: depth of register file(s) - number of FP and INT regs
 395         """
 396         self.rwid = rwid
 397         self.n_regs = n_regs
 398
 399         # Register Files
 400         self.intregs = RegFileArray(rwid, n_regs)
 401         self.fpregs = RegFileArray(rwid, n_regs)
 402
 403         # issue q needs to get at these
 404         self.aluissue = IssueUnitGroup(4)
 405         self.brissue = IssueUnitGroup(1)
 406         self.lsissue = IssueUnitGroup(1)
 407         # and these
 408         self.alu_oper_i = Signal(4, reset_less=True)
 409         self.alu_imm_i = Signal(rwid, reset_less=True)
 410         self.br_oper_i = Signal(4, reset_less=True)
 411         self.br_imm_i = Signal(rwid, reset_less=True)
 412         self.ls_oper_i = Signal(4, reset_less=True)
 413         self.ls_imm_i = Signal(rwid, reset_less=True)
 414
 415         # inputs
 416         self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
 417         self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
 418         self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
 419         self.reg_enable_i = Signal(reset_less=True) # enable reg decode
 420
 421         # outputs
 422         self.issue_o = Signal(reset_less=True) # instruction was accepted
 423         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 424
 425         # for branch speculation experiment.  branch_direction = 0 if
 426         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 427         # branch_succ and branch_fail are requests to have the current
 428         # instruction be dependent on the branch unit "shadow" capability.
 429         self.branch_succ_i = Signal(reset_less=True)
 430         self.branch_fail_i = Signal(reset_less=True)
 431         self.branch_direction_o = Signal(2, reset_less=True)
 432
 433     def elaborate(self, platform):
 434         m = Module()
 435         comb = m.d.comb
 436         sync = m.d.sync
 437
 438         m.submodules.intregs = self.intregs
 439         m.submodules.fpregs = self.fpregs
 440
 441         # register ports
 442         int_dest = self.intregs.write_port("dest")
 443         int_src1 = self.intregs.read_port("src1")
 444         int_src2 = self.intregs.read_port("src2")
 445
 446         fp_dest = self.fpregs.write_port("dest")
 447         fp_src1 = self.fpregs.read_port("src1")
 448         fp_src2 = self.fpregs.read_port("src2")
 449
 450         # Int ALUs and BR ALUs
 451         n_int_alus = 5
 452         cua = CompUnitALUs(self.rwid, 3, n_alus=4)
 453         cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
 454
 455         # LDST Comp Units
 456         n_ldsts = 2
 457         cul = CompUnitLDSTs(self.rwid, 3, None)
 458
 459         # Comp Units
 460         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cub, cul])
 461         bgt = cub.bgt # get at the branch computation unit
 462         br1 = cub.br1
 463
 464         # Int FUs
 465         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
 466
 467         # Memory FUs
 468         m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
 469
 470         # Count of number of FUs
 471         n_intfus = n_int_alus
 472         n_fp_fus = 0 # for now
 473
 474         # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
 475         intpick1 = GroupPicker(n_intfus) # picks 1 reader and 1 writer to intreg
 476         m.submodules.intpick1 = intpick1
 477
 478         # INT/FP Issue Unit
 479         regdecode = RegDecode(self.n_regs)
 480         m.submodules.regdecode = regdecode
 481         issueunit = IssueUnitArray([self.aluissue, self.brissue, self.lsissue])
 482         m.submodules.issueunit = issueunit
 483
 484         # Shadow Matrix.  currently n_intfus shadows, to be used for
 485         # write-after-write hazards.  NOTE: there is one extra for branches,
 486         # so the shadow width is increased by 1
 487         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 488         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 489
 490         # record previous instruction to cast shadow on current instruction
 491         prev_shadow = Signal(n_intfus)
 492
 493         # Branch Speculation recorder.  tracks the success/fail state as
 494         # each instruction is issued, so that when the branch occurs the
 495         # allow/cancel can be issued as appropriate.
 496         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 497
 498         #---------
 499         # ok start wiring things together...
 500         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 501         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 502         #---------
 503
 504         #---------
 505         # Issue Unit is where it starts.  set up some in/outs for this module
 506         #---------
 507         comb += [    regdecode.dest_i.eq(self.int_dest_i),
 508                      regdecode.src1_i.eq(self.int_src1_i),
 509                      regdecode.src2_i.eq(self.int_src2_i),
 510                      regdecode.enable_i.eq(self.reg_enable_i),
 511                      self.issue_o.eq(issueunit.issue_o)
 512                     ]
 513
 514         # take these to outside (issue needs them)
 515         comb += cua.oper_i.eq(self.alu_oper_i)
 516         comb += cua.imm_i.eq(self.alu_imm_i)
 517         comb += cub.oper_i.eq(self.br_oper_i)
 518         comb += cub.imm_i.eq(self.br_imm_i)
 519         comb += cul.oper_i.eq(self.ls_oper_i)
 520         comb += cul.imm_i.eq(self.ls_imm_i)
 521
 522         # TODO: issueunit.f (FP)
 523
 524         # and int function issue / busy arrays, and dest/src1/src2
 525         comb += intfus.dest_i.eq(regdecode.dest_o)
 526         comb += intfus.src1_i.eq(regdecode.src1_o)
 527         comb += intfus.src2_i.eq(regdecode.src2_o)
 528
 529         fn_issue_o = issueunit.fn_issue_o
 530
 531         comb += intfus.fn_issue_i.eq(fn_issue_o)
 532         comb += issueunit.busy_i.eq(cu.busy_o)
 533         comb += self.busy_o.eq(cu.busy_o.bool())
 534
 535         #---------
 536         # merge shadow matrices outputs
 537         #---------
 538
 539         # these are explained in ShadowMatrix docstring, and are to be
 540         # connected to the FUReg and FUFU Matrices, to get them to reset
 541         anydie = Signal(n_intfus, reset_less=True)
 542         allshadown = Signal(n_intfus, reset_less=True)
 543         shreset = Signal(n_intfus, reset_less=True)
 544         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 545         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 546         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 547
 548         #---------
 549         # connect fu-fu matrix
 550         #---------
 551
 552         # Group Picker... done manually for now.
 553         go_rd_o = intpick1.go_rd_o
 554         go_wr_o = intpick1.go_wr_o
 555         go_rd_i = intfus.go_rd_i
 556         go_wr_i = intfus.go_wr_i
 557         go_die_i = intfus.go_die_i
 558         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 559         comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
 560         comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
 561         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
 562
 563         # Connect Picker
 564         #---------
 565         comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
 566         comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
 567         int_rd_o = intfus.readable_o
 568         int_wr_o = intfus.writable_o
 569         comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
 570         comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
 571
 572         #---------
 573         # Shadow Matrix
 574         #---------
 575
 576         comb += shadows.issue_i.eq(fn_issue_o)
 577         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 578         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 579         #---------
 580         # NOTE; this setup is for the instruction order preservation...
 581
 582         # connect shadows / go_dies to Computation Units
 583         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 584         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 585
 586         # ok connect first n_int_fu shadows to busy lines, to create an
 587         # instruction-order linked-list-like arrangement, using a bit-matrix
 588         # (instead of e.g. a ring buffer).
 589         # XXX TODO
 590
 591         # when written, the shadow can be cancelled (and was good)
 592         for i in range(n_intfus):
 593             comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 594
 595         # *previous* instruction shadows *current* instruction, and, obviously,
 596         # if the previous is completed (!busy) don't cast the shadow!
 597         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 598         for i in range(n_intfus):
 599             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 600
 601         #---------
 602         # ... and this is for branch speculation.  it uses the extra bit
 603         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 604         # only needs to set shadow_i, s_fail_i and s_good_i
 605
 606         # issue captures shadow_i (if enabled)
 607         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 608
 609         bactive = Signal(reset_less=True)
 610         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 611
 612         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 613         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 614             comb += bshadow.issue_i.eq(fn_issue_o)
 615             for i in range(n_intfus):
 616                 with m.If(fn_issue_o & (Const(1<<i))):
 617                     comb += bshadow.shadow_i[i][0].eq(1)
 618
 619         # finally, we need an indicator to the test infrastructure as to
 620         # whether the branch succeeded or failed, plus, link up to the
 621         # "recorder" of whether the instruction was under shadow or not
 622
 623         with m.If(br1.issue_i):
 624             sync += bspec.active_i.eq(1)
 625         with m.If(self.branch_succ_i):
 626             comb += bspec.good_i.eq(fn_issue_o & 0x1f)
 627         with m.If(self.branch_fail_i):
 628             comb += bspec.fail_i.eq(fn_issue_o & 0x1f)
 629
 630         # branch is active (TODO: a better signal: this is over-using the
 631         # go_write signal - actually the branch should not be "writing")
 632         with m.If(br1.go_wr_i):
 633             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 634             sync += bspec.active_i.eq(0)
 635             comb += bspec.br_i.eq(1)
 636             # branch occurs if data == 1, failed if data == 0
 637             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 638             for i in range(n_intfus):
 639                 # *expected* direction of the branch matched against *actual*
 640                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 641                 # ... or it didn't
 642                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 643
 644         #---------
 645         # Connect Register File(s)
 646         #---------
 647         comb += int_dest.wen.eq(intfus.dest_rsel_o)
 648         comb += int_src1.ren.eq(intfus.src1_rsel_o)
 649         comb += int_src2.ren.eq(intfus.src2_rsel_o)
 650
 651         # connect ALUs to regfule
 652         comb += int_dest.data_i.eq(cu.data_o)
 653         comb += cu.src1_i.eq(int_src1.data_o)
 654         comb += cu.src2_i.eq(int_src2.data_o)
 655
 656         # connect ALU Computation Units
 657         comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 658         comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 659         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 660
 661         return m
 662
 663     def __iter__(self):
 664         yield from self.intregs
 665         yield from self.fpregs
 666         yield self.int_dest_i
 667         yield self.int_src1_i
 668         yield self.int_src2_i
 669         yield self.issue_o
 670         yield self.branch_succ_i
 671         yield self.branch_fail_i
 672         yield self.branch_direction_o
 673
 674     def ports(self):
 675         return list(self)
 676
 677
 678 class IssueToScoreboard(Elaboratable):
 679
 680     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 681         self.qlen = qlen
 682         self.n_in = n_in
 683         self.n_out = n_out
 684         self.rwid = rwid
 685         self.opw = opwid
 686         self.n_regs = n_regs
 687
 688         mqbits = (int(log(qlen) / log(2))+2, False)
 689         self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
 690         self.p_ready_o = Signal() # instructions were added
 691         self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
 692
 693         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 694         self.qlen_o = Signal(mqbits, reset_less=True)
 695
 696     def elaborate(self, platform):
 697         m = Module()
 698         comb = m.d.comb
 699         sync = m.d.sync
 700
 701         iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
 702         sc = Scoreboard(self.rwid, self.n_regs)
 703         mem = TestMemory(self.rwid, 8) # not too big, takes too long
 704         m.submodules.iq = iq
 705         m.submodules.sc = sc
 706         m.submodules.mem = mem
 707
 708         # get at the regfile for testing
 709         self.intregs = sc.intregs
 710
 711         # and the "busy" signal and instruction queue length
 712         comb += self.busy_o.eq(sc.busy_o)
 713         comb += self.qlen_o.eq(iq.qlen_o)
 714
 715         # link up instruction queue
 716         comb += iq.p_add_i.eq(self.p_add_i)
 717         comb += self.p_ready_o.eq(iq.p_ready_o)
 718         for i in range(self.n_in):
 719             comb += eq(iq.data_i[i], self.data_i[i])
 720
 721         # take instruction and process it.  note that it's possible to
 722         # "inspect" the queue contents *without* actually removing the
 723         # items.  items are only removed when the
 724
 725         # in "waiting" state
 726         wait_issue_br = Signal()
 727         wait_issue_alu = Signal()
 728         wait_issue_ls = Signal()
 729
 730         with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
 731             # set instruction pop length to 1 if the unit accepted
 732             with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
 733                 with m.If(iq.qlen_o != 0):
 734                     comb += iq.n_sub_i.eq(1)
 735             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 736                 with m.If(iq.qlen_o != 0):
 737                     comb += iq.n_sub_i.eq(1)
 738             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 739                 with m.If(iq.qlen_o != 0):
 740                     comb += iq.n_sub_i.eq(1)
 741
 742         # see if some instruction(s) are here.  note that this is
 743         # "inspecting" the in-place queue.  note also that on the
 744         # cycle following "waiting" for fn_issue_o to be set, the
 745         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 746         with m.If(iq.qlen_o != 0):
 747             # get the operands and operation
 748             imm = iq.data_o[0].imm_i
 749             dest = iq.data_o[0].dest_i
 750             src1 = iq.data_o[0].src1_i
 751             src2 = iq.data_o[0].src2_i
 752             op = iq.data_o[0].oper_i
 753             opi = iq.data_o[0].opim_i # immediate set
 754
 755             # set the src/dest regs
 756             comb += sc.int_dest_i.eq(dest)
 757             comb += sc.int_src1_i.eq(src1)
 758             comb += sc.int_src2_i.eq(src2)
 759             comb += sc.reg_enable_i.eq(1) # enable the regfile
 760
 761             # choose a Function-Unit-Group
 762             with m.If((op & (0x3<<2)) != 0): # branch
 763                 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
 764                 comb += sc.br_imm_i.eq(imm)
 765                 comb += sc.brissue.insn_i.eq(1)
 766                 comb += wait_issue_br.eq(1)
 767             with m.Elif((op & (0x3<<4)) != 0): # ld/st
 768                 # bit 0: ADD/SUB
 769                 # bit 1: immed
 770                 # bit 4: LD
 771                 # bit 5: ST
 772                 comb += sc.ls_oper_i.eq(Cat(op[0], opi, op[4:5]))
 773                 comb += sc.ls_imm_i.eq(imm)
 774                 comb += sc.lsissue.insn_i.eq(1)
 775                 comb += wait_issue_ls.eq(1)
 776             with m.Else(): # alu
 777                 comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
 778                 comb += sc.alu_imm_i.eq(imm)
 779                 comb += sc.aluissue.insn_i.eq(1)
 780                 comb += wait_issue_alu.eq(1)
 781
 782             # XXX TODO
 783             # these indicate that the instruction is to be made
 784             # shadow-dependent on
 785             # (either) branch success or branch fail
 786             #yield sc.branch_fail_i.eq(branch_fail)
 787             #yield sc.branch_succ_i.eq(branch_success)
 788
 789         return m
 790
 791     def __iter__(self):
 792         yield self.p_ready_o
 793         for o in self.data_i:
 794             yield from list(o)
 795         yield self.p_add_i
 796
 797     def ports(self):
 798         return list(self)
 799
 800
 801 IADD = 0
 802 ISUB = 1
 803 IMUL = 2
 804 ISHF = 3
 805 IBGT = 4
 806 IBLT = 5
 807 IBEQ = 6
 808 IBNE = 7
 809
 810 class RegSim:
 811     def __init__(self, rwidth, nregs):
 812         self.rwidth = rwidth
 813         self.regs = [0] * nregs
 814
 815     def op(self, op, op_imm, imm, src1, src2, dest):
 816         maxbits = (1 << self.rwidth) - 1
 817         src1 = self.regs[src1] & maxbits
 818         if op_imm:
 819             src2 = imm
 820         else:
 821             src2 = self.regs[src2] & maxbits
 822         if op == IADD:
 823             val = src1 + src2
 824         elif op == ISUB:
 825             val = src1 - src2
 826         elif op == IMUL:
 827             val = src1 * src2
 828         elif op == ISHF:
 829             val = src1 >> (src2 & maxbits)
 830         elif op == IBGT:
 831             val = int(src1 > src2)
 832         elif op == IBLT:
 833             val = int(src1 < src2)
 834         elif op == IBEQ:
 835             val = int(src1 == src2)
 836         elif op == IBNE:
 837             val = int(src1 != src2)
 838         val &= maxbits
 839         self.setval(dest, val)
 840         return val
 841
 842     def setval(self, dest, val):
 843         print ("sim setval", dest, hex(val))
 844         self.regs[dest] = val
 845
 846     def dump(self, dut):
 847         for i, val in enumerate(self.regs):
 848             reg = yield dut.intregs.regs[i].reg
 849             okstr = "OK" if reg == val else "!ok"
 850             print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
 851
 852     def check(self, dut):
 853         for i, val in enumerate(self.regs):
 854             reg = yield dut.intregs.regs[i].reg
 855             if reg != val:
 856                 print("reg %d expected %x received %x\n" % (i, val, reg))
 857                 yield from self.dump(dut)
 858                 assert False
 859
 860 def instr_q(dut, op, op_imm, imm, src1, src2, dest,
 861             branch_success, branch_fail):
 862     instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
 863                'src1_i': src1, 'src2_i': src2}]
 864
 865     sendlen = 1
 866     for idx in range(sendlen):
 867         yield from eq(dut.data_i[idx], instrs[idx])
 868         di = yield dut.data_i[idx]
 869         print ("senddata %d %x" % (idx, di))
 870     yield dut.p_add_i.eq(sendlen)
 871     yield
 872     o_p_ready = yield dut.p_ready_o
 873     while not o_p_ready:
 874         yield
 875         o_p_ready = yield dut.p_ready_o
 876
 877     yield dut.p_add_i.eq(0)
 878
 879
 880 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
 881     yield from disable_issue(dut)
 882     yield dut.int_dest_i.eq(dest)
 883     yield dut.int_src1_i.eq(src1)
 884     yield dut.int_src2_i.eq(src2)
 885     if (op & (0x3<<2)) != 0: # branch
 886         yield dut.brissue.insn_i.eq(1)
 887         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 888         yield dut.br_imm_i.eq(imm)
 889         dut_issue = dut.brissue
 890     else:
 891         yield dut.aluissue.insn_i.eq(1)
 892         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 893         yield dut.alu_imm_i.eq(imm)
 894         dut_issue = dut.aluissue
 895     yield dut.reg_enable_i.eq(1)
 896
 897     # these indicate that the instruction is to be made shadow-dependent on
 898     # (either) branch success or branch fail
 899     yield dut.branch_fail_i.eq(branch_fail)
 900     yield dut.branch_succ_i.eq(branch_success)
 901
 902     yield
 903     yield from wait_for_issue(dut, dut_issue)
 904
 905
 906 def print_reg(dut, rnums):
 907     rs = []
 908     for rnum in rnums:
 909         reg = yield dut.intregs.regs[rnum].reg
 910         rs.append("%x" % reg)
 911     rnums = map(str, rnums)
 912     print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 913
 914
 915 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 916     insts = []
 917     for i in range(n_ops):
 918         src1 = randint(1, dut.n_regs-1)
 919         src2 = randint(1, dut.n_regs-1)
 920         imm = randint(1, (1<<dut.rwid)-1)
 921         dest = randint(1, dut.n_regs-1)
 922         op = randint(0, max_opnums)
 923         opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
 924
 925         if shadowing:
 926             insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
 927         else:
 928             insts.append((src1, src2, dest, op, opi, imm))
 929     return insts
 930
 931
 932 def wait_for_busy_clear(dut):
 933     while True:
 934         busy_o = yield dut.busy_o
 935         if not busy_o:
 936             break
 937         print ("busy",)
 938         yield
 939
 940 def disable_issue(dut):
 941     yield dut.aluissue.insn_i.eq(0)
 942     yield dut.brissue.insn_i.eq(0)
 943
 944
 945 def wait_for_issue(dut, dut_issue):
 946     while True:
 947         issue_o = yield dut_issue.fn_issue_o
 948         if issue_o:
 949             yield from disable_issue(dut)
 950             yield dut.reg_enable_i.eq(0)
 951             break
 952         print ("busy",)
 953         #yield from print_reg(dut, [1,2,3])
 954         yield
 955     #yield from print_reg(dut, [1,2,3])
 956
 957 def scoreboard_branch_sim(dut, alusim):
 958
 959     iseed = 3
 960
 961     for i in range(1):
 962
 963         print ("rseed", iseed)
 964         seed(iseed)
 965         iseed += 1
 966
 967         yield dut.branch_direction_o.eq(0)
 968
 969         # set random values in the registers
 970         for i in range(1, dut.n_regs):
 971             val = 31+i*3
 972             val = randint(0, (1<<alusim.rwidth)-1)
 973             yield dut.intregs.regs[i].reg.eq(val)
 974             alusim.setval(i, val)
 975
 976         if False:
 977             # create some instructions: branches create a tree
 978             insts = create_random_ops(dut, 1, True, 1)
 979             #insts.append((6, 6, 1, 2, (0, 0)))
 980             #insts.append((4, 3, 3, 0, (0, 0)))
 981
 982             src1 = randint(1, dut.n_regs-1)
 983             src2 = randint(1, dut.n_regs-1)
 984             #op = randint(4, 7)
 985             op = 4 # only BGT at the moment
 986
 987             branch_ok = create_random_ops(dut, 1, True, 1)
 988             branch_fail = create_random_ops(dut, 1, True, 1)
 989
 990             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
 991
 992         if True:
 993             insts = []
 994             insts.append( (3, 5, 2, 0, (0, 0)) )
 995             branch_ok = []
 996             branch_fail = []
 997             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
 998             branch_ok.append( None )
 999             branch_fail.append( (1, 1, 2, 0, (0, 1)) )
1000             #branch_fail.append( None )
1001             insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
1002
1003         siminsts = deepcopy(insts)
1004
1005         # issue instruction(s)
1006         i = -1
1007         instrs = insts
1008         branch_direction = 0
1009         while instrs:
1010             yield
1011             yield
1012             i += 1
1013             branch_direction = yield dut.branch_direction_o # way branch went
1014             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1015             if branch_direction == 1 and shadow_on:
1016                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1017                 continue # branch was "success" and this is a "failed"... skip
1018             if branch_direction == 2 and shadow_off:
1019                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1020                 continue # branch was "fail" and this is a "success"... skip
1021             if branch_direction != 0:
1022                 shadow_on = 0
1023                 shadow_off = 0
1024             is_branch = op >= 4
1025             if is_branch:
1026                 branch_ok, branch_fail = dest
1027                 dest = src2
1028                 # ok zip up the branch success / fail instructions and
1029                 # drop them into the queue, one marked "to have branch success"
1030                 # the other to be marked shadow branch "fail".
1031                 # one out of each of these will be cancelled
1032                 for ok, fl in zip(branch_ok, branch_fail):
1033                     if ok:
1034                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1035                     if fl:
1036                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1037             print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
1038                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1039             yield from int_instr(dut, op, src1, src2, dest,
1040                                  shadow_on, shadow_off)
1041
1042         # wait for all instructions to stop before checking
1043         yield
1044         yield from wait_for_busy_clear(dut)
1045
1046         i = -1
1047         while siminsts:
1048             instr = siminsts.pop(0)
1049             if instr is None:
1050                 continue
1051             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1052             i += 1
1053             is_branch = op >= 4
1054             if is_branch:
1055                 branch_ok, branch_fail = dest
1056                 dest = src2
1057             print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
1058                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1059             branch_res = alusim.op(op, src1, src2, dest)
1060             if is_branch:
1061                 if branch_res:
1062                     siminsts += branch_ok
1063                 else:
1064                     siminsts += branch_fail
1065
1066         # check status
1067         yield from alusim.check(dut)
1068         yield from alusim.dump(dut)
1069
1070
1071 def scoreboard_sim(dut, alusim):
1072
1073     seed(0)
1074
1075     for i in range(1):
1076
1077         # set random values in the registers
1078         for i in range(1, dut.n_regs):
1079             val = randint(0, (1<<alusim.rwidth)-1)
1080             #val = 31+i*3
1081             #val = i
1082             yield dut.intregs.regs[i].reg.eq(val)
1083             alusim.setval(i, val)
1084
1085         # create some instructions (some random, some regression tests)
1086         instrs = []
1087         if True:
1088             instrs = create_random_ops(dut, 15, True, 4)
1089
1090         if False:
1091             instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
1092
1093         if False:
1094             instrs.append( (7, 3, 2, 4, (0, 0)) )
1095             instrs.append( (7, 6, 6, 2, (0, 0)) )
1096             instrs.append( (1, 7, 2, 2, (0, 0)) )
1097
1098         if False:
1099             instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1100             instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1101             instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1102             instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1103             instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1104
1105         if False:
1106             instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1107             instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1108             instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1109
1110         if False:
1111             instrs.append((5, 6, 2, 1))
1112             instrs.append((2, 2, 4, 0))
1113             #instrs.append((2, 2, 3, 1))
1114
1115         if False:
1116             instrs.append((2, 1, 2, 3))
1117
1118         if False:
1119             instrs.append((2, 6, 2, 1))
1120             instrs.append((2, 1, 2, 0))
1121
1122         if False:
1123             instrs.append((1, 2, 7, 2))
1124             instrs.append((7, 1, 5, 0))
1125             instrs.append((4, 4, 1, 1))
1126
1127         if False:
1128             instrs.append((5, 6, 2, 2))
1129             instrs.append((1, 1, 4, 1))
1130             instrs.append((6, 5, 3, 0))
1131
1132         if False:
1133             # Write-after-Write Hazard
1134             instrs.append( (3, 6, 7, 2) )
1135             instrs.append( (4, 4, 7, 1) )
1136
1137         if False:
1138             # self-read/write-after-write followed by Read-after-Write
1139             instrs.append((1, 1, 1, 1))
1140             instrs.append((1, 5, 3, 0))
1141
1142         if False:
1143             # Read-after-Write followed by self-read-after-write
1144             instrs.append((5, 6, 1, 2))
1145             instrs.append((1, 1, 1, 1))
1146
1147         if False:
1148             # self-read-write sandwich
1149             instrs.append((5, 6, 1, 2))
1150             instrs.append((1, 1, 1, 1))
1151             instrs.append((1, 5, 3, 0))
1152
1153         if False:
1154             # very weird failure
1155             instrs.append( (5, 2, 5, 2) )
1156             instrs.append( (2, 6, 3, 0) )
1157             instrs.append( (4, 2, 2, 1) )
1158
1159         if False:
1160             v1 = 4
1161             yield dut.intregs.regs[5].reg.eq(v1)
1162             alusim.setval(5, v1)
1163             yield dut.intregs.regs[3].reg.eq(5)
1164             alusim.setval(3, 5)
1165             instrs.append((5, 3, 3, 4, (0, 0)))
1166             instrs.append((4, 2, 1, 2, (0, 1)))
1167
1168         if False:
1169             v1 = 6
1170             yield dut.intregs.regs[5].reg.eq(v1)
1171             alusim.setval(5, v1)
1172             yield dut.intregs.regs[3].reg.eq(5)
1173             alusim.setval(3, 5)
1174             instrs.append((5, 3, 3, 4, (0, 0)))
1175             instrs.append((4, 2, 1, 2, (1, 0)))
1176
1177         if False:
1178             instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1179             instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1180             instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1181             instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1182             instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1183             instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1184             instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1185             instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1186             instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1187
1188         # issue instruction(s), wait for issue to be free before proceeding
1189         for i, instr in enumerate(instrs):
1190             src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
1191
1192             print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1193                     (i, src1, src2, dest, op, opi, imm))
1194             alusim.op(op, opi, imm, src1, src2, dest)
1195             yield from instr_q(dut, op, opi, imm, src1, src2, dest,
1196                                br_ok, br_fail)
1197
1198         # wait for all instructions to stop before checking
1199         while True:
1200             iqlen = yield dut.qlen_o
1201             if iqlen == 0:
1202                 break
1203             yield
1204         yield
1205         yield
1206         yield
1207         yield
1208         yield from wait_for_busy_clear(dut)
1209
1210         # check status
1211         yield from alusim.check(dut)
1212         yield from alusim.dump(dut)
1213
1214
1215 def test_scoreboard():
1216     dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1217     alusim = RegSim(16, 8)
1218     memsim = MemSim(16, 16)
1219     vl = rtlil.convert(dut, ports=dut.ports())
1220     with open("test_scoreboard6600.il", "w") as f:
1221         f.write(vl)
1222
1223     run_simulation(dut, scoreboard_sim(dut, alusim),
1224                         vcd_name='test_scoreboard6600.vcd')
1225
1226     #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1227     #                    vcd_name='test_scoreboard6600.vcd')
1228
1229
1230 if __name__ == '__main__':
1231     test_scoreboard()