src/experiment/score6600.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
   4
   5 from regfile.regfile import RegFileArray, treereduce
   6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
   7 from scoreboard.fu_reg_matrix import FURegDepMatrix
   8 from scoreboard.global_pending import GlobalPending
   9 from scoreboard.group_picker import GroupPicker
  10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  12 from scoreboard.instruction_q import Instruction, InstructionQ
  13 from scoreboard.memfu import MemFunctionUnits
  14
  15 from compalu import ComputationUnitNoDelay
  16 from compldst import LDSTCompUnit
  17
  18 from alu_hier import ALU, BranchALU
  19 from nmutil.latch import SRLatch
  20 from nmutil.nmoperator import eq
  21
  22 from random import randint, seed
  23 from copy import deepcopy
  24 from math import log
  25
  26
  27 class TestMemory(Elaboratable):
  28     def __init__(self, regwid, addrw):
  29         self.ddepth = 1 # regwid //8
  30         depth = (1<<addrw) // self.ddepth
  31         self.adr   = Signal(addrw)
  32         self.dat_r = Signal(regwid)
  33         self.dat_w = Signal(regwid)
  34         self.we    = Signal()
  35         self.mem   = Memory(width=regwid, depth=depth, init=range(0, depth))
  36
  37     def elaborate(self, platform):
  38         m = Module()
  39         m.submodules.rdport = rdport = self.mem.read_port()
  40         m.submodules.wrport = wrport = self.mem.write_port()
  41         m.d.comb += [
  42             rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
  43             self.dat_r.eq(rdport.data),
  44             wrport.addr.eq(self.adr),
  45             wrport.data.eq(self.dat_w),
  46             wrport.en.eq(self.we),
  47         ]
  48         return m
  49
  50
  51 class MemSim:
  52     def __init__(self, regwid, addrw):
  53         self.regwid = regwid
  54         self.ddepth = 1 # regwid//8
  55         depth = (1<<addrw) // self.ddepth
  56         self.mem = list(range(0, depth))
  57
  58     def ld(self, addr):
  59         return self.mem[addr>>self.ddepth]
  60
  61     def st(self, addr, data):
  62         self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
  63
  64
  65 class CompUnitsBase(Elaboratable):
  66     """ Computation Unit Base class.
  67
  68         Amazingly, this class works recursively.  It's supposed to just
  69         look after some ALUs (that can handle the same operations),
  70         grouping them together, however it turns out that the same code
  71         can also group *groups* of Computation Units together as well.
  72
  73         Basically it was intended just to concatenate the ALU's issue,
  74         go_rd etc. signals together, which start out as bits and become
  75         sequences.  Turns out that the same trick works just as well
  76         on Computation Units!
  77
  78         So this class may be used recursively to present a top-level
  79         sequential concatenation of all the signals in and out of
  80         ALUs, whilst at the same time making it convenient to group
  81         ALUs together.
  82
  83         At the lower level, the intent is that groups of (identical)
  84         ALUs may be passed the same operation.  Even beyond that,
  85         the intent is that that group of (identical) ALUs actually
  86         share the *same pipeline* and as such become a "Concurrent
  87         Computation Unit" as defined by Mitch Alsup (see section
  88         11.4.9.3)
  89     """
  90     def __init__(self, rwid, units, ldstmode=False):
  91         """ Inputs:
  92
  93             * :rwid:   bit width of register file(s) - both FP and INT
  94             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  95         """
  96         self.units = units
  97         self.ldstmode = ldstmode
  98         self.rwid = rwid
  99         self.rwid = rwid
 100         if units and isinstance(units[0], CompUnitsBase):
 101             self.n_units = 0
 102             for u in self.units:
 103                 self.n_units += u.n_units
 104         else:
 105             self.n_units = len(units)
 106
 107         n_units = self.n_units
 108
 109         # inputs
 110         self.issue_i = Signal(n_units, reset_less=True)
 111         self.go_rd_i = Signal(n_units, reset_less=True)
 112         self.go_wr_i = Signal(n_units, reset_less=True)
 113         self.shadown_i = Signal(n_units, reset_less=True)
 114         self.go_die_i = Signal(n_units, reset_less=True)
 115         if ldstmode:
 116             self.go_ad_i = Signal(n_units, reset_less=True)
 117             self.go_st_i = Signal(n_units, reset_less=True)
 118
 119         # outputs
 120         self.busy_o = Signal(n_units, reset_less=True)
 121         self.rd_rel_o = Signal(n_units, reset_less=True)
 122         self.req_rel_o = Signal(n_units, reset_less=True)
 123         if ldstmode:
 124             self.adr_rel_o = Signal(n_units, reset_less=True)
 125             self.sto_rel_o = Signal(n_units, reset_less=True)
 126             self.req_rel_o = Signal(n_units, reset_less=True)
 127             self.load_mem_o = Signal(n_units, reset_less=True)
 128             self.stwd_mem_o = Signal(n_units, reset_less=True)
 129
 130         # in/out register data (note: not register#, actual data)
 131         self.data_o = Signal(rwid, reset_less=True)
 132         self.src1_i = Signal(rwid, reset_less=True)
 133         self.src2_i = Signal(rwid, reset_less=True)
 134         # input operand
 135
 136     def elaborate(self, platform):
 137         m = Module()
 138         comb = m.d.comb
 139
 140         for i, alu in enumerate(self.units):
 141             setattr(m.submodules, "comp%d" % i, alu)
 142
 143         go_rd_l = []
 144         go_wr_l = []
 145         issue_l = []
 146         busy_l = []
 147         req_rel_l = []
 148         rd_rel_l = []
 149         shadow_l = []
 150         godie_l = []
 151         for alu in self.units:
 152             req_rel_l.append(alu.req_rel_o)
 153             rd_rel_l.append(alu.rd_rel_o)
 154             shadow_l.append(alu.shadown_i)
 155             godie_l.append(alu.go_die_i)
 156             go_wr_l.append(alu.go_wr_i)
 157             go_rd_l.append(alu.go_rd_i)
 158             issue_l.append(alu.issue_i)
 159             busy_l.append(alu.busy_o)
 160         comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
 161         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 162         comb += self.busy_o.eq(Cat(*busy_l))
 163         comb += Cat(*godie_l).eq(self.go_die_i)
 164         comb += Cat(*shadow_l).eq(self.shadown_i)
 165         comb += Cat(*go_wr_l).eq(self.go_wr_i)
 166         comb += Cat(*go_rd_l).eq(self.go_rd_i)
 167         comb += Cat(*issue_l).eq(self.issue_i)
 168
 169         # connect data register input/output
 170
 171         # merge (OR) all integer FU / ALU outputs to a single value
 172         # bit of a hack: treereduce needs a list with an item named "data_o"
 173         if self.units:
 174             data_o = treereduce(self.units)
 175             comb += self.data_o.eq(data_o)
 176
 177         for i, alu in enumerate(self.units):
 178             comb += alu.src1_i.eq(self.src1_i)
 179             comb += alu.src2_i.eq(self.src2_i)
 180
 181         if not self.ldstmode:
 182             return m
 183
 184         ldmem_l = []
 185         stmem_l = []
 186         go_ad_l = []
 187         go_st_l = []
 188         adr_rel_l = []
 189         sto_rel_l = []
 190         for alu in self.units:
 191             adr_rel_l.append(alu.adr_rel_o)
 192             sto_rel_l.append(alu.sto_rel_o)
 193             ldmem_l.append(alu.load_mem_o)
 194             stmem_l.append(alu.stwd_mem_o)
 195             go_ad_l.append(alu.go_ad_i)
 196             go_st_l.append(alu.go_st_i)
 197         comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
 198         comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
 199         comb += self.load_mem_o.eq(Cat(*ldmem_l))
 200         comb += self.stwd_mem_o.eq(Cat(*stmem_l))
 201         comb += Cat(*go_ad_l).eq(self.go_ad_i)
 202         comb += Cat(*go_st_l).eq(self.go_st_i)
 203
 204         return m
 205
 206
 207 class CompUnitLDSTs(CompUnitsBase):
 208
 209     def __init__(self, rwid, opwid, n_ldsts, mem):
 210         """ Inputs:
 211
 212             * :rwid:   bit width of register file(s) - both FP and INT
 213             * :opwid:  operand bit width
 214         """
 215         self.opwid = opwid
 216
 217         # inputs
 218         self.oper_i = Signal(opwid, reset_less=True)
 219         self.imm_i = Signal(rwid, reset_less=True)
 220
 221         # Int ALUs
 222         self.alus = []
 223         for i in range(n_ldsts):
 224             self.alus.append(ALU(rwid))
 225
 226         units = []
 227         for alu in self.alus:
 228             aluopwid = 4 # see compldst.py for "internal" opcode
 229             units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
 230
 231         CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
 232
 233     def elaborate(self, platform):
 234         m = CompUnitsBase.elaborate(self, platform)
 235         comb = m.d.comb
 236
 237         # hand the same operation to all units, 4 lower bits though
 238         for alu in self.units:
 239             comb += alu.oper_i[0:4].eq(self.oper_i)
 240             comb += alu.imm_i.eq(self.imm_i)
 241             comb += alu.isalu_i.eq(0)
 242
 243         return m
 244
 245
 246 class CompUnitALUs(CompUnitsBase):
 247
 248     def __init__(self, rwid, opwid, n_alus):
 249         """ Inputs:
 250
 251             * :rwid:   bit width of register file(s) - both FP and INT
 252             * :opwid:  operand bit width
 253         """
 254         self.opwid = opwid
 255
 256         # inputs
 257         self.oper_i = Signal(opwid, reset_less=True)
 258         self.imm_i = Signal(rwid, reset_less=True)
 259
 260         # Int ALUs
 261         alus = []
 262         for i in range(n_alus):
 263             alus.append(ALU(rwid))
 264
 265         units = []
 266         for alu in alus:
 267             aluopwid = 3 # extra bit for immediate mode
 268             units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
 269
 270         CompUnitsBase.__init__(self, rwid, units)
 271
 272     def elaborate(self, platform):
 273         m = CompUnitsBase.elaborate(self, platform)
 274         comb = m.d.comb
 275
 276         # hand the same operation to all units, only lower 3 bits though
 277         for alu in self.units:
 278             comb += alu.oper_i[0:3].eq(self.oper_i)
 279             comb += alu.imm_i.eq(self.imm_i)
 280
 281         return m
 282
 283
 284 class CompUnitBR(CompUnitsBase):
 285
 286     def __init__(self, rwid, opwid):
 287         """ Inputs:
 288
 289             * :rwid:   bit width of register file(s) - both FP and INT
 290             * :opwid:  operand bit width
 291
 292             Note: bgt unit is returned so that a shadow unit can be created
 293             for it
 294         """
 295         self.opwid = opwid
 296
 297         # inputs
 298         self.oper_i = Signal(opwid, reset_less=True)
 299         self.imm_i = Signal(rwid, reset_less=True)
 300
 301         # Branch ALU and CU
 302         self.bgt = BranchALU(rwid)
 303         aluopwid = 3 # extra bit for immediate mode
 304         self.br1 = ComputationUnitNoDelay(rwid, aluopwid, self.bgt)
 305         CompUnitsBase.__init__(self, rwid, [self.br1])
 306
 307     def elaborate(self, platform):
 308         m = CompUnitsBase.elaborate(self, platform)
 309         comb = m.d.comb
 310
 311         # hand the same operation to all units
 312         for alu in self.units:
 313             comb += alu.oper_i.eq(self.oper_i)
 314             comb += alu.imm_i.eq(self.imm_i)
 315
 316         return m
 317
 318
 319 class FunctionUnits(Elaboratable):
 320
 321     def __init__(self, n_regs, n_int_alus):
 322         self.n_regs = n_regs
 323         self.n_int_alus = n_int_alus
 324
 325         self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
 326         self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
 327         self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
 328
 329         self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
 330         self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
 331
 332         self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
 333         self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
 334         self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
 335
 336         self.readable_o = Signal(n_int_alus, reset_less=True)
 337         self.writable_o = Signal(n_int_alus, reset_less=True)
 338
 339         self.go_rd_i = Signal(n_int_alus, reset_less=True)
 340         self.go_wr_i = Signal(n_int_alus, reset_less=True)
 341         self.go_die_i = Signal(n_int_alus, reset_less=True)
 342         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 343
 344         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 345
 346     def elaborate(self, platform):
 347         m = Module()
 348         comb = m.d.comb
 349         sync = m.d.sync
 350
 351         n_intfus = self.n_int_alus
 352
 353         # Integer FU-FU Dep Matrix
 354         intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
 355         m.submodules.intfudeps = intfudeps
 356         # Integer FU-Reg Dep Matrix
 357         intregdeps = FURegDepMatrix(n_intfus, self.n_regs, 2)
 358         m.submodules.intregdeps = intregdeps
 359
 360         comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
 361         comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
 362
 363         comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
 364         comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
 365
 366         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 367         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 368         self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
 369
 370         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 371         comb += intfudeps.go_rd_i.eq(self.go_rd_i)
 372         comb += intfudeps.go_wr_i.eq(self.go_wr_i)
 373         comb += intfudeps.go_die_i.eq(self.go_die_i)
 374         comb += self.readable_o.eq(intfudeps.readable_o)
 375         comb += self.writable_o.eq(intfudeps.writable_o)
 376
 377         # Connect function issue / arrays, and dest/src1/src2
 378         comb += intregdeps.dest_i.eq(self.dest_i)
 379         comb += intregdeps.src_i[0].eq(self.src1_i)
 380         comb += intregdeps.src_i[1].eq(self.src2_i)
 381
 382         comb += intregdeps.go_rd_i.eq(self.go_rd_i)
 383         comb += intregdeps.go_wr_i.eq(self.go_wr_i)
 384         comb += intregdeps.go_die_i.eq(self.go_die_i)
 385         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 386
 387         comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
 388         comb += self.src1_rsel_o.eq(intregdeps.src_rsel_o[0])
 389         comb += self.src2_rsel_o.eq(intregdeps.src_rsel_o[1])
 390
 391         return m
 392
 393
 394 class Scoreboard(Elaboratable):
 395     def __init__(self, rwid, n_regs):
 396         """ Inputs:
 397
 398             * :rwid:   bit width of register file(s) - both FP and INT
 399             * :n_regs: depth of register file(s) - number of FP and INT regs
 400         """
 401         self.rwid = rwid
 402         self.n_regs = n_regs
 403
 404         # Register Files
 405         self.intregs = RegFileArray(rwid, n_regs)
 406         self.fpregs = RegFileArray(rwid, n_regs)
 407
 408         # issue q needs to get at these
 409         self.aluissue = IssueUnitGroup(2)
 410         self.lsissue = IssueUnitGroup(2)
 411         self.brissue = IssueUnitGroup(1)
 412         # and these
 413         self.alu_oper_i = Signal(4, reset_less=True)
 414         self.alu_imm_i = Signal(rwid, reset_less=True)
 415         self.br_oper_i = Signal(4, reset_less=True)
 416         self.br_imm_i = Signal(rwid, reset_less=True)
 417         self.ls_oper_i = Signal(4, reset_less=True)
 418         self.ls_imm_i = Signal(rwid, reset_less=True)
 419
 420         # inputs
 421         self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
 422         self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
 423         self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
 424         self.reg_enable_i = Signal(reset_less=True) # enable reg decode
 425
 426         # outputs
 427         self.issue_o = Signal(reset_less=True) # instruction was accepted
 428         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 429
 430         # for branch speculation experiment.  branch_direction = 0 if
 431         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 432         # branch_succ and branch_fail are requests to have the current
 433         # instruction be dependent on the branch unit "shadow" capability.
 434         self.branch_succ_i = Signal(reset_less=True)
 435         self.branch_fail_i = Signal(reset_less=True)
 436         self.branch_direction_o = Signal(2, reset_less=True)
 437
 438     def elaborate(self, platform):
 439         m = Module()
 440         comb = m.d.comb
 441         sync = m.d.sync
 442
 443         m.submodules.intregs = self.intregs
 444         m.submodules.fpregs = self.fpregs
 445
 446         # register ports
 447         int_dest = self.intregs.write_port("dest")
 448         int_src1 = self.intregs.read_port("src1")
 449         int_src2 = self.intregs.read_port("src2")
 450
 451         fp_dest = self.fpregs.write_port("dest")
 452         fp_src1 = self.fpregs.read_port("src1")
 453         fp_src2 = self.fpregs.read_port("src2")
 454
 455         # Int ALUs and BR ALUs
 456         n_int_alus = 5
 457         cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
 458         cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
 459
 460         # LDST Comp Units
 461         n_ldsts = 2
 462         cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, None)
 463
 464         # Comp Units
 465         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
 466         bgt = cub.bgt # get at the branch computation unit
 467         br1 = cub.br1
 468
 469         # Int FUs
 470         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
 471
 472         # Memory FUs
 473         m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
 474
 475         # Count of number of FUs
 476         n_intfus = n_int_alus
 477         n_fp_fus = 0 # for now
 478
 479         # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
 480         intpick1 = GroupPicker(n_intfus) # picks 1 reader and 1 writer to intreg
 481         m.submodules.intpick1 = intpick1
 482
 483         # INT/FP Issue Unit
 484         regdecode = RegDecode(self.n_regs)
 485         m.submodules.regdecode = regdecode
 486         issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
 487         m.submodules.issueunit = issueunit
 488
 489         # Shadow Matrix.  currently n_intfus shadows, to be used for
 490         # write-after-write hazards.  NOTE: there is one extra for branches,
 491         # so the shadow width is increased by 1
 492         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 493         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 494
 495         # record previous instruction to cast shadow on current instruction
 496         prev_shadow = Signal(n_intfus)
 497
 498         # Branch Speculation recorder.  tracks the success/fail state as
 499         # each instruction is issued, so that when the branch occurs the
 500         # allow/cancel can be issued as appropriate.
 501         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 502
 503         #---------
 504         # ok start wiring things together...
 505         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 506         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 507         #---------
 508
 509         #---------
 510         # Issue Unit is where it starts.  set up some in/outs for this module
 511         #---------
 512         comb += [    regdecode.dest_i.eq(self.int_dest_i),
 513                      regdecode.src1_i.eq(self.int_src1_i),
 514                      regdecode.src2_i.eq(self.int_src2_i),
 515                      regdecode.enable_i.eq(self.reg_enable_i),
 516                      self.issue_o.eq(issueunit.issue_o)
 517                     ]
 518
 519         # take these to outside (issue needs them)
 520         comb += cua.oper_i.eq(self.alu_oper_i)
 521         comb += cua.imm_i.eq(self.alu_imm_i)
 522         comb += cub.oper_i.eq(self.br_oper_i)
 523         comb += cub.imm_i.eq(self.br_imm_i)
 524         comb += cul.oper_i.eq(self.ls_oper_i)
 525         comb += cul.imm_i.eq(self.ls_imm_i)
 526
 527         # TODO: issueunit.f (FP)
 528
 529         # and int function issue / busy arrays, and dest/src1/src2
 530         comb += intfus.dest_i.eq(regdecode.dest_o)
 531         comb += intfus.src1_i.eq(regdecode.src1_o)
 532         comb += intfus.src2_i.eq(regdecode.src2_o)
 533
 534         fn_issue_o = issueunit.fn_issue_o
 535
 536         comb += intfus.fn_issue_i.eq(fn_issue_o)
 537         comb += issueunit.busy_i.eq(cu.busy_o)
 538         comb += self.busy_o.eq(cu.busy_o.bool())
 539
 540         #---------
 541         # Memory Function Unit
 542         #---------
 543         reset_b = Signal(cul.n_units, reset_less=True)
 544         sync += reset_b.eq(cul.go_st_i | cul.go_wr_i | cul.go_die_i)
 545         comb += memfus.fn_issue_i.eq(cul.issue_i) # Comp Unit Issue -> Mem FUs
 546         comb += memfus.addr_en_i.eq(cul.adr_rel_o) # Match enable on adr rel
 547         comb += memfus.addr_rs_i.eq(reset_b) # reset same as LDSTCompUnit
 548         with m.If(self.ls_oper_i[2]): # LD bit of operand
 549             comb += memfus.ld_i.eq(cul.issue_i)
 550         with m.If(self.ls_oper_i[3]): # ST bit of operand
 551             comb += memfus.st_i.eq(cul.issue_i)
 552
 553         # connect up address data
 554         comb += memfus.addrs_i[0].eq(cul.units[0].data_o)
 555         comb += memfus.addrs_i[1].eq(cul.units[1].data_o)
 556
 557         # connect loadable / storable to go_ld/go_st.
 558         # XXX should only be done when the memory ld/st has actually happened!
 559
 560         comb += memfus.go_ld_i.eq(memfus.loadable_o & memfus.addr_nomatch_o)
 561         comb += memfus.go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o)
 562         #comb += cul.go_wr_i.eq(memfus.loadable_o & memfus.addr_nomatch_o)
 563         comb += cul.go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o)
 564
 565         #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 566         #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 567         #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 568
 569         #---------
 570         # merge shadow matrices outputs
 571         #---------
 572
 573         # these are explained in ShadowMatrix docstring, and are to be
 574         # connected to the FUReg and FUFU Matrices, to get them to reset
 575         anydie = Signal(n_intfus, reset_less=True)
 576         allshadown = Signal(n_intfus, reset_less=True)
 577         shreset = Signal(n_intfus, reset_less=True)
 578         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 579         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 580         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 581
 582         #---------
 583         # connect fu-fu matrix
 584         #---------
 585
 586         # Group Picker... done manually for now.
 587         go_rd_o = intpick1.go_rd_o
 588         go_wr_o = intpick1.go_wr_o
 589         go_rd_i = intfus.go_rd_i
 590         go_wr_i = intfus.go_wr_i
 591         go_die_i = intfus.go_die_i
 592         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 593         comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
 594         comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
 595         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
 596
 597         # Connect Picker
 598         #---------
 599         comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
 600         comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
 601         int_rd_o = intfus.readable_o
 602         int_wr_o = intfus.writable_o
 603         comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
 604         comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
 605
 606         #---------
 607         # Shadow Matrix
 608         #---------
 609
 610         comb += shadows.issue_i.eq(fn_issue_o)
 611         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 612         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 613         #---------
 614         # NOTE; this setup is for the instruction order preservation...
 615
 616         # connect shadows / go_dies to Computation Units
 617         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 618         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 619
 620         # ok connect first n_int_fu shadows to busy lines, to create an
 621         # instruction-order linked-list-like arrangement, using a bit-matrix
 622         # (instead of e.g. a ring buffer).
 623
 624         # when written, the shadow can be cancelled (and was good)
 625         for i in range(n_intfus):
 626             comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 627
 628         # *previous* instruction shadows *current* instruction, and, obviously,
 629         # if the previous is completed (!busy) don't cast the shadow!
 630         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 631         for i in range(n_intfus):
 632             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 633
 634         #---------
 635         # ... and this is for branch speculation.  it uses the extra bit
 636         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 637         # only needs to set shadow_i, s_fail_i and s_good_i
 638
 639         # issue captures shadow_i (if enabled)
 640         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 641
 642         bactive = Signal(reset_less=True)
 643         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 644
 645         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 646         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 647             comb += bshadow.issue_i.eq(fn_issue_o)
 648             for i in range(n_intfus):
 649                 with m.If(fn_issue_o & (Const(1<<i))):
 650                     comb += bshadow.shadow_i[i][0].eq(1)
 651
 652         # finally, we need an indicator to the test infrastructure as to
 653         # whether the branch succeeded or failed, plus, link up to the
 654         # "recorder" of whether the instruction was under shadow or not
 655
 656         with m.If(br1.issue_i):
 657             sync += bspec.active_i.eq(1)
 658         with m.If(self.branch_succ_i):
 659             comb += bspec.good_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
 660         with m.If(self.branch_fail_i):
 661             comb += bspec.fail_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
 662
 663         # branch is active (TODO: a better signal: this is over-using the
 664         # go_write signal - actually the branch should not be "writing")
 665         with m.If(br1.go_wr_i):
 666             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 667             sync += bspec.active_i.eq(0)
 668             comb += bspec.br_i.eq(1)
 669             # branch occurs if data == 1, failed if data == 0
 670             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 671             for i in range(n_intfus):
 672                 # *expected* direction of the branch matched against *actual*
 673                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 674                 # ... or it didn't
 675                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 676
 677         #---------
 678         # Connect Register File(s)
 679         #---------
 680         comb += int_dest.wen.eq(intfus.dest_rsel_o)
 681         comb += int_src1.ren.eq(intfus.src1_rsel_o)
 682         comb += int_src2.ren.eq(intfus.src2_rsel_o)
 683
 684         # connect ALUs to regfule
 685         comb += int_dest.data_i.eq(cu.data_o)
 686         comb += cu.src1_i.eq(int_src1.data_o)
 687         comb += cu.src2_i.eq(int_src2.data_o)
 688
 689         # connect ALU Computation Units
 690         comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 691         comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 692         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 693
 694         return m
 695
 696     def __iter__(self):
 697         yield from self.intregs
 698         yield from self.fpregs
 699         yield self.int_dest_i
 700         yield self.int_src1_i
 701         yield self.int_src2_i
 702         yield self.issue_o
 703         yield self.branch_succ_i
 704         yield self.branch_fail_i
 705         yield self.branch_direction_o
 706
 707     def ports(self):
 708         return list(self)
 709
 710
 711 class IssueToScoreboard(Elaboratable):
 712
 713     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 714         self.qlen = qlen
 715         self.n_in = n_in
 716         self.n_out = n_out
 717         self.rwid = rwid
 718         self.opw = opwid
 719         self.n_regs = n_regs
 720
 721         mqbits = (int(log(qlen) / log(2))+2, False)
 722         self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
 723         self.p_ready_o = Signal() # instructions were added
 724         self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
 725
 726         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 727         self.qlen_o = Signal(mqbits, reset_less=True)
 728
 729     def elaborate(self, platform):
 730         m = Module()
 731         comb = m.d.comb
 732         sync = m.d.sync
 733
 734         iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
 735         sc = Scoreboard(self.rwid, self.n_regs)
 736         mem = TestMemory(self.rwid, 8) # not too big, takes too long
 737         m.submodules.iq = iq
 738         m.submodules.sc = sc
 739         m.submodules.mem = mem
 740
 741         # get at the regfile for testing
 742         self.intregs = sc.intregs
 743
 744         # and the "busy" signal and instruction queue length
 745         comb += self.busy_o.eq(sc.busy_o)
 746         comb += self.qlen_o.eq(iq.qlen_o)
 747
 748         # link up instruction queue
 749         comb += iq.p_add_i.eq(self.p_add_i)
 750         comb += self.p_ready_o.eq(iq.p_ready_o)
 751         for i in range(self.n_in):
 752             comb += eq(iq.data_i[i], self.data_i[i])
 753
 754         # take instruction and process it.  note that it's possible to
 755         # "inspect" the queue contents *without* actually removing the
 756         # items.  items are only removed when the
 757
 758         # in "waiting" state
 759         wait_issue_br = Signal()
 760         wait_issue_alu = Signal()
 761         wait_issue_ls = Signal()
 762
 763         with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
 764             # set instruction pop length to 1 if the unit accepted
 765             with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
 766                 with m.If(iq.qlen_o != 0):
 767                     comb += iq.n_sub_i.eq(1)
 768             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 769                 with m.If(iq.qlen_o != 0):
 770                     comb += iq.n_sub_i.eq(1)
 771             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 772                 with m.If(iq.qlen_o != 0):
 773                     comb += iq.n_sub_i.eq(1)
 774
 775         # see if some instruction(s) are here.  note that this is
 776         # "inspecting" the in-place queue.  note also that on the
 777         # cycle following "waiting" for fn_issue_o to be set, the
 778         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 779         with m.If(iq.qlen_o != 0):
 780             # get the operands and operation
 781             imm = iq.data_o[0].imm_i
 782             dest = iq.data_o[0].dest_i
 783             src1 = iq.data_o[0].src1_i
 784             src2 = iq.data_o[0].src2_i
 785             op = iq.data_o[0].oper_i
 786             opi = iq.data_o[0].opim_i # immediate set
 787
 788             # set the src/dest regs
 789             comb += sc.int_dest_i.eq(dest)
 790             comb += sc.int_src1_i.eq(src1)
 791             comb += sc.int_src2_i.eq(src2)
 792             comb += sc.reg_enable_i.eq(1) # enable the regfile
 793
 794             # choose a Function-Unit-Group
 795             with m.If((op & (0x3<<2)) != 0): # branch
 796                 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
 797                 comb += sc.br_imm_i.eq(imm)
 798                 comb += sc.brissue.insn_i.eq(1)
 799                 comb += wait_issue_br.eq(1)
 800             with m.Elif((op & (0x3<<4)) != 0): # ld/st
 801                 # see compldst.py
 802                 # bit 0: ADD/SUB
 803                 # bit 1: immed
 804                 # bit 4: LD
 805                 # bit 5: ST
 806                 comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6]))
 807                 comb += sc.ls_imm_i.eq(imm)
 808                 comb += sc.lsissue.insn_i.eq(1)
 809                 comb += wait_issue_ls.eq(1)
 810             with m.Else(): # alu
 811                 comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
 812                 comb += sc.alu_imm_i.eq(imm)
 813                 comb += sc.aluissue.insn_i.eq(1)
 814                 comb += wait_issue_alu.eq(1)
 815
 816             # XXX TODO
 817             # these indicate that the instruction is to be made
 818             # shadow-dependent on
 819             # (either) branch success or branch fail
 820             #yield sc.branch_fail_i.eq(branch_fail)
 821             #yield sc.branch_succ_i.eq(branch_success)
 822
 823         return m
 824
 825     def __iter__(self):
 826         yield self.p_ready_o
 827         for o in self.data_i:
 828             yield from list(o)
 829         yield self.p_add_i
 830
 831     def ports(self):
 832         return list(self)
 833
 834
 835 IADD = 0
 836 ISUB = 1
 837 IMUL = 2
 838 ISHF = 3
 839 IBGT = 4
 840 IBLT = 5
 841 IBEQ = 6
 842 IBNE = 7
 843
 844
 845 class RegSim:
 846     def __init__(self, rwidth, nregs):
 847         self.rwidth = rwidth
 848         self.regs = [0] * nregs
 849
 850     def op(self, op, op_imm, imm, src1, src2, dest):
 851         maxbits = (1 << self.rwidth) - 1
 852         src1 = self.regs[src1] & maxbits
 853         if op_imm:
 854             src2 = imm
 855         else:
 856             src2 = self.regs[src2] & maxbits
 857         if op == IADD:
 858             val = src1 + src2
 859         elif op == ISUB:
 860             val = src1 - src2
 861         elif op == IMUL:
 862             val = src1 * src2
 863         elif op == ISHF:
 864             val = src1 >> (src2 & maxbits)
 865         elif op == IBGT:
 866             val = int(src1 > src2)
 867         elif op == IBLT:
 868             val = int(src1 < src2)
 869         elif op == IBEQ:
 870             val = int(src1 == src2)
 871         elif op == IBNE:
 872             val = int(src1 != src2)
 873         else:
 874             return 0 # LD/ST TODO
 875         val &= maxbits
 876         self.setval(dest, val)
 877         return val
 878
 879     def setval(self, dest, val):
 880         print ("sim setval", dest, hex(val))
 881         self.regs[dest] = val
 882
 883     def dump(self, dut):
 884         for i, val in enumerate(self.regs):
 885             reg = yield dut.intregs.regs[i].reg
 886             okstr = "OK" if reg == val else "!ok"
 887             print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
 888
 889     def check(self, dut):
 890         for i, val in enumerate(self.regs):
 891             reg = yield dut.intregs.regs[i].reg
 892             if reg != val:
 893                 print("reg %d expected %x received %x\n" % (i, val, reg))
 894                 yield from self.dump(dut)
 895                 assert False
 896
 897 def instr_q(dut, op, op_imm, imm, src1, src2, dest,
 898             branch_success, branch_fail):
 899     instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
 900                'src1_i': src1, 'src2_i': src2}]
 901
 902     sendlen = 1
 903     for idx in range(sendlen):
 904         yield from eq(dut.data_i[idx], instrs[idx])
 905         di = yield dut.data_i[idx]
 906         print ("senddata %d %x" % (idx, di))
 907     yield dut.p_add_i.eq(sendlen)
 908     yield
 909     o_p_ready = yield dut.p_ready_o
 910     while not o_p_ready:
 911         yield
 912         o_p_ready = yield dut.p_ready_o
 913
 914     yield dut.p_add_i.eq(0)
 915
 916
 917 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
 918     yield from disable_issue(dut)
 919     yield dut.int_dest_i.eq(dest)
 920     yield dut.int_src1_i.eq(src1)
 921     yield dut.int_src2_i.eq(src2)
 922     if (op & (0x3<<2)) != 0: # branch
 923         yield dut.brissue.insn_i.eq(1)
 924         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 925         yield dut.br_imm_i.eq(imm)
 926         dut_issue = dut.brissue
 927     else:
 928         yield dut.aluissue.insn_i.eq(1)
 929         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 930         yield dut.alu_imm_i.eq(imm)
 931         dut_issue = dut.aluissue
 932     yield dut.reg_enable_i.eq(1)
 933
 934     # these indicate that the instruction is to be made shadow-dependent on
 935     # (either) branch success or branch fail
 936     yield dut.branch_fail_i.eq(branch_fail)
 937     yield dut.branch_succ_i.eq(branch_success)
 938
 939     yield
 940     yield from wait_for_issue(dut, dut_issue)
 941
 942
 943 def print_reg(dut, rnums):
 944     rs = []
 945     for rnum in rnums:
 946         reg = yield dut.intregs.regs[rnum].reg
 947         rs.append("%x" % reg)
 948     rnums = map(str, rnums)
 949     print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 950
 951
 952 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 953     insts = []
 954     for i in range(n_ops):
 955         src1 = randint(1, dut.n_regs-1)
 956         src2 = randint(1, dut.n_regs-1)
 957         imm = randint(1, (1<<dut.rwid)-1)
 958         dest = randint(1, dut.n_regs-1)
 959         op = randint(0, max_opnums)
 960         opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
 961
 962         if shadowing:
 963             insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
 964         else:
 965             insts.append((src1, src2, dest, op, opi, imm))
 966     return insts
 967
 968
 969 def wait_for_busy_clear(dut):
 970     while True:
 971         busy_o = yield dut.busy_o
 972         if not busy_o:
 973             break
 974         print ("busy",)
 975         yield
 976
 977 def disable_issue(dut):
 978     yield dut.aluissue.insn_i.eq(0)
 979     yield dut.brissue.insn_i.eq(0)
 980     yield dut.lsissue.insn_i.eq(0)
 981
 982
 983 def wait_for_issue(dut, dut_issue):
 984     while True:
 985         issue_o = yield dut_issue.fn_issue_o
 986         if issue_o:
 987             yield from disable_issue(dut)
 988             yield dut.reg_enable_i.eq(0)
 989             break
 990         print ("busy",)
 991         #yield from print_reg(dut, [1,2,3])
 992         yield
 993     #yield from print_reg(dut, [1,2,3])
 994
 995 def scoreboard_branch_sim(dut, alusim):
 996
 997     iseed = 3
 998
 999     for i in range(1):
1000
1001         print ("rseed", iseed)
1002         seed(iseed)
1003         iseed += 1
1004
1005         yield dut.branch_direction_o.eq(0)
1006
1007         # set random values in the registers
1008         for i in range(1, dut.n_regs):
1009             val = 31+i*3
1010             val = randint(0, (1<<alusim.rwidth)-1)
1011             yield dut.intregs.regs[i].reg.eq(val)
1012             alusim.setval(i, val)
1013
1014         if False:
1015             # create some instructions: branches create a tree
1016             insts = create_random_ops(dut, 1, True, 1)
1017             #insts.append((6, 6, 1, 2, (0, 0)))
1018             #insts.append((4, 3, 3, 0, (0, 0)))
1019
1020             src1 = randint(1, dut.n_regs-1)
1021             src2 = randint(1, dut.n_regs-1)
1022             #op = randint(4, 7)
1023             op = 4 # only BGT at the moment
1024
1025             branch_ok = create_random_ops(dut, 1, True, 1)
1026             branch_fail = create_random_ops(dut, 1, True, 1)
1027
1028             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1029
1030         if True:
1031             insts = []
1032             insts.append( (3, 5, 2, 0, (0, 0)) )
1033             branch_ok = []
1034             branch_fail = []
1035             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
1036             branch_ok.append( None )
1037             branch_fail.append( (1, 1, 2, 0, (0, 1)) )
1038             #branch_fail.append( None )
1039             insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
1040
1041         siminsts = deepcopy(insts)
1042
1043         # issue instruction(s)
1044         i = -1
1045         instrs = insts
1046         branch_direction = 0
1047         while instrs:
1048             yield
1049             yield
1050             i += 1
1051             branch_direction = yield dut.branch_direction_o # way branch went
1052             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1053             if branch_direction == 1 and shadow_on:
1054                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1055                 continue # branch was "success" and this is a "failed"... skip
1056             if branch_direction == 2 and shadow_off:
1057                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1058                 continue # branch was "fail" and this is a "success"... skip
1059             if branch_direction != 0:
1060                 shadow_on = 0
1061                 shadow_off = 0
1062             is_branch = op >= 4
1063             if is_branch:
1064                 branch_ok, branch_fail = dest
1065                 dest = src2
1066                 # ok zip up the branch success / fail instructions and
1067                 # drop them into the queue, one marked "to have branch success"
1068                 # the other to be marked shadow branch "fail".
1069                 # one out of each of these will be cancelled
1070                 for ok, fl in zip(branch_ok, branch_fail):
1071                     if ok:
1072                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1073                     if fl:
1074                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1075             print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
1076                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1077             yield from int_instr(dut, op, src1, src2, dest,
1078                                  shadow_on, shadow_off)
1079
1080         # wait for all instructions to stop before checking
1081         yield
1082         yield from wait_for_busy_clear(dut)
1083
1084         i = -1
1085         while siminsts:
1086             instr = siminsts.pop(0)
1087             if instr is None:
1088                 continue
1089             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1090             i += 1
1091             is_branch = op >= 4
1092             if is_branch:
1093                 branch_ok, branch_fail = dest
1094                 dest = src2
1095             print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
1096                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1097             branch_res = alusim.op(op, src1, src2, dest)
1098             if is_branch:
1099                 if branch_res:
1100                     siminsts += branch_ok
1101                 else:
1102                     siminsts += branch_fail
1103
1104         # check status
1105         yield from alusim.check(dut)
1106         yield from alusim.dump(dut)
1107
1108
1109 def scoreboard_sim(dut, alusim):
1110
1111     seed(0)
1112
1113     for i in range(1):
1114
1115         # set random values in the registers
1116         for i in range(1, dut.n_regs):
1117             val = randint(0, (1<<alusim.rwidth)-1)
1118             #val = 31+i*3
1119             #val = i
1120             yield dut.intregs.regs[i].reg.eq(val)
1121             alusim.setval(i, val)
1122
1123         # create some instructions (some random, some regression tests)
1124         instrs = []
1125         if False:
1126             instrs = create_random_ops(dut, 15, True, 4)
1127
1128         if True: # LD/ST test (with immediate)
1129             instrs.append( (1, 2, 2, 0x10, 1, 1, (0, 0)) )
1130             #instrs.append( (1, 2, 7, 0x10, 1, 1, (0, 0)) )
1131
1132         if False:
1133             instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
1134
1135         if False:
1136             instrs.append( (7, 3, 2, 4, (0, 0)) )
1137             instrs.append( (7, 6, 6, 2, (0, 0)) )
1138             instrs.append( (1, 7, 2, 2, (0, 0)) )
1139
1140         if False:
1141             instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1142             instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1143             instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1144             instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1145             instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1146
1147         if False:
1148             instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1149             instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1150             instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1151
1152         if False:
1153             instrs.append((5, 6, 2, 1))
1154             instrs.append((2, 2, 4, 0))
1155             #instrs.append((2, 2, 3, 1))
1156
1157         if False:
1158             instrs.append((2, 1, 2, 3))
1159
1160         if False:
1161             instrs.append((2, 6, 2, 1))
1162             instrs.append((2, 1, 2, 0))
1163
1164         if False:
1165             instrs.append((1, 2, 7, 2))
1166             instrs.append((7, 1, 5, 0))
1167             instrs.append((4, 4, 1, 1))
1168
1169         if False:
1170             instrs.append((5, 6, 2, 2))
1171             instrs.append((1, 1, 4, 1))
1172             instrs.append((6, 5, 3, 0))
1173
1174         if False:
1175             # Write-after-Write Hazard
1176             instrs.append( (3, 6, 7, 2) )
1177             instrs.append( (4, 4, 7, 1) )
1178
1179         if False:
1180             # self-read/write-after-write followed by Read-after-Write
1181             instrs.append((1, 1, 1, 1))
1182             instrs.append((1, 5, 3, 0))
1183
1184         if False:
1185             # Read-after-Write followed by self-read-after-write
1186             instrs.append((5, 6, 1, 2))
1187             instrs.append((1, 1, 1, 1))
1188
1189         if False:
1190             # self-read-write sandwich
1191             instrs.append((5, 6, 1, 2))
1192             instrs.append((1, 1, 1, 1))
1193             instrs.append((1, 5, 3, 0))
1194
1195         if False:
1196             # very weird failure
1197             instrs.append( (5, 2, 5, 2) )
1198             instrs.append( (2, 6, 3, 0) )
1199             instrs.append( (4, 2, 2, 1) )
1200
1201         if False:
1202             v1 = 4
1203             yield dut.intregs.regs[5].reg.eq(v1)
1204             alusim.setval(5, v1)
1205             yield dut.intregs.regs[3].reg.eq(5)
1206             alusim.setval(3, 5)
1207             instrs.append((5, 3, 3, 4, (0, 0)))
1208             instrs.append((4, 2, 1, 2, (0, 1)))
1209
1210         if False:
1211             v1 = 6
1212             yield dut.intregs.regs[5].reg.eq(v1)
1213             alusim.setval(5, v1)
1214             yield dut.intregs.regs[3].reg.eq(5)
1215             alusim.setval(3, 5)
1216             instrs.append((5, 3, 3, 4, (0, 0)))
1217             instrs.append((4, 2, 1, 2, (1, 0)))
1218
1219         if False:
1220             instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1221             instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1222             instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1223             instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1224             instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1225             instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1226             instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1227             instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1228             instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1229
1230         # issue instruction(s), wait for issue to be free before proceeding
1231         for i, instr in enumerate(instrs):
1232             src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
1233
1234             print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1235                     (i, src1, src2, dest, op, opi, imm))
1236             alusim.op(op, opi, imm, src1, src2, dest)
1237             yield from instr_q(dut, op, opi, imm, src1, src2, dest,
1238                                br_ok, br_fail)
1239
1240         # wait for all instructions to stop before checking
1241         while True:
1242             iqlen = yield dut.qlen_o
1243             if iqlen == 0:
1244                 break
1245             yield
1246         yield
1247         yield
1248         yield
1249         yield
1250         yield from wait_for_busy_clear(dut)
1251
1252         # check status
1253         yield from alusim.check(dut)
1254         yield from alusim.dump(dut)
1255
1256
1257 def test_scoreboard():
1258     dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1259     alusim = RegSim(16, 8)
1260     memsim = MemSim(16, 16)
1261     vl = rtlil.convert(dut, ports=dut.ports())
1262     with open("test_scoreboard6600.il", "w") as f:
1263         f.write(vl)
1264
1265     run_simulation(dut, scoreboard_sim(dut, alusim),
1266                         vcd_name='test_scoreboard6600.vcd')
1267
1268     #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1269     #                    vcd_name='test_scoreboard6600.vcd')
1270
1271
1272 if __name__ == '__main__':
1273     test_scoreboard()