src/experiment/score6600.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
   4
   5 from regfile.regfile import RegFileArray, treereduce
   6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
   7 from scoreboard.fu_reg_matrix import FURegDepMatrix
   8 from scoreboard.global_pending import GlobalPending
   9 from scoreboard.group_picker import GroupPicker
  10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  12 from scoreboard.instruction_q import Instruction, InstructionQ
  13 from scoreboard.memfu import MemFunctionUnits
  14
  15 from compalu import ComputationUnitNoDelay
  16 from compldst import LDSTCompUnit
  17
  18 from alu_hier import ALU, BranchALU
  19 from nmutil.latch import SRLatch
  20 from nmutil.nmoperator import eq
  21
  22 from random import randint, seed
  23 from copy import deepcopy
  24 from math import log
  25
  26
  27 class TestMemory(Elaboratable):
  28     def __init__(self, regwid, addrw):
  29         self.ddepth = 1 # regwid //8
  30         depth = (1<<addrw) // self.ddepth
  31         self.adr   = Signal(addrw)
  32         self.dat_r = Signal(regwid)
  33         self.dat_w = Signal(regwid)
  34         self.we    = Signal()
  35         self.mem   = Memory(width=regwid, depth=depth, init=range(0, depth))
  36
  37     def elaborate(self, platform):
  38         m = Module()
  39         m.submodules.rdport = rdport = self.mem.read_port()
  40         m.submodules.wrport = wrport = self.mem.write_port()
  41         m.d.comb += [
  42             rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
  43             self.dat_r.eq(rdport.data),
  44             wrport.addr.eq(self.adr),
  45             wrport.data.eq(self.dat_w),
  46             wrport.en.eq(self.we),
  47         ]
  48         return m
  49
  50
  51 class MemSim:
  52     def __init__(self, regwid, addrw):
  53         self.regwid = regwid
  54         self.ddepth = 1 # regwid//8
  55         depth = (1<<addrw) // self.ddepth
  56         self.mem = list(range(0, depth))
  57
  58     def ld(self, addr):
  59         return self.mem[addr>>self.ddepth]
  60
  61     def st(self, addr, data):
  62         self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
  63
  64
  65 class CompUnitsBase(Elaboratable):
  66     """ Computation Unit Base class.
  67
  68         Amazingly, this class works recursively.  It's supposed to just
  69         look after some ALUs (that can handle the same operations),
  70         grouping them together, however it turns out that the same code
  71         can also group *groups* of Computation Units together as well.
  72
  73         Basically it was intended just to concatenate the ALU's issue,
  74         go_rd etc. signals together, which start out as bits and become
  75         sequences.  Turns out that the same trick works just as well
  76         on Computation Units!
  77
  78         So this class may be used recursively to present a top-level
  79         sequential concatenation of all the signals in and out of
  80         ALUs, whilst at the same time making it convenient to group
  81         ALUs together.
  82
  83         At the lower level, the intent is that groups of (identical)
  84         ALUs may be passed the same operation.  Even beyond that,
  85         the intent is that that group of (identical) ALUs actually
  86         share the *same pipeline* and as such become a "Concurrent
  87         Computation Unit" as defined by Mitch Alsup (see section
  88         11.4.9.3)
  89     """
  90     def __init__(self, rwid, units, ldstmode=False):
  91         """ Inputs:
  92
  93             * :rwid:   bit width of register file(s) - both FP and INT
  94             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  95         """
  96         self.units = units
  97         self.ldstmode = ldstmode
  98         self.rwid = rwid
  99         self.rwid = rwid
 100         if units and isinstance(units[0], CompUnitsBase):
 101             self.n_units = 0
 102             for u in self.units:
 103                 self.n_units += u.n_units
 104         else:
 105             self.n_units = len(units)
 106
 107         n_units = self.n_units
 108
 109         # inputs
 110         self.issue_i = Signal(n_units, reset_less=True)
 111         self.go_rd_i = Signal(n_units, reset_less=True)
 112         self.go_wr_i = Signal(n_units, reset_less=True)
 113         self.shadown_i = Signal(n_units, reset_less=True)
 114         self.go_die_i = Signal(n_units, reset_less=True)
 115         if ldstmode:
 116             self.go_ad_i = Signal(n_units, reset_less=True)
 117             self.go_st_i = Signal(n_units, reset_less=True)
 118
 119         # outputs
 120         self.busy_o = Signal(n_units, reset_less=True)
 121         self.rd_rel_o = Signal(n_units, reset_less=True)
 122         self.req_rel_o = Signal(n_units, reset_less=True)
 123         if ldstmode:
 124             self.adr_rel_o = Signal(n_units, reset_less=True)
 125             self.sto_rel_o = Signal(n_units, reset_less=True)
 126             self.req_rel_o = Signal(n_units, reset_less=True)
 127             self.load_mem_o = Signal(n_units, reset_less=True)
 128             self.stwd_mem_o = Signal(n_units, reset_less=True)
 129
 130         # in/out register data (note: not register#, actual data)
 131         self.data_o = Signal(rwid, reset_less=True)
 132         self.src1_i = Signal(rwid, reset_less=True)
 133         self.src2_i = Signal(rwid, reset_less=True)
 134         # input operand
 135
 136     def elaborate(self, platform):
 137         m = Module()
 138         comb = m.d.comb
 139
 140         for i, alu in enumerate(self.units):
 141             setattr(m.submodules, "comp%d" % i, alu)
 142
 143         go_rd_l = []
 144         go_wr_l = []
 145         issue_l = []
 146         busy_l = []
 147         req_rel_l = []
 148         rd_rel_l = []
 149         shadow_l = []
 150         godie_l = []
 151         for alu in self.units:
 152             req_rel_l.append(alu.req_rel_o)
 153             rd_rel_l.append(alu.rd_rel_o)
 154             shadow_l.append(alu.shadown_i)
 155             godie_l.append(alu.go_die_i)
 156             go_wr_l.append(alu.go_wr_i)
 157             go_rd_l.append(alu.go_rd_i)
 158             issue_l.append(alu.issue_i)
 159             busy_l.append(alu.busy_o)
 160         comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
 161         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 162         comb += self.busy_o.eq(Cat(*busy_l))
 163         comb += Cat(*godie_l).eq(self.go_die_i)
 164         comb += Cat(*shadow_l).eq(self.shadown_i)
 165         comb += Cat(*go_wr_l).eq(self.go_wr_i)
 166         comb += Cat(*go_rd_l).eq(self.go_rd_i)
 167         comb += Cat(*issue_l).eq(self.issue_i)
 168
 169         # connect data register input/output
 170
 171         # merge (OR) all integer FU / ALU outputs to a single value
 172         # bit of a hack: treereduce needs a list with an item named "data_o"
 173         if self.units:
 174             data_o = treereduce(self.units)
 175             comb += self.data_o.eq(data_o)
 176
 177         for i, alu in enumerate(self.units):
 178             comb += alu.src1_i.eq(self.src1_i)
 179             comb += alu.src2_i.eq(self.src2_i)
 180
 181         if not self.ldstmode:
 182             return m
 183
 184         ldmem_l = []
 185         stmem_l = []
 186         go_ad_l = []
 187         go_st_l = []
 188         adr_rel_l = []
 189         sto_rel_l = []
 190         for alu in self.units:
 191             adr_rel_l.append(alu.adr_rel_o)
 192             sto_rel_l.append(alu.sto_rel_o)
 193             ldmem_l.append(alu.load_mem_o)
 194             stmem_l.append(alu.stwd_mem_o)
 195             go_ad_l.append(alu.go_ad_i)
 196             go_st_l.append(alu.go_st_i)
 197         comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
 198         comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
 199         comb += self.load_mem_o.eq(Cat(*ldmem_l))
 200         comb += self.stwd_mem_o.eq(Cat(*stmem_l))
 201         comb += Cat(*go_ad_l).eq(self.go_ad_i)
 202         comb += Cat(*go_st_l).eq(self.go_st_i)
 203
 204         return m
 205
 206
 207 class CompUnitLDSTs(CompUnitsBase):
 208
 209     def __init__(self, rwid, opwid, n_ldsts, mem):
 210         """ Inputs:
 211
 212             * :rwid:   bit width of register file(s) - both FP and INT
 213             * :opwid:  operand bit width
 214         """
 215         self.opwid = opwid
 216
 217         # inputs
 218         self.oper_i = Signal(opwid, reset_less=True)
 219         self.imm_i = Signal(rwid, reset_less=True)
 220
 221         # Int ALUs
 222         self.alus = []
 223         for i in range(n_ldsts):
 224             self.alus.append(ALU(rwid))
 225
 226         units = []
 227         for alu in self.alus:
 228             aluopwid = 4 # see compldst.py for "internal" opcode
 229             units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
 230
 231         CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
 232
 233     def elaborate(self, platform):
 234         m = CompUnitsBase.elaborate(self, platform)
 235         comb = m.d.comb
 236
 237         # hand the same operation to all units, 4 lower bits though
 238         for alu in self.units:
 239             comb += alu.oper_i[0:4].eq(self.oper_i)
 240             comb += alu.imm_i.eq(self.imm_i)
 241             comb += alu.isalu_i.eq(0)
 242
 243         return m
 244
 245
 246 class CompUnitALUs(CompUnitsBase):
 247
 248     def __init__(self, rwid, opwid, n_alus):
 249         """ Inputs:
 250
 251             * :rwid:   bit width of register file(s) - both FP and INT
 252             * :opwid:  operand bit width
 253         """
 254         self.opwid = opwid
 255
 256         # inputs
 257         self.oper_i = Signal(opwid, reset_less=True)
 258         self.imm_i = Signal(rwid, reset_less=True)
 259
 260         # Int ALUs
 261         alus = []
 262         for i in range(n_alus):
 263             alus.append(ALU(rwid))
 264
 265         units = []
 266         for alu in alus:
 267             aluopwid = 3 # extra bit for immediate mode
 268             units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
 269
 270         CompUnitsBase.__init__(self, rwid, units)
 271
 272     def elaborate(self, platform):
 273         m = CompUnitsBase.elaborate(self, platform)
 274         comb = m.d.comb
 275
 276         # hand the same operation to all units, only lower 3 bits though
 277         for alu in self.units:
 278             comb += alu.oper_i[0:3].eq(self.oper_i)
 279             comb += alu.imm_i.eq(self.imm_i)
 280
 281         return m
 282
 283
 284 class CompUnitBR(CompUnitsBase):
 285
 286     def __init__(self, rwid, opwid):
 287         """ Inputs:
 288
 289             * :rwid:   bit width of register file(s) - both FP and INT
 290             * :opwid:  operand bit width
 291
 292             Note: bgt unit is returned so that a shadow unit can be created
 293             for it
 294         """
 295         self.opwid = opwid
 296
 297         # inputs
 298         self.oper_i = Signal(opwid, reset_less=True)
 299         self.imm_i = Signal(rwid, reset_less=True)
 300
 301         # Branch ALU and CU
 302         self.bgt = BranchALU(rwid)
 303         aluopwid = 3 # extra bit for immediate mode
 304         self.br1 = ComputationUnitNoDelay(rwid, aluopwid, self.bgt)
 305         CompUnitsBase.__init__(self, rwid, [self.br1])
 306
 307     def elaborate(self, platform):
 308         m = CompUnitsBase.elaborate(self, platform)
 309         comb = m.d.comb
 310
 311         # hand the same operation to all units
 312         for alu in self.units:
 313             comb += alu.oper_i.eq(self.oper_i)
 314             comb += alu.imm_i.eq(self.imm_i)
 315
 316         return m
 317
 318
 319 class FunctionUnits(Elaboratable):
 320
 321     def __init__(self, n_regs, n_int_alus):
 322         self.n_regs = n_regs
 323         self.n_int_alus = n_int_alus
 324
 325         self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
 326         self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
 327         self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
 328
 329         self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
 330         self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
 331
 332         self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
 333         self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
 334         self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
 335
 336         self.readable_o = Signal(n_int_alus, reset_less=True)
 337         self.writable_o = Signal(n_int_alus, reset_less=True)
 338
 339         self.go_rd_i = Signal(n_int_alus, reset_less=True)
 340         self.go_wr_i = Signal(n_int_alus, reset_less=True)
 341         self.go_die_i = Signal(n_int_alus, reset_less=True)
 342         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 343
 344         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 345
 346     def elaborate(self, platform):
 347         m = Module()
 348         comb = m.d.comb
 349         sync = m.d.sync
 350
 351         n_intfus = self.n_int_alus
 352
 353         # Integer FU-FU Dep Matrix
 354         intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
 355         m.submodules.intfudeps = intfudeps
 356         # Integer FU-Reg Dep Matrix
 357         intregdeps = FURegDepMatrix(n_intfus, self.n_regs, 2)
 358         m.submodules.intregdeps = intregdeps
 359
 360         comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
 361         comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
 362
 363         comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
 364         comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
 365
 366         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 367         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 368         self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
 369
 370         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 371         comb += intfudeps.go_rd_i.eq(self.go_rd_i)
 372         comb += intfudeps.go_wr_i.eq(self.go_wr_i)
 373         comb += intfudeps.go_die_i.eq(self.go_die_i)
 374         comb += self.readable_o.eq(intfudeps.readable_o)
 375         comb += self.writable_o.eq(intfudeps.writable_o)
 376
 377         # Connect function issue / arrays, and dest/src1/src2
 378         comb += intregdeps.dest_i.eq(self.dest_i)
 379         comb += intregdeps.src_i[0].eq(self.src1_i)
 380         comb += intregdeps.src_i[1].eq(self.src2_i)
 381
 382         comb += intregdeps.go_rd_i.eq(self.go_rd_i)
 383         comb += intregdeps.go_wr_i.eq(self.go_wr_i)
 384         comb += intregdeps.go_die_i.eq(self.go_die_i)
 385         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 386
 387         comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
 388         comb += self.src1_rsel_o.eq(intregdeps.src_rsel_o[0])
 389         comb += self.src2_rsel_o.eq(intregdeps.src_rsel_o[1])
 390
 391         return m
 392
 393
 394 class Scoreboard(Elaboratable):
 395     def __init__(self, rwid, n_regs):
 396         """ Inputs:
 397
 398             * :rwid:   bit width of register file(s) - both FP and INT
 399             * :n_regs: depth of register file(s) - number of FP and INT regs
 400         """
 401         self.rwid = rwid
 402         self.n_regs = n_regs
 403
 404         # Register Files
 405         self.intregs = RegFileArray(rwid, n_regs)
 406         self.fpregs = RegFileArray(rwid, n_regs)
 407
 408         # issue q needs to get at these
 409         self.aluissue = IssueUnitGroup(2)
 410         self.lsissue = IssueUnitGroup(2)
 411         self.brissue = IssueUnitGroup(1)
 412         # and these
 413         self.alu_oper_i = Signal(4, reset_less=True)
 414         self.alu_imm_i = Signal(rwid, reset_less=True)
 415         self.br_oper_i = Signal(4, reset_less=True)
 416         self.br_imm_i = Signal(rwid, reset_less=True)
 417         self.ls_oper_i = Signal(4, reset_less=True)
 418         self.ls_imm_i = Signal(rwid, reset_less=True)
 419
 420         # inputs
 421         self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
 422         self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
 423         self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
 424         self.reg_enable_i = Signal(reset_less=True) # enable reg decode
 425
 426         # outputs
 427         self.issue_o = Signal(reset_less=True) # instruction was accepted
 428         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 429
 430         # for branch speculation experiment.  branch_direction = 0 if
 431         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 432         # branch_succ and branch_fail are requests to have the current
 433         # instruction be dependent on the branch unit "shadow" capability.
 434         self.branch_succ_i = Signal(reset_less=True)
 435         self.branch_fail_i = Signal(reset_less=True)
 436         self.branch_direction_o = Signal(2, reset_less=True)
 437
 438     def elaborate(self, platform):
 439         m = Module()
 440         comb = m.d.comb
 441         sync = m.d.sync
 442
 443         m.submodules.intregs = self.intregs
 444         m.submodules.fpregs = self.fpregs
 445
 446         # register ports
 447         int_dest = self.intregs.write_port("dest")
 448         int_src1 = self.intregs.read_port("src1")
 449         int_src2 = self.intregs.read_port("src2")
 450
 451         fp_dest = self.fpregs.write_port("dest")
 452         fp_src1 = self.fpregs.read_port("src1")
 453         fp_src2 = self.fpregs.read_port("src2")
 454
 455         # Int ALUs and BR ALUs
 456         n_int_alus = 5
 457         cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
 458         cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
 459
 460         # LDST Comp Units
 461         n_ldsts = 2
 462         cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, None)
 463
 464         # Comp Units
 465         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
 466         bgt = cub.bgt # get at the branch computation unit
 467         br1 = cub.br1
 468
 469         # Int FUs
 470         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
 471
 472         # Memory FUs
 473         m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
 474
 475         # Count of number of FUs
 476         n_intfus = n_int_alus
 477         n_fp_fus = 0 # for now
 478
 479         # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
 480         intpick1 = GroupPicker(n_intfus) # picks 1 reader and 1 writer to intreg
 481         m.submodules.intpick1 = intpick1
 482
 483         # INT/FP Issue Unit
 484         regdecode = RegDecode(self.n_regs)
 485         m.submodules.regdecode = regdecode
 486         issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
 487         m.submodules.issueunit = issueunit
 488
 489         # Shadow Matrix.  currently n_intfus shadows, to be used for
 490         # write-after-write hazards.  NOTE: there is one extra for branches,
 491         # so the shadow width is increased by 1
 492         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 493         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 494
 495         # record previous instruction to cast shadow on current instruction
 496         prev_shadow = Signal(n_intfus)
 497
 498         # Branch Speculation recorder.  tracks the success/fail state as
 499         # each instruction is issued, so that when the branch occurs the
 500         # allow/cancel can be issued as appropriate.
 501         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 502
 503         #---------
 504         # ok start wiring things together...
 505         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 506         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 507         #---------
 508
 509         #---------
 510         # Issue Unit is where it starts.  set up some in/outs for this module
 511         #---------
 512         comb += [    regdecode.dest_i.eq(self.int_dest_i),
 513                      regdecode.src1_i.eq(self.int_src1_i),
 514                      regdecode.src2_i.eq(self.int_src2_i),
 515                      regdecode.enable_i.eq(self.reg_enable_i),
 516                      self.issue_o.eq(issueunit.issue_o)
 517                     ]
 518
 519         # take these to outside (issue needs them)
 520         comb += cua.oper_i.eq(self.alu_oper_i)
 521         comb += cua.imm_i.eq(self.alu_imm_i)
 522         comb += cub.oper_i.eq(self.br_oper_i)
 523         comb += cub.imm_i.eq(self.br_imm_i)
 524         comb += cul.oper_i.eq(self.ls_oper_i)
 525         comb += cul.imm_i.eq(self.ls_imm_i)
 526
 527         # TODO: issueunit.f (FP)
 528
 529         # and int function issue / busy arrays, and dest/src1/src2
 530         comb += intfus.dest_i.eq(regdecode.dest_o)
 531         comb += intfus.src1_i.eq(regdecode.src1_o)
 532         comb += intfus.src2_i.eq(regdecode.src2_o)
 533
 534         fn_issue_o = issueunit.fn_issue_o
 535
 536         comb += intfus.fn_issue_i.eq(fn_issue_o)
 537         comb += issueunit.busy_i.eq(cu.busy_o)
 538         comb += self.busy_o.eq(cu.busy_o.bool())
 539
 540         #---------
 541         # Memory Function Unit
 542         #---------
 543         comb += memfus.fn_issue_i.eq(cul.issue_i) # Comp Unit Issue -> Mem FUs
 544         comb += memfus.addr_we_i.eq(cul.adr_rel_o) # Match enable on adr rel
 545         comb += memfus.addr_rs_i.eq(~cul.busy_o) # Match disable on busy off
 546
 547         # connect up address data
 548         comb += memfus.addrs_i[0].eq(cul.units[0].data_o)
 549         comb += memfus.addrs_i[1].eq(cul.units[1].data_o)
 550
 551         # connect loadable / storable to go_ld/go_st.
 552         # XXX should only be done when the memory ld/st has actually happened!
 553
 554         #comb += cul.go_wr_i.eq(memfus.loadable_o & memfus.addr_match_o)
 555         #comb += cul.go_st_i.eq(memfus.storable_o & memfus.addr_match_o)
 556
 557         #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 558         #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 559         #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 560
 561         #---------
 562         # merge shadow matrices outputs
 563         #---------
 564
 565         # these are explained in ShadowMatrix docstring, and are to be
 566         # connected to the FUReg and FUFU Matrices, to get them to reset
 567         anydie = Signal(n_intfus, reset_less=True)
 568         allshadown = Signal(n_intfus, reset_less=True)
 569         shreset = Signal(n_intfus, reset_less=True)
 570         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 571         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 572         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 573
 574         #---------
 575         # connect fu-fu matrix
 576         #---------
 577
 578         # Group Picker... done manually for now.
 579         go_rd_o = intpick1.go_rd_o
 580         go_wr_o = intpick1.go_wr_o
 581         go_rd_i = intfus.go_rd_i
 582         go_wr_i = intfus.go_wr_i
 583         go_die_i = intfus.go_die_i
 584         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 585         comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
 586         comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
 587         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
 588
 589         # Connect Picker
 590         #---------
 591         comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
 592         comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
 593         int_rd_o = intfus.readable_o
 594         int_wr_o = intfus.writable_o
 595         comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
 596         comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
 597
 598         #---------
 599         # Shadow Matrix
 600         #---------
 601
 602         comb += shadows.issue_i.eq(fn_issue_o)
 603         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 604         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 605         #---------
 606         # NOTE; this setup is for the instruction order preservation...
 607
 608         # connect shadows / go_dies to Computation Units
 609         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 610         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 611
 612         # ok connect first n_int_fu shadows to busy lines, to create an
 613         # instruction-order linked-list-like arrangement, using a bit-matrix
 614         # (instead of e.g. a ring buffer).
 615         # XXX TODO
 616
 617         # when written, the shadow can be cancelled (and was good)
 618         for i in range(n_intfus):
 619             comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 620
 621         # *previous* instruction shadows *current* instruction, and, obviously,
 622         # if the previous is completed (!busy) don't cast the shadow!
 623         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 624         for i in range(n_intfus):
 625             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 626
 627         #---------
 628         # ... and this is for branch speculation.  it uses the extra bit
 629         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 630         # only needs to set shadow_i, s_fail_i and s_good_i
 631
 632         # issue captures shadow_i (if enabled)
 633         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 634
 635         bactive = Signal(reset_less=True)
 636         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 637
 638         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 639         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 640             comb += bshadow.issue_i.eq(fn_issue_o)
 641             for i in range(n_intfus):
 642                 with m.If(fn_issue_o & (Const(1<<i))):
 643                     comb += bshadow.shadow_i[i][0].eq(1)
 644
 645         # finally, we need an indicator to the test infrastructure as to
 646         # whether the branch succeeded or failed, plus, link up to the
 647         # "recorder" of whether the instruction was under shadow or not
 648
 649         with m.If(br1.issue_i):
 650             sync += bspec.active_i.eq(1)
 651         with m.If(self.branch_succ_i):
 652             comb += bspec.good_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
 653         with m.If(self.branch_fail_i):
 654             comb += bspec.fail_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
 655
 656         # branch is active (TODO: a better signal: this is over-using the
 657         # go_write signal - actually the branch should not be "writing")
 658         with m.If(br1.go_wr_i):
 659             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 660             sync += bspec.active_i.eq(0)
 661             comb += bspec.br_i.eq(1)
 662             # branch occurs if data == 1, failed if data == 0
 663             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 664             for i in range(n_intfus):
 665                 # *expected* direction of the branch matched against *actual*
 666                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 667                 # ... or it didn't
 668                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 669
 670         #---------
 671         # Connect Register File(s)
 672         #---------
 673         comb += int_dest.wen.eq(intfus.dest_rsel_o)
 674         comb += int_src1.ren.eq(intfus.src1_rsel_o)
 675         comb += int_src2.ren.eq(intfus.src2_rsel_o)
 676
 677         # connect ALUs to regfule
 678         comb += int_dest.data_i.eq(cu.data_o)
 679         comb += cu.src1_i.eq(int_src1.data_o)
 680         comb += cu.src2_i.eq(int_src2.data_o)
 681
 682         # connect ALU Computation Units
 683         comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 684         comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 685         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 686
 687         return m
 688
 689     def __iter__(self):
 690         yield from self.intregs
 691         yield from self.fpregs
 692         yield self.int_dest_i
 693         yield self.int_src1_i
 694         yield self.int_src2_i
 695         yield self.issue_o
 696         yield self.branch_succ_i
 697         yield self.branch_fail_i
 698         yield self.branch_direction_o
 699
 700     def ports(self):
 701         return list(self)
 702
 703
 704 class IssueToScoreboard(Elaboratable):
 705
 706     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 707         self.qlen = qlen
 708         self.n_in = n_in
 709         self.n_out = n_out
 710         self.rwid = rwid
 711         self.opw = opwid
 712         self.n_regs = n_regs
 713
 714         mqbits = (int(log(qlen) / log(2))+2, False)
 715         self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
 716         self.p_ready_o = Signal() # instructions were added
 717         self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
 718
 719         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 720         self.qlen_o = Signal(mqbits, reset_less=True)
 721
 722     def elaborate(self, platform):
 723         m = Module()
 724         comb = m.d.comb
 725         sync = m.d.sync
 726
 727         iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
 728         sc = Scoreboard(self.rwid, self.n_regs)
 729         mem = TestMemory(self.rwid, 8) # not too big, takes too long
 730         m.submodules.iq = iq
 731         m.submodules.sc = sc
 732         m.submodules.mem = mem
 733
 734         # get at the regfile for testing
 735         self.intregs = sc.intregs
 736
 737         # and the "busy" signal and instruction queue length
 738         comb += self.busy_o.eq(sc.busy_o)
 739         comb += self.qlen_o.eq(iq.qlen_o)
 740
 741         # link up instruction queue
 742         comb += iq.p_add_i.eq(self.p_add_i)
 743         comb += self.p_ready_o.eq(iq.p_ready_o)
 744         for i in range(self.n_in):
 745             comb += eq(iq.data_i[i], self.data_i[i])
 746
 747         # take instruction and process it.  note that it's possible to
 748         # "inspect" the queue contents *without* actually removing the
 749         # items.  items are only removed when the
 750
 751         # in "waiting" state
 752         wait_issue_br = Signal()
 753         wait_issue_alu = Signal()
 754         wait_issue_ls = Signal()
 755
 756         with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
 757             # set instruction pop length to 1 if the unit accepted
 758             with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
 759                 with m.If(iq.qlen_o != 0):
 760                     comb += iq.n_sub_i.eq(1)
 761             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 762                 with m.If(iq.qlen_o != 0):
 763                     comb += iq.n_sub_i.eq(1)
 764             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 765                 with m.If(iq.qlen_o != 0):
 766                     comb += iq.n_sub_i.eq(1)
 767
 768         # see if some instruction(s) are here.  note that this is
 769         # "inspecting" the in-place queue.  note also that on the
 770         # cycle following "waiting" for fn_issue_o to be set, the
 771         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 772         with m.If(iq.qlen_o != 0):
 773             # get the operands and operation
 774             imm = iq.data_o[0].imm_i
 775             dest = iq.data_o[0].dest_i
 776             src1 = iq.data_o[0].src1_i
 777             src2 = iq.data_o[0].src2_i
 778             op = iq.data_o[0].oper_i
 779             opi = iq.data_o[0].opim_i # immediate set
 780
 781             # set the src/dest regs
 782             comb += sc.int_dest_i.eq(dest)
 783             comb += sc.int_src1_i.eq(src1)
 784             comb += sc.int_src2_i.eq(src2)
 785             comb += sc.reg_enable_i.eq(1) # enable the regfile
 786
 787             # choose a Function-Unit-Group
 788             with m.If((op & (0x3<<2)) != 0): # branch
 789                 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
 790                 comb += sc.br_imm_i.eq(imm)
 791                 comb += sc.brissue.insn_i.eq(1)
 792                 comb += wait_issue_br.eq(1)
 793             with m.Elif((op & (0x3<<4)) != 0): # ld/st
 794                 # see compldst.py
 795                 # bit 0: ADD/SUB
 796                 # bit 1: immed
 797                 # bit 4: LD
 798                 # bit 5: ST
 799                 comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6]))
 800                 comb += sc.ls_imm_i.eq(imm)
 801                 comb += sc.lsissue.insn_i.eq(1)
 802                 comb += wait_issue_ls.eq(1)
 803             with m.Else(): # alu
 804                 comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
 805                 comb += sc.alu_imm_i.eq(imm)
 806                 comb += sc.aluissue.insn_i.eq(1)
 807                 comb += wait_issue_alu.eq(1)
 808
 809             # XXX TODO
 810             # these indicate that the instruction is to be made
 811             # shadow-dependent on
 812             # (either) branch success or branch fail
 813             #yield sc.branch_fail_i.eq(branch_fail)
 814             #yield sc.branch_succ_i.eq(branch_success)
 815
 816         return m
 817
 818     def __iter__(self):
 819         yield self.p_ready_o
 820         for o in self.data_i:
 821             yield from list(o)
 822         yield self.p_add_i
 823
 824     def ports(self):
 825         return list(self)
 826
 827
 828 IADD = 0
 829 ISUB = 1
 830 IMUL = 2
 831 ISHF = 3
 832 IBGT = 4
 833 IBLT = 5
 834 IBEQ = 6
 835 IBNE = 7
 836
 837
 838 class RegSim:
 839     def __init__(self, rwidth, nregs):
 840         self.rwidth = rwidth
 841         self.regs = [0] * nregs
 842
 843     def op(self, op, op_imm, imm, src1, src2, dest):
 844         maxbits = (1 << self.rwidth) - 1
 845         src1 = self.regs[src1] & maxbits
 846         if op_imm:
 847             src2 = imm
 848         else:
 849             src2 = self.regs[src2] & maxbits
 850         if op == IADD:
 851             val = src1 + src2
 852         elif op == ISUB:
 853             val = src1 - src2
 854         elif op == IMUL:
 855             val = src1 * src2
 856         elif op == ISHF:
 857             val = src1 >> (src2 & maxbits)
 858         elif op == IBGT:
 859             val = int(src1 > src2)
 860         elif op == IBLT:
 861             val = int(src1 < src2)
 862         elif op == IBEQ:
 863             val = int(src1 == src2)
 864         elif op == IBNE:
 865             val = int(src1 != src2)
 866         else:
 867             return 0 # LD/ST TODO
 868         val &= maxbits
 869         self.setval(dest, val)
 870         return val
 871
 872     def setval(self, dest, val):
 873         print ("sim setval", dest, hex(val))
 874         self.regs[dest] = val
 875
 876     def dump(self, dut):
 877         for i, val in enumerate(self.regs):
 878             reg = yield dut.intregs.regs[i].reg
 879             okstr = "OK" if reg == val else "!ok"
 880             print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
 881
 882     def check(self, dut):
 883         for i, val in enumerate(self.regs):
 884             reg = yield dut.intregs.regs[i].reg
 885             if reg != val:
 886                 print("reg %d expected %x received %x\n" % (i, val, reg))
 887                 yield from self.dump(dut)
 888                 assert False
 889
 890 def instr_q(dut, op, op_imm, imm, src1, src2, dest,
 891             branch_success, branch_fail):
 892     instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
 893                'src1_i': src1, 'src2_i': src2}]
 894
 895     sendlen = 1
 896     for idx in range(sendlen):
 897         yield from eq(dut.data_i[idx], instrs[idx])
 898         di = yield dut.data_i[idx]
 899         print ("senddata %d %x" % (idx, di))
 900     yield dut.p_add_i.eq(sendlen)
 901     yield
 902     o_p_ready = yield dut.p_ready_o
 903     while not o_p_ready:
 904         yield
 905         o_p_ready = yield dut.p_ready_o
 906
 907     yield dut.p_add_i.eq(0)
 908
 909
 910 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
 911     yield from disable_issue(dut)
 912     yield dut.int_dest_i.eq(dest)
 913     yield dut.int_src1_i.eq(src1)
 914     yield dut.int_src2_i.eq(src2)
 915     if (op & (0x3<<2)) != 0: # branch
 916         yield dut.brissue.insn_i.eq(1)
 917         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 918         yield dut.br_imm_i.eq(imm)
 919         dut_issue = dut.brissue
 920     else:
 921         yield dut.aluissue.insn_i.eq(1)
 922         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 923         yield dut.alu_imm_i.eq(imm)
 924         dut_issue = dut.aluissue
 925     yield dut.reg_enable_i.eq(1)
 926
 927     # these indicate that the instruction is to be made shadow-dependent on
 928     # (either) branch success or branch fail
 929     yield dut.branch_fail_i.eq(branch_fail)
 930     yield dut.branch_succ_i.eq(branch_success)
 931
 932     yield
 933     yield from wait_for_issue(dut, dut_issue)
 934
 935
 936 def print_reg(dut, rnums):
 937     rs = []
 938     for rnum in rnums:
 939         reg = yield dut.intregs.regs[rnum].reg
 940         rs.append("%x" % reg)
 941     rnums = map(str, rnums)
 942     print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 943
 944
 945 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 946     insts = []
 947     for i in range(n_ops):
 948         src1 = randint(1, dut.n_regs-1)
 949         src2 = randint(1, dut.n_regs-1)
 950         imm = randint(1, (1<<dut.rwid)-1)
 951         dest = randint(1, dut.n_regs-1)
 952         op = randint(0, max_opnums)
 953         opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
 954
 955         if shadowing:
 956             insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
 957         else:
 958             insts.append((src1, src2, dest, op, opi, imm))
 959     return insts
 960
 961
 962 def wait_for_busy_clear(dut):
 963     while True:
 964         busy_o = yield dut.busy_o
 965         if not busy_o:
 966             break
 967         print ("busy",)
 968         yield
 969
 970 def disable_issue(dut):
 971     yield dut.aluissue.insn_i.eq(0)
 972     yield dut.brissue.insn_i.eq(0)
 973     yield dut.lsissue.insn_i.eq(0)
 974
 975
 976 def wait_for_issue(dut, dut_issue):
 977     while True:
 978         issue_o = yield dut_issue.fn_issue_o
 979         if issue_o:
 980             yield from disable_issue(dut)
 981             yield dut.reg_enable_i.eq(0)
 982             break
 983         print ("busy",)
 984         #yield from print_reg(dut, [1,2,3])
 985         yield
 986     #yield from print_reg(dut, [1,2,3])
 987
 988 def scoreboard_branch_sim(dut, alusim):
 989
 990     iseed = 3
 991
 992     for i in range(1):
 993
 994         print ("rseed", iseed)
 995         seed(iseed)
 996         iseed += 1
 997
 998         yield dut.branch_direction_o.eq(0)
 999
1000         # set random values in the registers
1001         for i in range(1, dut.n_regs):
1002             val = 31+i*3
1003             val = randint(0, (1<<alusim.rwidth)-1)
1004             yield dut.intregs.regs[i].reg.eq(val)
1005             alusim.setval(i, val)
1006
1007         if False:
1008             # create some instructions: branches create a tree
1009             insts = create_random_ops(dut, 1, True, 1)
1010             #insts.append((6, 6, 1, 2, (0, 0)))
1011             #insts.append((4, 3, 3, 0, (0, 0)))
1012
1013             src1 = randint(1, dut.n_regs-1)
1014             src2 = randint(1, dut.n_regs-1)
1015             #op = randint(4, 7)
1016             op = 4 # only BGT at the moment
1017
1018             branch_ok = create_random_ops(dut, 1, True, 1)
1019             branch_fail = create_random_ops(dut, 1, True, 1)
1020
1021             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1022
1023         if True:
1024             insts = []
1025             insts.append( (3, 5, 2, 0, (0, 0)) )
1026             branch_ok = []
1027             branch_fail = []
1028             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
1029             branch_ok.append( None )
1030             branch_fail.append( (1, 1, 2, 0, (0, 1)) )
1031             #branch_fail.append( None )
1032             insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
1033
1034         siminsts = deepcopy(insts)
1035
1036         # issue instruction(s)
1037         i = -1
1038         instrs = insts
1039         branch_direction = 0
1040         while instrs:
1041             yield
1042             yield
1043             i += 1
1044             branch_direction = yield dut.branch_direction_o # way branch went
1045             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1046             if branch_direction == 1 and shadow_on:
1047                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1048                 continue # branch was "success" and this is a "failed"... skip
1049             if branch_direction == 2 and shadow_off:
1050                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1051                 continue # branch was "fail" and this is a "success"... skip
1052             if branch_direction != 0:
1053                 shadow_on = 0
1054                 shadow_off = 0
1055             is_branch = op >= 4
1056             if is_branch:
1057                 branch_ok, branch_fail = dest
1058                 dest = src2
1059                 # ok zip up the branch success / fail instructions and
1060                 # drop them into the queue, one marked "to have branch success"
1061                 # the other to be marked shadow branch "fail".
1062                 # one out of each of these will be cancelled
1063                 for ok, fl in zip(branch_ok, branch_fail):
1064                     if ok:
1065                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1066                     if fl:
1067                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1068             print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
1069                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1070             yield from int_instr(dut, op, src1, src2, dest,
1071                                  shadow_on, shadow_off)
1072
1073         # wait for all instructions to stop before checking
1074         yield
1075         yield from wait_for_busy_clear(dut)
1076
1077         i = -1
1078         while siminsts:
1079             instr = siminsts.pop(0)
1080             if instr is None:
1081                 continue
1082             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1083             i += 1
1084             is_branch = op >= 4
1085             if is_branch:
1086                 branch_ok, branch_fail = dest
1087                 dest = src2
1088             print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
1089                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1090             branch_res = alusim.op(op, src1, src2, dest)
1091             if is_branch:
1092                 if branch_res:
1093                     siminsts += branch_ok
1094                 else:
1095                     siminsts += branch_fail
1096
1097         # check status
1098         yield from alusim.check(dut)
1099         yield from alusim.dump(dut)
1100
1101
1102 def scoreboard_sim(dut, alusim):
1103
1104     seed(0)
1105
1106     for i in range(1):
1107
1108         # set random values in the registers
1109         for i in range(1, dut.n_regs):
1110             val = randint(0, (1<<alusim.rwidth)-1)
1111             #val = 31+i*3
1112             #val = i
1113             yield dut.intregs.regs[i].reg.eq(val)
1114             alusim.setval(i, val)
1115
1116         # create some instructions (some random, some regression tests)
1117         instrs = []
1118         if False:
1119             instrs = create_random_ops(dut, 15, True, 4)
1120
1121         if True: # LD/ST test (with immediate)
1122             instrs.append( (1, 2, 2, 0x10, 1, 1, (0, 0)) )
1123             #instrs.append( (5, 6, 7, 0x10, 1, 1, (0, 0)) )
1124
1125         if False:
1126             instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
1127
1128         if False:
1129             instrs.append( (7, 3, 2, 4, (0, 0)) )
1130             instrs.append( (7, 6, 6, 2, (0, 0)) )
1131             instrs.append( (1, 7, 2, 2, (0, 0)) )
1132
1133         if False:
1134             instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1135             instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1136             instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1137             instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1138             instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1139
1140         if False:
1141             instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1142             instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1143             instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1144
1145         if False:
1146             instrs.append((5, 6, 2, 1))
1147             instrs.append((2, 2, 4, 0))
1148             #instrs.append((2, 2, 3, 1))
1149
1150         if False:
1151             instrs.append((2, 1, 2, 3))
1152
1153         if False:
1154             instrs.append((2, 6, 2, 1))
1155             instrs.append((2, 1, 2, 0))
1156
1157         if False:
1158             instrs.append((1, 2, 7, 2))
1159             instrs.append((7, 1, 5, 0))
1160             instrs.append((4, 4, 1, 1))
1161
1162         if False:
1163             instrs.append((5, 6, 2, 2))
1164             instrs.append((1, 1, 4, 1))
1165             instrs.append((6, 5, 3, 0))
1166
1167         if False:
1168             # Write-after-Write Hazard
1169             instrs.append( (3, 6, 7, 2) )
1170             instrs.append( (4, 4, 7, 1) )
1171
1172         if False:
1173             # self-read/write-after-write followed by Read-after-Write
1174             instrs.append((1, 1, 1, 1))
1175             instrs.append((1, 5, 3, 0))
1176
1177         if False:
1178             # Read-after-Write followed by self-read-after-write
1179             instrs.append((5, 6, 1, 2))
1180             instrs.append((1, 1, 1, 1))
1181
1182         if False:
1183             # self-read-write sandwich
1184             instrs.append((5, 6, 1, 2))
1185             instrs.append((1, 1, 1, 1))
1186             instrs.append((1, 5, 3, 0))
1187
1188         if False:
1189             # very weird failure
1190             instrs.append( (5, 2, 5, 2) )
1191             instrs.append( (2, 6, 3, 0) )
1192             instrs.append( (4, 2, 2, 1) )
1193
1194         if False:
1195             v1 = 4
1196             yield dut.intregs.regs[5].reg.eq(v1)
1197             alusim.setval(5, v1)
1198             yield dut.intregs.regs[3].reg.eq(5)
1199             alusim.setval(3, 5)
1200             instrs.append((5, 3, 3, 4, (0, 0)))
1201             instrs.append((4, 2, 1, 2, (0, 1)))
1202
1203         if False:
1204             v1 = 6
1205             yield dut.intregs.regs[5].reg.eq(v1)
1206             alusim.setval(5, v1)
1207             yield dut.intregs.regs[3].reg.eq(5)
1208             alusim.setval(3, 5)
1209             instrs.append((5, 3, 3, 4, (0, 0)))
1210             instrs.append((4, 2, 1, 2, (1, 0)))
1211
1212         if False:
1213             instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1214             instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1215             instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1216             instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1217             instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1218             instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1219             instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1220             instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1221             instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1222
1223         # issue instruction(s), wait for issue to be free before proceeding
1224         for i, instr in enumerate(instrs):
1225             src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
1226
1227             print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1228                     (i, src1, src2, dest, op, opi, imm))
1229             alusim.op(op, opi, imm, src1, src2, dest)
1230             yield from instr_q(dut, op, opi, imm, src1, src2, dest,
1231                                br_ok, br_fail)
1232
1233         # wait for all instructions to stop before checking
1234         while True:
1235             iqlen = yield dut.qlen_o
1236             if iqlen == 0:
1237                 break
1238             yield
1239         yield
1240         yield
1241         yield
1242         yield
1243         yield from wait_for_busy_clear(dut)
1244
1245         # check status
1246         yield from alusim.check(dut)
1247         yield from alusim.dump(dut)
1248
1249
1250 def test_scoreboard():
1251     dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1252     alusim = RegSim(16, 8)
1253     memsim = MemSim(16, 16)
1254     vl = rtlil.convert(dut, ports=dut.ports())
1255     with open("test_scoreboard6600.il", "w") as f:
1256         f.write(vl)
1257
1258     run_simulation(dut, scoreboard_sim(dut, alusim),
1259                         vcd_name='test_scoreboard6600.vcd')
1260
1261     #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1262     #                    vcd_name='test_scoreboard6600.vcd')
1263
1264
1265 if __name__ == '__main__':
1266     test_scoreboard()