src/experiment/score6600.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
   4
   5 from regfile.regfile import RegFileArray, treereduce
   6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
   7 from scoreboard.fu_reg_matrix import FURegDepMatrix
   8 from scoreboard.global_pending import GlobalPending
   9 from scoreboard.group_picker import GroupPicker
  10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  12 from scoreboard.instruction_q import Instruction, InstructionQ
  13 from scoreboard.memfu import MemFunctionUnits
  14
  15 from compalu import ComputationUnitNoDelay
  16 from compldst import LDSTCompUnit
  17
  18 from alu_hier import ALU, BranchALU
  19 from nmutil.latch import SRLatch
  20 from nmutil.nmoperator import eq
  21
  22 from random import randint, seed
  23 from copy import deepcopy
  24 from math import log
  25
  26
  27 class TestMemory(Elaboratable):
  28     def __init__(self, regwid, addrw):
  29         self.ddepth = 1 # regwid //8
  30         depth = (1<<addrw) // self.ddepth
  31         self.adr   = Signal(addrw)
  32         self.dat_r = Signal(regwid)
  33         self.dat_w = Signal(regwid)
  34         self.we    = Signal()
  35         self.mem   = Memory(width=regwid, depth=depth, init=range(0, depth))
  36
  37     def elaborate(self, platform):
  38         m = Module()
  39         m.submodules.rdport = rdport = self.mem.read_port()
  40         m.submodules.wrport = wrport = self.mem.write_port()
  41         m.d.comb += [
  42             rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
  43             self.dat_r.eq(rdport.data),
  44             wrport.addr.eq(self.adr),
  45             wrport.data.eq(self.dat_w),
  46             wrport.en.eq(self.we),
  47         ]
  48         return m
  49
  50
  51 class MemSim:
  52     def __init__(self, regwid, addrw):
  53         self.regwid = regwid
  54         self.ddepth = 1 # regwid//8
  55         depth = (1<<addrw) // self.ddepth
  56         self.mem = list(range(0, depth))
  57
  58     def ld(self, addr):
  59         return self.mem[addr>>self.ddepth]
  60
  61     def st(self, addr, data):
  62         self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
  63
  64
  65 class CompUnitsBase(Elaboratable):
  66     """ Computation Unit Base class.
  67
  68         Amazingly, this class works recursively.  It's supposed to just
  69         look after some ALUs (that can handle the same operations),
  70         grouping them together, however it turns out that the same code
  71         can also group *groups* of Computation Units together as well.
  72
  73         Basically it was intended just to concatenate the ALU's issue,
  74         go_rd etc. signals together, which start out as bits and become
  75         sequences.  Turns out that the same trick works just as well
  76         on Computation Units!
  77
  78         So this class may be used recursively to present a top-level
  79         sequential concatenation of all the signals in and out of
  80         ALUs, whilst at the same time making it convenient to group
  81         ALUs together.
  82
  83         At the lower level, the intent is that groups of (identical)
  84         ALUs may be passed the same operation.  Even beyond that,
  85         the intent is that that group of (identical) ALUs actually
  86         share the *same pipeline* and as such become a "Concurrent
  87         Computation Unit" as defined by Mitch Alsup (see section
  88         11.4.9.3)
  89     """
  90     def __init__(self, rwid, units, ldstmode=False):
  91         """ Inputs:
  92
  93             * :rwid:   bit width of register file(s) - both FP and INT
  94             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  95         """
  96         self.units = units
  97         self.ldstmode = ldstmode
  98         self.rwid = rwid
  99         self.rwid = rwid
 100         if units and isinstance(units[0], CompUnitsBase):
 101             self.n_units = 0
 102             for u in self.units:
 103                 self.n_units += u.n_units
 104         else:
 105             self.n_units = len(units)
 106
 107         n_units = self.n_units
 108
 109         # inputs
 110         self.issue_i = Signal(n_units, reset_less=True)
 111         self.go_rd_i = Signal(n_units, reset_less=True)
 112         self.go_wr_i = Signal(n_units, reset_less=True)
 113         self.shadown_i = Signal(n_units, reset_less=True)
 114         self.go_die_i = Signal(n_units, reset_less=True)
 115         if ldstmode:
 116             self.go_ad_i = Signal(n_units, reset_less=True)
 117             self.go_st_i = Signal(n_units, reset_less=True)
 118
 119         # outputs
 120         self.busy_o = Signal(n_units, reset_less=True)
 121         self.rd_rel_o = Signal(n_units, reset_less=True)
 122         self.req_rel_o = Signal(n_units, reset_less=True)
 123         if ldstmode:
 124             self.adr_rel_o = Signal(n_units, reset_less=True)
 125             self.sto_rel_o = Signal(n_units, reset_less=True)
 126             self.req_rel_o = Signal(n_units, reset_less=True)
 127             self.load_mem_o = Signal(n_units, reset_less=True)
 128             self.stwd_mem_o = Signal(n_units, reset_less=True)
 129
 130         # in/out register data (note: not register#, actual data)
 131         self.data_o = Signal(rwid, reset_less=True)
 132         self.src1_i = Signal(rwid, reset_less=True)
 133         self.src2_i = Signal(rwid, reset_less=True)
 134         # input operand
 135
 136     def elaborate(self, platform):
 137         m = Module()
 138         comb = m.d.comb
 139
 140         for i, alu in enumerate(self.units):
 141             setattr(m.submodules, "comp%d" % i, alu)
 142
 143         go_rd_l = []
 144         go_wr_l = []
 145         issue_l = []
 146         busy_l = []
 147         req_rel_l = []
 148         rd_rel_l = []
 149         shadow_l = []
 150         godie_l = []
 151         for alu in self.units:
 152             req_rel_l.append(alu.req_rel_o)
 153             rd_rel_l.append(alu.rd_rel_o)
 154             shadow_l.append(alu.shadown_i)
 155             godie_l.append(alu.go_die_i)
 156             go_wr_l.append(alu.go_wr_i)
 157             go_rd_l.append(alu.go_rd_i)
 158             issue_l.append(alu.issue_i)
 159             busy_l.append(alu.busy_o)
 160         comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
 161         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 162         comb += self.busy_o.eq(Cat(*busy_l))
 163         comb += Cat(*godie_l).eq(self.go_die_i)
 164         comb += Cat(*shadow_l).eq(self.shadown_i)
 165         comb += Cat(*go_wr_l).eq(self.go_wr_i)
 166         comb += Cat(*go_rd_l).eq(self.go_rd_i)
 167         comb += Cat(*issue_l).eq(self.issue_i)
 168
 169         # connect data register input/output
 170
 171         # merge (OR) all integer FU / ALU outputs to a single value
 172         # bit of a hack: treereduce needs a list with an item named "data_o"
 173         if self.units:
 174             data_o = treereduce(self.units)
 175             comb += self.data_o.eq(data_o)
 176
 177         for i, alu in enumerate(self.units):
 178             comb += alu.src1_i.eq(self.src1_i)
 179             comb += alu.src2_i.eq(self.src2_i)
 180
 181         if not self.ldstmode:
 182             return m
 183
 184         ldmem_l = []
 185         stmem_l = []
 186         go_ad_l = []
 187         go_st_l = []
 188         adr_rel_l = []
 189         sto_rel_l = []
 190         for alu in self.units:
 191             adr_rel_l.append(alu.adr_rel_o)
 192             sto_rel_l.append(alu.sto_rel_o)
 193             ldmem_l.append(alu.load_mem_o)
 194             stmem_l.append(alu.stwd_mem_o)
 195             go_ad_l.append(alu.go_ad_i)
 196             go_st_l.append(alu.go_st_i)
 197         comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
 198         comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
 199         comb += self.load_mem_o.eq(Cat(*ldmem_l))
 200         comb += self.stwd_mem_o.eq(Cat(*stmem_l))
 201         comb += Cat(*go_ad_l).eq(self.go_ad_i)
 202         comb += Cat(*go_st_l).eq(self.go_st_i)
 203
 204         return m
 205
 206
 207 class CompUnitLDSTs(CompUnitsBase):
 208
 209     def __init__(self, rwid, opwid, n_ldsts, mem):
 210         """ Inputs:
 211
 212             * :rwid:   bit width of register file(s) - both FP and INT
 213             * :opwid:  operand bit width
 214         """
 215         self.opwid = opwid
 216
 217         # inputs
 218         self.oper_i = Signal(opwid, reset_less=True)
 219         self.imm_i = Signal(rwid, reset_less=True)
 220
 221         # Int ALUs
 222         self.alus = []
 223         for i in range(n_ldsts):
 224             self.alus.append(ALU(rwid))
 225
 226         units = []
 227         for alu in self.alus:
 228             aluopwid = 4 # see compldst.py for "internal" opcode
 229             units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
 230
 231         CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
 232
 233     def elaborate(self, platform):
 234         m = CompUnitsBase.elaborate(self, platform)
 235         comb = m.d.comb
 236
 237         # hand the same operation to all units, 4 lower bits though
 238         for alu in self.units:
 239             comb += alu.oper_i[0:4].eq(self.oper_i)
 240             comb += alu.imm_i.eq(self.imm_i)
 241             comb += alu.isalu_i.eq(0)
 242
 243         return m
 244
 245
 246 class CompUnitALUs(CompUnitsBase):
 247
 248     def __init__(self, rwid, opwid, n_alus):
 249         """ Inputs:
 250
 251             * :rwid:   bit width of register file(s) - both FP and INT
 252             * :opwid:  operand bit width
 253         """
 254         self.opwid = opwid
 255
 256         # inputs
 257         self.oper_i = Signal(opwid, reset_less=True)
 258         self.imm_i = Signal(rwid, reset_less=True)
 259
 260         # Int ALUs
 261         alus = []
 262         for i in range(n_alus):
 263             alus.append(ALU(rwid))
 264
 265         units = []
 266         for alu in alus:
 267             aluopwid = 3 # extra bit for immediate mode
 268             units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
 269
 270         CompUnitsBase.__init__(self, rwid, units)
 271
 272     def elaborate(self, platform):
 273         m = CompUnitsBase.elaborate(self, platform)
 274         comb = m.d.comb
 275
 276         # hand the same operation to all units, only lower 3 bits though
 277         for alu in self.units:
 278             comb += alu.oper_i[0:3].eq(self.oper_i)
 279             comb += alu.imm_i.eq(self.imm_i)
 280
 281         return m
 282
 283
 284 class CompUnitBR(CompUnitsBase):
 285
 286     def __init__(self, rwid, opwid):
 287         """ Inputs:
 288
 289             * :rwid:   bit width of register file(s) - both FP and INT
 290             * :opwid:  operand bit width
 291
 292             Note: bgt unit is returned so that a shadow unit can be created
 293             for it
 294         """
 295         self.opwid = opwid
 296
 297         # inputs
 298         self.oper_i = Signal(opwid, reset_less=True)
 299         self.imm_i = Signal(rwid, reset_less=True)
 300
 301         # Branch ALU and CU
 302         self.bgt = BranchALU(rwid)
 303         aluopwid = 3 # extra bit for immediate mode
 304         self.br1 = ComputationUnitNoDelay(rwid, aluopwid, self.bgt)
 305         CompUnitsBase.__init__(self, rwid, [self.br1])
 306
 307     def elaborate(self, platform):
 308         m = CompUnitsBase.elaborate(self, platform)
 309         comb = m.d.comb
 310
 311         # hand the same operation to all units
 312         for alu in self.units:
 313             comb += alu.oper_i.eq(self.oper_i)
 314             comb += alu.imm_i.eq(self.imm_i)
 315
 316         return m
 317
 318
 319 class FunctionUnits(Elaboratable):
 320
 321     def __init__(self, n_regs, n_int_alus):
 322         self.n_regs = n_regs
 323         self.n_int_alus = n_int_alus
 324
 325         self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
 326         self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
 327         self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
 328
 329         self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
 330         self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
 331
 332         self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
 333         self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
 334         self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
 335
 336         self.readable_o = Signal(n_int_alus, reset_less=True)
 337         self.writable_o = Signal(n_int_alus, reset_less=True)
 338
 339         self.go_rd_i = Signal(n_int_alus, reset_less=True)
 340         self.go_wr_i = Signal(n_int_alus, reset_less=True)
 341         self.go_die_i = Signal(n_int_alus, reset_less=True)
 342         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 343
 344         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 345
 346     def elaborate(self, platform):
 347         m = Module()
 348         comb = m.d.comb
 349         sync = m.d.sync
 350
 351         n_intfus = self.n_int_alus
 352
 353         # Integer FU-FU Dep Matrix
 354         intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
 355         m.submodules.intfudeps = intfudeps
 356         # Integer FU-Reg Dep Matrix
 357         intregdeps = FURegDepMatrix(n_intfus, self.n_regs, 2)
 358         m.submodules.intregdeps = intregdeps
 359
 360         comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
 361         comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
 362
 363         comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
 364         comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
 365
 366         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 367         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 368         self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
 369
 370         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 371         comb += intfudeps.go_rd_i.eq(self.go_rd_i)
 372         comb += intfudeps.go_wr_i.eq(self.go_wr_i)
 373         comb += intfudeps.go_die_i.eq(self.go_die_i)
 374         comb += self.readable_o.eq(intfudeps.readable_o)
 375         comb += self.writable_o.eq(intfudeps.writable_o)
 376
 377         # Connect function issue / arrays, and dest/src1/src2
 378         comb += intregdeps.dest_i.eq(self.dest_i)
 379         comb += intregdeps.src_i[0].eq(self.src1_i)
 380         comb += intregdeps.src_i[1].eq(self.src2_i)
 381
 382         comb += intregdeps.go_rd_i.eq(self.go_rd_i)
 383         comb += intregdeps.go_wr_i.eq(self.go_wr_i)
 384         comb += intregdeps.go_die_i.eq(self.go_die_i)
 385         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 386
 387         comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
 388         comb += self.src1_rsel_o.eq(intregdeps.src_rsel_o[0])
 389         comb += self.src2_rsel_o.eq(intregdeps.src_rsel_o[1])
 390
 391         return m
 392
 393
 394 class Scoreboard(Elaboratable):
 395     def __init__(self, rwid, n_regs):
 396         """ Inputs:
 397
 398             * :rwid:   bit width of register file(s) - both FP and INT
 399             * :n_regs: depth of register file(s) - number of FP and INT regs
 400         """
 401         self.rwid = rwid
 402         self.n_regs = n_regs
 403
 404         # Register Files
 405         self.intregs = RegFileArray(rwid, n_regs)
 406         self.fpregs = RegFileArray(rwid, n_regs)
 407
 408         # issue q needs to get at these
 409         self.aluissue = IssueUnitGroup(2)
 410         self.lsissue = IssueUnitGroup(2)
 411         self.brissue = IssueUnitGroup(1)
 412         # and these
 413         self.alu_oper_i = Signal(4, reset_less=True)
 414         self.alu_imm_i = Signal(rwid, reset_less=True)
 415         self.br_oper_i = Signal(4, reset_less=True)
 416         self.br_imm_i = Signal(rwid, reset_less=True)
 417         self.ls_oper_i = Signal(4, reset_less=True)
 418         self.ls_imm_i = Signal(rwid, reset_less=True)
 419
 420         # inputs
 421         self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
 422         self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
 423         self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
 424         self.reg_enable_i = Signal(reset_less=True) # enable reg decode
 425
 426         # outputs
 427         self.issue_o = Signal(reset_less=True) # instruction was accepted
 428         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 429
 430         # for branch speculation experiment.  branch_direction = 0 if
 431         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 432         # branch_succ and branch_fail are requests to have the current
 433         # instruction be dependent on the branch unit "shadow" capability.
 434         self.branch_succ_i = Signal(reset_less=True)
 435         self.branch_fail_i = Signal(reset_less=True)
 436         self.branch_direction_o = Signal(2, reset_less=True)
 437
 438     def elaborate(self, platform):
 439         m = Module()
 440         comb = m.d.comb
 441         sync = m.d.sync
 442
 443         m.submodules.intregs = self.intregs
 444         m.submodules.fpregs = self.fpregs
 445
 446         # register ports
 447         int_dest = self.intregs.write_port("dest")
 448         int_src1 = self.intregs.read_port("src1")
 449         int_src2 = self.intregs.read_port("src2")
 450
 451         fp_dest = self.fpregs.write_port("dest")
 452         fp_src1 = self.fpregs.read_port("src1")
 453         fp_src2 = self.fpregs.read_port("src2")
 454
 455         # Int ALUs and BR ALUs
 456         n_int_alus = 5
 457         cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
 458         cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
 459
 460         # LDST Comp Units
 461         n_ldsts = 2
 462         cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, None)
 463
 464         # Comp Units
 465         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
 466         bgt = cub.bgt # get at the branch computation unit
 467         br1 = cub.br1
 468
 469         # Int FUs
 470         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
 471
 472         # Memory FUs
 473         m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
 474
 475         # Count of number of FUs
 476         n_intfus = n_int_alus
 477         n_fp_fus = 0 # for now
 478
 479         # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
 480         intpick1 = GroupPicker(n_intfus) # picks 1 reader and 1 writer to intreg
 481         m.submodules.intpick1 = intpick1
 482
 483         # INT/FP Issue Unit
 484         regdecode = RegDecode(self.n_regs)
 485         m.submodules.regdecode = regdecode
 486         issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
 487         m.submodules.issueunit = issueunit
 488
 489         # Shadow Matrix.  currently n_intfus shadows, to be used for
 490         # write-after-write hazards.  NOTE: there is one extra for branches,
 491         # so the shadow width is increased by 1
 492         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 493         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 494
 495         # record previous instruction to cast shadow on current instruction
 496         prev_shadow = Signal(n_intfus)
 497
 498         # Branch Speculation recorder.  tracks the success/fail state as
 499         # each instruction is issued, so that when the branch occurs the
 500         # allow/cancel can be issued as appropriate.
 501         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 502
 503         #---------
 504         # ok start wiring things together...
 505         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 506         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 507         #---------
 508
 509         #---------
 510         # Issue Unit is where it starts.  set up some in/outs for this module
 511         #---------
 512         comb += [    regdecode.dest_i.eq(self.int_dest_i),
 513                      regdecode.src1_i.eq(self.int_src1_i),
 514                      regdecode.src2_i.eq(self.int_src2_i),
 515                      regdecode.enable_i.eq(self.reg_enable_i),
 516                      self.issue_o.eq(issueunit.issue_o)
 517                     ]
 518
 519         # take these to outside (issue needs them)
 520         comb += cua.oper_i.eq(self.alu_oper_i)
 521         comb += cua.imm_i.eq(self.alu_imm_i)
 522         comb += cub.oper_i.eq(self.br_oper_i)
 523         comb += cub.imm_i.eq(self.br_imm_i)
 524         comb += cul.oper_i.eq(self.ls_oper_i)
 525         comb += cul.imm_i.eq(self.ls_imm_i)
 526
 527         # TODO: issueunit.f (FP)
 528
 529         # and int function issue / busy arrays, and dest/src1/src2
 530         comb += intfus.dest_i.eq(regdecode.dest_o)
 531         comb += intfus.src1_i.eq(regdecode.src1_o)
 532         comb += intfus.src2_i.eq(regdecode.src2_o)
 533
 534         fn_issue_o = issueunit.fn_issue_o
 535
 536         comb += intfus.fn_issue_i.eq(fn_issue_o)
 537         comb += issueunit.busy_i.eq(cu.busy_o)
 538         comb += self.busy_o.eq(cu.busy_o.bool())
 539
 540         #---------
 541         # Memory Function Unit
 542         #---------
 543         reset_b = Signal(cul.n_units, reset_less=True)
 544         sync += reset_b.eq(cul.go_st_i | cul.go_wr_i | cul.go_die_i)
 545         comb += memfus.fn_issue_i.eq(cul.issue_i) # Comp Unit Issue -> Mem FUs
 546         comb += memfus.addr_en_i.eq(cul.adr_rel_o) # Match enable on adr rel
 547         comb += memfus.addr_rs_i.eq(reset_b) # reset same as LDSTCompUnit
 548         with m.If(self.ls_oper_i[2]): # LD bit of operand
 549             comb += memfus.ld_i.eq(cul.issue_i)
 550         with m.If(self.ls_oper_i[3]): # ST bit of operand
 551             comb += memfus.st_i.eq(cul.issue_i)
 552
 553         # connect up address data
 554         comb += memfus.addrs_i[0].eq(cul.units[0].data_o)
 555         comb += memfus.addrs_i[1].eq(cul.units[1].data_o)
 556
 557         # connect loadable / storable to go_ld/go_st.
 558         # XXX should only be done when the memory ld/st has actually happened!
 559
 560         comb += memfus.go_ld_i.eq(memfus.loadable_o & memfus.addr_nomatch_o)
 561         comb += memfus.go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o)
 562         #comb += cul.go_wr_i.eq(memfus.loadable_o & memfus.addr_nomatch_o)
 563         comb += cul.go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o)
 564
 565         #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 566         #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 567         #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 568
 569         #---------
 570         # merge shadow matrices outputs
 571         #---------
 572
 573         # these are explained in ShadowMatrix docstring, and are to be
 574         # connected to the FUReg and FUFU Matrices, to get them to reset
 575         anydie = Signal(n_intfus, reset_less=True)
 576         allshadown = Signal(n_intfus, reset_less=True)
 577         shreset = Signal(n_intfus, reset_less=True)
 578         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 579         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 580         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 581
 582         #---------
 583         # connect fu-fu matrix
 584         #---------
 585
 586         # Group Picker... done manually for now.
 587         go_rd_o = intpick1.go_rd_o
 588         go_wr_o = intpick1.go_wr_o
 589         go_rd_i = intfus.go_rd_i
 590         go_wr_i = intfus.go_wr_i
 591         go_die_i = intfus.go_die_i
 592         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 593         comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
 594         comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
 595         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
 596
 597         # Connect Picker
 598         #---------
 599         comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
 600         comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
 601         int_rd_o = intfus.readable_o
 602         int_wr_o = intfus.writable_o
 603         comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
 604         comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
 605
 606         #---------
 607         # Shadow Matrix
 608         #---------
 609
 610         comb += shadows.issue_i.eq(fn_issue_o)
 611         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 612         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 613         #---------
 614         # NOTE; this setup is for the instruction order preservation...
 615
 616         # connect shadows / go_dies to Computation Units
 617         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 618         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 619
 620         # ok connect first n_int_fu shadows to busy lines, to create an
 621         # instruction-order linked-list-like arrangement, using a bit-matrix
 622         # (instead of e.g. a ring buffer).
 623         # XXX TODO
 624
 625         # when written, the shadow can be cancelled (and was good)
 626         for i in range(n_intfus):
 627             comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 628
 629         # *previous* instruction shadows *current* instruction, and, obviously,
 630         # if the previous is completed (!busy) don't cast the shadow!
 631         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 632         for i in range(n_intfus):
 633             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 634
 635         #---------
 636         # ... and this is for branch speculation.  it uses the extra bit
 637         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 638         # only needs to set shadow_i, s_fail_i and s_good_i
 639
 640         # issue captures shadow_i (if enabled)
 641         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 642
 643         bactive = Signal(reset_less=True)
 644         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 645
 646         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 647         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 648             comb += bshadow.issue_i.eq(fn_issue_o)
 649             for i in range(n_intfus):
 650                 with m.If(fn_issue_o & (Const(1<<i))):
 651                     comb += bshadow.shadow_i[i][0].eq(1)
 652
 653         # finally, we need an indicator to the test infrastructure as to
 654         # whether the branch succeeded or failed, plus, link up to the
 655         # "recorder" of whether the instruction was under shadow or not
 656
 657         with m.If(br1.issue_i):
 658             sync += bspec.active_i.eq(1)
 659         with m.If(self.branch_succ_i):
 660             comb += bspec.good_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
 661         with m.If(self.branch_fail_i):
 662             comb += bspec.fail_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
 663
 664         # branch is active (TODO: a better signal: this is over-using the
 665         # go_write signal - actually the branch should not be "writing")
 666         with m.If(br1.go_wr_i):
 667             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 668             sync += bspec.active_i.eq(0)
 669             comb += bspec.br_i.eq(1)
 670             # branch occurs if data == 1, failed if data == 0
 671             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 672             for i in range(n_intfus):
 673                 # *expected* direction of the branch matched against *actual*
 674                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 675                 # ... or it didn't
 676                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 677
 678         #---------
 679         # Connect Register File(s)
 680         #---------
 681         comb += int_dest.wen.eq(intfus.dest_rsel_o)
 682         comb += int_src1.ren.eq(intfus.src1_rsel_o)
 683         comb += int_src2.ren.eq(intfus.src2_rsel_o)
 684
 685         # connect ALUs to regfule
 686         comb += int_dest.data_i.eq(cu.data_o)
 687         comb += cu.src1_i.eq(int_src1.data_o)
 688         comb += cu.src2_i.eq(int_src2.data_o)
 689
 690         # connect ALU Computation Units
 691         comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 692         comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 693         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 694
 695         return m
 696
 697     def __iter__(self):
 698         yield from self.intregs
 699         yield from self.fpregs
 700         yield self.int_dest_i
 701         yield self.int_src1_i
 702         yield self.int_src2_i
 703         yield self.issue_o
 704         yield self.branch_succ_i
 705         yield self.branch_fail_i
 706         yield self.branch_direction_o
 707
 708     def ports(self):
 709         return list(self)
 710
 711
 712 class IssueToScoreboard(Elaboratable):
 713
 714     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 715         self.qlen = qlen
 716         self.n_in = n_in
 717         self.n_out = n_out
 718         self.rwid = rwid
 719         self.opw = opwid
 720         self.n_regs = n_regs
 721
 722         mqbits = (int(log(qlen) / log(2))+2, False)
 723         self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
 724         self.p_ready_o = Signal() # instructions were added
 725         self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
 726
 727         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 728         self.qlen_o = Signal(mqbits, reset_less=True)
 729
 730     def elaborate(self, platform):
 731         m = Module()
 732         comb = m.d.comb
 733         sync = m.d.sync
 734
 735         iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
 736         sc = Scoreboard(self.rwid, self.n_regs)
 737         mem = TestMemory(self.rwid, 8) # not too big, takes too long
 738         m.submodules.iq = iq
 739         m.submodules.sc = sc
 740         m.submodules.mem = mem
 741
 742         # get at the regfile for testing
 743         self.intregs = sc.intregs
 744
 745         # and the "busy" signal and instruction queue length
 746         comb += self.busy_o.eq(sc.busy_o)
 747         comb += self.qlen_o.eq(iq.qlen_o)
 748
 749         # link up instruction queue
 750         comb += iq.p_add_i.eq(self.p_add_i)
 751         comb += self.p_ready_o.eq(iq.p_ready_o)
 752         for i in range(self.n_in):
 753             comb += eq(iq.data_i[i], self.data_i[i])
 754
 755         # take instruction and process it.  note that it's possible to
 756         # "inspect" the queue contents *without* actually removing the
 757         # items.  items are only removed when the
 758
 759         # in "waiting" state
 760         wait_issue_br = Signal()
 761         wait_issue_alu = Signal()
 762         wait_issue_ls = Signal()
 763
 764         with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
 765             # set instruction pop length to 1 if the unit accepted
 766             with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
 767                 with m.If(iq.qlen_o != 0):
 768                     comb += iq.n_sub_i.eq(1)
 769             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 770                 with m.If(iq.qlen_o != 0):
 771                     comb += iq.n_sub_i.eq(1)
 772             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 773                 with m.If(iq.qlen_o != 0):
 774                     comb += iq.n_sub_i.eq(1)
 775
 776         # see if some instruction(s) are here.  note that this is
 777         # "inspecting" the in-place queue.  note also that on the
 778         # cycle following "waiting" for fn_issue_o to be set, the
 779         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 780         with m.If(iq.qlen_o != 0):
 781             # get the operands and operation
 782             imm = iq.data_o[0].imm_i
 783             dest = iq.data_o[0].dest_i
 784             src1 = iq.data_o[0].src1_i
 785             src2 = iq.data_o[0].src2_i
 786             op = iq.data_o[0].oper_i
 787             opi = iq.data_o[0].opim_i # immediate set
 788
 789             # set the src/dest regs
 790             comb += sc.int_dest_i.eq(dest)
 791             comb += sc.int_src1_i.eq(src1)
 792             comb += sc.int_src2_i.eq(src2)
 793             comb += sc.reg_enable_i.eq(1) # enable the regfile
 794
 795             # choose a Function-Unit-Group
 796             with m.If((op & (0x3<<2)) != 0): # branch
 797                 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
 798                 comb += sc.br_imm_i.eq(imm)
 799                 comb += sc.brissue.insn_i.eq(1)
 800                 comb += wait_issue_br.eq(1)
 801             with m.Elif((op & (0x3<<4)) != 0): # ld/st
 802                 # see compldst.py
 803                 # bit 0: ADD/SUB
 804                 # bit 1: immed
 805                 # bit 4: LD
 806                 # bit 5: ST
 807                 comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6]))
 808                 comb += sc.ls_imm_i.eq(imm)
 809                 comb += sc.lsissue.insn_i.eq(1)
 810                 comb += wait_issue_ls.eq(1)
 811             with m.Else(): # alu
 812                 comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
 813                 comb += sc.alu_imm_i.eq(imm)
 814                 comb += sc.aluissue.insn_i.eq(1)
 815                 comb += wait_issue_alu.eq(1)
 816
 817             # XXX TODO
 818             # these indicate that the instruction is to be made
 819             # shadow-dependent on
 820             # (either) branch success or branch fail
 821             #yield sc.branch_fail_i.eq(branch_fail)
 822             #yield sc.branch_succ_i.eq(branch_success)
 823
 824         return m
 825
 826     def __iter__(self):
 827         yield self.p_ready_o
 828         for o in self.data_i:
 829             yield from list(o)
 830         yield self.p_add_i
 831
 832     def ports(self):
 833         return list(self)
 834
 835
 836 IADD = 0
 837 ISUB = 1
 838 IMUL = 2
 839 ISHF = 3
 840 IBGT = 4
 841 IBLT = 5
 842 IBEQ = 6
 843 IBNE = 7
 844
 845
 846 class RegSim:
 847     def __init__(self, rwidth, nregs):
 848         self.rwidth = rwidth
 849         self.regs = [0] * nregs
 850
 851     def op(self, op, op_imm, imm, src1, src2, dest):
 852         maxbits = (1 << self.rwidth) - 1
 853         src1 = self.regs[src1] & maxbits
 854         if op_imm:
 855             src2 = imm
 856         else:
 857             src2 = self.regs[src2] & maxbits
 858         if op == IADD:
 859             val = src1 + src2
 860         elif op == ISUB:
 861             val = src1 - src2
 862         elif op == IMUL:
 863             val = src1 * src2
 864         elif op == ISHF:
 865             val = src1 >> (src2 & maxbits)
 866         elif op == IBGT:
 867             val = int(src1 > src2)
 868         elif op == IBLT:
 869             val = int(src1 < src2)
 870         elif op == IBEQ:
 871             val = int(src1 == src2)
 872         elif op == IBNE:
 873             val = int(src1 != src2)
 874         else:
 875             return 0 # LD/ST TODO
 876         val &= maxbits
 877         self.setval(dest, val)
 878         return val
 879
 880     def setval(self, dest, val):
 881         print ("sim setval", dest, hex(val))
 882         self.regs[dest] = val
 883
 884     def dump(self, dut):
 885         for i, val in enumerate(self.regs):
 886             reg = yield dut.intregs.regs[i].reg
 887             okstr = "OK" if reg == val else "!ok"
 888             print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
 889
 890     def check(self, dut):
 891         for i, val in enumerate(self.regs):
 892             reg = yield dut.intregs.regs[i].reg
 893             if reg != val:
 894                 print("reg %d expected %x received %x\n" % (i, val, reg))
 895                 yield from self.dump(dut)
 896                 assert False
 897
 898 def instr_q(dut, op, op_imm, imm, src1, src2, dest,
 899             branch_success, branch_fail):
 900     instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
 901                'src1_i': src1, 'src2_i': src2}]
 902
 903     sendlen = 1
 904     for idx in range(sendlen):
 905         yield from eq(dut.data_i[idx], instrs[idx])
 906         di = yield dut.data_i[idx]
 907         print ("senddata %d %x" % (idx, di))
 908     yield dut.p_add_i.eq(sendlen)
 909     yield
 910     o_p_ready = yield dut.p_ready_o
 911     while not o_p_ready:
 912         yield
 913         o_p_ready = yield dut.p_ready_o
 914
 915     yield dut.p_add_i.eq(0)
 916
 917
 918 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
 919     yield from disable_issue(dut)
 920     yield dut.int_dest_i.eq(dest)
 921     yield dut.int_src1_i.eq(src1)
 922     yield dut.int_src2_i.eq(src2)
 923     if (op & (0x3<<2)) != 0: # branch
 924         yield dut.brissue.insn_i.eq(1)
 925         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 926         yield dut.br_imm_i.eq(imm)
 927         dut_issue = dut.brissue
 928     else:
 929         yield dut.aluissue.insn_i.eq(1)
 930         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 931         yield dut.alu_imm_i.eq(imm)
 932         dut_issue = dut.aluissue
 933     yield dut.reg_enable_i.eq(1)
 934
 935     # these indicate that the instruction is to be made shadow-dependent on
 936     # (either) branch success or branch fail
 937     yield dut.branch_fail_i.eq(branch_fail)
 938     yield dut.branch_succ_i.eq(branch_success)
 939
 940     yield
 941     yield from wait_for_issue(dut, dut_issue)
 942
 943
 944 def print_reg(dut, rnums):
 945     rs = []
 946     for rnum in rnums:
 947         reg = yield dut.intregs.regs[rnum].reg
 948         rs.append("%x" % reg)
 949     rnums = map(str, rnums)
 950     print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 951
 952
 953 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 954     insts = []
 955     for i in range(n_ops):
 956         src1 = randint(1, dut.n_regs-1)
 957         src2 = randint(1, dut.n_regs-1)
 958         imm = randint(1, (1<<dut.rwid)-1)
 959         dest = randint(1, dut.n_regs-1)
 960         op = randint(0, max_opnums)
 961         opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
 962
 963         if shadowing:
 964             insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
 965         else:
 966             insts.append((src1, src2, dest, op, opi, imm))
 967     return insts
 968
 969
 970 def wait_for_busy_clear(dut):
 971     while True:
 972         busy_o = yield dut.busy_o
 973         if not busy_o:
 974             break
 975         print ("busy",)
 976         yield
 977
 978 def disable_issue(dut):
 979     yield dut.aluissue.insn_i.eq(0)
 980     yield dut.brissue.insn_i.eq(0)
 981     yield dut.lsissue.insn_i.eq(0)
 982
 983
 984 def wait_for_issue(dut, dut_issue):
 985     while True:
 986         issue_o = yield dut_issue.fn_issue_o
 987         if issue_o:
 988             yield from disable_issue(dut)
 989             yield dut.reg_enable_i.eq(0)
 990             break
 991         print ("busy",)
 992         #yield from print_reg(dut, [1,2,3])
 993         yield
 994     #yield from print_reg(dut, [1,2,3])
 995
 996 def scoreboard_branch_sim(dut, alusim):
 997
 998     iseed = 3
 999
1000     for i in range(1):
1001
1002         print ("rseed", iseed)
1003         seed(iseed)
1004         iseed += 1
1005
1006         yield dut.branch_direction_o.eq(0)
1007
1008         # set random values in the registers
1009         for i in range(1, dut.n_regs):
1010             val = 31+i*3
1011             val = randint(0, (1<<alusim.rwidth)-1)
1012             yield dut.intregs.regs[i].reg.eq(val)
1013             alusim.setval(i, val)
1014
1015         if False:
1016             # create some instructions: branches create a tree
1017             insts = create_random_ops(dut, 1, True, 1)
1018             #insts.append((6, 6, 1, 2, (0, 0)))
1019             #insts.append((4, 3, 3, 0, (0, 0)))
1020
1021             src1 = randint(1, dut.n_regs-1)
1022             src2 = randint(1, dut.n_regs-1)
1023             #op = randint(4, 7)
1024             op = 4 # only BGT at the moment
1025
1026             branch_ok = create_random_ops(dut, 1, True, 1)
1027             branch_fail = create_random_ops(dut, 1, True, 1)
1028
1029             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1030
1031         if True:
1032             insts = []
1033             insts.append( (3, 5, 2, 0, (0, 0)) )
1034             branch_ok = []
1035             branch_fail = []
1036             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
1037             branch_ok.append( None )
1038             branch_fail.append( (1, 1, 2, 0, (0, 1)) )
1039             #branch_fail.append( None )
1040             insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
1041
1042         siminsts = deepcopy(insts)
1043
1044         # issue instruction(s)
1045         i = -1
1046         instrs = insts
1047         branch_direction = 0
1048         while instrs:
1049             yield
1050             yield
1051             i += 1
1052             branch_direction = yield dut.branch_direction_o # way branch went
1053             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1054             if branch_direction == 1 and shadow_on:
1055                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1056                 continue # branch was "success" and this is a "failed"... skip
1057             if branch_direction == 2 and shadow_off:
1058                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1059                 continue # branch was "fail" and this is a "success"... skip
1060             if branch_direction != 0:
1061                 shadow_on = 0
1062                 shadow_off = 0
1063             is_branch = op >= 4
1064             if is_branch:
1065                 branch_ok, branch_fail = dest
1066                 dest = src2
1067                 # ok zip up the branch success / fail instructions and
1068                 # drop them into the queue, one marked "to have branch success"
1069                 # the other to be marked shadow branch "fail".
1070                 # one out of each of these will be cancelled
1071                 for ok, fl in zip(branch_ok, branch_fail):
1072                     if ok:
1073                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1074                     if fl:
1075                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1076             print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
1077                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1078             yield from int_instr(dut, op, src1, src2, dest,
1079                                  shadow_on, shadow_off)
1080
1081         # wait for all instructions to stop before checking
1082         yield
1083         yield from wait_for_busy_clear(dut)
1084
1085         i = -1
1086         while siminsts:
1087             instr = siminsts.pop(0)
1088             if instr is None:
1089                 continue
1090             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1091             i += 1
1092             is_branch = op >= 4
1093             if is_branch:
1094                 branch_ok, branch_fail = dest
1095                 dest = src2
1096             print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
1097                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1098             branch_res = alusim.op(op, src1, src2, dest)
1099             if is_branch:
1100                 if branch_res:
1101                     siminsts += branch_ok
1102                 else:
1103                     siminsts += branch_fail
1104
1105         # check status
1106         yield from alusim.check(dut)
1107         yield from alusim.dump(dut)
1108
1109
1110 def scoreboard_sim(dut, alusim):
1111
1112     seed(0)
1113
1114     for i in range(1):
1115
1116         # set random values in the registers
1117         for i in range(1, dut.n_regs):
1118             val = randint(0, (1<<alusim.rwidth)-1)
1119             #val = 31+i*3
1120             #val = i
1121             yield dut.intregs.regs[i].reg.eq(val)
1122             alusim.setval(i, val)
1123
1124         # create some instructions (some random, some regression tests)
1125         instrs = []
1126         if False:
1127             instrs = create_random_ops(dut, 15, True, 4)
1128
1129         if True: # LD/ST test (with immediate)
1130             instrs.append( (1, 2, 2, 0x10, 1, 1, (0, 0)) )
1131             #instrs.append( (1, 2, 7, 0x10, 1, 1, (0, 0)) )
1132
1133         if False:
1134             instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
1135
1136         if False:
1137             instrs.append( (7, 3, 2, 4, (0, 0)) )
1138             instrs.append( (7, 6, 6, 2, (0, 0)) )
1139             instrs.append( (1, 7, 2, 2, (0, 0)) )
1140
1141         if False:
1142             instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1143             instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1144             instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1145             instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1146             instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1147
1148         if False:
1149             instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1150             instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1151             instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1152
1153         if False:
1154             instrs.append((5, 6, 2, 1))
1155             instrs.append((2, 2, 4, 0))
1156             #instrs.append((2, 2, 3, 1))
1157
1158         if False:
1159             instrs.append((2, 1, 2, 3))
1160
1161         if False:
1162             instrs.append((2, 6, 2, 1))
1163             instrs.append((2, 1, 2, 0))
1164
1165         if False:
1166             instrs.append((1, 2, 7, 2))
1167             instrs.append((7, 1, 5, 0))
1168             instrs.append((4, 4, 1, 1))
1169
1170         if False:
1171             instrs.append((5, 6, 2, 2))
1172             instrs.append((1, 1, 4, 1))
1173             instrs.append((6, 5, 3, 0))
1174
1175         if False:
1176             # Write-after-Write Hazard
1177             instrs.append( (3, 6, 7, 2) )
1178             instrs.append( (4, 4, 7, 1) )
1179
1180         if False:
1181             # self-read/write-after-write followed by Read-after-Write
1182             instrs.append((1, 1, 1, 1))
1183             instrs.append((1, 5, 3, 0))
1184
1185         if False:
1186             # Read-after-Write followed by self-read-after-write
1187             instrs.append((5, 6, 1, 2))
1188             instrs.append((1, 1, 1, 1))
1189
1190         if False:
1191             # self-read-write sandwich
1192             instrs.append((5, 6, 1, 2))
1193             instrs.append((1, 1, 1, 1))
1194             instrs.append((1, 5, 3, 0))
1195
1196         if False:
1197             # very weird failure
1198             instrs.append( (5, 2, 5, 2) )
1199             instrs.append( (2, 6, 3, 0) )
1200             instrs.append( (4, 2, 2, 1) )
1201
1202         if False:
1203             v1 = 4
1204             yield dut.intregs.regs[5].reg.eq(v1)
1205             alusim.setval(5, v1)
1206             yield dut.intregs.regs[3].reg.eq(5)
1207             alusim.setval(3, 5)
1208             instrs.append((5, 3, 3, 4, (0, 0)))
1209             instrs.append((4, 2, 1, 2, (0, 1)))
1210
1211         if False:
1212             v1 = 6
1213             yield dut.intregs.regs[5].reg.eq(v1)
1214             alusim.setval(5, v1)
1215             yield dut.intregs.regs[3].reg.eq(5)
1216             alusim.setval(3, 5)
1217             instrs.append((5, 3, 3, 4, (0, 0)))
1218             instrs.append((4, 2, 1, 2, (1, 0)))
1219
1220         if False:
1221             instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1222             instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1223             instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1224             instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1225             instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1226             instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1227             instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1228             instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1229             instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1230
1231         # issue instruction(s), wait for issue to be free before proceeding
1232         for i, instr in enumerate(instrs):
1233             src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
1234
1235             print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1236                     (i, src1, src2, dest, op, opi, imm))
1237             alusim.op(op, opi, imm, src1, src2, dest)
1238             yield from instr_q(dut, op, opi, imm, src1, src2, dest,
1239                                br_ok, br_fail)
1240
1241         # wait for all instructions to stop before checking
1242         while True:
1243             iqlen = yield dut.qlen_o
1244             if iqlen == 0:
1245                 break
1246             yield
1247         yield
1248         yield
1249         yield
1250         yield
1251         yield from wait_for_busy_clear(dut)
1252
1253         # check status
1254         yield from alusim.check(dut)
1255         yield from alusim.dump(dut)
1256
1257
1258 def test_scoreboard():
1259     dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1260     alusim = RegSim(16, 8)
1261     memsim = MemSim(16, 16)
1262     vl = rtlil.convert(dut, ports=dut.ports())
1263     with open("test_scoreboard6600.il", "w") as f:
1264         f.write(vl)
1265
1266     run_simulation(dut, scoreboard_sim(dut, alusim),
1267                         vcd_name='test_scoreboard6600.vcd')
1268
1269     #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1270     #                    vcd_name='test_scoreboard6600.vcd')
1271
1272
1273 if __name__ == '__main__':
1274     test_scoreboard()