src/experiment/score6600.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
   4
   5 from regfile.regfile import RegFileArray, treereduce
   6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
   7 from scoreboard.fu_reg_matrix import FURegDepMatrix
   8 from scoreboard.global_pending import GlobalPending
   9 from scoreboard.group_picker import GroupPicker
  10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  12 from scoreboard.instruction_q import Instruction, InstructionQ
  13 from scoreboard.memfu import MemFunctionUnits
  14
  15 from compalu import ComputationUnitNoDelay
  16 from compldst import LDSTCompUnit
  17
  18 from alu_hier import ALU, BranchALU
  19 from nmutil.latch import SRLatch
  20 from nmutil.nmoperator import eq
  21
  22 from random import randint, seed
  23 from copy import deepcopy
  24 from math import log
  25
  26
  27 class TestMemory(Elaboratable):
  28     def __init__(self, regwid, addrw):
  29         self.ddepth = 1 # regwid //8
  30         depth = (1<<addrw) // self.ddepth
  31         self.mem   = Memory(width=regwid, depth=depth, init=range(0, depth))
  32
  33     def elaborate(self, platform):
  34         m = Module()
  35         m.submodules.rdport = self.rdport = self.mem.read_port()
  36         m.submodules.wrport = self.wrport = self.mem.write_port()
  37         return m
  38
  39
  40 class MemSim:
  41     def __init__(self, regwid, addrw):
  42         self.regwid = regwid
  43         self.ddepth = 1 # regwid//8
  44         depth = (1<<addrw) // self.ddepth
  45         self.mem = list(range(0, depth))
  46
  47     def ld(self, addr):
  48         return self.mem[addr>>self.ddepth]
  49
  50     def st(self, addr, data):
  51         self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
  52
  53
  54 class CompUnitsBase(Elaboratable):
  55     """ Computation Unit Base class.
  56
  57         Amazingly, this class works recursively.  It's supposed to just
  58         look after some ALUs (that can handle the same operations),
  59         grouping them together, however it turns out that the same code
  60         can also group *groups* of Computation Units together as well.
  61
  62         Basically it was intended just to concatenate the ALU's issue,
  63         go_rd etc. signals together, which start out as bits and become
  64         sequences.  Turns out that the same trick works just as well
  65         on Computation Units!
  66
  67         So this class may be used recursively to present a top-level
  68         sequential concatenation of all the signals in and out of
  69         ALUs, whilst at the same time making it convenient to group
  70         ALUs together.
  71
  72         At the lower level, the intent is that groups of (identical)
  73         ALUs may be passed the same operation.  Even beyond that,
  74         the intent is that that group of (identical) ALUs actually
  75         share the *same pipeline* and as such become a "Concurrent
  76         Computation Unit" as defined by Mitch Alsup (see section
  77         11.4.9.3)
  78     """
  79     def __init__(self, rwid, units, ldstmode=False):
  80         """ Inputs:
  81
  82             * :rwid:   bit width of register file(s) - both FP and INT
  83             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  84         """
  85         self.units = units
  86         self.ldstmode = ldstmode
  87         self.rwid = rwid
  88         self.rwid = rwid
  89         if units and isinstance(units[0], CompUnitsBase):
  90             self.n_units = 0
  91             for u in self.units:
  92                 self.n_units += u.n_units
  93         else:
  94             self.n_units = len(units)
  95
  96         n_units = self.n_units
  97
  98         # inputs
  99         self.issue_i = Signal(n_units, reset_less=True)
 100         self.go_rd_i = Signal(n_units, reset_less=True)
 101         self.go_wr_i = Signal(n_units, reset_less=True)
 102         self.shadown_i = Signal(n_units, reset_less=True)
 103         self.go_die_i = Signal(n_units, reset_less=True)
 104         if ldstmode:
 105             self.go_ad_i = Signal(n_units, reset_less=True)
 106             self.go_st_i = Signal(n_units, reset_less=True)
 107
 108         # outputs
 109         self.busy_o = Signal(n_units, reset_less=True)
 110         self.rd_rel_o = Signal(n_units, reset_less=True)
 111         self.req_rel_o = Signal(n_units, reset_less=True)
 112         if ldstmode:
 113             self.ld_o = Signal(n_units, reset_less=True) # op is LD
 114             self.st_o = Signal(n_units, reset_less=True) # op is ST
 115             self.adr_rel_o = Signal(n_units, reset_less=True)
 116             self.sto_rel_o = Signal(n_units, reset_less=True)
 117             self.req_rel_o = Signal(n_units, reset_less=True)
 118             self.load_mem_o = Signal(n_units, reset_less=True)
 119             self.stwd_mem_o = Signal(n_units, reset_less=True)
 120             self.addr_o = Signal(rwid, reset_less=True)
 121
 122         # in/out register data (note: not register#, actual data)
 123         self.data_o = Signal(rwid, reset_less=True)
 124         self.src1_i = Signal(rwid, reset_less=True)
 125         self.src2_i = Signal(rwid, reset_less=True)
 126         # input operand
 127
 128     def elaborate(self, platform):
 129         m = Module()
 130         comb = m.d.comb
 131
 132         for i, alu in enumerate(self.units):
 133             setattr(m.submodules, "comp%d" % i, alu)
 134
 135         go_rd_l = []
 136         go_wr_l = []
 137         issue_l = []
 138         busy_l = []
 139         req_rel_l = []
 140         rd_rel_l = []
 141         shadow_l = []
 142         godie_l = []
 143         for alu in self.units:
 144             req_rel_l.append(alu.req_rel_o)
 145             rd_rel_l.append(alu.rd_rel_o)
 146             shadow_l.append(alu.shadown_i)
 147             godie_l.append(alu.go_die_i)
 148             go_wr_l.append(alu.go_wr_i)
 149             go_rd_l.append(alu.go_rd_i)
 150             issue_l.append(alu.issue_i)
 151             busy_l.append(alu.busy_o)
 152         comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
 153         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 154         comb += self.busy_o.eq(Cat(*busy_l))
 155         comb += Cat(*godie_l).eq(self.go_die_i)
 156         comb += Cat(*shadow_l).eq(self.shadown_i)
 157         comb += Cat(*go_wr_l).eq(self.go_wr_i)
 158         comb += Cat(*go_rd_l).eq(self.go_rd_i)
 159         comb += Cat(*issue_l).eq(self.issue_i)
 160
 161         # connect data register input/output
 162
 163         # merge (OR) all integer FU / ALU outputs to a single value
 164         if self.units:
 165             data_o = treereduce(self.units, "data_o")
 166             comb += self.data_o.eq(data_o)
 167             if self.ldstmode:
 168                 addr_o = treereduce(self.units, "addr_o")
 169                 comb += self.addr_o.eq(addr_o)
 170
 171         for i, alu in enumerate(self.units):
 172             comb += alu.src1_i.eq(self.src1_i)
 173             comb += alu.src2_i.eq(self.src2_i)
 174
 175         if not self.ldstmode:
 176             return m
 177
 178         ldmem_l = []
 179         stmem_l = []
 180         go_ad_l = []
 181         go_st_l = []
 182         ld_l = []
 183         st_l = []
 184         adr_rel_l = []
 185         sto_rel_l = []
 186         for alu in self.units:
 187             ld_l.append(alu.ld_o)
 188             st_l.append(alu.st_o)
 189             adr_rel_l.append(alu.adr_rel_o)
 190             sto_rel_l.append(alu.sto_rel_o)
 191             ldmem_l.append(alu.load_mem_o)
 192             stmem_l.append(alu.stwd_mem_o)
 193             go_ad_l.append(alu.go_ad_i)
 194             go_st_l.append(alu.go_st_i)
 195         comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
 196         comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
 197         comb += self.load_mem_o.eq(Cat(*ldmem_l))
 198         comb += self.stwd_mem_o.eq(Cat(*stmem_l))
 199         comb += Cat(*go_ad_l).eq(self.go_ad_i)
 200         comb += Cat(*go_st_l).eq(self.go_st_i)
 201
 202         return m
 203
 204
 205 class CompUnitLDSTs(CompUnitsBase):
 206
 207     def __init__(self, rwid, opwid, n_ldsts, mem):
 208         """ Inputs:
 209
 210             * :rwid:   bit width of register file(s) - both FP and INT
 211             * :opwid:  operand bit width
 212         """
 213         self.opwid = opwid
 214
 215         # inputs
 216         self.oper_i = Signal(opwid, reset_less=True)
 217         self.imm_i = Signal(rwid, reset_less=True)
 218
 219         # Int ALUs
 220         self.alus = []
 221         for i in range(n_ldsts):
 222             self.alus.append(ALU(rwid))
 223
 224         units = []
 225         for alu in self.alus:
 226             aluopwid = 4 # see compldst.py for "internal" opcode
 227             units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
 228
 229         CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
 230
 231     def elaborate(self, platform):
 232         m = CompUnitsBase.elaborate(self, platform)
 233         comb = m.d.comb
 234
 235         # hand the same operation to all units, 4 lower bits though
 236         for alu in self.units:
 237             comb += alu.oper_i[0:4].eq(self.oper_i)
 238             comb += alu.imm_i.eq(self.imm_i)
 239             comb += alu.isalu_i.eq(0)
 240
 241         return m
 242
 243
 244 class CompUnitALUs(CompUnitsBase):
 245
 246     def __init__(self, rwid, opwid, n_alus):
 247         """ Inputs:
 248
 249             * :rwid:   bit width of register file(s) - both FP and INT
 250             * :opwid:  operand bit width
 251         """
 252         self.opwid = opwid
 253
 254         # inputs
 255         self.oper_i = Signal(opwid, reset_less=True)
 256         self.imm_i = Signal(rwid, reset_less=True)
 257
 258         # Int ALUs
 259         alus = []
 260         for i in range(n_alus):
 261             alus.append(ALU(rwid))
 262
 263         units = []
 264         for alu in alus:
 265             aluopwid = 3 # extra bit for immediate mode
 266             units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
 267
 268         CompUnitsBase.__init__(self, rwid, units)
 269
 270     def elaborate(self, platform):
 271         m = CompUnitsBase.elaborate(self, platform)
 272         comb = m.d.comb
 273
 274         # hand the same operation to all units, only lower 3 bits though
 275         for alu in self.units:
 276             comb += alu.oper_i[0:3].eq(self.oper_i)
 277             comb += alu.imm_i.eq(self.imm_i)
 278
 279         return m
 280
 281
 282 class CompUnitBR(CompUnitsBase):
 283
 284     def __init__(self, rwid, opwid):
 285         """ Inputs:
 286
 287             * :rwid:   bit width of register file(s) - both FP and INT
 288             * :opwid:  operand bit width
 289
 290             Note: bgt unit is returned so that a shadow unit can be created
 291             for it
 292         """
 293         self.opwid = opwid
 294
 295         # inputs
 296         self.oper_i = Signal(opwid, reset_less=True)
 297         self.imm_i = Signal(rwid, reset_less=True)
 298
 299         # Branch ALU and CU
 300         self.bgt = BranchALU(rwid)
 301         aluopwid = 3 # extra bit for immediate mode
 302         self.br1 = ComputationUnitNoDelay(rwid, aluopwid, self.bgt)
 303         CompUnitsBase.__init__(self, rwid, [self.br1])
 304
 305     def elaborate(self, platform):
 306         m = CompUnitsBase.elaborate(self, platform)
 307         comb = m.d.comb
 308
 309         # hand the same operation to all units
 310         for alu in self.units:
 311             comb += alu.oper_i.eq(self.oper_i)
 312             comb += alu.imm_i.eq(self.imm_i)
 313
 314         return m
 315
 316
 317 class FunctionUnits(Elaboratable):
 318
 319     def __init__(self, n_regs, n_int_alus):
 320         self.n_regs = n_regs
 321         self.n_int_alus = n_int_alus
 322
 323         self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
 324         self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
 325         self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
 326
 327         self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
 328         self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
 329
 330         self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
 331         self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
 332         self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
 333
 334         self.readable_o = Signal(n_int_alus, reset_less=True)
 335         self.writable_o = Signal(n_int_alus, reset_less=True)
 336
 337         self.go_rd_i = Signal(n_int_alus, reset_less=True)
 338         self.go_wr_i = Signal(n_int_alus, reset_less=True)
 339         self.go_die_i = Signal(n_int_alus, reset_less=True)
 340         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 341
 342         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 343
 344     def elaborate(self, platform):
 345         m = Module()
 346         comb = m.d.comb
 347         sync = m.d.sync
 348
 349         n_intfus = self.n_int_alus
 350
 351         # Integer FU-FU Dep Matrix
 352         intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
 353         m.submodules.intfudeps = intfudeps
 354         # Integer FU-Reg Dep Matrix
 355         intregdeps = FURegDepMatrix(n_intfus, self.n_regs, 2)
 356         m.submodules.intregdeps = intregdeps
 357
 358         comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
 359         comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
 360
 361         comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
 362         comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
 363
 364         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 365         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 366         self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
 367
 368         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 369         comb += intfudeps.go_rd_i.eq(self.go_rd_i)
 370         comb += intfudeps.go_wr_i.eq(self.go_wr_i)
 371         comb += intfudeps.go_die_i.eq(self.go_die_i)
 372         comb += self.readable_o.eq(intfudeps.readable_o)
 373         comb += self.writable_o.eq(intfudeps.writable_o)
 374
 375         # Connect function issue / arrays, and dest/src1/src2
 376         comb += intregdeps.dest_i.eq(self.dest_i)
 377         comb += intregdeps.src_i[0].eq(self.src1_i)
 378         comb += intregdeps.src_i[1].eq(self.src2_i)
 379
 380         comb += intregdeps.go_rd_i.eq(self.go_rd_i)
 381         comb += intregdeps.go_wr_i.eq(self.go_wr_i)
 382         comb += intregdeps.go_die_i.eq(self.go_die_i)
 383         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 384
 385         comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
 386         comb += self.src1_rsel_o.eq(intregdeps.src_rsel_o[0])
 387         comb += self.src2_rsel_o.eq(intregdeps.src_rsel_o[1])
 388
 389         return m
 390
 391
 392 class Scoreboard(Elaboratable):
 393     def __init__(self, rwid, n_regs):
 394         """ Inputs:
 395
 396             * :rwid:   bit width of register file(s) - both FP and INT
 397             * :n_regs: depth of register file(s) - number of FP and INT regs
 398         """
 399         self.rwid = rwid
 400         self.n_regs = n_regs
 401
 402         # Register Files
 403         self.intregs = RegFileArray(rwid, n_regs)
 404         self.fpregs = RegFileArray(rwid, n_regs)
 405
 406         # Memory (test for now)
 407         self.mem = TestMemory(self.rwid, 8) # not too big, takes too long
 408
 409         # issue q needs to get at these
 410         self.aluissue = IssueUnitGroup(2)
 411         self.lsissue = IssueUnitGroup(2)
 412         self.brissue = IssueUnitGroup(1)
 413         # and these
 414         self.alu_oper_i = Signal(4, reset_less=True)
 415         self.alu_imm_i = Signal(rwid, reset_less=True)
 416         self.br_oper_i = Signal(4, reset_less=True)
 417         self.br_imm_i = Signal(rwid, reset_less=True)
 418         self.ls_oper_i = Signal(4, reset_less=True)
 419         self.ls_imm_i = Signal(rwid, reset_less=True)
 420
 421         # inputs
 422         self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
 423         self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
 424         self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
 425         self.reg_enable_i = Signal(reset_less=True) # enable reg decode
 426
 427         # outputs
 428         self.issue_o = Signal(reset_less=True) # instruction was accepted
 429         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 430
 431         # for branch speculation experiment.  branch_direction = 0 if
 432         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 433         # branch_succ and branch_fail are requests to have the current
 434         # instruction be dependent on the branch unit "shadow" capability.
 435         self.branch_succ_i = Signal(reset_less=True)
 436         self.branch_fail_i = Signal(reset_less=True)
 437         self.branch_direction_o = Signal(2, reset_less=True)
 438
 439     def elaborate(self, platform):
 440         m = Module()
 441         comb = m.d.comb
 442         sync = m.d.sync
 443
 444         m.submodules.intregs = self.intregs
 445         m.submodules.fpregs = self.fpregs
 446         m.submodules.mem = mem = self.mem
 447
 448         # register ports
 449         int_dest = self.intregs.write_port("dest")
 450         int_src1 = self.intregs.read_port("src1")
 451         int_src2 = self.intregs.read_port("src2")
 452
 453         fp_dest = self.fpregs.write_port("dest")
 454         fp_src1 = self.fpregs.read_port("src1")
 455         fp_src2 = self.fpregs.read_port("src2")
 456
 457         # Int ALUs and BR ALUs
 458         n_int_alus = 5
 459         cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
 460         cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
 461
 462         # LDST Comp Units
 463         n_ldsts = 2
 464         cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, None)
 465
 466         # Comp Units
 467         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
 468         bgt = cub.bgt # get at the branch computation unit
 469         br1 = cub.br1
 470
 471         # Int FUs
 472         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
 473
 474         # Memory FUs
 475         m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
 476
 477         # Memory Priority Picker 1: one gateway per memory port
 478         mempick1 = GroupPicker(n_ldsts) # picks 1 reader and 1 writer to intreg
 479         m.submodules.mempick1 = mempick1
 480
 481         # Count of number of FUs
 482         n_intfus = n_int_alus
 483         n_fp_fus = 0 # for now
 484
 485         # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
 486         intpick1 = GroupPicker(n_intfus) # picks 1 reader and 1 writer to intreg
 487         m.submodules.intpick1 = intpick1
 488
 489         # INT/FP Issue Unit
 490         regdecode = RegDecode(self.n_regs)
 491         m.submodules.regdecode = regdecode
 492         issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
 493         m.submodules.issueunit = issueunit
 494
 495         # Shadow Matrix.  currently n_intfus shadows, to be used for
 496         # write-after-write hazards.  NOTE: there is one extra for branches,
 497         # so the shadow width is increased by 1
 498         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 499         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 500
 501         # record previous instruction to cast shadow on current instruction
 502         prev_shadow = Signal(n_intfus)
 503
 504         # Branch Speculation recorder.  tracks the success/fail state as
 505         # each instruction is issued, so that when the branch occurs the
 506         # allow/cancel can be issued as appropriate.
 507         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 508
 509         #---------
 510         # ok start wiring things together...
 511         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 512         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 513         #---------
 514
 515         #---------
 516         # Issue Unit is where it starts.  set up some in/outs for this module
 517         #---------
 518         comb += [    regdecode.dest_i.eq(self.int_dest_i),
 519                      regdecode.src1_i.eq(self.int_src1_i),
 520                      regdecode.src2_i.eq(self.int_src2_i),
 521                      regdecode.enable_i.eq(self.reg_enable_i),
 522                      self.issue_o.eq(issueunit.issue_o)
 523                     ]
 524
 525         # take these to outside (issue needs them)
 526         comb += cua.oper_i.eq(self.alu_oper_i)
 527         comb += cua.imm_i.eq(self.alu_imm_i)
 528         comb += cub.oper_i.eq(self.br_oper_i)
 529         comb += cub.imm_i.eq(self.br_imm_i)
 530         comb += cul.oper_i.eq(self.ls_oper_i)
 531         comb += cul.imm_i.eq(self.ls_imm_i)
 532
 533         # TODO: issueunit.f (FP)
 534
 535         # and int function issue / busy arrays, and dest/src1/src2
 536         comb += intfus.dest_i.eq(regdecode.dest_o)
 537         comb += intfus.src1_i.eq(regdecode.src1_o)
 538         comb += intfus.src2_i.eq(regdecode.src2_o)
 539
 540         fn_issue_o = issueunit.fn_issue_o
 541
 542         comb += intfus.fn_issue_i.eq(fn_issue_o)
 543         comb += issueunit.busy_i.eq(cu.busy_o)
 544         comb += self.busy_o.eq(cu.busy_o.bool())
 545
 546         #---------
 547         # Memory Function Unit
 548         #---------
 549         reset_b = Signal(cul.n_units, reset_less=True)
 550         sync += reset_b.eq(cul.go_st_i | cul.go_wr_i | cul.go_die_i)
 551
 552         comb += memfus.fn_issue_i.eq(cul.issue_i) # Comp Unit Issue -> Mem FUs
 553         comb += memfus.addr_en_i.eq(cul.adr_rel_o) # Match enable on adr rel
 554         comb += memfus.addr_rs_i.eq(reset_b) # reset same as LDSTCompUnit
 555
 556         # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
 557         # in a transitive fashion).  This cycle activates based on LDSTCompUnit
 558         # issue_i.  multi-issue gets a bit more complex but not a lot.
 559         prior_ldsts = Signal(cul.n_units, reset_less=True)
 560         sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
 561         with m.If(self.ls_oper_i[2]): # LD bit of operand
 562             comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
 563         with m.If(self.ls_oper_i[3]): # ST bit of operand
 564             comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
 565
 566         # TODO: adr_rel_o needs to go into L1 Cache.  for now,
 567         # just immediately activate go_adr
 568         comb += cul.go_ad_i.eq(cul.adr_rel_o)
 569
 570         # connect up address data
 571         comb += memfus.addrs_i[0].eq(cul.units[0].addr_o)
 572         comb += memfus.addrs_i[1].eq(cul.units[1].addr_o)
 573
 574         # connect loadable / storable to go_ld/go_st.
 575         # XXX should only be done when the memory ld/st has actually happened!
 576         go_st_i = Signal(cul.n_units, reset_less=True)
 577         go_ld_i = Signal(cul.n_units, reset_less=True)
 578         comb += go_ld_i.eq(memfus.storable_o & memfus.addr_nomatch_o &\
 579                                   cul.req_rel_o & cul.ld_o)
 580         comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &\
 581                                   cul.sto_rel_o & cul.st_o)
 582         comb += memfus.go_ld_i.eq(go_ld_i)
 583         comb += memfus.go_st_i.eq(go_st_i)
 584         #comb += cul.go_wr_i.eq(go_ld_i)
 585         comb += cul.go_st_i.eq(go_st_i)
 586
 587         #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 588         #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 589         #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 590
 591         #---------
 592         # merge shadow matrices outputs
 593         #---------
 594
 595         # these are explained in ShadowMatrix docstring, and are to be
 596         # connected to the FUReg and FUFU Matrices, to get them to reset
 597         anydie = Signal(n_intfus, reset_less=True)
 598         allshadown = Signal(n_intfus, reset_less=True)
 599         shreset = Signal(n_intfus, reset_less=True)
 600         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 601         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 602         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 603
 604         #---------
 605         # connect fu-fu matrix
 606         #---------
 607
 608         # Group Picker... done manually for now.
 609         go_rd_o = intpick1.go_rd_o
 610         go_wr_o = intpick1.go_wr_o
 611         go_rd_i = intfus.go_rd_i
 612         go_wr_i = intfus.go_wr_i
 613         go_die_i = intfus.go_die_i
 614         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 615         comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
 616         comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
 617         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
 618
 619         # Connect Picker
 620         #---------
 621         comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
 622         comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
 623         int_rd_o = intfus.readable_o
 624         int_wr_o = intfus.writable_o
 625         comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
 626         comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
 627
 628         #---------
 629         # Shadow Matrix
 630         #---------
 631
 632         comb += shadows.issue_i.eq(fn_issue_o)
 633         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 634         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 635         #---------
 636         # NOTE; this setup is for the instruction order preservation...
 637
 638         # connect shadows / go_dies to Computation Units
 639         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 640         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 641
 642         # ok connect first n_int_fu shadows to busy lines, to create an
 643         # instruction-order linked-list-like arrangement, using a bit-matrix
 644         # (instead of e.g. a ring buffer).
 645
 646         # when written, the shadow can be cancelled (and was good)
 647         for i in range(n_intfus):
 648             comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 649
 650         # *previous* instruction shadows *current* instruction, and, obviously,
 651         # if the previous is completed (!busy) don't cast the shadow!
 652         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 653         for i in range(n_intfus):
 654             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 655
 656         #---------
 657         # ... and this is for branch speculation.  it uses the extra bit
 658         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 659         # only needs to set shadow_i, s_fail_i and s_good_i
 660
 661         # issue captures shadow_i (if enabled)
 662         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 663
 664         bactive = Signal(reset_less=True)
 665         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 666
 667         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 668         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 669             comb += bshadow.issue_i.eq(fn_issue_o)
 670             for i in range(n_intfus):
 671                 with m.If(fn_issue_o & (Const(1<<i))):
 672                     comb += bshadow.shadow_i[i][0].eq(1)
 673
 674         # finally, we need an indicator to the test infrastructure as to
 675         # whether the branch succeeded or failed, plus, link up to the
 676         # "recorder" of whether the instruction was under shadow or not
 677
 678         with m.If(br1.issue_i):
 679             sync += bspec.active_i.eq(1)
 680         with m.If(self.branch_succ_i):
 681             comb += bspec.good_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
 682         with m.If(self.branch_fail_i):
 683             comb += bspec.fail_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
 684
 685         # branch is active (TODO: a better signal: this is over-using the
 686         # go_write signal - actually the branch should not be "writing")
 687         with m.If(br1.go_wr_i):
 688             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 689             sync += bspec.active_i.eq(0)
 690             comb += bspec.br_i.eq(1)
 691             # branch occurs if data == 1, failed if data == 0
 692             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 693             for i in range(n_intfus):
 694                 # *expected* direction of the branch matched against *actual*
 695                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 696                 # ... or it didn't
 697                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 698
 699         #---------
 700         # Connect Register File(s)
 701         #---------
 702         comb += int_dest.wen.eq(intfus.dest_rsel_o)
 703         comb += int_src1.ren.eq(intfus.src1_rsel_o)
 704         comb += int_src2.ren.eq(intfus.src2_rsel_o)
 705
 706         # connect ALUs to regfule
 707         comb += int_dest.data_i.eq(cu.data_o)
 708         comb += cu.src1_i.eq(int_src1.data_o)
 709         comb += cu.src2_i.eq(int_src2.data_o)
 710
 711         # connect ALU Computation Units
 712         comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 713         comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 714         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 715
 716         return m
 717
 718     def __iter__(self):
 719         yield from self.intregs
 720         yield from self.fpregs
 721         yield self.int_dest_i
 722         yield self.int_src1_i
 723         yield self.int_src2_i
 724         yield self.issue_o
 725         yield self.branch_succ_i
 726         yield self.branch_fail_i
 727         yield self.branch_direction_o
 728
 729     def ports(self):
 730         return list(self)
 731
 732
 733 class IssueToScoreboard(Elaboratable):
 734
 735     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 736         self.qlen = qlen
 737         self.n_in = n_in
 738         self.n_out = n_out
 739         self.rwid = rwid
 740         self.opw = opwid
 741         self.n_regs = n_regs
 742
 743         mqbits = (int(log(qlen) / log(2))+2, False)
 744         self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
 745         self.p_ready_o = Signal() # instructions were added
 746         self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
 747
 748         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 749         self.qlen_o = Signal(mqbits, reset_less=True)
 750
 751     def elaborate(self, platform):
 752         m = Module()
 753         comb = m.d.comb
 754         sync = m.d.sync
 755
 756         iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
 757         sc = Scoreboard(self.rwid, self.n_regs)
 758         m.submodules.iq = iq
 759         m.submodules.sc = sc
 760
 761         # get at the regfile for testing
 762         self.intregs = sc.intregs
 763
 764         # and the "busy" signal and instruction queue length
 765         comb += self.busy_o.eq(sc.busy_o)
 766         comb += self.qlen_o.eq(iq.qlen_o)
 767
 768         # link up instruction queue
 769         comb += iq.p_add_i.eq(self.p_add_i)
 770         comb += self.p_ready_o.eq(iq.p_ready_o)
 771         for i in range(self.n_in):
 772             comb += eq(iq.data_i[i], self.data_i[i])
 773
 774         # take instruction and process it.  note that it's possible to
 775         # "inspect" the queue contents *without* actually removing the
 776         # items.  items are only removed when the
 777
 778         # in "waiting" state
 779         wait_issue_br = Signal()
 780         wait_issue_alu = Signal()
 781         wait_issue_ls = Signal()
 782
 783         with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
 784             # set instruction pop length to 1 if the unit accepted
 785             with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
 786                 with m.If(iq.qlen_o != 0):
 787                     comb += iq.n_sub_i.eq(1)
 788             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 789                 with m.If(iq.qlen_o != 0):
 790                     comb += iq.n_sub_i.eq(1)
 791             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 792                 with m.If(iq.qlen_o != 0):
 793                     comb += iq.n_sub_i.eq(1)
 794
 795         # see if some instruction(s) are here.  note that this is
 796         # "inspecting" the in-place queue.  note also that on the
 797         # cycle following "waiting" for fn_issue_o to be set, the
 798         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 799         with m.If(iq.qlen_o != 0):
 800             # get the operands and operation
 801             imm = iq.data_o[0].imm_i
 802             dest = iq.data_o[0].dest_i
 803             src1 = iq.data_o[0].src1_i
 804             src2 = iq.data_o[0].src2_i
 805             op = iq.data_o[0].oper_i
 806             opi = iq.data_o[0].opim_i # immediate set
 807
 808             # set the src/dest regs
 809             comb += sc.int_dest_i.eq(dest)
 810             comb += sc.int_src1_i.eq(src1)
 811             comb += sc.int_src2_i.eq(src2)
 812             comb += sc.reg_enable_i.eq(1) # enable the regfile
 813
 814             # choose a Function-Unit-Group
 815             with m.If((op & (0x3<<2)) != 0): # branch
 816                 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
 817                 comb += sc.br_imm_i.eq(imm)
 818                 comb += sc.brissue.insn_i.eq(1)
 819                 comb += wait_issue_br.eq(1)
 820             with m.Elif((op & (0x3<<4)) != 0): # ld/st
 821                 # see compldst.py
 822                 # bit 0: ADD/SUB
 823                 # bit 1: immed
 824                 # bit 4: LD
 825                 # bit 5: ST
 826                 comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6]))
 827                 comb += sc.ls_imm_i.eq(imm)
 828                 comb += sc.lsissue.insn_i.eq(1)
 829                 comb += wait_issue_ls.eq(1)
 830             with m.Else(): # alu
 831                 comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
 832                 comb += sc.alu_imm_i.eq(imm)
 833                 comb += sc.aluissue.insn_i.eq(1)
 834                 comb += wait_issue_alu.eq(1)
 835
 836             # XXX TODO
 837             # these indicate that the instruction is to be made
 838             # shadow-dependent on
 839             # (either) branch success or branch fail
 840             #yield sc.branch_fail_i.eq(branch_fail)
 841             #yield sc.branch_succ_i.eq(branch_success)
 842
 843         return m
 844
 845     def __iter__(self):
 846         yield self.p_ready_o
 847         for o in self.data_i:
 848             yield from list(o)
 849         yield self.p_add_i
 850
 851     def ports(self):
 852         return list(self)
 853
 854
 855 IADD = 0
 856 ISUB = 1
 857 IMUL = 2
 858 ISHF = 3
 859 IBGT = 4
 860 IBLT = 5
 861 IBEQ = 6
 862 IBNE = 7
 863
 864
 865 class RegSim:
 866     def __init__(self, rwidth, nregs):
 867         self.rwidth = rwidth
 868         self.regs = [0] * nregs
 869
 870     def op(self, op, op_imm, imm, src1, src2, dest):
 871         maxbits = (1 << self.rwidth) - 1
 872         src1 = self.regs[src1] & maxbits
 873         if op_imm:
 874             src2 = imm
 875         else:
 876             src2 = self.regs[src2] & maxbits
 877         if op == IADD:
 878             val = src1 + src2
 879         elif op == ISUB:
 880             val = src1 - src2
 881         elif op == IMUL:
 882             val = src1 * src2
 883         elif op == ISHF:
 884             val = src1 >> (src2 & maxbits)
 885         elif op == IBGT:
 886             val = int(src1 > src2)
 887         elif op == IBLT:
 888             val = int(src1 < src2)
 889         elif op == IBEQ:
 890             val = int(src1 == src2)
 891         elif op == IBNE:
 892             val = int(src1 != src2)
 893         else:
 894             return 0 # LD/ST TODO
 895         val &= maxbits
 896         self.setval(dest, val)
 897         return val
 898
 899     def setval(self, dest, val):
 900         print ("sim setval", dest, hex(val))
 901         self.regs[dest] = val
 902
 903     def dump(self, dut):
 904         for i, val in enumerate(self.regs):
 905             reg = yield dut.intregs.regs[i].reg
 906             okstr = "OK" if reg == val else "!ok"
 907             print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
 908
 909     def check(self, dut):
 910         for i, val in enumerate(self.regs):
 911             reg = yield dut.intregs.regs[i].reg
 912             if reg != val:
 913                 print("reg %d expected %x received %x\n" % (i, val, reg))
 914                 yield from self.dump(dut)
 915                 assert False
 916
 917 def instr_q(dut, op, op_imm, imm, src1, src2, dest,
 918             branch_success, branch_fail):
 919     instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
 920                'src1_i': src1, 'src2_i': src2}]
 921
 922     sendlen = 1
 923     for idx in range(sendlen):
 924         yield from eq(dut.data_i[idx], instrs[idx])
 925         di = yield dut.data_i[idx]
 926         print ("senddata %d %x" % (idx, di))
 927     yield dut.p_add_i.eq(sendlen)
 928     yield
 929     o_p_ready = yield dut.p_ready_o
 930     while not o_p_ready:
 931         yield
 932         o_p_ready = yield dut.p_ready_o
 933
 934     yield dut.p_add_i.eq(0)
 935
 936
 937 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
 938     yield from disable_issue(dut)
 939     yield dut.int_dest_i.eq(dest)
 940     yield dut.int_src1_i.eq(src1)
 941     yield dut.int_src2_i.eq(src2)
 942     if (op & (0x3<<2)) != 0: # branch
 943         yield dut.brissue.insn_i.eq(1)
 944         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 945         yield dut.br_imm_i.eq(imm)
 946         dut_issue = dut.brissue
 947     else:
 948         yield dut.aluissue.insn_i.eq(1)
 949         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 950         yield dut.alu_imm_i.eq(imm)
 951         dut_issue = dut.aluissue
 952     yield dut.reg_enable_i.eq(1)
 953
 954     # these indicate that the instruction is to be made shadow-dependent on
 955     # (either) branch success or branch fail
 956     yield dut.branch_fail_i.eq(branch_fail)
 957     yield dut.branch_succ_i.eq(branch_success)
 958
 959     yield
 960     yield from wait_for_issue(dut, dut_issue)
 961
 962
 963 def print_reg(dut, rnums):
 964     rs = []
 965     for rnum in rnums:
 966         reg = yield dut.intregs.regs[rnum].reg
 967         rs.append("%x" % reg)
 968     rnums = map(str, rnums)
 969     print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 970
 971
 972 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 973     insts = []
 974     for i in range(n_ops):
 975         src1 = randint(1, dut.n_regs-1)
 976         src2 = randint(1, dut.n_regs-1)
 977         imm = randint(1, (1<<dut.rwid)-1)
 978         dest = randint(1, dut.n_regs-1)
 979         op = randint(0, max_opnums)
 980         opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
 981
 982         if shadowing:
 983             insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
 984         else:
 985             insts.append((src1, src2, dest, op, opi, imm))
 986     return insts
 987
 988
 989 def wait_for_busy_clear(dut):
 990     while True:
 991         busy_o = yield dut.busy_o
 992         if not busy_o:
 993             break
 994         print ("busy",)
 995         yield
 996
 997 def disable_issue(dut):
 998     yield dut.aluissue.insn_i.eq(0)
 999     yield dut.brissue.insn_i.eq(0)
1000     yield dut.lsissue.insn_i.eq(0)
1001
1002
1003 def wait_for_issue(dut, dut_issue):
1004     while True:
1005         issue_o = yield dut_issue.fn_issue_o
1006         if issue_o:
1007             yield from disable_issue(dut)
1008             yield dut.reg_enable_i.eq(0)
1009             break
1010         print ("busy",)
1011         #yield from print_reg(dut, [1,2,3])
1012         yield
1013     #yield from print_reg(dut, [1,2,3])
1014
1015 def scoreboard_branch_sim(dut, alusim):
1016
1017     iseed = 3
1018
1019     for i in range(1):
1020
1021         print ("rseed", iseed)
1022         seed(iseed)
1023         iseed += 1
1024
1025         yield dut.branch_direction_o.eq(0)
1026
1027         # set random values in the registers
1028         for i in range(1, dut.n_regs):
1029             val = 31+i*3
1030             val = randint(0, (1<<alusim.rwidth)-1)
1031             yield dut.intregs.regs[i].reg.eq(val)
1032             alusim.setval(i, val)
1033
1034         if False:
1035             # create some instructions: branches create a tree
1036             insts = create_random_ops(dut, 1, True, 1)
1037             #insts.append((6, 6, 1, 2, (0, 0)))
1038             #insts.append((4, 3, 3, 0, (0, 0)))
1039
1040             src1 = randint(1, dut.n_regs-1)
1041             src2 = randint(1, dut.n_regs-1)
1042             #op = randint(4, 7)
1043             op = 4 # only BGT at the moment
1044
1045             branch_ok = create_random_ops(dut, 1, True, 1)
1046             branch_fail = create_random_ops(dut, 1, True, 1)
1047
1048             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1049
1050         if True:
1051             insts = []
1052             insts.append( (3, 5, 2, 0, (0, 0)) )
1053             branch_ok = []
1054             branch_fail = []
1055             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
1056             branch_ok.append( None )
1057             branch_fail.append( (1, 1, 2, 0, (0, 1)) )
1058             #branch_fail.append( None )
1059             insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
1060
1061         siminsts = deepcopy(insts)
1062
1063         # issue instruction(s)
1064         i = -1
1065         instrs = insts
1066         branch_direction = 0
1067         while instrs:
1068             yield
1069             yield
1070             i += 1
1071             branch_direction = yield dut.branch_direction_o # way branch went
1072             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1073             if branch_direction == 1 and shadow_on:
1074                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1075                 continue # branch was "success" and this is a "failed"... skip
1076             if branch_direction == 2 and shadow_off:
1077                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1078                 continue # branch was "fail" and this is a "success"... skip
1079             if branch_direction != 0:
1080                 shadow_on = 0
1081                 shadow_off = 0
1082             is_branch = op >= 4
1083             if is_branch:
1084                 branch_ok, branch_fail = dest
1085                 dest = src2
1086                 # ok zip up the branch success / fail instructions and
1087                 # drop them into the queue, one marked "to have branch success"
1088                 # the other to be marked shadow branch "fail".
1089                 # one out of each of these will be cancelled
1090                 for ok, fl in zip(branch_ok, branch_fail):
1091                     if ok:
1092                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1093                     if fl:
1094                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1095             print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
1096                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1097             yield from int_instr(dut, op, src1, src2, dest,
1098                                  shadow_on, shadow_off)
1099
1100         # wait for all instructions to stop before checking
1101         yield
1102         yield from wait_for_busy_clear(dut)
1103
1104         i = -1
1105         while siminsts:
1106             instr = siminsts.pop(0)
1107             if instr is None:
1108                 continue
1109             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1110             i += 1
1111             is_branch = op >= 4
1112             if is_branch:
1113                 branch_ok, branch_fail = dest
1114                 dest = src2
1115             print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
1116                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1117             branch_res = alusim.op(op, src1, src2, dest)
1118             if is_branch:
1119                 if branch_res:
1120                     siminsts += branch_ok
1121                 else:
1122                     siminsts += branch_fail
1123
1124         # check status
1125         yield from alusim.check(dut)
1126         yield from alusim.dump(dut)
1127
1128
1129 def scoreboard_sim(dut, alusim):
1130
1131     seed(0)
1132
1133     for i in range(1):
1134
1135         # set random values in the registers
1136         for i in range(1, dut.n_regs):
1137             val = randint(0, (1<<alusim.rwidth)-1)
1138             #val = 31+i*3
1139             #val = i
1140             yield dut.intregs.regs[i].reg.eq(val)
1141             alusim.setval(i, val)
1142
1143         # create some instructions (some random, some regression tests)
1144         instrs = []
1145         if False:
1146             instrs = create_random_ops(dut, 15, True, 4)
1147
1148         if True: # LD/ST test (with immediate)
1149             instrs.append( (1, 2, 2, 0x30, 1, 1, (0, 0)) )
1150             #instrs.append( (1, 2, 7, 0x10, 1, 1, (0, 0)) )
1151
1152         if False:
1153             instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
1154
1155         if False:
1156             instrs.append( (7, 3, 2, 4, (0, 0)) )
1157             instrs.append( (7, 6, 6, 2, (0, 0)) )
1158             instrs.append( (1, 7, 2, 2, (0, 0)) )
1159
1160         if False:
1161             instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1162             instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1163             instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1164             instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1165             instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1166
1167         if False:
1168             instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1169             instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1170             instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1171
1172         if False:
1173             instrs.append((5, 6, 2, 1))
1174             instrs.append((2, 2, 4, 0))
1175             #instrs.append((2, 2, 3, 1))
1176
1177         if False:
1178             instrs.append((2, 1, 2, 3))
1179
1180         if False:
1181             instrs.append((2, 6, 2, 1))
1182             instrs.append((2, 1, 2, 0))
1183
1184         if False:
1185             instrs.append((1, 2, 7, 2))
1186             instrs.append((7, 1, 5, 0))
1187             instrs.append((4, 4, 1, 1))
1188
1189         if False:
1190             instrs.append((5, 6, 2, 2))
1191             instrs.append((1, 1, 4, 1))
1192             instrs.append((6, 5, 3, 0))
1193
1194         if False:
1195             # Write-after-Write Hazard
1196             instrs.append( (3, 6, 7, 2) )
1197             instrs.append( (4, 4, 7, 1) )
1198
1199         if False:
1200             # self-read/write-after-write followed by Read-after-Write
1201             instrs.append((1, 1, 1, 1))
1202             instrs.append((1, 5, 3, 0))
1203
1204         if False:
1205             # Read-after-Write followed by self-read-after-write
1206             instrs.append((5, 6, 1, 2))
1207             instrs.append((1, 1, 1, 1))
1208
1209         if False:
1210             # self-read-write sandwich
1211             instrs.append((5, 6, 1, 2))
1212             instrs.append((1, 1, 1, 1))
1213             instrs.append((1, 5, 3, 0))
1214
1215         if False:
1216             # very weird failure
1217             instrs.append( (5, 2, 5, 2) )
1218             instrs.append( (2, 6, 3, 0) )
1219             instrs.append( (4, 2, 2, 1) )
1220
1221         if False:
1222             v1 = 4
1223             yield dut.intregs.regs[5].reg.eq(v1)
1224             alusim.setval(5, v1)
1225             yield dut.intregs.regs[3].reg.eq(5)
1226             alusim.setval(3, 5)
1227             instrs.append((5, 3, 3, 4, (0, 0)))
1228             instrs.append((4, 2, 1, 2, (0, 1)))
1229
1230         if False:
1231             v1 = 6
1232             yield dut.intregs.regs[5].reg.eq(v1)
1233             alusim.setval(5, v1)
1234             yield dut.intregs.regs[3].reg.eq(5)
1235             alusim.setval(3, 5)
1236             instrs.append((5, 3, 3, 4, (0, 0)))
1237             instrs.append((4, 2, 1, 2, (1, 0)))
1238
1239         if False:
1240             instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1241             instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1242             instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1243             instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1244             instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1245             instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1246             instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1247             instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1248             instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1249
1250         # issue instruction(s), wait for issue to be free before proceeding
1251         for i, instr in enumerate(instrs):
1252             src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
1253
1254             print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1255                     (i, src1, src2, dest, op, opi, imm))
1256             alusim.op(op, opi, imm, src1, src2, dest)
1257             yield from instr_q(dut, op, opi, imm, src1, src2, dest,
1258                                br_ok, br_fail)
1259
1260         # wait for all instructions to stop before checking
1261         while True:
1262             iqlen = yield dut.qlen_o
1263             if iqlen == 0:
1264                 break
1265             yield
1266         yield
1267         yield
1268         yield
1269         yield
1270         yield from wait_for_busy_clear(dut)
1271
1272         # check status
1273         yield from alusim.check(dut)
1274         yield from alusim.dump(dut)
1275
1276
1277 def test_scoreboard():
1278     dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1279     alusim = RegSim(16, 8)
1280     memsim = MemSim(16, 16)
1281     vl = rtlil.convert(dut, ports=dut.ports())
1282     with open("test_scoreboard6600.il", "w") as f:
1283         f.write(vl)
1284
1285     run_simulation(dut, scoreboard_sim(dut, alusim),
1286                         vcd_name='test_scoreboard6600.vcd')
1287
1288     #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1289     #                    vcd_name='test_scoreboard6600.vcd')
1290
1291
1292 if __name__ == '__main__':
1293     test_scoreboard()