src/soc/experiment/score6600.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen.hdl.ast import unsigned
   4 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
   5
   6 from soc.regfile.regfile import RegFileArray, treereduce
   7 from soc.scoreboard.fu_fu_matrix import FUFUDepMatrix
   8 from soc.scoreboard.fu_reg_matrix import FURegDepMatrix
   9 from soc.scoreboard.global_pending import GlobalPending
  10 from soc.scoreboard.group_picker import GroupPicker
  11 from soc.scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  12 from soc.scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  13 from soc.scoreboard.instruction_q import Instruction, InstructionQ
  14 from soc.scoreboard.memfu import MemFunctionUnits
  15
  16 from compalu import ComputationUnitNoDelay
  17 from compldst import LDSTCompUnit
  18 from testmem import TestMemory
  19
  20 from alu_hier import ALU, BranchALU
  21 from nmutil.latch import SRLatch
  22 from nmutil.nmoperator import eq
  23
  24 from random import randint, seed
  25 from copy import deepcopy
  26 from math import log
  27
  28
  29 class MemSim:
  30     def __init__(self, regwid, addrw):
  31         self.regwid = regwid
  32         self.ddepth = 1 # regwid//8
  33         depth = (1<<addrw) // self.ddepth
  34         self.mem = list(range(0, depth))
  35
  36     def ld(self, addr):
  37         return self.mem[addr>>self.ddepth]
  38
  39     def st(self, addr, data):
  40         self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
  41
  42
  43 class CompUnitsBase(Elaboratable):
  44     """ Computation Unit Base class.
  45
  46         Amazingly, this class works recursively.  It's supposed to just
  47         look after some ALUs (that can handle the same operations),
  48         grouping them together, however it turns out that the same code
  49         can also group *groups* of Computation Units together as well.
  50
  51         Basically it was intended just to concatenate the ALU's issue,
  52         go_rd etc. signals together, which start out as bits and become
  53         sequences.  Turns out that the same trick works just as well
  54         on Computation Units!
  55
  56         So this class may be used recursively to present a top-level
  57         sequential concatenation of all the signals in and out of
  58         ALUs, whilst at the same time making it convenient to group
  59         ALUs together.
  60
  61         At the lower level, the intent is that groups of (identical)
  62         ALUs may be passed the same operation.  Even beyond that,
  63         the intent is that that group of (identical) ALUs actually
  64         share the *same pipeline* and as such become a "Concurrent
  65         Computation Unit" as defined by Mitch Alsup (see section
  66         11.4.9.3)
  67     """
  68     def __init__(self, rwid, units, ldstmode=False):
  69         """ Inputs:
  70
  71             * :rwid:   bit width of register file(s) - both FP and INT
  72             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  73         """
  74         self.units = units
  75         self.ldstmode = ldstmode
  76         self.rwid = rwid
  77         self.rwid = rwid
  78         if units and isinstance(units[0], CompUnitsBase):
  79             self.n_units = 0
  80             for u in self.units:
  81                 self.n_units += u.n_units
  82         else:
  83             self.n_units = len(units)
  84
  85         n_units = self.n_units
  86
  87         # inputs
  88         self.issue_i = Signal(n_units, reset_less=True)
  89         self.go_rd_i = Signal(n_units, reset_less=True)
  90         self.go_wr_i = Signal(n_units, reset_less=True)
  91         self.shadown_i = Signal(n_units, reset_less=True)
  92         self.go_die_i = Signal(n_units, reset_less=True)
  93         if ldstmode:
  94             self.go_ad_i = Signal(n_units, reset_less=True)
  95             self.go_st_i = Signal(n_units, reset_less=True)
  96
  97         # outputs
  98         self.busy_o = Signal(n_units, reset_less=True)
  99         self.rd_rel_o = Signal(n_units, reset_less=True)
 100         self.req_rel_o = Signal(n_units, reset_less=True)
 101         if ldstmode:
 102             self.ld_o = Signal(n_units, reset_less=True) # op is LD
 103             self.st_o = Signal(n_units, reset_less=True) # op is ST
 104             self.adr_rel_o = Signal(n_units, reset_less=True)
 105             self.sto_rel_o = Signal(n_units, reset_less=True)
 106             self.req_rel_o = Signal(n_units, reset_less=True)
 107             self.load_mem_o = Signal(n_units, reset_less=True)
 108             self.stwd_mem_o = Signal(n_units, reset_less=True)
 109             self.addr_o = Signal(rwid, reset_less=True)
 110
 111         # in/out register data (note: not register#, actual data)
 112         self.data_o = Signal(rwid, reset_less=True)
 113         self.src1_i = Signal(rwid, reset_less=True)
 114         self.src2_i = Signal(rwid, reset_less=True)
 115         # input operand
 116
 117     def elaborate(self, platform):
 118         m = Module()
 119         comb = m.d.comb
 120
 121         for i, alu in enumerate(self.units):
 122             setattr(m.submodules, "comp%d" % i, alu)
 123
 124         go_rd_l = []
 125         go_wr_l = []
 126         issue_l = []
 127         busy_l = []
 128         req_rel_l = []
 129         rd_rel_l = []
 130         shadow_l = []
 131         godie_l = []
 132         for alu in self.units:
 133             req_rel_l.append(alu.req_rel_o)
 134             rd_rel_l.append(alu.rd_rel_o)
 135             shadow_l.append(alu.shadown_i)
 136             godie_l.append(alu.go_die_i)
 137             go_wr_l.append(alu.go_wr_i)
 138             go_rd_l.append(alu.go_rd_i)
 139             issue_l.append(alu.issue_i)
 140             busy_l.append(alu.busy_o)
 141         comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
 142         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 143         comb += self.busy_o.eq(Cat(*busy_l))
 144         comb += Cat(*godie_l).eq(self.go_die_i)
 145         comb += Cat(*shadow_l).eq(self.shadown_i)
 146         comb += Cat(*go_wr_l).eq(self.go_wr_i)
 147         comb += Cat(*go_rd_l).eq(self.go_rd_i)
 148         comb += Cat(*issue_l).eq(self.issue_i)
 149
 150         # connect data register input/output
 151
 152         # merge (OR) all integer FU / ALU outputs to a single value
 153         if self.units:
 154             data_o = treereduce(self.units, "data_o")
 155             comb += self.data_o.eq(data_o)
 156             if self.ldstmode:
 157                 addr_o = treereduce(self.units, "addr_o")
 158                 comb += self.addr_o.eq(addr_o)
 159
 160         for i, alu in enumerate(self.units):
 161             comb += alu.src1_i.eq(self.src1_i)
 162             comb += alu.src2_i.eq(self.src2_i)
 163
 164         if not self.ldstmode:
 165             return m
 166
 167         ldmem_l = []
 168         stmem_l = []
 169         go_ad_l = []
 170         go_st_l = []
 171         ld_l = []
 172         st_l = []
 173         adr_rel_l = []
 174         sto_rel_l = []
 175         for alu in self.units:
 176             ld_l.append(alu.ld_o)
 177             st_l.append(alu.st_o)
 178             adr_rel_l.append(alu.adr_rel_o)
 179             sto_rel_l.append(alu.sto_rel_o)
 180             ldmem_l.append(alu.load_mem_o)
 181             stmem_l.append(alu.stwd_mem_o)
 182             go_ad_l.append(alu.go_ad_i)
 183             go_st_l.append(alu.go_st_i)
 184         comb += self.ld_o.eq(Cat(*ld_l))
 185         comb += self.st_o.eq(Cat(*st_l))
 186         comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
 187         comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
 188         comb += self.load_mem_o.eq(Cat(*ldmem_l))
 189         comb += self.stwd_mem_o.eq(Cat(*stmem_l))
 190         comb += Cat(*go_ad_l).eq(self.go_ad_i)
 191         comb += Cat(*go_st_l).eq(self.go_st_i)
 192
 193         return m
 194
 195
 196 class CompUnitLDSTs(CompUnitsBase):
 197
 198     def __init__(self, rwid, opwid, n_ldsts, mem):
 199         """ Inputs:
 200
 201             * :rwid:   bit width of register file(s) - both FP and INT
 202             * :opwid:  operand bit width
 203         """
 204         self.opwid = opwid
 205
 206         # inputs
 207         self.oper_i = Signal(opwid, reset_less=True)
 208         self.imm_i = Signal(rwid, reset_less=True)
 209
 210         # Int ALUs
 211         self.alus = []
 212         for i in range(n_ldsts):
 213             self.alus.append(ALU(rwid))
 214
 215         units = []
 216         for alu in self.alus:
 217             aluopwid = 4 # see compldst.py for "internal" opcode
 218             units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
 219
 220         CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
 221
 222     def elaborate(self, platform):
 223         m = CompUnitsBase.elaborate(self, platform)
 224         comb = m.d.comb
 225
 226         # hand the same operation to all units, 4 lower bits though
 227         for alu in self.units:
 228             comb += alu.oper_i[0:4].eq(self.oper_i)
 229             comb += alu.imm_i.eq(self.imm_i)
 230             comb += alu.isalu_i.eq(0)
 231
 232         return m
 233
 234
 235 class CompUnitALUs(CompUnitsBase):
 236
 237     def __init__(self, rwid, opwid, n_alus):
 238         """ Inputs:
 239
 240             * :rwid:   bit width of register file(s) - both FP and INT
 241             * :opwid:  operand bit width
 242         """
 243         self.opwid = opwid
 244
 245         # inputs
 246         self.oper_i = Signal(opwid, reset_less=True)
 247         self.imm_i = Signal(rwid, reset_less=True)
 248
 249         # Int ALUs
 250         alus = []
 251         for i in range(n_alus):
 252             alus.append(ALU(rwid))
 253
 254         units = []
 255         for alu in alus:
 256             aluopwid = 3 # extra bit for immediate mode
 257             units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
 258
 259         CompUnitsBase.__init__(self, rwid, units)
 260
 261     def elaborate(self, platform):
 262         m = CompUnitsBase.elaborate(self, platform)
 263         comb = m.d.comb
 264
 265         # hand the same operation to all units, only lower 3 bits though
 266         for alu in self.units:
 267             comb += alu.oper_i[0:3].eq(self.oper_i)
 268             comb += alu.imm_i.eq(self.imm_i)
 269
 270         return m
 271
 272
 273 class CompUnitBR(CompUnitsBase):
 274
 275     def __init__(self, rwid, opwid):
 276         """ Inputs:
 277
 278             * :rwid:   bit width of register file(s) - both FP and INT
 279             * :opwid:  operand bit width
 280
 281             Note: bgt unit is returned so that a shadow unit can be created
 282             for it
 283         """
 284         self.opwid = opwid
 285
 286         # inputs
 287         self.oper_i = Signal(opwid, reset_less=True)
 288         self.imm_i = Signal(rwid, reset_less=True)
 289
 290         # Branch ALU and CU
 291         self.bgt = BranchALU(rwid)
 292         aluopwid = 3 # extra bit for immediate mode
 293         self.br1 = ComputationUnitNoDelay(rwid, aluopwid, self.bgt)
 294         CompUnitsBase.__init__(self, rwid, [self.br1])
 295
 296     def elaborate(self, platform):
 297         m = CompUnitsBase.elaborate(self, platform)
 298         comb = m.d.comb
 299
 300         # hand the same operation to all units
 301         for alu in self.units:
 302             comb += alu.oper_i.eq(self.oper_i)
 303             comb += alu.imm_i.eq(self.imm_i)
 304
 305         return m
 306
 307
 308 class FunctionUnits(Elaboratable):
 309
 310     def __init__(self, n_regs, n_int_alus):
 311         self.n_regs = n_regs
 312         self.n_int_alus = n_int_alus
 313
 314         self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
 315         self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
 316         self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
 317
 318         self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
 319         self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
 320
 321         self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
 322         self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
 323         self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
 324
 325         self.readable_o = Signal(n_int_alus, reset_less=True)
 326         self.writable_o = Signal(n_int_alus, reset_less=True)
 327
 328         self.go_rd_i = Signal(n_int_alus, reset_less=True)
 329         self.go_wr_i = Signal(n_int_alus, reset_less=True)
 330         self.go_die_i = Signal(n_int_alus, reset_less=True)
 331         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 332
 333         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 334
 335     def elaborate(self, platform):
 336         m = Module()
 337         comb = m.d.comb
 338         sync = m.d.sync
 339
 340         n_intfus = self.n_int_alus
 341
 342         # Integer FU-FU Dep Matrix
 343         intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
 344         m.submodules.intfudeps = intfudeps
 345         # Integer FU-Reg Dep Matrix
 346         intregdeps = FURegDepMatrix(n_intfus, self.n_regs, 2)
 347         m.submodules.intregdeps = intregdeps
 348
 349         comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
 350         comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
 351
 352         comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
 353         comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
 354
 355         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 356         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 357         self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
 358
 359         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 360         comb += intfudeps.go_rd_i.eq(self.go_rd_i)
 361         comb += intfudeps.go_wr_i.eq(self.go_wr_i)
 362         comb += intfudeps.go_die_i.eq(self.go_die_i)
 363         comb += self.readable_o.eq(intfudeps.readable_o)
 364         comb += self.writable_o.eq(intfudeps.writable_o)
 365
 366         # Connect function issue / arrays, and dest/src1/src2
 367         comb += intregdeps.dest_i.eq(self.dest_i)
 368         comb += intregdeps.src_i[0].eq(self.src1_i)
 369         comb += intregdeps.src_i[1].eq(self.src2_i)
 370
 371         comb += intregdeps.go_rd_i.eq(self.go_rd_i)
 372         comb += intregdeps.go_wr_i.eq(self.go_wr_i)
 373         comb += intregdeps.go_die_i.eq(self.go_die_i)
 374         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 375
 376         comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
 377         comb += self.src1_rsel_o.eq(intregdeps.src_rsel_o[0])
 378         comb += self.src2_rsel_o.eq(intregdeps.src_rsel_o[1])
 379
 380         return m
 381
 382
 383 class Scoreboard(Elaboratable):
 384     def __init__(self, rwid, n_regs):
 385         """ Inputs:
 386
 387             * :rwid:   bit width of register file(s) - both FP and INT
 388             * :n_regs: depth of register file(s) - number of FP and INT regs
 389         """
 390         self.rwid = rwid
 391         self.n_regs = n_regs
 392
 393         # Register Files
 394         self.intregs = RegFileArray(rwid, n_regs)
 395         self.fpregs = RegFileArray(rwid, n_regs)
 396
 397         # Memory (test for now)
 398         self.mem = TestMemory(self.rwid, 8) # not too big, takes too long
 399
 400         # issue q needs to get at these
 401         self.aluissue = IssueUnitGroup(2)
 402         self.lsissue = IssueUnitGroup(2)
 403         self.brissue = IssueUnitGroup(1)
 404         # and these
 405         self.alu_oper_i = Signal(4, reset_less=True)
 406         self.alu_imm_i = Signal(rwid, reset_less=True)
 407         self.br_oper_i = Signal(4, reset_less=True)
 408         self.br_imm_i = Signal(rwid, reset_less=True)
 409         self.ls_oper_i = Signal(4, reset_less=True)
 410         self.ls_imm_i = Signal(rwid, reset_less=True)
 411
 412         # inputs
 413         self.int_dest_i = Signal(range(n_regs), reset_less=True) # Dest R# in
 414         self.int_src1_i = Signal(range(n_regs), reset_less=True) # oper1 R# in
 415         self.int_src2_i = Signal(range(n_regs), reset_less=True) # oper2 R# in
 416         self.reg_enable_i = Signal(reset_less=True) # enable reg decode
 417
 418         # outputs
 419         self.issue_o = Signal(reset_less=True) # instruction was accepted
 420         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 421
 422         # for branch speculation experiment.  branch_direction = 0 if
 423         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 424         # branch_succ and branch_fail are requests to have the current
 425         # instruction be dependent on the branch unit "shadow" capability.
 426         self.branch_succ_i = Signal(reset_less=True)
 427         self.branch_fail_i = Signal(reset_less=True)
 428         self.branch_direction_o = Signal(2, reset_less=True)
 429
 430     def elaborate(self, platform):
 431         m = Module()
 432         comb = m.d.comb
 433         sync = m.d.sync
 434
 435         m.submodules.intregs = self.intregs
 436         m.submodules.fpregs = self.fpregs
 437         m.submodules.mem = mem = self.mem
 438
 439         # register ports
 440         int_dest = self.intregs.write_port("dest")
 441         int_src1 = self.intregs.read_port("src1")
 442         int_src2 = self.intregs.read_port("src2")
 443
 444         fp_dest = self.fpregs.write_port("dest")
 445         fp_src1 = self.fpregs.read_port("src1")
 446         fp_src2 = self.fpregs.read_port("src2")
 447
 448         # Int ALUs and BR ALUs
 449         n_int_alus = 5
 450         cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
 451         cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
 452
 453         # LDST Comp Units
 454         n_ldsts = 2
 455         cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, self.mem)
 456
 457         # Comp Units
 458         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
 459         bgt = cub.bgt # get at the branch computation unit
 460         br1 = cub.br1
 461
 462         # Int FUs
 463         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
 464
 465         # Memory FUs
 466         m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
 467
 468         # Memory Priority Picker 1: one gateway per memory port
 469         mempick1 = GroupPicker(n_ldsts) # picks 1 reader and 1 writer to intreg
 470         m.submodules.mempick1 = mempick1
 471
 472         # Count of number of FUs
 473         n_intfus = n_int_alus
 474         n_fp_fus = 0 # for now
 475
 476         # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
 477         intpick1 = GroupPicker(n_intfus) # picks 1 reader and 1 writer to intreg
 478         m.submodules.intpick1 = intpick1
 479
 480         # INT/FP Issue Unit
 481         regdecode = RegDecode(self.n_regs)
 482         m.submodules.regdecode = regdecode
 483         issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
 484         m.submodules.issueunit = issueunit
 485
 486         # Shadow Matrix.  currently n_intfus shadows, to be used for
 487         # write-after-write hazards.  NOTE: there is one extra for branches,
 488         # so the shadow width is increased by 1
 489         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 490         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 491
 492         # record previous instruction to cast shadow on current instruction
 493         prev_shadow = Signal(n_intfus)
 494
 495         # Branch Speculation recorder.  tracks the success/fail state as
 496         # each instruction is issued, so that when the branch occurs the
 497         # allow/cancel can be issued as appropriate.
 498         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 499
 500         #---------
 501         # ok start wiring things together...
 502         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 503         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 504         #---------
 505
 506         #---------
 507         # Issue Unit is where it starts.  set up some in/outs for this module
 508         #---------
 509         comb += [    regdecode.dest_i.eq(self.int_dest_i),
 510                      regdecode.src1_i.eq(self.int_src1_i),
 511                      regdecode.src2_i.eq(self.int_src2_i),
 512                      regdecode.enable_i.eq(self.reg_enable_i),
 513                      self.issue_o.eq(issueunit.issue_o)
 514                     ]
 515
 516         # take these to outside (issue needs them)
 517         comb += cua.oper_i.eq(self.alu_oper_i)
 518         comb += cua.imm_i.eq(self.alu_imm_i)
 519         comb += cub.oper_i.eq(self.br_oper_i)
 520         comb += cub.imm_i.eq(self.br_imm_i)
 521         comb += cul.oper_i.eq(self.ls_oper_i)
 522         comb += cul.imm_i.eq(self.ls_imm_i)
 523
 524         # TODO: issueunit.f (FP)
 525
 526         # and int function issue / busy arrays, and dest/src1/src2
 527         comb += intfus.dest_i.eq(regdecode.dest_o)
 528         comb += intfus.src1_i.eq(regdecode.src1_o)
 529         comb += intfus.src2_i.eq(regdecode.src2_o)
 530
 531         fn_issue_o = issueunit.fn_issue_o
 532
 533         comb += intfus.fn_issue_i.eq(fn_issue_o)
 534         comb += issueunit.busy_i.eq(cu.busy_o)
 535         comb += self.busy_o.eq(cu.busy_o.bool())
 536
 537         #---------
 538         # Memory Function Unit
 539         #---------
 540         reset_b = Signal(cul.n_units, reset_less=True)
 541         sync += reset_b.eq(cul.go_st_i | cul.go_wr_i | cul.go_die_i)
 542
 543         comb += memfus.fn_issue_i.eq(cul.issue_i) # Comp Unit Issue -> Mem FUs
 544         comb += memfus.addr_en_i.eq(cul.adr_rel_o) # Match enable on adr rel
 545         comb += memfus.addr_rs_i.eq(reset_b) # reset same as LDSTCompUnit
 546
 547         # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
 548         # in a transitive fashion).  This cycle activates based on LDSTCompUnit
 549         # issue_i.  multi-issue gets a bit more complex but not a lot.
 550         prior_ldsts = Signal(cul.n_units, reset_less=True)
 551         sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
 552         with m.If(self.ls_oper_i[3]): # LD bit of operand
 553             comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
 554         with m.If(self.ls_oper_i[2]): # ST bit of operand
 555             comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
 556
 557         # TODO: adr_rel_o needs to go into L1 Cache.  for now,
 558         # just immediately activate go_adr
 559         comb += cul.go_ad_i.eq(cul.adr_rel_o)
 560
 561         # connect up address data
 562         comb += memfus.addrs_i[0].eq(cul.units[0].addr_o)
 563         comb += memfus.addrs_i[1].eq(cul.units[1].addr_o)
 564
 565         # connect loadable / storable to go_ld/go_st.
 566         # XXX should only be done when the memory ld/st has actually happened!
 567         go_st_i = Signal(cul.n_units, reset_less=True)
 568         go_ld_i = Signal(cul.n_units, reset_less=True)
 569         comb += go_ld_i.eq(memfus.loadable_o & memfus.addr_nomatch_o &\
 570                                   cul.adr_rel_o & cul.ld_o)
 571         comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &\
 572                                   cul.sto_rel_o & cul.st_o)
 573         comb += memfus.go_ld_i.eq(go_ld_i)
 574         comb += memfus.go_st_i.eq(go_st_i)
 575         #comb += cul.go_wr_i.eq(go_ld_i)
 576         comb += cul.go_st_i.eq(go_st_i)
 577
 578         #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 579         #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 580         #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 581
 582         #---------
 583         # merge shadow matrices outputs
 584         #---------
 585
 586         # these are explained in ShadowMatrix docstring, and are to be
 587         # connected to the FUReg and FUFU Matrices, to get them to reset
 588         anydie = Signal(n_intfus, reset_less=True)
 589         allshadown = Signal(n_intfus, reset_less=True)
 590         shreset = Signal(n_intfus, reset_less=True)
 591         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 592         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 593         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 594
 595         #---------
 596         # connect fu-fu matrix
 597         #---------
 598
 599         # Group Picker... done manually for now.
 600         go_rd_o = intpick1.go_rd_o
 601         go_wr_o = intpick1.go_wr_o
 602         go_rd_i = intfus.go_rd_i
 603         go_wr_i = intfus.go_wr_i
 604         go_die_i = intfus.go_die_i
 605         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 606         comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
 607         comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
 608         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
 609
 610         # Connect Picker
 611         #---------
 612         comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
 613         #comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
 614         # HACK for now: connect LD/ST request release to *address* release
 615         comb += intpick1.req_rel_i[0].eq(cu.req_rel_o[0]) # ALU 0
 616         comb += intpick1.req_rel_i[1].eq(cu.req_rel_o[1]) # ALU 1
 617         comb += intpick1.req_rel_i[2].eq(cul.adr_rel_o[0]) # LD/ST 0
 618         comb += intpick1.req_rel_i[3].eq(cul.adr_rel_o[1]) # LD/ST 1
 619         comb += intpick1.req_rel_i[4].eq(cu.req_rel_o[4])  # BR 0
 620         int_rd_o = intfus.readable_o
 621         int_wr_o = intfus.writable_o
 622         comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
 623         comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
 624
 625         #---------
 626         # Shadow Matrix
 627         #---------
 628
 629         comb += shadows.issue_i.eq(fn_issue_o)
 630         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 631         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 632         #---------
 633         # NOTE; this setup is for the instruction order preservation...
 634
 635         # connect shadows / go_dies to Computation Units
 636         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 637         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 638
 639         # ok connect first n_int_fu shadows to busy lines, to create an
 640         # instruction-order linked-list-like arrangement, using a bit-matrix
 641         # (instead of e.g. a ring buffer).
 642
 643         # when written, the shadow can be cancelled (and was good)
 644         for i in range(n_intfus):
 645             comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 646
 647         # *previous* instruction shadows *current* instruction, and, obviously,
 648         # if the previous is completed (!busy) don't cast the shadow!
 649         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 650         for i in range(n_intfus):
 651             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 652
 653         #---------
 654         # ... and this is for branch speculation.  it uses the extra bit
 655         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 656         # only needs to set shadow_i, s_fail_i and s_good_i
 657
 658         # issue captures shadow_i (if enabled)
 659         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 660
 661         bactive = Signal(reset_less=True)
 662         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 663
 664         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 665         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 666             comb += bshadow.issue_i.eq(fn_issue_o)
 667             for i in range(n_intfus):
 668                 with m.If(fn_issue_o & (Const(1<<i))):
 669                     comb += bshadow.shadow_i[i][0].eq(1)
 670
 671         # finally, we need an indicator to the test infrastructure as to
 672         # whether the branch succeeded or failed, plus, link up to the
 673         # "recorder" of whether the instruction was under shadow or not
 674
 675         with m.If(br1.issue_i):
 676             sync += bspec.active_i.eq(1)
 677         with m.If(self.branch_succ_i):
 678             comb += bspec.good_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
 679         with m.If(self.branch_fail_i):
 680             comb += bspec.fail_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
 681
 682         # branch is active (TODO: a better signal: this is over-using the
 683         # go_write signal - actually the branch should not be "writing")
 684         with m.If(br1.go_wr_i):
 685             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 686             sync += bspec.active_i.eq(0)
 687             comb += bspec.br_i.eq(1)
 688             # branch occurs if data == 1, failed if data == 0
 689             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 690             for i in range(n_intfus):
 691                 # *expected* direction of the branch matched against *actual*
 692                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 693                 # ... or it didn't
 694                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 695
 696         #---------
 697         # Connect Register File(s)
 698         #---------
 699         comb += int_dest.wen.eq(intfus.dest_rsel_o)
 700         comb += int_src1.ren.eq(intfus.src1_rsel_o)
 701         comb += int_src2.ren.eq(intfus.src2_rsel_o)
 702
 703         # connect ALUs to regfule
 704         comb += int_dest.data_i.eq(cu.data_o)
 705         comb += cu.src1_i.eq(int_src1.data_o)
 706         comb += cu.src2_i.eq(int_src2.data_o)
 707
 708         # connect ALU Computation Units
 709         comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 710         comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 711         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 712
 713         return m
 714
 715     def __iter__(self):
 716         yield from self.intregs
 717         yield from self.fpregs
 718         yield self.int_dest_i
 719         yield self.int_src1_i
 720         yield self.int_src2_i
 721         yield self.issue_o
 722         yield self.branch_succ_i
 723         yield self.branch_fail_i
 724         yield self.branch_direction_o
 725
 726     def ports(self):
 727         return list(self)
 728
 729
 730 class IssueToScoreboard(Elaboratable):
 731
 732     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 733         self.qlen = qlen
 734         self.n_in = n_in
 735         self.n_out = n_out
 736         self.rwid = rwid
 737         self.opw = opwid
 738         self.n_regs = n_regs
 739
 740         mqbits = unsigned(int(log(qlen) / log(2))+2)
 741         self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
 742         self.p_ready_o = Signal() # instructions were added
 743         self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
 744
 745         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 746         self.qlen_o = Signal(mqbits, reset_less=True)
 747
 748     def elaborate(self, platform):
 749         m = Module()
 750         comb = m.d.comb
 751         sync = m.d.sync
 752
 753         iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
 754         sc = Scoreboard(self.rwid, self.n_regs)
 755         m.submodules.iq = iq
 756         m.submodules.sc = sc
 757
 758         # get at the regfile for testing
 759         self.intregs = sc.intregs
 760
 761         # and the "busy" signal and instruction queue length
 762         comb += self.busy_o.eq(sc.busy_o)
 763         comb += self.qlen_o.eq(iq.qlen_o)
 764
 765         # link up instruction queue
 766         comb += iq.p_add_i.eq(self.p_add_i)
 767         comb += self.p_ready_o.eq(iq.p_ready_o)
 768         for i in range(self.n_in):
 769             comb += eq(iq.data_i[i], self.data_i[i])
 770
 771         # take instruction and process it.  note that it's possible to
 772         # "inspect" the queue contents *without* actually removing the
 773         # items.  items are only removed when the
 774
 775         # in "waiting" state
 776         wait_issue_br = Signal()
 777         wait_issue_alu = Signal()
 778         wait_issue_ls = Signal()
 779
 780         with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
 781             # set instruction pop length to 1 if the unit accepted
 782             with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
 783                 with m.If(iq.qlen_o != 0):
 784                     comb += iq.n_sub_i.eq(1)
 785             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 786                 with m.If(iq.qlen_o != 0):
 787                     comb += iq.n_sub_i.eq(1)
 788             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 789                 with m.If(iq.qlen_o != 0):
 790                     comb += iq.n_sub_i.eq(1)
 791
 792         # see if some instruction(s) are here.  note that this is
 793         # "inspecting" the in-place queue.  note also that on the
 794         # cycle following "waiting" for fn_issue_o to be set, the
 795         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 796         with m.If(iq.qlen_o != 0):
 797             # get the operands and operation
 798             imm = iq.data_o[0].imm_i
 799             dest = iq.data_o[0].dest_i
 800             src1 = iq.data_o[0].src1_i
 801             src2 = iq.data_o[0].src2_i
 802             op = iq.data_o[0].oper_i
 803             opi = iq.data_o[0].opim_i # immediate set
 804
 805             # set the src/dest regs
 806             comb += sc.int_dest_i.eq(dest)
 807             comb += sc.int_src1_i.eq(src1)
 808             comb += sc.int_src2_i.eq(src2)
 809             comb += sc.reg_enable_i.eq(1) # enable the regfile
 810
 811             # choose a Function-Unit-Group
 812             with m.If((op & (0x3<<2)) != 0): # branch
 813                 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
 814                 comb += sc.br_imm_i.eq(imm)
 815                 comb += sc.brissue.insn_i.eq(1)
 816                 comb += wait_issue_br.eq(1)
 817             with m.Elif((op & (0x3<<4)) != 0): # ld/st
 818                 # see compldst.py
 819                 # bit 0: ADD/SUB
 820                 # bit 1: immed
 821                 # bit 4: LD
 822                 # bit 5: ST
 823                 comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6]))
 824                 comb += sc.ls_imm_i.eq(imm)
 825                 comb += sc.lsissue.insn_i.eq(1)
 826                 comb += wait_issue_ls.eq(1)
 827             with m.Else(): # alu
 828                 comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
 829                 comb += sc.alu_imm_i.eq(imm)
 830                 comb += sc.aluissue.insn_i.eq(1)
 831                 comb += wait_issue_alu.eq(1)
 832
 833             # XXX TODO
 834             # these indicate that the instruction is to be made
 835             # shadow-dependent on
 836             # (either) branch success or branch fail
 837             #yield sc.branch_fail_i.eq(branch_fail)
 838             #yield sc.branch_succ_i.eq(branch_success)
 839
 840         return m
 841
 842     def __iter__(self):
 843         yield self.p_ready_o
 844         for o in self.data_i:
 845             yield from list(o)
 846         yield self.p_add_i
 847
 848     def ports(self):
 849         return list(self)
 850
 851
 852 IADD = 0
 853 ISUB = 1
 854 IMUL = 2
 855 ISHF = 3
 856 IBGT = 4
 857 IBLT = 5
 858 IBEQ = 6
 859 IBNE = 7
 860
 861
 862 class RegSim:
 863     def __init__(self, rwidth, nregs):
 864         self.rwidth = rwidth
 865         self.regs = [0] * nregs
 866
 867     def op(self, op, op_imm, imm, src1, src2, dest):
 868         maxbits = (1 << self.rwidth) - 1
 869         src1 = self.regs[src1] & maxbits
 870         if op_imm:
 871             src2 = imm
 872         else:
 873             src2 = self.regs[src2] & maxbits
 874         if op == IADD:
 875             val = src1 + src2
 876         elif op == ISUB:
 877             val = src1 - src2
 878         elif op == IMUL:
 879             val = src1 * src2
 880         elif op == ISHF:
 881             val = src1 >> (src2 & maxbits)
 882         elif op == IBGT:
 883             val = int(src1 > src2)
 884         elif op == IBLT:
 885             val = int(src1 < src2)
 886         elif op == IBEQ:
 887             val = int(src1 == src2)
 888         elif op == IBNE:
 889             val = int(src1 != src2)
 890         else:
 891             return 0 # LD/ST TODO
 892         val &= maxbits
 893         self.setval(dest, val)
 894         return val
 895
 896     def setval(self, dest, val):
 897         print ("sim setval", dest, hex(val))
 898         self.regs[dest] = val
 899
 900     def dump(self, dut):
 901         for i, val in enumerate(self.regs):
 902             reg = yield dut.intregs.regs[i].reg
 903             okstr = "OK" if reg == val else "!ok"
 904             print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
 905
 906     def check(self, dut):
 907         for i, val in enumerate(self.regs):
 908             reg = yield dut.intregs.regs[i].reg
 909             if reg != val:
 910                 print("reg %d expected %x received %x\n" % (i, val, reg))
 911                 yield from self.dump(dut)
 912                 assert False
 913
 914 def instr_q(dut, op, op_imm, imm, src1, src2, dest,
 915             branch_success, branch_fail):
 916     instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
 917                'src1_i': src1, 'src2_i': src2}]
 918
 919     sendlen = 1
 920     for idx in range(sendlen):
 921         yield from eq(dut.data_i[idx], instrs[idx])
 922         di = yield dut.data_i[idx]
 923         print ("senddata %d %x" % (idx, di))
 924     yield dut.p_add_i.eq(sendlen)
 925     yield
 926     o_p_ready = yield dut.p_ready_o
 927     while not o_p_ready:
 928         yield
 929         o_p_ready = yield dut.p_ready_o
 930
 931     yield dut.p_add_i.eq(0)
 932
 933
 934 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
 935     yield from disable_issue(dut)
 936     yield dut.int_dest_i.eq(dest)
 937     yield dut.int_src1_i.eq(src1)
 938     yield dut.int_src2_i.eq(src2)
 939     if (op & (0x3<<2)) != 0: # branch
 940         yield dut.brissue.insn_i.eq(1)
 941         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 942         yield dut.br_imm_i.eq(imm)
 943         dut_issue = dut.brissue
 944     else:
 945         yield dut.aluissue.insn_i.eq(1)
 946         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 947         yield dut.alu_imm_i.eq(imm)
 948         dut_issue = dut.aluissue
 949     yield dut.reg_enable_i.eq(1)
 950
 951     # these indicate that the instruction is to be made shadow-dependent on
 952     # (either) branch success or branch fail
 953     yield dut.branch_fail_i.eq(branch_fail)
 954     yield dut.branch_succ_i.eq(branch_success)
 955
 956     yield
 957     yield from wait_for_issue(dut, dut_issue)
 958
 959
 960 def print_reg(dut, rnums):
 961     rs = []
 962     for rnum in rnums:
 963         reg = yield dut.intregs.regs[rnum].reg
 964         rs.append("%x" % reg)
 965     rnums = map(str, rnums)
 966     print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 967
 968
 969 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 970     insts = []
 971     for i in range(n_ops):
 972         src1 = randint(1, dut.n_regs-1)
 973         src2 = randint(1, dut.n_regs-1)
 974         imm = randint(1, (1<<dut.rwid)-1)
 975         dest = randint(1, dut.n_regs-1)
 976         op = randint(0, max_opnums)
 977         opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
 978
 979         if shadowing:
 980             insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
 981         else:
 982             insts.append((src1, src2, dest, op, opi, imm))
 983     return insts
 984
 985
 986 def wait_for_busy_clear(dut):
 987     while True:
 988         busy_o = yield dut.busy_o
 989         if not busy_o:
 990             break
 991         print ("busy",)
 992         yield
 993
 994 def disable_issue(dut):
 995     yield dut.aluissue.insn_i.eq(0)
 996     yield dut.brissue.insn_i.eq(0)
 997     yield dut.lsissue.insn_i.eq(0)
 998
 999
1000 def wait_for_issue(dut, dut_issue):
1001     while True:
1002         issue_o = yield dut_issue.fn_issue_o
1003         if issue_o:
1004             yield from disable_issue(dut)
1005             yield dut.reg_enable_i.eq(0)
1006             break
1007         print ("busy",)
1008         #yield from print_reg(dut, [1,2,3])
1009         yield
1010     #yield from print_reg(dut, [1,2,3])
1011
1012 def scoreboard_branch_sim(dut, alusim):
1013
1014     iseed = 3
1015
1016     for i in range(1):
1017
1018         print ("rseed", iseed)
1019         seed(iseed)
1020         iseed += 1
1021
1022         yield dut.branch_direction_o.eq(0)
1023
1024         # set random values in the registers
1025         for i in range(1, dut.n_regs):
1026             val = 31+i*3
1027             val = randint(0, (1<<alusim.rwidth)-1)
1028             yield dut.intregs.regs[i].reg.eq(val)
1029             alusim.setval(i, val)
1030
1031         if False:
1032             # create some instructions: branches create a tree
1033             insts = create_random_ops(dut, 1, True, 1)
1034             #insts.append((6, 6, 1, 2, (0, 0)))
1035             #insts.append((4, 3, 3, 0, (0, 0)))
1036
1037             src1 = randint(1, dut.n_regs-1)
1038             src2 = randint(1, dut.n_regs-1)
1039             #op = randint(4, 7)
1040             op = 4 # only BGT at the moment
1041
1042             branch_ok = create_random_ops(dut, 1, True, 1)
1043             branch_fail = create_random_ops(dut, 1, True, 1)
1044
1045             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1046
1047         if True:
1048             insts = []
1049             insts.append( (3, 5, 2, 0, (0, 0)) )
1050             branch_ok = []
1051             branch_fail = []
1052             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
1053             branch_ok.append( None )
1054             branch_fail.append( (1, 1, 2, 0, (0, 1)) )
1055             #branch_fail.append( None )
1056             insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
1057
1058         siminsts = deepcopy(insts)
1059
1060         # issue instruction(s)
1061         i = -1
1062         instrs = insts
1063         branch_direction = 0
1064         while instrs:
1065             yield
1066             yield
1067             i += 1
1068             branch_direction = yield dut.branch_direction_o # way branch went
1069             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1070             if branch_direction == 1 and shadow_on:
1071                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1072                 continue # branch was "success" and this is a "failed"... skip
1073             if branch_direction == 2 and shadow_off:
1074                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1075                 continue # branch was "fail" and this is a "success"... skip
1076             if branch_direction != 0:
1077                 shadow_on = 0
1078                 shadow_off = 0
1079             is_branch = op >= 4
1080             if is_branch:
1081                 branch_ok, branch_fail = dest
1082                 dest = src2
1083                 # ok zip up the branch success / fail instructions and
1084                 # drop them into the queue, one marked "to have branch success"
1085                 # the other to be marked shadow branch "fail".
1086                 # one out of each of these will be cancelled
1087                 for ok, fl in zip(branch_ok, branch_fail):
1088                     if ok:
1089                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1090                     if fl:
1091                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1092             print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
1093                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1094             yield from int_instr(dut, op, src1, src2, dest,
1095                                  shadow_on, shadow_off)
1096
1097         # wait for all instructions to stop before checking
1098         yield
1099         yield from wait_for_busy_clear(dut)
1100
1101         i = -1
1102         while siminsts:
1103             instr = siminsts.pop(0)
1104             if instr is None:
1105                 continue
1106             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1107             i += 1
1108             is_branch = op >= 4
1109             if is_branch:
1110                 branch_ok, branch_fail = dest
1111                 dest = src2
1112             print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
1113                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1114             branch_res = alusim.op(op, src1, src2, dest)
1115             if is_branch:
1116                 if branch_res:
1117                     siminsts += branch_ok
1118                 else:
1119                     siminsts += branch_fail
1120
1121         # check status
1122         yield from alusim.check(dut)
1123         yield from alusim.dump(dut)
1124
1125
1126 def scoreboard_sim(dut, alusim):
1127
1128     seed(0)
1129
1130     for i in range(1):
1131
1132         # set random values in the registers
1133         for i in range(1, dut.n_regs):
1134             #val = randint(0, (1<<alusim.rwidth)-1)
1135             #val = 31+i*3
1136             val = i
1137             yield dut.intregs.regs[i].reg.eq(val)
1138             alusim.setval(i, val)
1139
1140         # create some instructions (some random, some regression tests)
1141         instrs = []
1142         if False:
1143             instrs = create_random_ops(dut, 15, True, 4)
1144
1145         if True: # LD/ST test (with immediate)
1146             instrs.append( (1, 2, 0, 0x20, 1, 1, (0, 0)) ) # LD
1147             #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1148
1149         if True:
1150             instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
1151
1152         if True:
1153             instrs.append( (7, 3, 2, 4, 0, 0, (0, 0)) )
1154             instrs.append( (7, 6, 6, 2, 0, 0, (0, 0)) )
1155             instrs.append( (1, 7, 2, 2, 0, 0, (0, 0)) )
1156
1157         if True:
1158             instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1159             instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1160             instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1161             instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1162             instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1163
1164         if False:
1165             instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1166             instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1167             instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1168
1169         if False:
1170             instrs.append((5, 6, 2, 1))
1171             instrs.append((2, 2, 4, 0))
1172             #instrs.append((2, 2, 3, 1))
1173
1174         if False:
1175             instrs.append((2, 1, 2, 3))
1176
1177         if False:
1178             instrs.append((2, 6, 2, 1))
1179             instrs.append((2, 1, 2, 0))
1180
1181         if False:
1182             instrs.append((1, 2, 7, 2))
1183             instrs.append((7, 1, 5, 0))
1184             instrs.append((4, 4, 1, 1))
1185
1186         if False:
1187             instrs.append((5, 6, 2, 2))
1188             instrs.append((1, 1, 4, 1))
1189             instrs.append((6, 5, 3, 0))
1190
1191         if False:
1192             # Write-after-Write Hazard
1193             instrs.append( (3, 6, 7, 2) )
1194             instrs.append( (4, 4, 7, 1) )
1195
1196         if False:
1197             # self-read/write-after-write followed by Read-after-Write
1198             instrs.append((1, 1, 1, 1))
1199             instrs.append((1, 5, 3, 0))
1200
1201         if False:
1202             # Read-after-Write followed by self-read-after-write
1203             instrs.append((5, 6, 1, 2))
1204             instrs.append((1, 1, 1, 1))
1205
1206         if False:
1207             # self-read-write sandwich
1208             instrs.append((5, 6, 1, 2))
1209             instrs.append((1, 1, 1, 1))
1210             instrs.append((1, 5, 3, 0))
1211
1212         if False:
1213             # very weird failure
1214             instrs.append( (5, 2, 5, 2) )
1215             instrs.append( (2, 6, 3, 0) )
1216             instrs.append( (4, 2, 2, 1) )
1217
1218         if False:
1219             v1 = 4
1220             yield dut.intregs.regs[5].reg.eq(v1)
1221             alusim.setval(5, v1)
1222             yield dut.intregs.regs[3].reg.eq(5)
1223             alusim.setval(3, 5)
1224             instrs.append((5, 3, 3, 4, (0, 0)))
1225             instrs.append((4, 2, 1, 2, (0, 1)))
1226
1227         if False:
1228             v1 = 6
1229             yield dut.intregs.regs[5].reg.eq(v1)
1230             alusim.setval(5, v1)
1231             yield dut.intregs.regs[3].reg.eq(5)
1232             alusim.setval(3, 5)
1233             instrs.append((5, 3, 3, 4, (0, 0)))
1234             instrs.append((4, 2, 1, 2, (1, 0)))
1235
1236         if False:
1237             instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1238             instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1239             instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1240             instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1241             instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1242             instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1243             instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1244             instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1245             instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1246
1247         # issue instruction(s), wait for issue to be free before proceeding
1248         for i, instr in enumerate(instrs):
1249             src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
1250
1251             print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1252                     (i, src1, src2, dest, op, opi, imm))
1253             alusim.op(op, opi, imm, src1, src2, dest)
1254             yield from instr_q(dut, op, opi, imm, src1, src2, dest,
1255                                br_ok, br_fail)
1256
1257         # wait for all instructions to stop before checking
1258         while True:
1259             iqlen = yield dut.qlen_o
1260             if iqlen == 0:
1261                 break
1262             yield
1263         yield
1264         yield
1265         yield
1266         yield
1267         yield from wait_for_busy_clear(dut)
1268
1269         # check status
1270         yield from alusim.check(dut)
1271         yield from alusim.dump(dut)
1272
1273
1274 def test_scoreboard():
1275     dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1276     alusim = RegSim(16, 8)
1277     memsim = MemSim(16, 8)
1278     vl = rtlil.convert(dut, ports=dut.ports())
1279     with open("test_scoreboard6600.il", "w") as f:
1280         f.write(vl)
1281
1282     run_simulation(dut, scoreboard_sim(dut, alusim),
1283                         vcd_name='test_scoreboard6600.vcd')
1284
1285     #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1286     #                    vcd_name='test_scoreboard6600.vcd')
1287
1288
1289 if __name__ == '__main__':
1290     test_scoreboard()