src/experiment/score6600.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen.hdl.ast import unsigned
   4 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
   5
   6 from regfile.regfile import RegFileArray, treereduce
   7 from scoreboard.fu_fu_matrix import FUFUDepMatrix
   8 from scoreboard.fu_reg_matrix import FURegDepMatrix
   9 from scoreboard.global_pending import GlobalPending
  10 from scoreboard.group_picker import GroupPicker
  11 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  12 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  13 from scoreboard.instruction_q import Instruction, InstructionQ
  14 from scoreboard.memfu import MemFunctionUnits
  15
  16 from compalu import ComputationUnitNoDelay
  17 from compldst import LDSTCompUnit
  18
  19 from alu_hier import ALU, BranchALU
  20 from nmutil.latch import SRLatch
  21 from nmutil.nmoperator import eq
  22
  23 from random import randint, seed
  24 from copy import deepcopy
  25 from math import log
  26
  27
  28 class TestMemory(Elaboratable):
  29     def __init__(self, regwid, addrw):
  30         self.ddepth = 1 # regwid //8
  31         depth = (1<<addrw) // self.ddepth
  32         self.mem   = Memory(width=regwid, depth=depth, init=range(0, depth))
  33
  34     def elaborate(self, platform):
  35         m = Module()
  36         m.submodules.rdport = self.rdport = self.mem.read_port()
  37         m.submodules.wrport = self.wrport = self.mem.write_port()
  38         return m
  39
  40
  41 class MemSim:
  42     def __init__(self, regwid, addrw):
  43         self.regwid = regwid
  44         self.ddepth = 1 # regwid//8
  45         depth = (1<<addrw) // self.ddepth
  46         self.mem = list(range(0, depth))
  47
  48     def ld(self, addr):
  49         return self.mem[addr>>self.ddepth]
  50
  51     def st(self, addr, data):
  52         self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
  53
  54
  55 class CompUnitsBase(Elaboratable):
  56     """ Computation Unit Base class.
  57
  58         Amazingly, this class works recursively.  It's supposed to just
  59         look after some ALUs (that can handle the same operations),
  60         grouping them together, however it turns out that the same code
  61         can also group *groups* of Computation Units together as well.
  62
  63         Basically it was intended just to concatenate the ALU's issue,
  64         go_rd etc. signals together, which start out as bits and become
  65         sequences.  Turns out that the same trick works just as well
  66         on Computation Units!
  67
  68         So this class may be used recursively to present a top-level
  69         sequential concatenation of all the signals in and out of
  70         ALUs, whilst at the same time making it convenient to group
  71         ALUs together.
  72
  73         At the lower level, the intent is that groups of (identical)
  74         ALUs may be passed the same operation.  Even beyond that,
  75         the intent is that that group of (identical) ALUs actually
  76         share the *same pipeline* and as such become a "Concurrent
  77         Computation Unit" as defined by Mitch Alsup (see section
  78         11.4.9.3)
  79     """
  80     def __init__(self, rwid, units, ldstmode=False):
  81         """ Inputs:
  82
  83             * :rwid:   bit width of register file(s) - both FP and INT
  84             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  85         """
  86         self.units = units
  87         self.ldstmode = ldstmode
  88         self.rwid = rwid
  89         self.rwid = rwid
  90         if units and isinstance(units[0], CompUnitsBase):
  91             self.n_units = 0
  92             for u in self.units:
  93                 self.n_units += u.n_units
  94         else:
  95             self.n_units = len(units)
  96
  97         n_units = self.n_units
  98
  99         # inputs
 100         self.issue_i = Signal(n_units, reset_less=True)
 101         self.go_rd_i = Signal(n_units, reset_less=True)
 102         self.go_wr_i = Signal(n_units, reset_less=True)
 103         self.shadown_i = Signal(n_units, reset_less=True)
 104         self.go_die_i = Signal(n_units, reset_less=True)
 105         if ldstmode:
 106             self.go_ad_i = Signal(n_units, reset_less=True)
 107             self.go_st_i = Signal(n_units, reset_less=True)
 108
 109         # outputs
 110         self.busy_o = Signal(n_units, reset_less=True)
 111         self.rd_rel_o = Signal(n_units, reset_less=True)
 112         self.req_rel_o = Signal(n_units, reset_less=True)
 113         if ldstmode:
 114             self.ld_o = Signal(n_units, reset_less=True) # op is LD
 115             self.st_o = Signal(n_units, reset_less=True) # op is ST
 116             self.adr_rel_o = Signal(n_units, reset_less=True)
 117             self.sto_rel_o = Signal(n_units, reset_less=True)
 118             self.req_rel_o = Signal(n_units, reset_less=True)
 119             self.load_mem_o = Signal(n_units, reset_less=True)
 120             self.stwd_mem_o = Signal(n_units, reset_less=True)
 121             self.addr_o = Signal(rwid, reset_less=True)
 122
 123         # in/out register data (note: not register#, actual data)
 124         self.data_o = Signal(rwid, reset_less=True)
 125         self.src1_i = Signal(rwid, reset_less=True)
 126         self.src2_i = Signal(rwid, reset_less=True)
 127         # input operand
 128
 129     def elaborate(self, platform):
 130         m = Module()
 131         comb = m.d.comb
 132
 133         for i, alu in enumerate(self.units):
 134             setattr(m.submodules, "comp%d" % i, alu)
 135
 136         go_rd_l = []
 137         go_wr_l = []
 138         issue_l = []
 139         busy_l = []
 140         req_rel_l = []
 141         rd_rel_l = []
 142         shadow_l = []
 143         godie_l = []
 144         for alu in self.units:
 145             req_rel_l.append(alu.req_rel_o)
 146             rd_rel_l.append(alu.rd_rel_o)
 147             shadow_l.append(alu.shadown_i)
 148             godie_l.append(alu.go_die_i)
 149             go_wr_l.append(alu.go_wr_i)
 150             go_rd_l.append(alu.go_rd_i)
 151             issue_l.append(alu.issue_i)
 152             busy_l.append(alu.busy_o)
 153         comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
 154         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 155         comb += self.busy_o.eq(Cat(*busy_l))
 156         comb += Cat(*godie_l).eq(self.go_die_i)
 157         comb += Cat(*shadow_l).eq(self.shadown_i)
 158         comb += Cat(*go_wr_l).eq(self.go_wr_i)
 159         comb += Cat(*go_rd_l).eq(self.go_rd_i)
 160         comb += Cat(*issue_l).eq(self.issue_i)
 161
 162         # connect data register input/output
 163
 164         # merge (OR) all integer FU / ALU outputs to a single value
 165         if self.units:
 166             data_o = treereduce(self.units, "data_o")
 167             comb += self.data_o.eq(data_o)
 168             if self.ldstmode:
 169                 addr_o = treereduce(self.units, "addr_o")
 170                 comb += self.addr_o.eq(addr_o)
 171
 172         for i, alu in enumerate(self.units):
 173             comb += alu.src1_i.eq(self.src1_i)
 174             comb += alu.src2_i.eq(self.src2_i)
 175
 176         if not self.ldstmode:
 177             return m
 178
 179         ldmem_l = []
 180         stmem_l = []
 181         go_ad_l = []
 182         go_st_l = []
 183         ld_l = []
 184         st_l = []
 185         adr_rel_l = []
 186         sto_rel_l = []
 187         for alu in self.units:
 188             ld_l.append(alu.ld_o)
 189             st_l.append(alu.st_o)
 190             adr_rel_l.append(alu.adr_rel_o)
 191             sto_rel_l.append(alu.sto_rel_o)
 192             ldmem_l.append(alu.load_mem_o)
 193             stmem_l.append(alu.stwd_mem_o)
 194             go_ad_l.append(alu.go_ad_i)
 195             go_st_l.append(alu.go_st_i)
 196         comb += self.ld_o.eq(Cat(*ld_l))
 197         comb += self.st_o.eq(Cat(*st_l))
 198         comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
 199         comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
 200         comb += self.load_mem_o.eq(Cat(*ldmem_l))
 201         comb += self.stwd_mem_o.eq(Cat(*stmem_l))
 202         comb += Cat(*go_ad_l).eq(self.go_ad_i)
 203         comb += Cat(*go_st_l).eq(self.go_st_i)
 204
 205         return m
 206
 207
 208 class CompUnitLDSTs(CompUnitsBase):
 209
 210     def __init__(self, rwid, opwid, n_ldsts, mem):
 211         """ Inputs:
 212
 213             * :rwid:   bit width of register file(s) - both FP and INT
 214             * :opwid:  operand bit width
 215         """
 216         self.opwid = opwid
 217
 218         # inputs
 219         self.oper_i = Signal(opwid, reset_less=True)
 220         self.imm_i = Signal(rwid, reset_less=True)
 221
 222         # Int ALUs
 223         self.alus = []
 224         for i in range(n_ldsts):
 225             self.alus.append(ALU(rwid))
 226
 227         units = []
 228         for alu in self.alus:
 229             aluopwid = 4 # see compldst.py for "internal" opcode
 230             units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
 231
 232         CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
 233
 234     def elaborate(self, platform):
 235         m = CompUnitsBase.elaborate(self, platform)
 236         comb = m.d.comb
 237
 238         # hand the same operation to all units, 4 lower bits though
 239         for alu in self.units:
 240             comb += alu.oper_i[0:4].eq(self.oper_i)
 241             comb += alu.imm_i.eq(self.imm_i)
 242             comb += alu.isalu_i.eq(0)
 243
 244         return m
 245
 246
 247 class CompUnitALUs(CompUnitsBase):
 248
 249     def __init__(self, rwid, opwid, n_alus):
 250         """ Inputs:
 251
 252             * :rwid:   bit width of register file(s) - both FP and INT
 253             * :opwid:  operand bit width
 254         """
 255         self.opwid = opwid
 256
 257         # inputs
 258         self.oper_i = Signal(opwid, reset_less=True)
 259         self.imm_i = Signal(rwid, reset_less=True)
 260
 261         # Int ALUs
 262         alus = []
 263         for i in range(n_alus):
 264             alus.append(ALU(rwid))
 265
 266         units = []
 267         for alu in alus:
 268             aluopwid = 3 # extra bit for immediate mode
 269             units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
 270
 271         CompUnitsBase.__init__(self, rwid, units)
 272
 273     def elaborate(self, platform):
 274         m = CompUnitsBase.elaborate(self, platform)
 275         comb = m.d.comb
 276
 277         # hand the same operation to all units, only lower 3 bits though
 278         for alu in self.units:
 279             comb += alu.oper_i[0:3].eq(self.oper_i)
 280             comb += alu.imm_i.eq(self.imm_i)
 281
 282         return m
 283
 284
 285 class CompUnitBR(CompUnitsBase):
 286
 287     def __init__(self, rwid, opwid):
 288         """ Inputs:
 289
 290             * :rwid:   bit width of register file(s) - both FP and INT
 291             * :opwid:  operand bit width
 292
 293             Note: bgt unit is returned so that a shadow unit can be created
 294             for it
 295         """
 296         self.opwid = opwid
 297
 298         # inputs
 299         self.oper_i = Signal(opwid, reset_less=True)
 300         self.imm_i = Signal(rwid, reset_less=True)
 301
 302         # Branch ALU and CU
 303         self.bgt = BranchALU(rwid)
 304         aluopwid = 3 # extra bit for immediate mode
 305         self.br1 = ComputationUnitNoDelay(rwid, aluopwid, self.bgt)
 306         CompUnitsBase.__init__(self, rwid, [self.br1])
 307
 308     def elaborate(self, platform):
 309         m = CompUnitsBase.elaborate(self, platform)
 310         comb = m.d.comb
 311
 312         # hand the same operation to all units
 313         for alu in self.units:
 314             comb += alu.oper_i.eq(self.oper_i)
 315             comb += alu.imm_i.eq(self.imm_i)
 316
 317         return m
 318
 319
 320 class FunctionUnits(Elaboratable):
 321
 322     def __init__(self, n_regs, n_int_alus):
 323         self.n_regs = n_regs
 324         self.n_int_alus = n_int_alus
 325
 326         self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
 327         self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
 328         self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
 329
 330         self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
 331         self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
 332
 333         self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
 334         self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
 335         self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
 336
 337         self.readable_o = Signal(n_int_alus, reset_less=True)
 338         self.writable_o = Signal(n_int_alus, reset_less=True)
 339
 340         self.go_rd_i = Signal(n_int_alus, reset_less=True)
 341         self.go_wr_i = Signal(n_int_alus, reset_less=True)
 342         self.go_die_i = Signal(n_int_alus, reset_less=True)
 343         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 344
 345         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 346
 347     def elaborate(self, platform):
 348         m = Module()
 349         comb = m.d.comb
 350         sync = m.d.sync
 351
 352         n_intfus = self.n_int_alus
 353
 354         # Integer FU-FU Dep Matrix
 355         intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
 356         m.submodules.intfudeps = intfudeps
 357         # Integer FU-Reg Dep Matrix
 358         intregdeps = FURegDepMatrix(n_intfus, self.n_regs, 2)
 359         m.submodules.intregdeps = intregdeps
 360
 361         comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
 362         comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
 363
 364         comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
 365         comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
 366
 367         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 368         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 369         self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
 370
 371         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 372         comb += intfudeps.go_rd_i.eq(self.go_rd_i)
 373         comb += intfudeps.go_wr_i.eq(self.go_wr_i)
 374         comb += intfudeps.go_die_i.eq(self.go_die_i)
 375         comb += self.readable_o.eq(intfudeps.readable_o)
 376         comb += self.writable_o.eq(intfudeps.writable_o)
 377
 378         # Connect function issue / arrays, and dest/src1/src2
 379         comb += intregdeps.dest_i.eq(self.dest_i)
 380         comb += intregdeps.src_i[0].eq(self.src1_i)
 381         comb += intregdeps.src_i[1].eq(self.src2_i)
 382
 383         comb += intregdeps.go_rd_i.eq(self.go_rd_i)
 384         comb += intregdeps.go_wr_i.eq(self.go_wr_i)
 385         comb += intregdeps.go_die_i.eq(self.go_die_i)
 386         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 387
 388         comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
 389         comb += self.src1_rsel_o.eq(intregdeps.src_rsel_o[0])
 390         comb += self.src2_rsel_o.eq(intregdeps.src_rsel_o[1])
 391
 392         return m
 393
 394
 395 class Scoreboard(Elaboratable):
 396     def __init__(self, rwid, n_regs):
 397         """ Inputs:
 398
 399             * :rwid:   bit width of register file(s) - both FP and INT
 400             * :n_regs: depth of register file(s) - number of FP and INT regs
 401         """
 402         self.rwid = rwid
 403         self.n_regs = n_regs
 404
 405         # Register Files
 406         self.intregs = RegFileArray(rwid, n_regs)
 407         self.fpregs = RegFileArray(rwid, n_regs)
 408
 409         # Memory (test for now)
 410         self.mem = TestMemory(self.rwid, 8) # not too big, takes too long
 411
 412         # issue q needs to get at these
 413         self.aluissue = IssueUnitGroup(2)
 414         self.lsissue = IssueUnitGroup(2)
 415         self.brissue = IssueUnitGroup(1)
 416         # and these
 417         self.alu_oper_i = Signal(4, reset_less=True)
 418         self.alu_imm_i = Signal(rwid, reset_less=True)
 419         self.br_oper_i = Signal(4, reset_less=True)
 420         self.br_imm_i = Signal(rwid, reset_less=True)
 421         self.ls_oper_i = Signal(4, reset_less=True)
 422         self.ls_imm_i = Signal(rwid, reset_less=True)
 423
 424         # inputs
 425         self.int_dest_i = Signal(range(n_regs), reset_less=True) # Dest R# in
 426         self.int_src1_i = Signal(range(n_regs), reset_less=True) # oper1 R# in
 427         self.int_src2_i = Signal(range(n_regs), reset_less=True) # oper2 R# in
 428         self.reg_enable_i = Signal(reset_less=True) # enable reg decode
 429
 430         # outputs
 431         self.issue_o = Signal(reset_less=True) # instruction was accepted
 432         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 433
 434         # for branch speculation experiment.  branch_direction = 0 if
 435         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 436         # branch_succ and branch_fail are requests to have the current
 437         # instruction be dependent on the branch unit "shadow" capability.
 438         self.branch_succ_i = Signal(reset_less=True)
 439         self.branch_fail_i = Signal(reset_less=True)
 440         self.branch_direction_o = Signal(2, reset_less=True)
 441
 442     def elaborate(self, platform):
 443         m = Module()
 444         comb = m.d.comb
 445         sync = m.d.sync
 446
 447         m.submodules.intregs = self.intregs
 448         m.submodules.fpregs = self.fpregs
 449         m.submodules.mem = mem = self.mem
 450
 451         # register ports
 452         int_dest = self.intregs.write_port("dest")
 453         int_src1 = self.intregs.read_port("src1")
 454         int_src2 = self.intregs.read_port("src2")
 455
 456         fp_dest = self.fpregs.write_port("dest")
 457         fp_src1 = self.fpregs.read_port("src1")
 458         fp_src2 = self.fpregs.read_port("src2")
 459
 460         # Int ALUs and BR ALUs
 461         n_int_alus = 5
 462         cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
 463         cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
 464
 465         # LDST Comp Units
 466         n_ldsts = 2
 467         cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, None)
 468
 469         # Comp Units
 470         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
 471         bgt = cub.bgt # get at the branch computation unit
 472         br1 = cub.br1
 473
 474         # Int FUs
 475         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
 476
 477         # Memory FUs
 478         m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
 479
 480         # Memory Priority Picker 1: one gateway per memory port
 481         mempick1 = GroupPicker(n_ldsts) # picks 1 reader and 1 writer to intreg
 482         m.submodules.mempick1 = mempick1
 483
 484         # Count of number of FUs
 485         n_intfus = n_int_alus
 486         n_fp_fus = 0 # for now
 487
 488         # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
 489         intpick1 = GroupPicker(n_intfus) # picks 1 reader and 1 writer to intreg
 490         m.submodules.intpick1 = intpick1
 491
 492         # INT/FP Issue Unit
 493         regdecode = RegDecode(self.n_regs)
 494         m.submodules.regdecode = regdecode
 495         issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
 496         m.submodules.issueunit = issueunit
 497
 498         # Shadow Matrix.  currently n_intfus shadows, to be used for
 499         # write-after-write hazards.  NOTE: there is one extra for branches,
 500         # so the shadow width is increased by 1
 501         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 502         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 503
 504         # record previous instruction to cast shadow on current instruction
 505         prev_shadow = Signal(n_intfus)
 506
 507         # Branch Speculation recorder.  tracks the success/fail state as
 508         # each instruction is issued, so that when the branch occurs the
 509         # allow/cancel can be issued as appropriate.
 510         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 511
 512         #---------
 513         # ok start wiring things together...
 514         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 515         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 516         #---------
 517
 518         #---------
 519         # Issue Unit is where it starts.  set up some in/outs for this module
 520         #---------
 521         comb += [    regdecode.dest_i.eq(self.int_dest_i),
 522                      regdecode.src1_i.eq(self.int_src1_i),
 523                      regdecode.src2_i.eq(self.int_src2_i),
 524                      regdecode.enable_i.eq(self.reg_enable_i),
 525                      self.issue_o.eq(issueunit.issue_o)
 526                     ]
 527
 528         # take these to outside (issue needs them)
 529         comb += cua.oper_i.eq(self.alu_oper_i)
 530         comb += cua.imm_i.eq(self.alu_imm_i)
 531         comb += cub.oper_i.eq(self.br_oper_i)
 532         comb += cub.imm_i.eq(self.br_imm_i)
 533         comb += cul.oper_i.eq(self.ls_oper_i)
 534         comb += cul.imm_i.eq(self.ls_imm_i)
 535
 536         # TODO: issueunit.f (FP)
 537
 538         # and int function issue / busy arrays, and dest/src1/src2
 539         comb += intfus.dest_i.eq(regdecode.dest_o)
 540         comb += intfus.src1_i.eq(regdecode.src1_o)
 541         comb += intfus.src2_i.eq(regdecode.src2_o)
 542
 543         fn_issue_o = issueunit.fn_issue_o
 544
 545         comb += intfus.fn_issue_i.eq(fn_issue_o)
 546         comb += issueunit.busy_i.eq(cu.busy_o)
 547         comb += self.busy_o.eq(cu.busy_o.bool())
 548
 549         #---------
 550         # Memory Function Unit
 551         #---------
 552         reset_b = Signal(cul.n_units, reset_less=True)
 553         sync += reset_b.eq(cul.go_st_i | cul.go_wr_i | cul.go_die_i)
 554
 555         comb += memfus.fn_issue_i.eq(cul.issue_i) # Comp Unit Issue -> Mem FUs
 556         comb += memfus.addr_en_i.eq(cul.adr_rel_o) # Match enable on adr rel
 557         comb += memfus.addr_rs_i.eq(reset_b) # reset same as LDSTCompUnit
 558
 559         # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
 560         # in a transitive fashion).  This cycle activates based on LDSTCompUnit
 561         # issue_i.  multi-issue gets a bit more complex but not a lot.
 562         prior_ldsts = Signal(cul.n_units, reset_less=True)
 563         sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
 564         with m.If(self.ls_oper_i[2]): # LD bit of operand
 565             comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
 566         with m.If(self.ls_oper_i[3]): # ST bit of operand
 567             comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
 568
 569         # TODO: adr_rel_o needs to go into L1 Cache.  for now,
 570         # just immediately activate go_adr
 571         comb += cul.go_ad_i.eq(cul.adr_rel_o)
 572
 573         # connect up address data
 574         comb += memfus.addrs_i[0].eq(cul.units[0].addr_o)
 575         comb += memfus.addrs_i[1].eq(cul.units[1].addr_o)
 576
 577         # connect loadable / storable to go_ld/go_st.
 578         # XXX should only be done when the memory ld/st has actually happened!
 579         go_st_i = Signal(cul.n_units, reset_less=True)
 580         go_ld_i = Signal(cul.n_units, reset_less=True)
 581         comb += go_ld_i.eq(memfus.loadable_o & memfus.addr_nomatch_o &\
 582                                   cul.req_rel_o & cul.ld_o)
 583         comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &\
 584                                   cul.sto_rel_o & cul.st_o)
 585         comb += memfus.go_ld_i.eq(go_ld_i)
 586         comb += memfus.go_st_i.eq(go_st_i)
 587         #comb += cul.go_wr_i.eq(go_ld_i)
 588         comb += cul.go_st_i.eq(go_st_i)
 589
 590         #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 591         #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 592         #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 593
 594         #---------
 595         # merge shadow matrices outputs
 596         #---------
 597
 598         # these are explained in ShadowMatrix docstring, and are to be
 599         # connected to the FUReg and FUFU Matrices, to get them to reset
 600         anydie = Signal(n_intfus, reset_less=True)
 601         allshadown = Signal(n_intfus, reset_less=True)
 602         shreset = Signal(n_intfus, reset_less=True)
 603         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 604         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 605         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 606
 607         #---------
 608         # connect fu-fu matrix
 609         #---------
 610
 611         # Group Picker... done manually for now.
 612         go_rd_o = intpick1.go_rd_o
 613         go_wr_o = intpick1.go_wr_o
 614         go_rd_i = intfus.go_rd_i
 615         go_wr_i = intfus.go_wr_i
 616         go_die_i = intfus.go_die_i
 617         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 618         comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
 619         comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
 620         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
 621
 622         # Connect Picker
 623         #---------
 624         comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
 625         comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
 626         int_rd_o = intfus.readable_o
 627         int_wr_o = intfus.writable_o
 628         comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
 629         comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
 630
 631         #---------
 632         # Shadow Matrix
 633         #---------
 634
 635         comb += shadows.issue_i.eq(fn_issue_o)
 636         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 637         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 638         #---------
 639         # NOTE; this setup is for the instruction order preservation...
 640
 641         # connect shadows / go_dies to Computation Units
 642         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 643         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 644
 645         # ok connect first n_int_fu shadows to busy lines, to create an
 646         # instruction-order linked-list-like arrangement, using a bit-matrix
 647         # (instead of e.g. a ring buffer).
 648
 649         # when written, the shadow can be cancelled (and was good)
 650         for i in range(n_intfus):
 651             comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 652
 653         # *previous* instruction shadows *current* instruction, and, obviously,
 654         # if the previous is completed (!busy) don't cast the shadow!
 655         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 656         for i in range(n_intfus):
 657             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 658
 659         #---------
 660         # ... and this is for branch speculation.  it uses the extra bit
 661         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 662         # only needs to set shadow_i, s_fail_i and s_good_i
 663
 664         # issue captures shadow_i (if enabled)
 665         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 666
 667         bactive = Signal(reset_less=True)
 668         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 669
 670         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 671         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 672             comb += bshadow.issue_i.eq(fn_issue_o)
 673             for i in range(n_intfus):
 674                 with m.If(fn_issue_o & (Const(1<<i))):
 675                     comb += bshadow.shadow_i[i][0].eq(1)
 676
 677         # finally, we need an indicator to the test infrastructure as to
 678         # whether the branch succeeded or failed, plus, link up to the
 679         # "recorder" of whether the instruction was under shadow or not
 680
 681         with m.If(br1.issue_i):
 682             sync += bspec.active_i.eq(1)
 683         with m.If(self.branch_succ_i):
 684             comb += bspec.good_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
 685         with m.If(self.branch_fail_i):
 686             comb += bspec.fail_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
 687
 688         # branch is active (TODO: a better signal: this is over-using the
 689         # go_write signal - actually the branch should not be "writing")
 690         with m.If(br1.go_wr_i):
 691             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 692             sync += bspec.active_i.eq(0)
 693             comb += bspec.br_i.eq(1)
 694             # branch occurs if data == 1, failed if data == 0
 695             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 696             for i in range(n_intfus):
 697                 # *expected* direction of the branch matched against *actual*
 698                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 699                 # ... or it didn't
 700                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 701
 702         #---------
 703         # Connect Register File(s)
 704         #---------
 705         comb += int_dest.wen.eq(intfus.dest_rsel_o)
 706         comb += int_src1.ren.eq(intfus.src1_rsel_o)
 707         comb += int_src2.ren.eq(intfus.src2_rsel_o)
 708
 709         # connect ALUs to regfule
 710         comb += int_dest.data_i.eq(cu.data_o)
 711         comb += cu.src1_i.eq(int_src1.data_o)
 712         comb += cu.src2_i.eq(int_src2.data_o)
 713
 714         # connect ALU Computation Units
 715         comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 716         comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 717         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 718
 719         return m
 720
 721     def __iter__(self):
 722         yield from self.intregs
 723         yield from self.fpregs
 724         yield self.int_dest_i
 725         yield self.int_src1_i
 726         yield self.int_src2_i
 727         yield self.issue_o
 728         yield self.branch_succ_i
 729         yield self.branch_fail_i
 730         yield self.branch_direction_o
 731
 732     def ports(self):
 733         return list(self)
 734
 735
 736 class IssueToScoreboard(Elaboratable):
 737
 738     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 739         self.qlen = qlen
 740         self.n_in = n_in
 741         self.n_out = n_out
 742         self.rwid = rwid
 743         self.opw = opwid
 744         self.n_regs = n_regs
 745
 746         mqbits = unsigned(int(log(qlen) / log(2))+2)
 747         self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
 748         self.p_ready_o = Signal() # instructions were added
 749         self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
 750
 751         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 752         self.qlen_o = Signal(mqbits, reset_less=True)
 753
 754     def elaborate(self, platform):
 755         m = Module()
 756         comb = m.d.comb
 757         sync = m.d.sync
 758
 759         iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
 760         sc = Scoreboard(self.rwid, self.n_regs)
 761         m.submodules.iq = iq
 762         m.submodules.sc = sc
 763
 764         # get at the regfile for testing
 765         self.intregs = sc.intregs
 766
 767         # and the "busy" signal and instruction queue length
 768         comb += self.busy_o.eq(sc.busy_o)
 769         comb += self.qlen_o.eq(iq.qlen_o)
 770
 771         # link up instruction queue
 772         comb += iq.p_add_i.eq(self.p_add_i)
 773         comb += self.p_ready_o.eq(iq.p_ready_o)
 774         for i in range(self.n_in):
 775             comb += eq(iq.data_i[i], self.data_i[i])
 776
 777         # take instruction and process it.  note that it's possible to
 778         # "inspect" the queue contents *without* actually removing the
 779         # items.  items are only removed when the
 780
 781         # in "waiting" state
 782         wait_issue_br = Signal()
 783         wait_issue_alu = Signal()
 784         wait_issue_ls = Signal()
 785
 786         with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
 787             # set instruction pop length to 1 if the unit accepted
 788             with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
 789                 with m.If(iq.qlen_o != 0):
 790                     comb += iq.n_sub_i.eq(1)
 791             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 792                 with m.If(iq.qlen_o != 0):
 793                     comb += iq.n_sub_i.eq(1)
 794             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 795                 with m.If(iq.qlen_o != 0):
 796                     comb += iq.n_sub_i.eq(1)
 797
 798         # see if some instruction(s) are here.  note that this is
 799         # "inspecting" the in-place queue.  note also that on the
 800         # cycle following "waiting" for fn_issue_o to be set, the
 801         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 802         with m.If(iq.qlen_o != 0):
 803             # get the operands and operation
 804             imm = iq.data_o[0].imm_i
 805             dest = iq.data_o[0].dest_i
 806             src1 = iq.data_o[0].src1_i
 807             src2 = iq.data_o[0].src2_i
 808             op = iq.data_o[0].oper_i
 809             opi = iq.data_o[0].opim_i # immediate set
 810
 811             # set the src/dest regs
 812             comb += sc.int_dest_i.eq(dest)
 813             comb += sc.int_src1_i.eq(src1)
 814             comb += sc.int_src2_i.eq(src2)
 815             comb += sc.reg_enable_i.eq(1) # enable the regfile
 816
 817             # choose a Function-Unit-Group
 818             with m.If((op & (0x3<<2)) != 0): # branch
 819                 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
 820                 comb += sc.br_imm_i.eq(imm)
 821                 comb += sc.brissue.insn_i.eq(1)
 822                 comb += wait_issue_br.eq(1)
 823             with m.Elif((op & (0x3<<4)) != 0): # ld/st
 824                 # see compldst.py
 825                 # bit 0: ADD/SUB
 826                 # bit 1: immed
 827                 # bit 4: LD
 828                 # bit 5: ST
 829                 comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6]))
 830                 comb += sc.ls_imm_i.eq(imm)
 831                 comb += sc.lsissue.insn_i.eq(1)
 832                 comb += wait_issue_ls.eq(1)
 833             with m.Else(): # alu
 834                 comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
 835                 comb += sc.alu_imm_i.eq(imm)
 836                 comb += sc.aluissue.insn_i.eq(1)
 837                 comb += wait_issue_alu.eq(1)
 838
 839             # XXX TODO
 840             # these indicate that the instruction is to be made
 841             # shadow-dependent on
 842             # (either) branch success or branch fail
 843             #yield sc.branch_fail_i.eq(branch_fail)
 844             #yield sc.branch_succ_i.eq(branch_success)
 845
 846         return m
 847
 848     def __iter__(self):
 849         yield self.p_ready_o
 850         for o in self.data_i:
 851             yield from list(o)
 852         yield self.p_add_i
 853
 854     def ports(self):
 855         return list(self)
 856
 857
 858 IADD = 0
 859 ISUB = 1
 860 IMUL = 2
 861 ISHF = 3
 862 IBGT = 4
 863 IBLT = 5
 864 IBEQ = 6
 865 IBNE = 7
 866
 867
 868 class RegSim:
 869     def __init__(self, rwidth, nregs):
 870         self.rwidth = rwidth
 871         self.regs = [0] * nregs
 872
 873     def op(self, op, op_imm, imm, src1, src2, dest):
 874         maxbits = (1 << self.rwidth) - 1
 875         src1 = self.regs[src1] & maxbits
 876         if op_imm:
 877             src2 = imm
 878         else:
 879             src2 = self.regs[src2] & maxbits
 880         if op == IADD:
 881             val = src1 + src2
 882         elif op == ISUB:
 883             val = src1 - src2
 884         elif op == IMUL:
 885             val = src1 * src2
 886         elif op == ISHF:
 887             val = src1 >> (src2 & maxbits)
 888         elif op == IBGT:
 889             val = int(src1 > src2)
 890         elif op == IBLT:
 891             val = int(src1 < src2)
 892         elif op == IBEQ:
 893             val = int(src1 == src2)
 894         elif op == IBNE:
 895             val = int(src1 != src2)
 896         else:
 897             return 0 # LD/ST TODO
 898         val &= maxbits
 899         self.setval(dest, val)
 900         return val
 901
 902     def setval(self, dest, val):
 903         print ("sim setval", dest, hex(val))
 904         self.regs[dest] = val
 905
 906     def dump(self, dut):
 907         for i, val in enumerate(self.regs):
 908             reg = yield dut.intregs.regs[i].reg
 909             okstr = "OK" if reg == val else "!ok"
 910             print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
 911
 912     def check(self, dut):
 913         for i, val in enumerate(self.regs):
 914             reg = yield dut.intregs.regs[i].reg
 915             if reg != val:
 916                 print("reg %d expected %x received %x\n" % (i, val, reg))
 917                 yield from self.dump(dut)
 918                 assert False
 919
 920 def instr_q(dut, op, op_imm, imm, src1, src2, dest,
 921             branch_success, branch_fail):
 922     instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
 923                'src1_i': src1, 'src2_i': src2}]
 924
 925     sendlen = 1
 926     for idx in range(sendlen):
 927         yield from eq(dut.data_i[idx], instrs[idx])
 928         di = yield dut.data_i[idx]
 929         print ("senddata %d %x" % (idx, di))
 930     yield dut.p_add_i.eq(sendlen)
 931     yield
 932     o_p_ready = yield dut.p_ready_o
 933     while not o_p_ready:
 934         yield
 935         o_p_ready = yield dut.p_ready_o
 936
 937     yield dut.p_add_i.eq(0)
 938
 939
 940 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
 941     yield from disable_issue(dut)
 942     yield dut.int_dest_i.eq(dest)
 943     yield dut.int_src1_i.eq(src1)
 944     yield dut.int_src2_i.eq(src2)
 945     if (op & (0x3<<2)) != 0: # branch
 946         yield dut.brissue.insn_i.eq(1)
 947         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 948         yield dut.br_imm_i.eq(imm)
 949         dut_issue = dut.brissue
 950     else:
 951         yield dut.aluissue.insn_i.eq(1)
 952         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 953         yield dut.alu_imm_i.eq(imm)
 954         dut_issue = dut.aluissue
 955     yield dut.reg_enable_i.eq(1)
 956
 957     # these indicate that the instruction is to be made shadow-dependent on
 958     # (either) branch success or branch fail
 959     yield dut.branch_fail_i.eq(branch_fail)
 960     yield dut.branch_succ_i.eq(branch_success)
 961
 962     yield
 963     yield from wait_for_issue(dut, dut_issue)
 964
 965
 966 def print_reg(dut, rnums):
 967     rs = []
 968     for rnum in rnums:
 969         reg = yield dut.intregs.regs[rnum].reg
 970         rs.append("%x" % reg)
 971     rnums = map(str, rnums)
 972     print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 973
 974
 975 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 976     insts = []
 977     for i in range(n_ops):
 978         src1 = randint(1, dut.n_regs-1)
 979         src2 = randint(1, dut.n_regs-1)
 980         imm = randint(1, (1<<dut.rwid)-1)
 981         dest = randint(1, dut.n_regs-1)
 982         op = randint(0, max_opnums)
 983         opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
 984
 985         if shadowing:
 986             insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
 987         else:
 988             insts.append((src1, src2, dest, op, opi, imm))
 989     return insts
 990
 991
 992 def wait_for_busy_clear(dut):
 993     while True:
 994         busy_o = yield dut.busy_o
 995         if not busy_o:
 996             break
 997         print ("busy",)
 998         yield
 999
1000 def disable_issue(dut):
1001     yield dut.aluissue.insn_i.eq(0)
1002     yield dut.brissue.insn_i.eq(0)
1003     yield dut.lsissue.insn_i.eq(0)
1004
1005
1006 def wait_for_issue(dut, dut_issue):
1007     while True:
1008         issue_o = yield dut_issue.fn_issue_o
1009         if issue_o:
1010             yield from disable_issue(dut)
1011             yield dut.reg_enable_i.eq(0)
1012             break
1013         print ("busy",)
1014         #yield from print_reg(dut, [1,2,3])
1015         yield
1016     #yield from print_reg(dut, [1,2,3])
1017
1018 def scoreboard_branch_sim(dut, alusim):
1019
1020     iseed = 3
1021
1022     for i in range(1):
1023
1024         print ("rseed", iseed)
1025         seed(iseed)
1026         iseed += 1
1027
1028         yield dut.branch_direction_o.eq(0)
1029
1030         # set random values in the registers
1031         for i in range(1, dut.n_regs):
1032             val = 31+i*3
1033             val = randint(0, (1<<alusim.rwidth)-1)
1034             yield dut.intregs.regs[i].reg.eq(val)
1035             alusim.setval(i, val)
1036
1037         if False:
1038             # create some instructions: branches create a tree
1039             insts = create_random_ops(dut, 1, True, 1)
1040             #insts.append((6, 6, 1, 2, (0, 0)))
1041             #insts.append((4, 3, 3, 0, (0, 0)))
1042
1043             src1 = randint(1, dut.n_regs-1)
1044             src2 = randint(1, dut.n_regs-1)
1045             #op = randint(4, 7)
1046             op = 4 # only BGT at the moment
1047
1048             branch_ok = create_random_ops(dut, 1, True, 1)
1049             branch_fail = create_random_ops(dut, 1, True, 1)
1050
1051             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1052
1053         if True:
1054             insts = []
1055             insts.append( (3, 5, 2, 0, (0, 0)) )
1056             branch_ok = []
1057             branch_fail = []
1058             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
1059             branch_ok.append( None )
1060             branch_fail.append( (1, 1, 2, 0, (0, 1)) )
1061             #branch_fail.append( None )
1062             insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
1063
1064         siminsts = deepcopy(insts)
1065
1066         # issue instruction(s)
1067         i = -1
1068         instrs = insts
1069         branch_direction = 0
1070         while instrs:
1071             yield
1072             yield
1073             i += 1
1074             branch_direction = yield dut.branch_direction_o # way branch went
1075             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1076             if branch_direction == 1 and shadow_on:
1077                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1078                 continue # branch was "success" and this is a "failed"... skip
1079             if branch_direction == 2 and shadow_off:
1080                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1081                 continue # branch was "fail" and this is a "success"... skip
1082             if branch_direction != 0:
1083                 shadow_on = 0
1084                 shadow_off = 0
1085             is_branch = op >= 4
1086             if is_branch:
1087                 branch_ok, branch_fail = dest
1088                 dest = src2
1089                 # ok zip up the branch success / fail instructions and
1090                 # drop them into the queue, one marked "to have branch success"
1091                 # the other to be marked shadow branch "fail".
1092                 # one out of each of these will be cancelled
1093                 for ok, fl in zip(branch_ok, branch_fail):
1094                     if ok:
1095                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1096                     if fl:
1097                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1098             print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
1099                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1100             yield from int_instr(dut, op, src1, src2, dest,
1101                                  shadow_on, shadow_off)
1102
1103         # wait for all instructions to stop before checking
1104         yield
1105         yield from wait_for_busy_clear(dut)
1106
1107         i = -1
1108         while siminsts:
1109             instr = siminsts.pop(0)
1110             if instr is None:
1111                 continue
1112             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1113             i += 1
1114             is_branch = op >= 4
1115             if is_branch:
1116                 branch_ok, branch_fail = dest
1117                 dest = src2
1118             print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
1119                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1120             branch_res = alusim.op(op, src1, src2, dest)
1121             if is_branch:
1122                 if branch_res:
1123                     siminsts += branch_ok
1124                 else:
1125                     siminsts += branch_fail
1126
1127         # check status
1128         yield from alusim.check(dut)
1129         yield from alusim.dump(dut)
1130
1131
1132 def scoreboard_sim(dut, alusim):
1133
1134     seed(0)
1135
1136     for i in range(1):
1137
1138         # set random values in the registers
1139         for i in range(1, dut.n_regs):
1140             val = randint(0, (1<<alusim.rwidth)-1)
1141             #val = 31+i*3
1142             #val = i
1143             yield dut.intregs.regs[i].reg.eq(val)
1144             alusim.setval(i, val)
1145
1146         # create some instructions (some random, some regression tests)
1147         instrs = []
1148         if False:
1149             instrs = create_random_ops(dut, 15, True, 4)
1150
1151         if False: # LD/ST test (with immediate)
1152             instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1153             #instrs.append( (1, 2, 0, 0x10, 1, 1, (0, 0)) )
1154
1155         if True:
1156             instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
1157
1158         if True:
1159             instrs.append( (7, 3, 2, 4, 0, 0, (0, 0)) )
1160             instrs.append( (7, 6, 6, 2, 0, 0, (0, 0)) )
1161             instrs.append( (1, 7, 2, 2, 0, 0, (0, 0)) )
1162
1163         if True:
1164             instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1165             instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1166             instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1167             instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1168             instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1169
1170         if False:
1171             instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1172             instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1173             instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1174
1175         if False:
1176             instrs.append((5, 6, 2, 1))
1177             instrs.append((2, 2, 4, 0))
1178             #instrs.append((2, 2, 3, 1))
1179
1180         if False:
1181             instrs.append((2, 1, 2, 3))
1182
1183         if False:
1184             instrs.append((2, 6, 2, 1))
1185             instrs.append((2, 1, 2, 0))
1186
1187         if False:
1188             instrs.append((1, 2, 7, 2))
1189             instrs.append((7, 1, 5, 0))
1190             instrs.append((4, 4, 1, 1))
1191
1192         if False:
1193             instrs.append((5, 6, 2, 2))
1194             instrs.append((1, 1, 4, 1))
1195             instrs.append((6, 5, 3, 0))
1196
1197         if False:
1198             # Write-after-Write Hazard
1199             instrs.append( (3, 6, 7, 2) )
1200             instrs.append( (4, 4, 7, 1) )
1201
1202         if False:
1203             # self-read/write-after-write followed by Read-after-Write
1204             instrs.append((1, 1, 1, 1))
1205             instrs.append((1, 5, 3, 0))
1206
1207         if False:
1208             # Read-after-Write followed by self-read-after-write
1209             instrs.append((5, 6, 1, 2))
1210             instrs.append((1, 1, 1, 1))
1211
1212         if False:
1213             # self-read-write sandwich
1214             instrs.append((5, 6, 1, 2))
1215             instrs.append((1, 1, 1, 1))
1216             instrs.append((1, 5, 3, 0))
1217
1218         if False:
1219             # very weird failure
1220             instrs.append( (5, 2, 5, 2) )
1221             instrs.append( (2, 6, 3, 0) )
1222             instrs.append( (4, 2, 2, 1) )
1223
1224         if False:
1225             v1 = 4
1226             yield dut.intregs.regs[5].reg.eq(v1)
1227             alusim.setval(5, v1)
1228             yield dut.intregs.regs[3].reg.eq(5)
1229             alusim.setval(3, 5)
1230             instrs.append((5, 3, 3, 4, (0, 0)))
1231             instrs.append((4, 2, 1, 2, (0, 1)))
1232
1233         if False:
1234             v1 = 6
1235             yield dut.intregs.regs[5].reg.eq(v1)
1236             alusim.setval(5, v1)
1237             yield dut.intregs.regs[3].reg.eq(5)
1238             alusim.setval(3, 5)
1239             instrs.append((5, 3, 3, 4, (0, 0)))
1240             instrs.append((4, 2, 1, 2, (1, 0)))
1241
1242         if False:
1243             instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1244             instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1245             instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1246             instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1247             instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1248             instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1249             instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1250             instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1251             instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1252
1253         # issue instruction(s), wait for issue to be free before proceeding
1254         for i, instr in enumerate(instrs):
1255             src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
1256
1257             print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1258                     (i, src1, src2, dest, op, opi, imm))
1259             alusim.op(op, opi, imm, src1, src2, dest)
1260             yield from instr_q(dut, op, opi, imm, src1, src2, dest,
1261                                br_ok, br_fail)
1262
1263         # wait for all instructions to stop before checking
1264         while True:
1265             iqlen = yield dut.qlen_o
1266             if iqlen == 0:
1267                 break
1268             yield
1269         yield
1270         yield
1271         yield
1272         yield
1273         yield from wait_for_busy_clear(dut)
1274
1275         # check status
1276         yield from alusim.check(dut)
1277         yield from alusim.dump(dut)
1278
1279
1280 def test_scoreboard():
1281     dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1282     alusim = RegSim(16, 8)
1283     memsim = MemSim(16, 16)
1284     vl = rtlil.convert(dut, ports=dut.ports())
1285     with open("test_scoreboard6600.il", "w") as f:
1286         f.write(vl)
1287
1288     run_simulation(dut, scoreboard_sim(dut, alusim),
1289                         vcd_name='test_scoreboard6600.vcd')
1290
1291     #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1292     #                    vcd_name='test_scoreboard6600.vcd')
1293
1294
1295 if __name__ == '__main__':
1296     test_scoreboard()