src/experiment/score6600.py

   1 from nmigen.compat.sim import run_simulation
   2 from nmigen.cli import verilog, rtlil
   3 from nmigen import Module, Const, Signal, Array, Cat, Elaboratable, Memory
   4
   5 from regfile.regfile import RegFileArray, treereduce
   6 from scoreboard.fu_fu_matrix import FUFUDepMatrix
   7 from scoreboard.fu_reg_matrix import FURegDepMatrix
   8 from scoreboard.global_pending import GlobalPending
   9 from scoreboard.group_picker import GroupPicker
  10 from scoreboard.issue_unit import IssueUnitGroup, IssueUnitArray, RegDecode
  11 from scoreboard.shadow import ShadowMatrix, BranchSpeculationRecord
  12 from scoreboard.instruction_q import Instruction, InstructionQ
  13 from scoreboard.memfu import MemFunctionUnits
  14
  15 from compalu import ComputationUnitNoDelay
  16 from compldst import LDSTCompUnit
  17
  18 from alu_hier import ALU, BranchALU
  19 from nmutil.latch import SRLatch
  20 from nmutil.nmoperator import eq
  21
  22 from random import randint, seed
  23 from copy import deepcopy
  24 from math import log
  25
  26
  27 class TestMemory(Elaboratable):
  28     def __init__(self, regwid, addrw):
  29         self.ddepth = 1 # regwid //8
  30         depth = (1<<addrw) // self.ddepth
  31         self.adr   = Signal(addrw)
  32         self.dat_r = Signal(regwid)
  33         self.dat_w = Signal(regwid)
  34         self.we    = Signal()
  35         self.mem   = Memory(width=regwid, depth=depth, init=range(0, depth))
  36
  37     def elaborate(self, platform):
  38         m = Module()
  39         m.submodules.rdport = rdport = self.mem.read_port()
  40         m.submodules.wrport = wrport = self.mem.write_port()
  41         m.d.comb += [
  42             rdport.addr.eq(self.adr[self.ddepth:]), # ignore low bits
  43             self.dat_r.eq(rdport.data),
  44             wrport.addr.eq(self.adr),
  45             wrport.data.eq(self.dat_w),
  46             wrport.en.eq(self.we),
  47         ]
  48         return m
  49
  50
  51 class MemSim:
  52     def __init__(self, regwid, addrw):
  53         self.regwid = regwid
  54         self.ddepth = 1 # regwid//8
  55         depth = (1<<addrw) // self.ddepth
  56         self.mem = list(range(0, depth))
  57
  58     def ld(self, addr):
  59         return self.mem[addr>>self.ddepth]
  60
  61     def st(self, addr, data):
  62         self.mem[addr>>self.ddepth] = data & ((1<<self.regwid)-1)
  63
  64
  65 class CompUnitsBase(Elaboratable):
  66     """ Computation Unit Base class.
  67
  68         Amazingly, this class works recursively.  It's supposed to just
  69         look after some ALUs (that can handle the same operations),
  70         grouping them together, however it turns out that the same code
  71         can also group *groups* of Computation Units together as well.
  72
  73         Basically it was intended just to concatenate the ALU's issue,
  74         go_rd etc. signals together, which start out as bits and become
  75         sequences.  Turns out that the same trick works just as well
  76         on Computation Units!
  77
  78         So this class may be used recursively to present a top-level
  79         sequential concatenation of all the signals in and out of
  80         ALUs, whilst at the same time making it convenient to group
  81         ALUs together.
  82
  83         At the lower level, the intent is that groups of (identical)
  84         ALUs may be passed the same operation.  Even beyond that,
  85         the intent is that that group of (identical) ALUs actually
  86         share the *same pipeline* and as such become a "Concurrent
  87         Computation Unit" as defined by Mitch Alsup (see section
  88         11.4.9.3)
  89     """
  90     def __init__(self, rwid, units, ldstmode=False):
  91         """ Inputs:
  92
  93             * :rwid:   bit width of register file(s) - both FP and INT
  94             * :units: sequence of ALUs (or CompUnitsBase derivatives)
  95         """
  96         self.units = units
  97         self.ldstmode = ldstmode
  98         self.rwid = rwid
  99         self.rwid = rwid
 100         if units and isinstance(units[0], CompUnitsBase):
 101             self.n_units = 0
 102             for u in self.units:
 103                 self.n_units += u.n_units
 104         else:
 105             self.n_units = len(units)
 106
 107         n_units = self.n_units
 108
 109         # inputs
 110         self.issue_i = Signal(n_units, reset_less=True)
 111         self.go_rd_i = Signal(n_units, reset_less=True)
 112         self.go_wr_i = Signal(n_units, reset_less=True)
 113         self.shadown_i = Signal(n_units, reset_less=True)
 114         self.go_die_i = Signal(n_units, reset_less=True)
 115         if ldstmode:
 116             self.go_ad_i = Signal(n_units, reset_less=True)
 117             self.go_st_i = Signal(n_units, reset_less=True)
 118
 119         # outputs
 120         self.busy_o = Signal(n_units, reset_less=True)
 121         self.rd_rel_o = Signal(n_units, reset_less=True)
 122         self.req_rel_o = Signal(n_units, reset_less=True)
 123         if ldstmode:
 124             self.ld_o = Signal(n_units, reset_less=True) # op is LD
 125             self.st_o = Signal(n_units, reset_less=True) # op is ST
 126             self.adr_rel_o = Signal(n_units, reset_less=True)
 127             self.sto_rel_o = Signal(n_units, reset_less=True)
 128             self.req_rel_o = Signal(n_units, reset_less=True)
 129             self.load_mem_o = Signal(n_units, reset_less=True)
 130             self.stwd_mem_o = Signal(n_units, reset_less=True)
 131
 132         # in/out register data (note: not register#, actual data)
 133         self.data_o = Signal(rwid, reset_less=True)
 134         self.src1_i = Signal(rwid, reset_less=True)
 135         self.src2_i = Signal(rwid, reset_less=True)
 136         # input operand
 137
 138     def elaborate(self, platform):
 139         m = Module()
 140         comb = m.d.comb
 141
 142         for i, alu in enumerate(self.units):
 143             setattr(m.submodules, "comp%d" % i, alu)
 144
 145         go_rd_l = []
 146         go_wr_l = []
 147         issue_l = []
 148         busy_l = []
 149         req_rel_l = []
 150         rd_rel_l = []
 151         shadow_l = []
 152         godie_l = []
 153         for alu in self.units:
 154             req_rel_l.append(alu.req_rel_o)
 155             rd_rel_l.append(alu.rd_rel_o)
 156             shadow_l.append(alu.shadown_i)
 157             godie_l.append(alu.go_die_i)
 158             go_wr_l.append(alu.go_wr_i)
 159             go_rd_l.append(alu.go_rd_i)
 160             issue_l.append(alu.issue_i)
 161             busy_l.append(alu.busy_o)
 162         comb += self.rd_rel_o.eq(Cat(*rd_rel_l))
 163         comb += self.req_rel_o.eq(Cat(*req_rel_l))
 164         comb += self.busy_o.eq(Cat(*busy_l))
 165         comb += Cat(*godie_l).eq(self.go_die_i)
 166         comb += Cat(*shadow_l).eq(self.shadown_i)
 167         comb += Cat(*go_wr_l).eq(self.go_wr_i)
 168         comb += Cat(*go_rd_l).eq(self.go_rd_i)
 169         comb += Cat(*issue_l).eq(self.issue_i)
 170
 171         # connect data register input/output
 172
 173         # merge (OR) all integer FU / ALU outputs to a single value
 174         # bit of a hack: treereduce needs a list with an item named "data_o"
 175         if self.units:
 176             data_o = treereduce(self.units)
 177             comb += self.data_o.eq(data_o)
 178
 179         for i, alu in enumerate(self.units):
 180             comb += alu.src1_i.eq(self.src1_i)
 181             comb += alu.src2_i.eq(self.src2_i)
 182
 183         if not self.ldstmode:
 184             return m
 185
 186         ldmem_l = []
 187         stmem_l = []
 188         go_ad_l = []
 189         go_st_l = []
 190         ld_l = []
 191         st_l = []
 192         adr_rel_l = []
 193         sto_rel_l = []
 194         for alu in self.units:
 195             ld_l.append(alu.ld_o)
 196             st_l.append(alu.st_o)
 197             adr_rel_l.append(alu.adr_rel_o)
 198             sto_rel_l.append(alu.sto_rel_o)
 199             ldmem_l.append(alu.load_mem_o)
 200             stmem_l.append(alu.stwd_mem_o)
 201             go_ad_l.append(alu.go_ad_i)
 202             go_st_l.append(alu.go_st_i)
 203         comb += self.adr_rel_o.eq(Cat(*adr_rel_l))
 204         comb += self.sto_rel_o.eq(Cat(*sto_rel_l))
 205         comb += self.load_mem_o.eq(Cat(*ldmem_l))
 206         comb += self.stwd_mem_o.eq(Cat(*stmem_l))
 207         comb += Cat(*go_ad_l).eq(self.go_ad_i)
 208         comb += Cat(*go_st_l).eq(self.go_st_i)
 209
 210         return m
 211
 212
 213 class CompUnitLDSTs(CompUnitsBase):
 214
 215     def __init__(self, rwid, opwid, n_ldsts, mem):
 216         """ Inputs:
 217
 218             * :rwid:   bit width of register file(s) - both FP and INT
 219             * :opwid:  operand bit width
 220         """
 221         self.opwid = opwid
 222
 223         # inputs
 224         self.oper_i = Signal(opwid, reset_less=True)
 225         self.imm_i = Signal(rwid, reset_less=True)
 226
 227         # Int ALUs
 228         self.alus = []
 229         for i in range(n_ldsts):
 230             self.alus.append(ALU(rwid))
 231
 232         units = []
 233         for alu in self.alus:
 234             aluopwid = 4 # see compldst.py for "internal" opcode
 235             units.append(LDSTCompUnit(rwid, aluopwid, alu, mem))
 236
 237         CompUnitsBase.__init__(self, rwid, units, ldstmode=True)
 238
 239     def elaborate(self, platform):
 240         m = CompUnitsBase.elaborate(self, platform)
 241         comb = m.d.comb
 242
 243         # hand the same operation to all units, 4 lower bits though
 244         for alu in self.units:
 245             comb += alu.oper_i[0:4].eq(self.oper_i)
 246             comb += alu.imm_i.eq(self.imm_i)
 247             comb += alu.isalu_i.eq(0)
 248
 249         return m
 250
 251
 252 class CompUnitALUs(CompUnitsBase):
 253
 254     def __init__(self, rwid, opwid, n_alus):
 255         """ Inputs:
 256
 257             * :rwid:   bit width of register file(s) - both FP and INT
 258             * :opwid:  operand bit width
 259         """
 260         self.opwid = opwid
 261
 262         # inputs
 263         self.oper_i = Signal(opwid, reset_less=True)
 264         self.imm_i = Signal(rwid, reset_less=True)
 265
 266         # Int ALUs
 267         alus = []
 268         for i in range(n_alus):
 269             alus.append(ALU(rwid))
 270
 271         units = []
 272         for alu in alus:
 273             aluopwid = 3 # extra bit for immediate mode
 274             units.append(ComputationUnitNoDelay(rwid, aluopwid, alu))
 275
 276         CompUnitsBase.__init__(self, rwid, units)
 277
 278     def elaborate(self, platform):
 279         m = CompUnitsBase.elaborate(self, platform)
 280         comb = m.d.comb
 281
 282         # hand the same operation to all units, only lower 3 bits though
 283         for alu in self.units:
 284             comb += alu.oper_i[0:3].eq(self.oper_i)
 285             comb += alu.imm_i.eq(self.imm_i)
 286
 287         return m
 288
 289
 290 class CompUnitBR(CompUnitsBase):
 291
 292     def __init__(self, rwid, opwid):
 293         """ Inputs:
 294
 295             * :rwid:   bit width of register file(s) - both FP and INT
 296             * :opwid:  operand bit width
 297
 298             Note: bgt unit is returned so that a shadow unit can be created
 299             for it
 300         """
 301         self.opwid = opwid
 302
 303         # inputs
 304         self.oper_i = Signal(opwid, reset_less=True)
 305         self.imm_i = Signal(rwid, reset_less=True)
 306
 307         # Branch ALU and CU
 308         self.bgt = BranchALU(rwid)
 309         aluopwid = 3 # extra bit for immediate mode
 310         self.br1 = ComputationUnitNoDelay(rwid, aluopwid, self.bgt)
 311         CompUnitsBase.__init__(self, rwid, [self.br1])
 312
 313     def elaborate(self, platform):
 314         m = CompUnitsBase.elaborate(self, platform)
 315         comb = m.d.comb
 316
 317         # hand the same operation to all units
 318         for alu in self.units:
 319             comb += alu.oper_i.eq(self.oper_i)
 320             comb += alu.imm_i.eq(self.imm_i)
 321
 322         return m
 323
 324
 325 class FunctionUnits(Elaboratable):
 326
 327     def __init__(self, n_regs, n_int_alus):
 328         self.n_regs = n_regs
 329         self.n_int_alus = n_int_alus
 330
 331         self.dest_i = Signal(n_regs, reset_less=True) # Dest R# in
 332         self.src1_i = Signal(n_regs, reset_less=True) # oper1 R# in
 333         self.src2_i = Signal(n_regs, reset_less=True) # oper2 R# in
 334
 335         self.g_int_rd_pend_o = Signal(n_regs, reset_less=True)
 336         self.g_int_wr_pend_o = Signal(n_regs, reset_less=True)
 337
 338         self.dest_rsel_o = Signal(n_regs, reset_less=True) # dest reg (bot)
 339         self.src1_rsel_o = Signal(n_regs, reset_less=True) # src1 reg (bot)
 340         self.src2_rsel_o = Signal(n_regs, reset_less=True) # src2 reg (bot)
 341
 342         self.readable_o = Signal(n_int_alus, reset_less=True)
 343         self.writable_o = Signal(n_int_alus, reset_less=True)
 344
 345         self.go_rd_i = Signal(n_int_alus, reset_less=True)
 346         self.go_wr_i = Signal(n_int_alus, reset_less=True)
 347         self.go_die_i = Signal(n_int_alus, reset_less=True)
 348         self.fn_issue_i = Signal(n_int_alus, reset_less=True)
 349
 350         # Note: FURegs wr_pend_o is also outputted from here, for use in WaWGrid
 351
 352     def elaborate(self, platform):
 353         m = Module()
 354         comb = m.d.comb
 355         sync = m.d.sync
 356
 357         n_intfus = self.n_int_alus
 358
 359         # Integer FU-FU Dep Matrix
 360         intfudeps = FUFUDepMatrix(n_intfus, n_intfus)
 361         m.submodules.intfudeps = intfudeps
 362         # Integer FU-Reg Dep Matrix
 363         intregdeps = FURegDepMatrix(n_intfus, self.n_regs, 2)
 364         m.submodules.intregdeps = intregdeps
 365
 366         comb += self.g_int_rd_pend_o.eq(intregdeps.v_rd_rsel_o)
 367         comb += self.g_int_wr_pend_o.eq(intregdeps.v_wr_rsel_o)
 368
 369         comb += intregdeps.rd_pend_i.eq(intregdeps.v_rd_rsel_o)
 370         comb += intregdeps.wr_pend_i.eq(intregdeps.v_wr_rsel_o)
 371
 372         comb += intfudeps.rd_pend_i.eq(intregdeps.rd_pend_o)
 373         comb += intfudeps.wr_pend_i.eq(intregdeps.wr_pend_o)
 374         self.wr_pend_o = intregdeps.wr_pend_o # also output for use in WaWGrid
 375
 376         comb += intfudeps.issue_i.eq(self.fn_issue_i)
 377         comb += intfudeps.go_rd_i.eq(self.go_rd_i)
 378         comb += intfudeps.go_wr_i.eq(self.go_wr_i)
 379         comb += intfudeps.go_die_i.eq(self.go_die_i)
 380         comb += self.readable_o.eq(intfudeps.readable_o)
 381         comb += self.writable_o.eq(intfudeps.writable_o)
 382
 383         # Connect function issue / arrays, and dest/src1/src2
 384         comb += intregdeps.dest_i.eq(self.dest_i)
 385         comb += intregdeps.src_i[0].eq(self.src1_i)
 386         comb += intregdeps.src_i[1].eq(self.src2_i)
 387
 388         comb += intregdeps.go_rd_i.eq(self.go_rd_i)
 389         comb += intregdeps.go_wr_i.eq(self.go_wr_i)
 390         comb += intregdeps.go_die_i.eq(self.go_die_i)
 391         comb += intregdeps.issue_i.eq(self.fn_issue_i)
 392
 393         comb += self.dest_rsel_o.eq(intregdeps.dest_rsel_o)
 394         comb += self.src1_rsel_o.eq(intregdeps.src_rsel_o[0])
 395         comb += self.src2_rsel_o.eq(intregdeps.src_rsel_o[1])
 396
 397         return m
 398
 399
 400 class Scoreboard(Elaboratable):
 401     def __init__(self, rwid, n_regs):
 402         """ Inputs:
 403
 404             * :rwid:   bit width of register file(s) - both FP and INT
 405             * :n_regs: depth of register file(s) - number of FP and INT regs
 406         """
 407         self.rwid = rwid
 408         self.n_regs = n_regs
 409
 410         # Register Files
 411         self.intregs = RegFileArray(rwid, n_regs)
 412         self.fpregs = RegFileArray(rwid, n_regs)
 413
 414         # issue q needs to get at these
 415         self.aluissue = IssueUnitGroup(2)
 416         self.lsissue = IssueUnitGroup(2)
 417         self.brissue = IssueUnitGroup(1)
 418         # and these
 419         self.alu_oper_i = Signal(4, reset_less=True)
 420         self.alu_imm_i = Signal(rwid, reset_less=True)
 421         self.br_oper_i = Signal(4, reset_less=True)
 422         self.br_imm_i = Signal(rwid, reset_less=True)
 423         self.ls_oper_i = Signal(4, reset_less=True)
 424         self.ls_imm_i = Signal(rwid, reset_less=True)
 425
 426         # inputs
 427         self.int_dest_i = Signal(max=n_regs, reset_less=True) # Dest R# in
 428         self.int_src1_i = Signal(max=n_regs, reset_less=True) # oper1 R# in
 429         self.int_src2_i = Signal(max=n_regs, reset_less=True) # oper2 R# in
 430         self.reg_enable_i = Signal(reset_less=True) # enable reg decode
 431
 432         # outputs
 433         self.issue_o = Signal(reset_less=True) # instruction was accepted
 434         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 435
 436         # for branch speculation experiment.  branch_direction = 0 if
 437         # the branch hasn't been met yet.  1 indicates "success", 2 is "fail"
 438         # branch_succ and branch_fail are requests to have the current
 439         # instruction be dependent on the branch unit "shadow" capability.
 440         self.branch_succ_i = Signal(reset_less=True)
 441         self.branch_fail_i = Signal(reset_less=True)
 442         self.branch_direction_o = Signal(2, reset_less=True)
 443
 444     def elaborate(self, platform):
 445         m = Module()
 446         comb = m.d.comb
 447         sync = m.d.sync
 448
 449         m.submodules.intregs = self.intregs
 450         m.submodules.fpregs = self.fpregs
 451
 452         # register ports
 453         int_dest = self.intregs.write_port("dest")
 454         int_src1 = self.intregs.read_port("src1")
 455         int_src2 = self.intregs.read_port("src2")
 456
 457         fp_dest = self.fpregs.write_port("dest")
 458         fp_src1 = self.fpregs.read_port("src1")
 459         fp_src2 = self.fpregs.read_port("src2")
 460
 461         # Int ALUs and BR ALUs
 462         n_int_alus = 5
 463         cua = CompUnitALUs(self.rwid, 3, n_alus=self.aluissue.n_insns)
 464         cub = CompUnitBR(self.rwid, 3) # 1 BR ALUs
 465
 466         # LDST Comp Units
 467         n_ldsts = 2
 468         cul = CompUnitLDSTs(self.rwid, 4, self.lsissue.n_insns, None)
 469
 470         # Comp Units
 471         m.submodules.cu = cu = CompUnitsBase(self.rwid, [cua, cul, cub])
 472         bgt = cub.bgt # get at the branch computation unit
 473         br1 = cub.br1
 474
 475         # Int FUs
 476         m.submodules.intfus = intfus = FunctionUnits(self.n_regs, n_int_alus)
 477
 478         # Memory FUs
 479         m.submodules.memfus = memfus = MemFunctionUnits(n_ldsts, 5)
 480
 481         # Count of number of FUs
 482         n_intfus = n_int_alus
 483         n_fp_fus = 0 # for now
 484
 485         # Integer Priority Picker 1: Adder + Subtractor (and LD/ST)
 486         intpick1 = GroupPicker(n_intfus) # picks 1 reader and 1 writer to intreg
 487         m.submodules.intpick1 = intpick1
 488
 489         # INT/FP Issue Unit
 490         regdecode = RegDecode(self.n_regs)
 491         m.submodules.regdecode = regdecode
 492         issueunit = IssueUnitArray([self.aluissue, self.lsissue, self.brissue])
 493         m.submodules.issueunit = issueunit
 494
 495         # Shadow Matrix.  currently n_intfus shadows, to be used for
 496         # write-after-write hazards.  NOTE: there is one extra for branches,
 497         # so the shadow width is increased by 1
 498         m.submodules.shadows = shadows = ShadowMatrix(n_intfus, n_intfus, True)
 499         m.submodules.bshadow = bshadow = ShadowMatrix(n_intfus, 1, False)
 500
 501         # record previous instruction to cast shadow on current instruction
 502         prev_shadow = Signal(n_intfus)
 503
 504         # Branch Speculation recorder.  tracks the success/fail state as
 505         # each instruction is issued, so that when the branch occurs the
 506         # allow/cancel can be issued as appropriate.
 507         m.submodules.specrec = bspec = BranchSpeculationRecord(n_intfus)
 508
 509         #---------
 510         # ok start wiring things together...
 511         # "now hear de word of de looord... dem bones dem bones dem dryy bones"
 512         # https://www.youtube.com/watch?v=pYb8Wm6-QfA
 513         #---------
 514
 515         #---------
 516         # Issue Unit is where it starts.  set up some in/outs for this module
 517         #---------
 518         comb += [    regdecode.dest_i.eq(self.int_dest_i),
 519                      regdecode.src1_i.eq(self.int_src1_i),
 520                      regdecode.src2_i.eq(self.int_src2_i),
 521                      regdecode.enable_i.eq(self.reg_enable_i),
 522                      self.issue_o.eq(issueunit.issue_o)
 523                     ]
 524
 525         # take these to outside (issue needs them)
 526         comb += cua.oper_i.eq(self.alu_oper_i)
 527         comb += cua.imm_i.eq(self.alu_imm_i)
 528         comb += cub.oper_i.eq(self.br_oper_i)
 529         comb += cub.imm_i.eq(self.br_imm_i)
 530         comb += cul.oper_i.eq(self.ls_oper_i)
 531         comb += cul.imm_i.eq(self.ls_imm_i)
 532
 533         # TODO: issueunit.f (FP)
 534
 535         # and int function issue / busy arrays, and dest/src1/src2
 536         comb += intfus.dest_i.eq(regdecode.dest_o)
 537         comb += intfus.src1_i.eq(regdecode.src1_o)
 538         comb += intfus.src2_i.eq(regdecode.src2_o)
 539
 540         fn_issue_o = issueunit.fn_issue_o
 541
 542         comb += intfus.fn_issue_i.eq(fn_issue_o)
 543         comb += issueunit.busy_i.eq(cu.busy_o)
 544         comb += self.busy_o.eq(cu.busy_o.bool())
 545
 546         #---------
 547         # Memory Function Unit
 548         #---------
 549         reset_b = Signal(cul.n_units, reset_less=True)
 550         sync += reset_b.eq(cul.go_st_i | cul.go_wr_i | cul.go_die_i)
 551
 552
 553         comb += memfus.fn_issue_i.eq(cul.issue_i) # Comp Unit Issue -> Mem FUs
 554         comb += memfus.addr_en_i.eq(cul.adr_rel_o) # Match enable on adr rel
 555         comb += memfus.addr_rs_i.eq(reset_b) # reset same as LDSTCompUnit
 556
 557         # LD/STs have to accumulate prior LD/STs (TODO: multi-issue as well,
 558         # in a transitive fashion).  This cycle activates based on LDSTCompUnit
 559         # issue_i.  multi-issue gets a bit more complex but not a lot.
 560         prior_ldsts = Signal(cul.n_units, reset_less=True)
 561         sync += prior_ldsts.eq(memfus.g_int_ld_pend_o | memfus.g_int_st_pend_o)
 562         with m.If(self.ls_oper_i[2]): # LD bit of operand
 563             comb += memfus.ld_i.eq(cul.issue_i | prior_ldsts)
 564         with m.If(self.ls_oper_i[3]): # ST bit of operand
 565             comb += memfus.st_i.eq(cul.issue_i | prior_ldsts)
 566
 567         # TODO: adr_rel_o needs to go into L1 Cache.  for now,
 568         # just immediately activate go_adr
 569         comb += cul.go_ad_i.eq(cul.adr_rel_o)
 570
 571         # connect up address data
 572         comb += memfus.addrs_i[0].eq(cul.units[0].data_o)
 573         comb += memfus.addrs_i[1].eq(cul.units[1].data_o)
 574
 575         # connect loadable / storable to go_ld/go_st.
 576         # XXX should only be done when the memory ld/st has actually happened!
 577         go_st_i = Signal(cul.n_units, reset_less=True)
 578         go_ld_i = Signal(cul.n_units, reset_less=True)
 579         comb += go_ld_i.eq(memfus.storable_o & memfus.addr_nomatch_o &\
 580                                   cul.req_rel_o & cul.ld_o)
 581         comb += go_st_i.eq(memfus.storable_o & memfus.addr_nomatch_o &\
 582                                   cul.sto_rel_o & cul.st_o)
 583         comb += memfus.go_ld_i.eq(go_ld_i)
 584         comb += memfus.go_st_i.eq(go_st_i)
 585         #comb += cul.go_wr_i.eq(memfus.loadable_o & memfus.addr_nomatch_o)
 586         comb += cul.go_st_i.eq(go_st_i)
 587
 588         #comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 589         #comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 590         #comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 591
 592         #---------
 593         # merge shadow matrices outputs
 594         #---------
 595
 596         # these are explained in ShadowMatrix docstring, and are to be
 597         # connected to the FUReg and FUFU Matrices, to get them to reset
 598         anydie = Signal(n_intfus, reset_less=True)
 599         allshadown = Signal(n_intfus, reset_less=True)
 600         shreset = Signal(n_intfus, reset_less=True)
 601         comb += allshadown.eq(shadows.shadown_o & bshadow.shadown_o)
 602         comb += anydie.eq(shadows.go_die_o | bshadow.go_die_o)
 603         comb += shreset.eq(bspec.match_g_o | bspec.match_f_o)
 604
 605         #---------
 606         # connect fu-fu matrix
 607         #---------
 608
 609         # Group Picker... done manually for now.
 610         go_rd_o = intpick1.go_rd_o
 611         go_wr_o = intpick1.go_wr_o
 612         go_rd_i = intfus.go_rd_i
 613         go_wr_i = intfus.go_wr_i
 614         go_die_i = intfus.go_die_i
 615         # NOTE: connect to the shadowed versions so that they can "die" (reset)
 616         comb += go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus]) # rd
 617         comb += go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus]) # wr
 618         comb += go_die_i[0:n_intfus].eq(anydie[0:n_intfus]) # die
 619
 620         # Connect Picker
 621         #---------
 622         comb += intpick1.rd_rel_i[0:n_intfus].eq(cu.rd_rel_o[0:n_intfus])
 623         comb += intpick1.req_rel_i[0:n_intfus].eq(cu.req_rel_o[0:n_intfus])
 624         int_rd_o = intfus.readable_o
 625         int_wr_o = intfus.writable_o
 626         comb += intpick1.readable_i[0:n_intfus].eq(int_rd_o[0:n_intfus])
 627         comb += intpick1.writable_i[0:n_intfus].eq(int_wr_o[0:n_intfus])
 628
 629         #---------
 630         # Shadow Matrix
 631         #---------
 632
 633         comb += shadows.issue_i.eq(fn_issue_o)
 634         #comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 635         comb += shadows.reset_i[0:n_intfus].eq(bshadow.go_die_o[0:n_intfus])
 636         #---------
 637         # NOTE; this setup is for the instruction order preservation...
 638
 639         # connect shadows / go_dies to Computation Units
 640         comb += cu.shadown_i[0:n_intfus].eq(allshadown)
 641         comb += cu.go_die_i[0:n_intfus].eq(anydie)
 642
 643         # ok connect first n_int_fu shadows to busy lines, to create an
 644         # instruction-order linked-list-like arrangement, using a bit-matrix
 645         # (instead of e.g. a ring buffer).
 646
 647         # when written, the shadow can be cancelled (and was good)
 648         for i in range(n_intfus):
 649             comb += shadows.s_good_i[i][0:n_intfus].eq(go_wr_o[0:n_intfus])
 650
 651         # *previous* instruction shadows *current* instruction, and, obviously,
 652         # if the previous is completed (!busy) don't cast the shadow!
 653         comb += prev_shadow.eq(~fn_issue_o & cu.busy_o)
 654         for i in range(n_intfus):
 655             comb += shadows.shadow_i[i][0:n_intfus].eq(prev_shadow)
 656
 657         #---------
 658         # ... and this is for branch speculation.  it uses the extra bit
 659         # tacked onto the ShadowMatrix (hence shadow_wid=n_intfus+1)
 660         # only needs to set shadow_i, s_fail_i and s_good_i
 661
 662         # issue captures shadow_i (if enabled)
 663         comb += bshadow.reset_i[0:n_intfus].eq(shreset[0:n_intfus])
 664
 665         bactive = Signal(reset_less=True)
 666         comb += bactive.eq((bspec.active_i | br1.issue_i) & ~br1.go_wr_i)
 667
 668         # instruction being issued (fn_issue_o) has a shadow cast by the branch
 669         with m.If(bactive & (self.branch_succ_i | self.branch_fail_i)):
 670             comb += bshadow.issue_i.eq(fn_issue_o)
 671             for i in range(n_intfus):
 672                 with m.If(fn_issue_o & (Const(1<<i))):
 673                     comb += bshadow.shadow_i[i][0].eq(1)
 674
 675         # finally, we need an indicator to the test infrastructure as to
 676         # whether the branch succeeded or failed, plus, link up to the
 677         # "recorder" of whether the instruction was under shadow or not
 678
 679         with m.If(br1.issue_i):
 680             sync += bspec.active_i.eq(1)
 681         with m.If(self.branch_succ_i):
 682             comb += bspec.good_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
 683         with m.If(self.branch_fail_i):
 684             comb += bspec.fail_i.eq(fn_issue_o & 0x1f) # XXX MAGIC CONSTANT
 685
 686         # branch is active (TODO: a better signal: this is over-using the
 687         # go_write signal - actually the branch should not be "writing")
 688         with m.If(br1.go_wr_i):
 689             sync += self.branch_direction_o.eq(br1.data_o+Const(1, 2))
 690             sync += bspec.active_i.eq(0)
 691             comb += bspec.br_i.eq(1)
 692             # branch occurs if data == 1, failed if data == 0
 693             comb += bspec.br_ok_i.eq(br1.data_o == 1)
 694             for i in range(n_intfus):
 695                 # *expected* direction of the branch matched against *actual*
 696                 comb += bshadow.s_good_i[i][0].eq(bspec.match_g_o[i])
 697                 # ... or it didn't
 698                 comb += bshadow.s_fail_i[i][0].eq(bspec.match_f_o[i])
 699
 700         #---------
 701         # Connect Register File(s)
 702         #---------
 703         comb += int_dest.wen.eq(intfus.dest_rsel_o)
 704         comb += int_src1.ren.eq(intfus.src1_rsel_o)
 705         comb += int_src2.ren.eq(intfus.src2_rsel_o)
 706
 707         # connect ALUs to regfule
 708         comb += int_dest.data_i.eq(cu.data_o)
 709         comb += cu.src1_i.eq(int_src1.data_o)
 710         comb += cu.src2_i.eq(int_src2.data_o)
 711
 712         # connect ALU Computation Units
 713         comb += cu.go_rd_i[0:n_intfus].eq(go_rd_o[0:n_intfus])
 714         comb += cu.go_wr_i[0:n_intfus].eq(go_wr_o[0:n_intfus])
 715         comb += cu.issue_i[0:n_intfus].eq(fn_issue_o[0:n_intfus])
 716
 717         return m
 718
 719     def __iter__(self):
 720         yield from self.intregs
 721         yield from self.fpregs
 722         yield self.int_dest_i
 723         yield self.int_src1_i
 724         yield self.int_src2_i
 725         yield self.issue_o
 726         yield self.branch_succ_i
 727         yield self.branch_fail_i
 728         yield self.branch_direction_o
 729
 730     def ports(self):
 731         return list(self)
 732
 733
 734 class IssueToScoreboard(Elaboratable):
 735
 736     def __init__(self, qlen, n_in, n_out, rwid, opwid, n_regs):
 737         self.qlen = qlen
 738         self.n_in = n_in
 739         self.n_out = n_out
 740         self.rwid = rwid
 741         self.opw = opwid
 742         self.n_regs = n_regs
 743
 744         mqbits = (int(log(qlen) / log(2))+2, False)
 745         self.p_add_i = Signal(mqbits) # instructions to add (from data_i)
 746         self.p_ready_o = Signal() # instructions were added
 747         self.data_i = Instruction.nq(n_in, "data_i", rwid, opwid)
 748
 749         self.busy_o = Signal(reset_less=True) # at least one CU is busy
 750         self.qlen_o = Signal(mqbits, reset_less=True)
 751
 752     def elaborate(self, platform):
 753         m = Module()
 754         comb = m.d.comb
 755         sync = m.d.sync
 756
 757         iq = InstructionQ(self.rwid, self.opw, self.qlen, self.n_in, self.n_out)
 758         sc = Scoreboard(self.rwid, self.n_regs)
 759         mem = TestMemory(self.rwid, 8) # not too big, takes too long
 760         m.submodules.iq = iq
 761         m.submodules.sc = sc
 762         m.submodules.mem = mem
 763
 764         # get at the regfile for testing
 765         self.intregs = sc.intregs
 766
 767         # and the "busy" signal and instruction queue length
 768         comb += self.busy_o.eq(sc.busy_o)
 769         comb += self.qlen_o.eq(iq.qlen_o)
 770
 771         # link up instruction queue
 772         comb += iq.p_add_i.eq(self.p_add_i)
 773         comb += self.p_ready_o.eq(iq.p_ready_o)
 774         for i in range(self.n_in):
 775             comb += eq(iq.data_i[i], self.data_i[i])
 776
 777         # take instruction and process it.  note that it's possible to
 778         # "inspect" the queue contents *without* actually removing the
 779         # items.  items are only removed when the
 780
 781         # in "waiting" state
 782         wait_issue_br = Signal()
 783         wait_issue_alu = Signal()
 784         wait_issue_ls = Signal()
 785
 786         with m.If(wait_issue_br | wait_issue_alu | wait_issue_ls):
 787             # set instruction pop length to 1 if the unit accepted
 788             with m.If(wait_issue_ls & (sc.lsissue.fn_issue_o != 0)):
 789                 with m.If(iq.qlen_o != 0):
 790                     comb += iq.n_sub_i.eq(1)
 791             with m.If(wait_issue_br & (sc.brissue.fn_issue_o != 0)):
 792                 with m.If(iq.qlen_o != 0):
 793                     comb += iq.n_sub_i.eq(1)
 794             with m.If(wait_issue_alu & (sc.aluissue.fn_issue_o != 0)):
 795                 with m.If(iq.qlen_o != 0):
 796                     comb += iq.n_sub_i.eq(1)
 797
 798         # see if some instruction(s) are here.  note that this is
 799         # "inspecting" the in-place queue.  note also that on the
 800         # cycle following "waiting" for fn_issue_o to be set, the
 801         # "resetting" done above (insn_i=0) could be re-ASSERTed.
 802         with m.If(iq.qlen_o != 0):
 803             # get the operands and operation
 804             imm = iq.data_o[0].imm_i
 805             dest = iq.data_o[0].dest_i
 806             src1 = iq.data_o[0].src1_i
 807             src2 = iq.data_o[0].src2_i
 808             op = iq.data_o[0].oper_i
 809             opi = iq.data_o[0].opim_i # immediate set
 810
 811             # set the src/dest regs
 812             comb += sc.int_dest_i.eq(dest)
 813             comb += sc.int_src1_i.eq(src1)
 814             comb += sc.int_src2_i.eq(src2)
 815             comb += sc.reg_enable_i.eq(1) # enable the regfile
 816
 817             # choose a Function-Unit-Group
 818             with m.If((op & (0x3<<2)) != 0): # branch
 819                 comb += sc.br_oper_i.eq(Cat(op[0:2], opi))
 820                 comb += sc.br_imm_i.eq(imm)
 821                 comb += sc.brissue.insn_i.eq(1)
 822                 comb += wait_issue_br.eq(1)
 823             with m.Elif((op & (0x3<<4)) != 0): # ld/st
 824                 # see compldst.py
 825                 # bit 0: ADD/SUB
 826                 # bit 1: immed
 827                 # bit 4: LD
 828                 # bit 5: ST
 829                 comb += sc.ls_oper_i.eq(Cat(op[0], opi[0], op[4:6]))
 830                 comb += sc.ls_imm_i.eq(imm)
 831                 comb += sc.lsissue.insn_i.eq(1)
 832                 comb += wait_issue_ls.eq(1)
 833             with m.Else(): # alu
 834                 comb += sc.alu_oper_i.eq(Cat(op[0:2], opi))
 835                 comb += sc.alu_imm_i.eq(imm)
 836                 comb += sc.aluissue.insn_i.eq(1)
 837                 comb += wait_issue_alu.eq(1)
 838
 839             # XXX TODO
 840             # these indicate that the instruction is to be made
 841             # shadow-dependent on
 842             # (either) branch success or branch fail
 843             #yield sc.branch_fail_i.eq(branch_fail)
 844             #yield sc.branch_succ_i.eq(branch_success)
 845
 846         return m
 847
 848     def __iter__(self):
 849         yield self.p_ready_o
 850         for o in self.data_i:
 851             yield from list(o)
 852         yield self.p_add_i
 853
 854     def ports(self):
 855         return list(self)
 856
 857
 858 IADD = 0
 859 ISUB = 1
 860 IMUL = 2
 861 ISHF = 3
 862 IBGT = 4
 863 IBLT = 5
 864 IBEQ = 6
 865 IBNE = 7
 866
 867
 868 class RegSim:
 869     def __init__(self, rwidth, nregs):
 870         self.rwidth = rwidth
 871         self.regs = [0] * nregs
 872
 873     def op(self, op, op_imm, imm, src1, src2, dest):
 874         maxbits = (1 << self.rwidth) - 1
 875         src1 = self.regs[src1] & maxbits
 876         if op_imm:
 877             src2 = imm
 878         else:
 879             src2 = self.regs[src2] & maxbits
 880         if op == IADD:
 881             val = src1 + src2
 882         elif op == ISUB:
 883             val = src1 - src2
 884         elif op == IMUL:
 885             val = src1 * src2
 886         elif op == ISHF:
 887             val = src1 >> (src2 & maxbits)
 888         elif op == IBGT:
 889             val = int(src1 > src2)
 890         elif op == IBLT:
 891             val = int(src1 < src2)
 892         elif op == IBEQ:
 893             val = int(src1 == src2)
 894         elif op == IBNE:
 895             val = int(src1 != src2)
 896         else:
 897             return 0 # LD/ST TODO
 898         val &= maxbits
 899         self.setval(dest, val)
 900         return val
 901
 902     def setval(self, dest, val):
 903         print ("sim setval", dest, hex(val))
 904         self.regs[dest] = val
 905
 906     def dump(self, dut):
 907         for i, val in enumerate(self.regs):
 908             reg = yield dut.intregs.regs[i].reg
 909             okstr = "OK" if reg == val else "!ok"
 910             print("reg %d expected %x received %x %s" % (i, val, reg, okstr))
 911
 912     def check(self, dut):
 913         for i, val in enumerate(self.regs):
 914             reg = yield dut.intregs.regs[i].reg
 915             if reg != val:
 916                 print("reg %d expected %x received %x\n" % (i, val, reg))
 917                 yield from self.dump(dut)
 918                 assert False
 919
 920 def instr_q(dut, op, op_imm, imm, src1, src2, dest,
 921             branch_success, branch_fail):
 922     instrs = [{'oper_i': op, 'dest_i': dest, 'imm_i': imm, 'opim_i': op_imm,
 923                'src1_i': src1, 'src2_i': src2}]
 924
 925     sendlen = 1
 926     for idx in range(sendlen):
 927         yield from eq(dut.data_i[idx], instrs[idx])
 928         di = yield dut.data_i[idx]
 929         print ("senddata %d %x" % (idx, di))
 930     yield dut.p_add_i.eq(sendlen)
 931     yield
 932     o_p_ready = yield dut.p_ready_o
 933     while not o_p_ready:
 934         yield
 935         o_p_ready = yield dut.p_ready_o
 936
 937     yield dut.p_add_i.eq(0)
 938
 939
 940 def int_instr(dut, op, imm, src1, src2, dest, branch_success, branch_fail):
 941     yield from disable_issue(dut)
 942     yield dut.int_dest_i.eq(dest)
 943     yield dut.int_src1_i.eq(src1)
 944     yield dut.int_src2_i.eq(src2)
 945     if (op & (0x3<<2)) != 0: # branch
 946         yield dut.brissue.insn_i.eq(1)
 947         yield dut.br_oper_i.eq(Const(op & 0x3, 2))
 948         yield dut.br_imm_i.eq(imm)
 949         dut_issue = dut.brissue
 950     else:
 951         yield dut.aluissue.insn_i.eq(1)
 952         yield dut.alu_oper_i.eq(Const(op & 0x3, 2))
 953         yield dut.alu_imm_i.eq(imm)
 954         dut_issue = dut.aluissue
 955     yield dut.reg_enable_i.eq(1)
 956
 957     # these indicate that the instruction is to be made shadow-dependent on
 958     # (either) branch success or branch fail
 959     yield dut.branch_fail_i.eq(branch_fail)
 960     yield dut.branch_succ_i.eq(branch_success)
 961
 962     yield
 963     yield from wait_for_issue(dut, dut_issue)
 964
 965
 966 def print_reg(dut, rnums):
 967     rs = []
 968     for rnum in rnums:
 969         reg = yield dut.intregs.regs[rnum].reg
 970         rs.append("%x" % reg)
 971     rnums = map(str, rnums)
 972     print ("reg %s: %s" % (','.join(rnums), ','.join(rs)))
 973
 974
 975 def create_random_ops(dut, n_ops, shadowing=False, max_opnums=3):
 976     insts = []
 977     for i in range(n_ops):
 978         src1 = randint(1, dut.n_regs-1)
 979         src2 = randint(1, dut.n_regs-1)
 980         imm = randint(1, (1<<dut.rwid)-1)
 981         dest = randint(1, dut.n_regs-1)
 982         op = randint(0, max_opnums)
 983         opi = 0 if randint(0, 2) else 1 # set true if random is nonzero
 984
 985         if shadowing:
 986             insts.append((src1, src2, dest, op, opi, imm, (0, 0)))
 987         else:
 988             insts.append((src1, src2, dest, op, opi, imm))
 989     return insts
 990
 991
 992 def wait_for_busy_clear(dut):
 993     while True:
 994         busy_o = yield dut.busy_o
 995         if not busy_o:
 996             break
 997         print ("busy",)
 998         yield
 999
1000 def disable_issue(dut):
1001     yield dut.aluissue.insn_i.eq(0)
1002     yield dut.brissue.insn_i.eq(0)
1003     yield dut.lsissue.insn_i.eq(0)
1004
1005
1006 def wait_for_issue(dut, dut_issue):
1007     while True:
1008         issue_o = yield dut_issue.fn_issue_o
1009         if issue_o:
1010             yield from disable_issue(dut)
1011             yield dut.reg_enable_i.eq(0)
1012             break
1013         print ("busy",)
1014         #yield from print_reg(dut, [1,2,3])
1015         yield
1016     #yield from print_reg(dut, [1,2,3])
1017
1018 def scoreboard_branch_sim(dut, alusim):
1019
1020     iseed = 3
1021
1022     for i in range(1):
1023
1024         print ("rseed", iseed)
1025         seed(iseed)
1026         iseed += 1
1027
1028         yield dut.branch_direction_o.eq(0)
1029
1030         # set random values in the registers
1031         for i in range(1, dut.n_regs):
1032             val = 31+i*3
1033             val = randint(0, (1<<alusim.rwidth)-1)
1034             yield dut.intregs.regs[i].reg.eq(val)
1035             alusim.setval(i, val)
1036
1037         if False:
1038             # create some instructions: branches create a tree
1039             insts = create_random_ops(dut, 1, True, 1)
1040             #insts.append((6, 6, 1, 2, (0, 0)))
1041             #insts.append((4, 3, 3, 0, (0, 0)))
1042
1043             src1 = randint(1, dut.n_regs-1)
1044             src2 = randint(1, dut.n_regs-1)
1045             #op = randint(4, 7)
1046             op = 4 # only BGT at the moment
1047
1048             branch_ok = create_random_ops(dut, 1, True, 1)
1049             branch_fail = create_random_ops(dut, 1, True, 1)
1050
1051             insts.append((src1, src2, (branch_ok, branch_fail), op, (0, 0)))
1052
1053         if True:
1054             insts = []
1055             insts.append( (3, 5, 2, 0, (0, 0)) )
1056             branch_ok = []
1057             branch_fail = []
1058             #branch_ok.append  ( (5, 7, 5, 1, (1, 0)) )
1059             branch_ok.append( None )
1060             branch_fail.append( (1, 1, 2, 0, (0, 1)) )
1061             #branch_fail.append( None )
1062             insts.append( (6, 4, (branch_ok, branch_fail), 4, (0, 0)) )
1063
1064         siminsts = deepcopy(insts)
1065
1066         # issue instruction(s)
1067         i = -1
1068         instrs = insts
1069         branch_direction = 0
1070         while instrs:
1071             yield
1072             yield
1073             i += 1
1074             branch_direction = yield dut.branch_direction_o # way branch went
1075             (src1, src2, dest, op, (shadow_on, shadow_off)) = insts.pop(0)
1076             if branch_direction == 1 and shadow_on:
1077                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1078                 continue # branch was "success" and this is a "failed"... skip
1079             if branch_direction == 2 and shadow_off:
1080                 print ("skip", i, src1, src2, dest, op, shadow_on, shadow_off)
1081                 continue # branch was "fail" and this is a "success"... skip
1082             if branch_direction != 0:
1083                 shadow_on = 0
1084                 shadow_off = 0
1085             is_branch = op >= 4
1086             if is_branch:
1087                 branch_ok, branch_fail = dest
1088                 dest = src2
1089                 # ok zip up the branch success / fail instructions and
1090                 # drop them into the queue, one marked "to have branch success"
1091                 # the other to be marked shadow branch "fail".
1092                 # one out of each of these will be cancelled
1093                 for ok, fl in zip(branch_ok, branch_fail):
1094                     if ok:
1095                         instrs.append((ok[0], ok[1], ok[2], ok[3], (1, 0)))
1096                     if fl:
1097                         instrs.append((fl[0], fl[1], fl[2], fl[3], (0, 1)))
1098             print ("instr %d: (%d, %d, %d, %d, (%d, %d))" % \
1099                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1100             yield from int_instr(dut, op, src1, src2, dest,
1101                                  shadow_on, shadow_off)
1102
1103         # wait for all instructions to stop before checking
1104         yield
1105         yield from wait_for_busy_clear(dut)
1106
1107         i = -1
1108         while siminsts:
1109             instr = siminsts.pop(0)
1110             if instr is None:
1111                 continue
1112             (src1, src2, dest, op, (shadow_on, shadow_off)) = instr
1113             i += 1
1114             is_branch = op >= 4
1115             if is_branch:
1116                 branch_ok, branch_fail = dest
1117                 dest = src2
1118             print ("sim %d: (%d, %d, %d, %d, (%d, %d))" % \
1119                             (i, src1, src2, dest, op, shadow_on, shadow_off))
1120             branch_res = alusim.op(op, src1, src2, dest)
1121             if is_branch:
1122                 if branch_res:
1123                     siminsts += branch_ok
1124                 else:
1125                     siminsts += branch_fail
1126
1127         # check status
1128         yield from alusim.check(dut)
1129         yield from alusim.dump(dut)
1130
1131
1132 def scoreboard_sim(dut, alusim):
1133
1134     seed(0)
1135
1136     for i in range(1):
1137
1138         # set random values in the registers
1139         for i in range(1, dut.n_regs):
1140             val = randint(0, (1<<alusim.rwidth)-1)
1141             #val = 31+i*3
1142             #val = i
1143             yield dut.intregs.regs[i].reg.eq(val)
1144             alusim.setval(i, val)
1145
1146         # create some instructions (some random, some regression tests)
1147         instrs = []
1148         if False:
1149             instrs = create_random_ops(dut, 15, True, 4)
1150
1151         if True: # LD/ST test (with immediate)
1152             instrs.append( (1, 2, 2, 0x10, 1, 1, (0, 0)) )
1153             instrs.append( (1, 2, 7, 0x12, 1, 1, (0, 0)) )
1154
1155         if False:
1156             instrs.append( (1, 2, 2, 1, 1, 20, (0, 0)) )
1157
1158         if False:
1159             instrs.append( (7, 3, 2, 4, (0, 0)) )
1160             instrs.append( (7, 6, 6, 2, (0, 0)) )
1161             instrs.append( (1, 7, 2, 2, (0, 0)) )
1162
1163         if False:
1164             instrs.append((2, 3, 3, 0, 0, 0, (0, 0)))
1165             instrs.append((5, 3, 3, 1, 0, 0, (0, 0)))
1166             instrs.append((3, 5, 5, 2, 0, 0, (0, 0)))
1167             instrs.append((5, 3, 3, 3, 0, 0, (0, 0)))
1168             instrs.append((3, 5, 5, 0, 0, 0, (0, 0)))
1169
1170         if False:
1171             instrs.append( (3, 3, 4, 0, 0, 13979, (0, 0)))
1172             instrs.append( (6, 4, 1, 2, 0, 40976, (0, 0)))
1173             instrs.append( (1, 4, 7, 4, 1, 23652, (0, 0)))
1174
1175         if False:
1176             instrs.append((5, 6, 2, 1))
1177             instrs.append((2, 2, 4, 0))
1178             #instrs.append((2, 2, 3, 1))
1179
1180         if False:
1181             instrs.append((2, 1, 2, 3))
1182
1183         if False:
1184             instrs.append((2, 6, 2, 1))
1185             instrs.append((2, 1, 2, 0))
1186
1187         if False:
1188             instrs.append((1, 2, 7, 2))
1189             instrs.append((7, 1, 5, 0))
1190             instrs.append((4, 4, 1, 1))
1191
1192         if False:
1193             instrs.append((5, 6, 2, 2))
1194             instrs.append((1, 1, 4, 1))
1195             instrs.append((6, 5, 3, 0))
1196
1197         if False:
1198             # Write-after-Write Hazard
1199             instrs.append( (3, 6, 7, 2) )
1200             instrs.append( (4, 4, 7, 1) )
1201
1202         if False:
1203             # self-read/write-after-write followed by Read-after-Write
1204             instrs.append((1, 1, 1, 1))
1205             instrs.append((1, 5, 3, 0))
1206
1207         if False:
1208             # Read-after-Write followed by self-read-after-write
1209             instrs.append((5, 6, 1, 2))
1210             instrs.append((1, 1, 1, 1))
1211
1212         if False:
1213             # self-read-write sandwich
1214             instrs.append((5, 6, 1, 2))
1215             instrs.append((1, 1, 1, 1))
1216             instrs.append((1, 5, 3, 0))
1217
1218         if False:
1219             # very weird failure
1220             instrs.append( (5, 2, 5, 2) )
1221             instrs.append( (2, 6, 3, 0) )
1222             instrs.append( (4, 2, 2, 1) )
1223
1224         if False:
1225             v1 = 4
1226             yield dut.intregs.regs[5].reg.eq(v1)
1227             alusim.setval(5, v1)
1228             yield dut.intregs.regs[3].reg.eq(5)
1229             alusim.setval(3, 5)
1230             instrs.append((5, 3, 3, 4, (0, 0)))
1231             instrs.append((4, 2, 1, 2, (0, 1)))
1232
1233         if False:
1234             v1 = 6
1235             yield dut.intregs.regs[5].reg.eq(v1)
1236             alusim.setval(5, v1)
1237             yield dut.intregs.regs[3].reg.eq(5)
1238             alusim.setval(3, 5)
1239             instrs.append((5, 3, 3, 4, (0, 0)))
1240             instrs.append((4, 2, 1, 2, (1, 0)))
1241
1242         if False:
1243             instrs.append( (4, 3, 5, 1, 0, (0, 0)) )
1244             instrs.append( (5, 2, 3, 1, 0, (0, 0)) )
1245             instrs.append( (7, 1, 5, 2, 0, (0, 0)) )
1246             instrs.append( (5, 6, 6, 4, 0, (0, 0)) )
1247             instrs.append( (7, 5, 2, 2, 0, (1, 0)) )
1248             instrs.append( (1, 7, 5, 0, 0, (0, 1)) )
1249             instrs.append( (1, 6, 1, 2, 0, (1, 0)) )
1250             instrs.append( (1, 6, 7, 3, 0, (0, 0)) )
1251             instrs.append( (6, 7, 7, 0, 0, (0, 0)) )
1252
1253         # issue instruction(s), wait for issue to be free before proceeding
1254         for i, instr in enumerate(instrs):
1255             src1, src2, dest, op, opi, imm, (br_ok, br_fail) = instr
1256
1257             print ("instr %d: (%d, %d, %d, %d, %d, %d)" % \
1258                     (i, src1, src2, dest, op, opi, imm))
1259             alusim.op(op, opi, imm, src1, src2, dest)
1260             yield from instr_q(dut, op, opi, imm, src1, src2, dest,
1261                                br_ok, br_fail)
1262
1263         # wait for all instructions to stop before checking
1264         while True:
1265             iqlen = yield dut.qlen_o
1266             if iqlen == 0:
1267                 break
1268             yield
1269         yield
1270         yield
1271         yield
1272         yield
1273         yield from wait_for_busy_clear(dut)
1274
1275         # check status
1276         yield from alusim.check(dut)
1277         yield from alusim.dump(dut)
1278
1279
1280 def test_scoreboard():
1281     dut = IssueToScoreboard(2, 1, 1, 16, 8, 8)
1282     alusim = RegSim(16, 8)
1283     memsim = MemSim(16, 16)
1284     vl = rtlil.convert(dut, ports=dut.ports())
1285     with open("test_scoreboard6600.il", "w") as f:
1286         f.write(vl)
1287
1288     run_simulation(dut, scoreboard_sim(dut, alusim),
1289                         vcd_name='test_scoreboard6600.vcd')
1290
1291     #run_simulation(dut, scoreboard_branch_sim(dut, alusim),
1292     #                    vcd_name='test_scoreboard6600.vcd')
1293
1294
1295 if __name__ == '__main__':
1296     test_scoreboard()